/* SPDX-License-Identifier: GPL-2.0-or-later */1/*2* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are3* instantiated by crc-pclmul-template.S4*5* Copyright 2025 Google LLC6*7* Author: Eric Biggers <[email protected]>8*/9#ifndef _CRC_PCLMUL_TEMPLATE_H10#define _CRC_PCLMUL_TEMPLATE_H1112#include <asm/cpufeatures.h>13#include <asm/simd.h>14#include <linux/static_call.h>15#include "crc-pclmul-consts.h"1617#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \18crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \19const void *consts_ptr); \20crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \21const void *consts_ptr); \22crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \23const void *consts_ptr); \24DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)2526static inline bool have_vpclmul(void)27{28return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) &&29boot_cpu_has(X86_FEATURE_AVX2) &&30cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL);31}3233static inline bool have_avx512(void)34{35return boot_cpu_has(X86_FEATURE_AVX512BW) &&36boot_cpu_has(X86_FEATURE_AVX512VL) &&37!boot_cpu_has(X86_FEATURE_PREFER_YMM) &&38cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL);39}4041/*42* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 1643* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.44*45* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.46* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),47* varying by CPU and factors such as which parts of the "FPU" state userspace48* has touched, which could result in a larger cutoff being better. Indeed, a49* larger cutoff is usually better for a *single* message. However, the50* overhead of the FPU section gets amortized if multiple FPU sections get51* executed before returning to userspace, since the XSAVE and XRSTOR occur only52* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on53* the dcache than the table-based code is, a 16-byte cutoff seems to work well.54*/55#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \56do { \57if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \58likely(irq_fpu_usable())) { \59const void *consts_ptr; \60\61consts_ptr = (consts).fold_across_128_bits_consts; \62kernel_fpu_begin(); \63crc = static_call(prefix##_pclmul)((crc), (p), (len), \64consts_ptr); \65kernel_fpu_end(); \66return crc; \67} \68} while (0)6970#endif /* _CRC_PCLMUL_TEMPLATE_H */717273