Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crc/x86/crc-pclmul-template.h
29267 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
4
* instantiated by crc-pclmul-template.S
5
*
6
* Copyright 2025 Google LLC
7
*
8
* Author: Eric Biggers <[email protected]>
9
*/
10
#ifndef _CRC_PCLMUL_TEMPLATE_H
11
#define _CRC_PCLMUL_TEMPLATE_H
12
13
#include <asm/cpufeatures.h>
14
#include <asm/simd.h>
15
#include <linux/static_call.h>
16
#include "crc-pclmul-consts.h"
17
18
#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
19
crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
20
const void *consts_ptr); \
21
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
22
const void *consts_ptr); \
23
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
24
const void *consts_ptr); \
25
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
26
27
static inline bool have_vpclmul(void)
28
{
29
return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) &&
30
boot_cpu_has(X86_FEATURE_AVX2) &&
31
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL);
32
}
33
34
static inline bool have_avx512(void)
35
{
36
return boot_cpu_has(X86_FEATURE_AVX512BW) &&
37
boot_cpu_has(X86_FEATURE_AVX512VL) &&
38
!boot_cpu_has(X86_FEATURE_PREFER_YMM) &&
39
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL);
40
}
41
42
/*
43
* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
44
* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
45
*
46
* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
47
* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
48
* varying by CPU and factors such as which parts of the "FPU" state userspace
49
* has touched, which could result in a larger cutoff being better. Indeed, a
50
* larger cutoff is usually better for a *single* message. However, the
51
* overhead of the FPU section gets amortized if multiple FPU sections get
52
* executed before returning to userspace, since the XSAVE and XRSTOR occur only
53
* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
54
* the dcache than the table-based code is, a 16-byte cutoff seems to work well.
55
*/
56
#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
57
do { \
58
if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
59
likely(irq_fpu_usable())) { \
60
const void *consts_ptr; \
61
\
62
consts_ptr = (consts).fold_across_128_bits_consts; \
63
kernel_fpu_begin(); \
64
crc = static_call(prefix##_pclmul)((crc), (p), (len), \
65
consts_ptr); \
66
kernel_fpu_end(); \
67
return crc; \
68
} \
69
} while (0)
70
71
#endif /* _CRC_PCLMUL_TEMPLATE_H */
72
73