From: Alexander.Boyko Date: Fri, 29 Jun 2012 08:51:56 +0000 (+0400) Subject: LU-1339 libcfs: add crc32 pclmulqdq implementation X-Git-Tag: 2.2.60~24 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=5a562259fce98b15f519fab7dd8cb921e8445d7b;p=fs%2Flustre-release.git LU-1339 libcfs: add crc32 pclmulqdq implementation Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 disposal. This instruction present from Intel Westmere and AMD Bulldozer CPUs. Signed-off-by: Alexander Boyko Reviewed-by: Alexander Zarochentsev Reviewed-by: Alexey Lyashkov Xyratex-bug-id: MRP-314 Change-Id: Id6c88629f77cc5d389db49b7ee6e7111294c4a14 Reviewed-on: http://review.whamcloud.com/2586 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/build/autoconf/lustre-build.m4 b/build/autoconf/lustre-build.m4 index 1f355f3..b50e899 100644 --- a/build/autoconf/lustre-build.m4 +++ b/build/autoconf/lustre-build.m4 @@ -694,6 +694,9 @@ AC_SUBST(LLCPPFLAGS) LLCFLAGS="-g -Wall -fPIC -D_GNU_SOURCE" AC_SUBST(LLCFLAGS) +CCASFLAGS="-Wall -fPIC -D_GNU_SOURCE" +AC_SUBST(CCASFLAGS) + # everyone builds against lnet and lustre EXTRA_KCFLAGS="$EXTRA_KCFLAGS -g -I$PWD/$LIBCFS_INCLUDE_DIR -I$PWD/lnet/include -I$PWD/lustre/include" AC_SUBST(EXTRA_KCFLAGS) @@ -715,6 +718,7 @@ AM_CONDITIONAL(DARWIN, test x$lb_target_os = "xdarwin") AM_CONDITIONAL(CRAY_XT3, test x$enable_cray_xt3 = "xyes") AM_CONDITIONAL(SUNOS, test x$lb_target_os = "xSunOS") AM_CONDITIONAL(USES_DPKG, test x$uses_dpkg = "xyes") +AM_CONDITIONAL(ARCH_x86, test x$target_cpu != "xpowerpc") # this lets lustre cancel libsysio, per-branch or if liblustre is # disabled diff --git a/configure.ac b/configure.ac index 48e4d3c..9574d0c 100644 --- a/configure.ac +++ b/configure.ac @@ -16,4 +16,6 @@ AM_INIT_AUTOMAKE([1.9 tar-ustar]) AC_PROG_CC +AM_PROG_AS + LB_CONFIGURE diff --git a/libcfs/include/libcfs/Makefile.am b/libcfs/include/libcfs/Makefile.am index 89054ee..6c79304 100644 --- a/libcfs/include/libcfs/Makefile.am +++ b/libcfs/include/libcfs/Makefile.am @@ -6,7 +6,7 @@ DIST_SUBDIRS := linux posix util darwin EXTRA_DIST := curproc.h libcfs_private.h libcfs.h list.h lltrace.h \ user-lock.h user-prim.h user-time.h user-mem.h \ - user-tcpip.h user-bitops.h bitmap.h \ + user-tcpip.h user-bitops.h bitmap.h user-crypto.h \ libcfs_prim.h libcfs_time.h libcfs_hash.h libcfs_cpu.h \ libcfs_debug.h libcfsutil.h libcfs_ioctl.h \ libcfs_pack.h libcfs_unpack.h libcfs_string.h \ diff --git a/libcfs/include/libcfs/linux/linux-crypto.h b/libcfs/include/libcfs/linux/linux-crypto.h index c8a4e76..97c771c 100644 --- a/libcfs/include/libcfs/linux/linux-crypto.h +++ b/libcfs/include/libcfs/linux/linux-crypto.h @@ -41,3 +41,9 @@ void cfs_crypto_crc32_unregister(void); */ int cfs_crypto_adler32_register(void); void cfs_crypto_adler32_unregister(void); + +/** + * Functions for start/stop shash crc32 pclmulqdq + */ +int cfs_crypto_crc32_pclmul_register(void); +void cfs_crypto_crc32_pclmul_unregister(void); diff --git a/libcfs/include/libcfs/user-crypto.h b/libcfs/include/libcfs/user-crypto.h new file mode 100644 index 0000000..9f928e6 --- /dev/null +++ b/libcfs/include/libcfs/user-crypto.h @@ -0,0 +1,35 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + */ + +unsigned int crc32_pclmul_le_16(unsigned char const *buffer, size_t len, + unsigned int crc32) __attribute__((regparm(3))); + +unsigned int crc32_pclmul_le(unsigned int crc, unsigned char const *p, + size_t len); + +int crc32_pclmul_init(void); diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in index 2b040b0..bf08868 100644 --- a/libcfs/libcfs/Makefile.in +++ b/libcfs/libcfs/Makefile.in @@ -8,6 +8,21 @@ libcfs-linux-objs += linux-utils.o linux-module.o libcfs-linux-objs += linux-crypto.o linux-crypto-crc32.o libcfs-linux-objs += linux-crypto-adler.o +libcfs-pclmul-obj := + +ifeq ($(ARCH),x86) +libcfs-linux-objs += linux-crypto-crc32pclmul.o +libcfs-pclmul-obj += crc32-pclmul_asm.o +endif +ifeq ($(ARCH),i386) +libcfs-linux-objs += linux-crypto-crc32pclmul.o +libcfs-pclmul-obj += crc32-pclmul_asm.o +endif +ifeq ($(ARCH),x86_64) +libcfs-linux-objs += linux-crypto-crc32pclmul.o +libcfs-pclmul-obj += crc32-pclmul_asm.o +endif + default: all ifeq (@linux25@,no) @@ -27,7 +42,7 @@ endif libcfs-all-objs := debug.o fail.o nidstrings.o lwt.o module.o tracefile.o \ watchdog.o libcfs_string.o hash.o kernel_user_comm.o \ prng.o workitem.o upcall_cache.o libcfs_cpu.o \ - libcfs_mem.o libcfs_lock.o + libcfs_mem.o libcfs_lock.o $(libcfs-pclmul-obj) libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/libcfs/libcfs/autoMakefile.am b/libcfs/libcfs/autoMakefile.am index 295cab5..33b7ec6 100644 --- a/libcfs/libcfs/autoMakefile.am +++ b/libcfs/libcfs/autoMakefile.am @@ -47,6 +47,11 @@ libcfs_a_SOURCES= posix/posix-debug.c user-prim.c user-lock.c user-tcpip.c \ workitem.c fail.c libcfs_cpu.c libcfs_mem.c libcfs_lock.c \ posix/rbtree.c user-crypto.c posix/posix-crc32.c \ posix/posix-adler.c + +if ARCH_x86 +libcfs_a_SOURCES += user-crc32pclmul.c crc32-pclmul_asm.S +endif + libcfs_a_CPPFLAGS = $(LLCPPFLAGS) libcfs_a_CFLAGS = $(LLCFLAGS) endif diff --git a/libcfs/libcfs/crc32-pclmul_asm.S b/libcfs/libcfs/crc32-pclmul_asm.S new file mode 100644 index 0000000..7cc6168 --- /dev/null +++ b/libcfs/libcfs/crc32-pclmul_asm.S @@ -0,0 +1,324 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas + * Alexander Boyko + */ + +/* gcc 4.1.2 does not support pclmulqdq instruction + * Use macro defenition from linux kernel 2.6.38 */ + +#define REG_NUM_INVALID 100 + .macro XMM_NUM opd xmm + \opd = REG_NUM_INVALID + .ifc \xmm,%xmm0 + \opd = 0 + .endif + .ifc \xmm,%xmm1 + \opd = 1 + .endif + .ifc \xmm,%xmm2 + \opd = 2 + .endif + .ifc \xmm,%xmm3 + \opd = 3 + .endif + .ifc \xmm,%xmm4 + \opd = 4 + .endif + .ifc \xmm,%xmm5 + \opd = 5 + .endif + .ifc \xmm,%xmm6 + \opd = 6 + .endif + .ifc \xmm,%xmm7 + \opd = 7 + .endif + .ifc \xmm,%xmm8 + \opd = 8 + .endif + .ifc \xmm,%xmm9 + \opd = 9 + .endif + .ifc \xmm,%xmm10 + \opd = 10 + .endif + .ifc \xmm,%xmm11 + \opd = 11 + .endif + .ifc \xmm,%xmm12 + \opd = 12 + .endif + .ifc \xmm,%xmm13 + \opd = 13 + .endif + .ifc \xmm,%xmm14 + \opd = 14 + .endif + .ifc \xmm,%xmm15 + \opd = 15 + .endif + .endm + + .macro PFX_OPD_SIZE + .byte 0x66 + .endm + + .macro PFX_REX opd1 opd2 W=0 + .if ((\opd1 | \opd2) & 8) || \W + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm + + .macro PCLMULQDQ imm8 xmm1 xmm2 + XMM_NUM clmul_opd1 \xmm1 + XMM_NUM clmul_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX clmul_opd1 clmul_opd2 + .byte 0x0f, 0x3a, 0x44 + MODRM 0xc0 clmul_opd1 clmul_opd2 + .byte \imm8 + .endm + + +.align 16 +/* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ +.Lconstant_R2R1: + .octa 0x00000001c6e415960000000154442bd4 +/* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ +.Lconstant_R4R3: + .octa 0x00000000ccaa009e00000001751997d0 +/* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ +.Lconstant_R5: + .octa 0x00000000000000000000000163cd6124 +.Lconstant_mask32: + .octa 0x000000000000000000000000FFFFFFFF +/* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ +.Lconstant_RUpoly: + .octa 0x00000001F701164100000001DB710641 + +#define CONSTANT %xmm0 + +#ifdef __x86_64__ +#define BUF %rdi +#define LEN %rsi +#define CRC %edx +#else +#warning Using 32bit code support +#define BUF %eax +#define LEN %edx +#define CRC %ecx +#endif + + + +.text +/** + * Calculate crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pclmul_le_16(unsigned char const *buffer, + * size_t len, uint crc32) + */ +.globl crc32_pclmul_le_16 +.align 4, 0x90 +crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ + movdqa (BUF), %xmm1 + movdqa 0x10(BUF), %xmm2 + movdqa 0x20(BUF), %xmm3 + movdqa 0x30(BUF), %xmm4 + movd CRC, CONSTANT + pxor CONSTANT, %xmm1 + sub $0x40, LEN + add $0x40, BUF +#ifndef __x86_64__ + /* This is for position independed code(-fPIC) support for 32bit */ + call delta +delta: + pop %ecx +#endif + cmp $0x40, LEN + jb less_64 + +#ifdef __x86_64__ + movdqa .Lconstant_R2R1(%rip), CONSTANT +#else + movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT +#endif + +loop_64:/* 64 bytes Full cache line folding */ + prefetchnta 0x40(BUF) + movdqa %xmm1, %xmm5 + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 +#ifdef __x86_64__ + movdqa %xmm4, %xmm8 +#endif + PCLMULQDQ 00, CONSTANT, %xmm1 + PCLMULQDQ 00, CONSTANT, %xmm2 + PCLMULQDQ 00, CONSTANT, %xmm3 +#ifdef __x86_64__ + PCLMULQDQ 00, CONSTANT, %xmm4 +#endif + PCLMULQDQ 0x11, CONSTANT, %xmm5 + PCLMULQDQ 0x11, CONSTANT, %xmm6 + PCLMULQDQ 0x11, CONSTANT, %xmm7 +#ifdef __x86_64__ + PCLMULQDQ 0x11, CONSTANT, %xmm8 +#endif + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 +#ifdef __x86_64__ + pxor %xmm8, %xmm4 +#else + /* xmm8 unsupported for x32 */ + movdqa %xmm4, %xmm5 + PCLMULQDQ 00, CONSTANT, %xmm4 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm4 +#endif + + pxor (BUF), %xmm1 + pxor 0x10(BUF), %xmm2 + pxor 0x20(BUF), %xmm3 + pxor 0x30(BUF), %xmm4 + + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jge loop_64 +less_64:/* Folding cache line into 128bit */ +#ifdef __x86_64__ + movdqa .Lconstant_R4R3(%rip), CONSTANT +#else + movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT +#endif + prefetchnta (BUF) + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm2, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm3, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + + cmp $0x10, LEN + jb fold_64 +loop_16:/* Folding rest buffer into 128bit */ + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor (BUF), %xmm1 + sub $0x10, LEN + add $0x10, BUF + cmp $0x10, LEN + jge loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ + psrldq $0x08, %xmm1 + pxor CONSTANT, %xmm1 + + /* final 32-bit fold */ + movdqa %xmm1, %xmm2 +#ifdef __x86_64__ + movdqa .Lconstant_R5(%rip), CONSTANT + movdqa .Lconstant_mask32(%rip), %xmm3 +#else + movdqa .Lconstant_R5 - delta(%ecx), CONSTANT + movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 +#endif + psrldq $0x04, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ +#ifdef __x86_64__ + movdqa .Lconstant_RUpoly(%rip), CONSTANT +#else + movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT +#endif + movdqa %xmm1, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x10, CONSTANT, %xmm1 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + + ret diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am index 8d206cf..1ca6b4d 100644 --- a/libcfs/libcfs/linux/Makefile.am +++ b/libcfs/libcfs/linux/Makefile.am @@ -1,7 +1,8 @@ EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c \ - linux-cpu.c linux-crypto.c linux-crypto-crc32.c linux-crypto-adler.c + linux-cpu.c linux-crypto.c linux-crypto-crc32.c linux-crypto-adler.c \ + linux-crypto-crc32pclmul.c diff --git a/libcfs/libcfs/linux/linux-crypto-crc32pclmul.c b/libcfs/libcfs/linux/linux-crypto-crc32pclmul.c new file mode 100644 index 0000000..fc05af9 --- /dev/null +++ b/libcfs/libcfs/linux/linux-crypto-crc32pclmul.c @@ -0,0 +1,268 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation. + * + * Author: Alexander Boyko + */ +#include +#ifdef HAVE_STRUCT_SHASH_ALG +#include +#else +#include +#endif +#include +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); + +static u32 __attribute__((pure)) + crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(p, iquotient, crc); + kernel_fpu_end(); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} + +static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +#ifdef HAVE_STRUCT_SHASH_ALG +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pclmul_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32_pclmul_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); + return 0; +} + +static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32_pclmul_setkey, + .init = crc32_pclmul_init, + .update = crc32_pclmul_update, + .final = crc32_pclmul_final, + .finup = crc32_pclmul_finup, + .digest = crc32_pclmul_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + } +}; +#else /* HAVE_STRUCT_SHASH_ALG */ +#ifdef HAVE_DIGEST_SETKEY_FLAGS +static int crc32_digest_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen, unsigned int *flags) +#else +static int crc32_digest_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +#endif +{ + u32 *mctx = crypto_tfm_ctx(tfm); + + if (keylen != sizeof(u32)) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static void crc32_digest_init(struct crypto_tfm *tfm) +{ + u32 *mctx = crypto_tfm_ctx(tfm); + + *mctx = 0; +} + +static void crc32_digest_update(struct crypto_tfm *tfm, const u8 *data, + unsigned int len) +{ + u32 *crcp = crypto_tfm_ctx(tfm); + + *crcp = crc32_pclmul_le(*crcp, data, len); +} + +static void crc32_digest_final(struct crypto_tfm *tfm, u8 *out) +{ + u32 *crcp = crypto_tfm_ctx(tfm); + + *(__le32 *)out = cpu_to_le32p(crcp); +} + +static struct crypto_alg alg = { + .cra_name = "crc32", + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .digest = { + .dia_digestsize = CHKSUM_DIGEST_SIZE, + .dia_setkey = crc32_digest_setkey, + .dia_init = crc32_digest_init, + .dia_update = crc32_digest_update, + .dia_final = crc32_digest_final + } + } +}; +#endif /* HAVE_STRUCT_SHASH_ALG */ + +#ifndef X86_FEATURE_PCLMULQDQ +#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */ +#endif + +int cfs_crypto_crc32_pclmul_register(void) +{ + + if (!boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { + CDEBUG(D_INFO, "PCLMULQDQ-NI instructions are not " + "detected.\n"); + return -ENODEV; + } +#ifdef HAVE_STRUCT_SHASH_ALG + return crypto_register_shash(&alg); +#else + return crypto_register_alg(&alg); +#endif +} + +void cfs_crypto_crc32_pclmul_unregister(void) +{ +#ifdef HAVE_STRUCT_SHASH_ALG + crypto_unregister_shash(&alg); +#else + crypto_unregister_alg(&alg); +#endif +} diff --git a/libcfs/libcfs/linux/linux-crypto.c b/libcfs/libcfs/linux/linux-crypto.c index 245fca2..bacf26a 100644 --- a/libcfs/libcfs/linux/linux-crypto.c +++ b/libcfs/libcfs/linux/linux-crypto.c @@ -339,11 +339,19 @@ static int cfs_crypto_test_hashes(void) static int crc32, adler32; +#ifdef CONFIG_X86 +static int crc32pclmul; +#endif + int cfs_crypto_register(void) { crc32 = cfs_crypto_crc32_register(); adler32 = cfs_crypto_adler32_register(); +#ifdef CONFIG_X86 + crc32pclmul = cfs_crypto_crc32_pclmul_register(); +#endif + /* check all algorithms and do perfermance test */ cfs_crypto_test_hashes(); return 0; @@ -354,5 +362,11 @@ void cfs_crypto_unregister(void) cfs_crypto_crc32_unregister(); if (adler32 == 0) cfs_crypto_adler32_unregister(); + +#ifdef CONFIG_X86 + if (crc32pclmul == 0) + cfs_crypto_crc32_pclmul_unregister(); +#endif + return; } diff --git a/libcfs/libcfs/user-crc32pclmul.c b/libcfs/libcfs/user-crc32pclmul.c new file mode 100644 index 0000000..720459d --- /dev/null +++ b/libcfs/libcfs/user-crc32pclmul.c @@ -0,0 +1,91 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + */ + +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +unsigned int crc32_pclmul_le(unsigned int crc, unsigned char const *p, + size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + crc = crc32_pclmul_le_16(p, iquotient, crc); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} +#ifndef bit_PCLMUL +#define bit_PCLMUL (1 << 1) +#endif + +int crc32_pclmul_init(void) +{ + unsigned int eax, ebx, ecx, edx, level; + + eax = ebx = ecx = edx = 0; + level = 1; + /* get cpuid */ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) \ + : "0" (level)); + + if (ecx & bit_PCLMUL) + return 1; + + return -1; +} diff --git a/libcfs/libcfs/user-crypto.c b/libcfs/libcfs/user-crypto.c index 66d0bae..9ae5aa9 100644 --- a/libcfs/libcfs/user-crypto.c +++ b/libcfs/libcfs/user-crypto.c @@ -31,6 +31,7 @@ #include #include +#include static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; @@ -89,8 +90,21 @@ static int adler_wrapper(void *ctx, const unsigned char *p, return 0; } +#if (defined i386) || (defined __amd64__) +static int crc32_pclmul_wrapper(void *ctx, const unsigned char *p, + unsigned int len) +{ + unsigned int cksum = *(unsigned int *)ctx; + + cksum = crc32_pclmul_le(cksum, p, len); + + *(unsigned int *)ctx = cksum; + return 0; +} +#endif + static int start_generic(void *ctx, unsigned char *key, - unsigned int key_len) + unsigned int key_len) { const struct cfs_crypto_hash_type *type; struct hash_desc *hd = container_of(ctx, struct hash_desc, @@ -111,7 +125,7 @@ static int start_generic(void *ctx, unsigned char *key, } static int final_generic(void *ctx, unsigned char *hash, - unsigned int hash_len) + unsigned int hash_len) { const struct cfs_crypto_hash_type *type; struct hash_desc *hd = container_of(ctx, struct hash_desc, @@ -142,7 +156,18 @@ static struct __hash_alg crypto_hash[] = { .update = adler_wrapper, .start = start_generic, .final = final_generic, - .fini = NULL} }; + .fini = NULL}, +#if (defined i386) || (defined __amd64__) + {.ha_id = CFS_HASH_ALG_CRC32, + .ha_ctx_size = sizeof(unsigned int), + .ha_priority = 100, + .init = crc32_pclmul_init, + .update = crc32_pclmul_wrapper, + .start = start_generic, + .final = final_generic, + .fini = NULL}, +#endif + }; /** * Go through hashes to find the hash with max priority