From: Shuichi Ihara Date: Thu, 23 Jun 2011 13:08:39 +0000 (+0900) Subject: LU-241 Support crc32c with hardware accelerated instruction as one of lustre checksums X-Git-Tag: 2.1.50~5 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0517160dd68ac026513ad1b8e3e6f7abd4acfdef;hp=398fd6e9da8c845efb45bdc6b0af2c8440219d7e LU-241 Support crc32c with hardware accelerated instruction as one of lustre checksums Adding CRC32C as new lustre checksum algorithm. Because of crc32c, the Intel Nehalem based CPU supports h/w accelerated crc32c instruction as one of SSE4.2 instruction set. The new crc32c codes in the lustre also use this h/w instruction. The lustre also automatically detects whether crc32c instruction is available, if not, use adler (fastest checksum other than crc32c). Change-Id: I764851a46a94a879239cd127eac411e98342e67f Signed-off-by: Shuichi Ihara Reviewed-on: http://review.whamcloud.com/1009 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Johann Lombardi Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index beae3480..1443051 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -44,6 +44,8 @@ #ifdef __KERNEL__ #ifndef AUTOCONF_INCLUDED #include +#include +#include #endif #include #include diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index fcba9a1..a9f6ba5 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1220,6 +1220,7 @@ extern void lustre_swab_connect(struct obd_connect_data *ocd); typedef enum { OBD_CKSUM_CRC32 = 0x00000001, OBD_CKSUM_ADLER = 0x00000002, + OBD_CKSUM_CRC32C= 0x00000004, } cksum_type_t; /* @@ -1265,7 +1266,7 @@ enum obdo_flags { OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */ OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ - OBD_FL_CKSUM_RSVD1 = 0x00004000, /* for future cksum types */ + OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ @@ -1273,7 +1274,10 @@ enum obdo_flags { OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ - OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER, + /* Note that while these checksum values are currently separate bits, + * in 2.x we can actually allow all values from 1-31 if we wanted. */ + OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | + OBD_FL_CKSUM_CRC32C, /* mask for local-only flag, which won't be sent over network */ OBD_FL_LOCAL_MASK = 0xF0000000, diff --git a/lustre/include/obd_cksum.h b/lustre/include/obd_cksum.h index ceae740..ef76b66 100644 --- a/lustre/include/obd_cksum.h +++ b/lustre/include/obd_cksum.h @@ -80,16 +80,82 @@ static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len) return crc; } #endif + +#ifdef HAVE_ADLER +/* Adler-32 is supported */ +#define CHECKSUM_ADLER OBD_CKSUM_ADLER +#else +#define CHECKSUM_ADLER 0 +#endif + +#ifdef X86_FEATURE_XMM4_2 +/* Call Nehalem+ CRC32C harware acceleration instruction on individual bytes. */ +static inline __u32 crc32c_hw_byte(__u32 crc, unsigned char const *p, + size_t bytes) +{ + while (bytes--) { + __asm__ __volatile__ ( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + : "=S"(crc) + : "0"(crc), "c"(*p) + ); + p++; + } + + return crc; +} + +#if BITS_PER_LONG > 32 +#define WORD_SHIFT 3 +#define WORD_MASK 7 +#define REX "0x48, " +#else +#define WORD_SHIFT 2 +#define WORD_MASK 3 +#define REX "" +#endif + +/* Do we need to worry about unaligned input data here? */ +static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p, size_t len) +{ + unsigned int words = len >> WORD_SHIFT; + unsigned int bytes = len & WORD_MASK; + long *ptmp = (long *)p; + + while (words--) { + __asm__ __volatile__( + ".byte 0xf2, " REX "0xf, 0x38, 0xf1, 0xf1;" + : "=S"(crc) + : "0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (bytes) + crc = crc32c_hw_byte(crc, (unsigned char *)ptmp, bytes); + + return crc; +} +#else +/* We should never call this unless the CPU has previously been detected to + * support this instruction in the SSE4.2 feature set. b=23549 */ +static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p,size_t len) +{ + LBUG(); +} +#endif static inline __u32 init_checksum(cksum_type_t cksum_type) { switch(cksum_type) { - case OBD_CKSUM_CRC32: + case OBD_CKSUM_CRC32C: return ~0U; #ifdef HAVE_ADLER case OBD_CKSUM_ADLER: return 1U; #endif + case OBD_CKSUM_CRC32: + return ~0U; default: CERROR("Unknown checksum type (%x)!!!\n", cksum_type); LBUG(); @@ -101,12 +167,14 @@ static inline __u32 compute_checksum(__u32 cksum, unsigned char const *p, size_t len, cksum_type_t cksum_type) { switch(cksum_type) { - case OBD_CKSUM_CRC32: - return crc32_le(cksum, p, len); + case OBD_CKSUM_CRC32C: + return crc32c_hw(cksum, p, len); #ifdef HAVE_ADLER case OBD_CKSUM_ADLER: return adler32(cksum, p, len); #endif + case OBD_CKSUM_CRC32: + return crc32_le(cksum, p, len); default: CERROR("Unknown checksum type (%x)!!!\n", cksum_type); LBUG(); @@ -114,50 +182,90 @@ static inline __u32 compute_checksum(__u32 cksum, unsigned char const *p, return 0; } +/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can + * only be a single checksum type per RPC. + * + * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask + * since they need to represent the full range of checksum algorithms that + * both the client and server can understand. + * + * In case of an unsupported types/flags we fall back to CRC32 (even though + * it isn't very fast) because that is supported by all clients + * checksums, since 1.6.5 (or earlier via patches). + * + * These flags should be listed in order of descending performance, so that + * in case multiple algorithms are supported the best one is used. */ static inline obd_flag cksum_type_pack(cksum_type_t cksum_type) { - switch(cksum_type) { - case OBD_CKSUM_CRC32: - return OBD_FL_CKSUM_CRC32; + if (cksum_type & OBD_CKSUM_CRC32C) + return OBD_FL_CKSUM_CRC32C; #ifdef HAVE_ADLER - case OBD_CKSUM_ADLER: + if (cksum_type & OBD_CKSUM_ADLER) return OBD_FL_CKSUM_ADLER; #endif - default: + if (unlikely(cksum_type && !(cksum_type & OBD_CKSUM_CRC32))) CWARN("unknown cksum type %x\n", cksum_type); - } + return OBD_FL_CKSUM_CRC32; } static inline cksum_type_t cksum_type_unpack(obd_flag o_flags) { - o_flags &= OBD_FL_CKSUM_ALL; - if ((o_flags - 1) & o_flags) - CWARN("several checksum types are set: %x\n", o_flags); - if (o_flags & OBD_FL_CKSUM_ADLER) + switch (o_flags & OBD_FL_CKSUM_ALL) { + case OBD_FL_CKSUM_CRC32C: + return OBD_CKSUM_CRC32C; + case OBD_FL_CKSUM_ADLER: #ifdef HAVE_ADLER return OBD_CKSUM_ADLER; #else CWARN("checksum type is set to adler32, but adler32 is not " "supported (%x)\n", o_flags); + break; #endif + default: + break; + } + + /* 1.6.4- only supported CRC32 and didn't set o_flags */ return OBD_CKSUM_CRC32; } +/* Return a bitmask of the checksum types supported on this system. + * + * CRC32 is a required for compatibility (starting with 1.6.5), + * after which we could move to Adler as the base checksum type. + * + * If hardware crc32c support is not available, it is slower than Adler, + * so don't include it, even if it could be emulated in software. b=23549 */ +static inline cksum_type_t cksum_types_supported(void) +{ + cksum_type_t ret = OBD_CKSUM_CRC32; + +#ifdef X86_FEATURE_XMM4_2 + if (cpu_has_xmm4_2) + ret |= OBD_CKSUM_CRC32C; +#endif #ifdef HAVE_ADLER -/* Default preferred checksum algorithm to use (if supported by the server) */ -#define OSC_DEFAULT_CKSUM OBD_CKSUM_ADLER -/* Adler-32 is supported */ -#define CHECKSUM_ADLER OBD_CKSUM_ADLER -#else -#define OSC_DEFAULT_CKSUM OBD_CKSUM_CRC32 -#define CHECKSUM_ADLER 0 + ret |= OBD_CKSUM_ADLER; #endif + return ret; +} -#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | CHECKSUM_ADLER) +/* Select the best checksum algorithm among those supplied in the cksum_types + * input. + * + * Currently, calling cksum_type_pack() with a mask will return the fastest + * checksum type due to its ordering, but in the future we might want to + * determine this based on benchmarking the different algorithms quickly. + * Caution is advised, however, since what is fastest on a single client may + * not be the fastest or most efficient algorithm on the server. */ +static inline cksum_type_t cksum_type_select(cksum_type_t cksum_types) +{ + return cksum_type_unpack(cksum_type_pack(cksum_types)); +} /* Checksum algorithm names. Must be defined in the same order as the * OBD_CKSUM_* flags. */ -#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler"} +#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"} #endif /* __OBD_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f97250b..2a0d78a 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -411,8 +411,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY)) data->ocd_cksum_types = OBD_CKSUM_ADLER; else - /* send the list of supported checksum types */ - data->ocd_cksum_types = OBD_CKSUM_ALL; + data->ocd_cksum_types = cksum_types_supported(); } #ifdef HAVE_LRU_RESIZE_SUPPORT diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 95edc07..70ab89c 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -728,7 +728,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) /* send max bytes per rpc */ data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT; /* send the list of supported checksum types */ - data->ocd_cksum_types = OBD_CKSUM_ALL; + data->ocd_cksum_types = cksum_types_supported(); /* NB: lov_connect() needs to fill in .ocd_index for each OST */ rc = obd_connect(NULL, &mds->mds_lov_exp, mds->mds_lov_obd, &obd->obd_uuid, data, NULL); OBD_FREE(data, sizeof(*data)); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index d6a9fd8..ba5cca5 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2769,9 +2769,10 @@ static int filter_connect_internal(struct obd_export *exp, /* The client set in ocd_cksum_types the checksum types it * supports. We have to mask off the algorithms that we don't * support */ - if (cksum_types & OBD_CKSUM_ALL) - data->ocd_cksum_types &= OBD_CKSUM_ALL; - else + data->ocd_cksum_types &= cksum_types_supported(); + + /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */ + if (unlikely(data->ocd_cksum_types == 0)) data->ocd_cksum_types = OBD_CKSUM_CRC32; CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return " diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index be0027b..2f876d4 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1488,11 +1488,8 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer, if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & OBD_FL_MMAP) return 1; - if (oa->o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(oa->o_flags); - else - cksum_type = OBD_CKSUM_CRC32; - + cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ? + oa->o_flags : 0); new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE, cksum_type); @@ -1620,10 +1617,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) char *router; cksum_type_t cksum_type; - if (body->oa.o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(body->oa.o_flags); - else - cksum_type = OBD_CKSUM_CRC32; + cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS? + body->oa.o_flags : 0); client_cksum = osc_checksum_bulk(rc, aa->aa_page_count, aa->aa_ppga, OST_READ, cksum_type); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 0452257..8dc1cec 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -885,13 +885,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) } if (body->oa.o_valid & OBD_MD_FLCKSUM) { - cksum_type_t cksum_type = OBD_CKSUM_CRC32; - - if (body->oa.o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(body->oa.o_flags); + cksum_type_t cksum_type = + cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ? + body->oa.o_flags : 0); body->oa.o_flags = cksum_type_pack(cksum_type); body->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS; - body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ, cksum_type); + body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ,cksum_type); CDEBUG(D_PAGE,"checksum at read origin: %x\n",body->oa.o_cksum); } else { body->oa.o_valid = 0; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 03311e5..b0a5292 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1010,33 +1010,25 @@ finish: /* We sent to the server ocd_cksum_types with bits set * for algorithms we understand. The server masked off * the checksum types it doesn't support */ - if ((ocd->ocd_cksum_types & OBD_CKSUM_ALL) == 0) { + if ((ocd->ocd_cksum_types & cksum_types_supported()) == 0) { LCONSOLE_WARN("The negotiation of the checksum " "alogrithm to use with server %s " "failed (%x/%x), disabling " "checksums\n", obd2cli_tgt(imp->imp_obd), ocd->ocd_cksum_types, - OBD_CKSUM_ALL); + cksum_types_supported()); cli->cl_checksum = 0; cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - cli->cl_cksum_type = OBD_CKSUM_CRC32; } else { cli->cl_supp_cksum_types = ocd->ocd_cksum_types; - - if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM) - cli->cl_cksum_type = OSC_DEFAULT_CKSUM; - else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER) - cli->cl_cksum_type = OBD_CKSUM_ADLER; - else - cli->cl_cksum_type = OBD_CKSUM_CRC32; } } else { /* The server does not support OBD_CONNECT_CKSUM. * Enforce CRC32 for backward compatibility*/ cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - cli->cl_cksum_type = OBD_CKSUM_CRC32; } + cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types); if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) cli->cl_max_pages_per_rpc = diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index a25f53a..20134f0 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -686,11 +686,13 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_FL_SRVLOCK == 2048); CLASSERT(OBD_FL_CKSUM_CRC32 == 4096); CLASSERT(OBD_FL_CKSUM_ADLER == 8192); + CLASSERT(OBD_FL_CKSUM_CRC32C == 16384); CLASSERT(OBD_FL_SHRINK_GRANT == 131072); CLASSERT(OBD_FL_MMAP == (0x00040000)); CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000)); CLASSERT(OBD_CKSUM_CRC32 == 1); CLASSERT(OBD_CKSUM_ADLER == 2); + CLASSERT(OBD_CKSUM_CRC32C == 4); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 14920ee..aa53e3c 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -4171,13 +4171,12 @@ set_checksums() return 0 } -export ORIG_CSUM_TYPE="" +export ORIG_CSUM_TYPE="`lctl get_param -n osc/*osc-[^mM]*/checksum_type | + sed 's/.*\[\(.*\)\].*/\1/g' | head -n1`" CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"} +[ "$ORIG_CSUM_TYPE" = "crc32c" ] && CKSUM_TYPES="$CKSUM_TYPES crc32c" set_checksum_type() { - [ "$ORIG_CSUM_TYPE" ] || \ - ORIG_CSUM_TYPE=`lctl get_param -n osc/*osc-[^mM]*/checksum_type | - sed 's/.*\[\(.*\)\].*/\1/g' | head -n1` lctl set_param -n osc.*osc-[^mM]*.checksum_type $1 log "set checksum type to $1" return 0 diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 93e1ea2..5ea06e7 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -324,11 +324,13 @@ check_obdo(void) CHECK_CVALUE(OBD_FL_SRVLOCK); CHECK_CVALUE(OBD_FL_CKSUM_CRC32); CHECK_CVALUE(OBD_FL_CKSUM_ADLER); + CHECK_CVALUE(OBD_FL_CKSUM_CRC32C); CHECK_CVALUE(OBD_FL_SHRINK_GRANT); CHECK_CVALUE(OBD_FL_MMAP); CHECK_CVALUE(OBD_FL_RECOV_RESEND); CHECK_CVALUE(OBD_CKSUM_CRC32); CHECK_CVALUE(OBD_CKSUM_ADLER); + CHECK_CVALUE(OBD_CKSUM_CRC32C); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 8cd3ae5..79fca5c 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -683,11 +683,13 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_FL_SRVLOCK == 2048); CLASSERT(OBD_FL_CKSUM_CRC32 == 4096); CLASSERT(OBD_FL_CKSUM_ADLER == 8192); + CLASSERT(OBD_FL_CKSUM_CRC32C == 16384); CLASSERT(OBD_FL_SHRINK_GRANT == 131072); CLASSERT(OBD_FL_MMAP == (0x00040000)); CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000)); CLASSERT(OBD_CKSUM_CRC32 == 1); CLASSERT(OBD_CKSUM_ADLER == 2); + CLASSERT(OBD_CKSUM_CRC32C == 4); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",