Whamcloud - gitweb
LU-241 Support crc32c with hardware accelerated instruction as one of lustre checksums
authorShuichi Ihara <sihara@ddn.com>
Thu, 23 Jun 2011 13:08:39 +0000 (22:08 +0900)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Oct 2011 08:23:34 +0000 (04:23 -0400)
Adding CRC32C as new lustre checksum algorithm. Because of crc32c,
the Intel Nehalem based CPU supports h/w accelerated crc32c
instruction as one of SSE4.2 instruction set. The new crc32c codes
in the lustre also use this h/w instruction. The lustre also
automatically detects whether crc32c instruction is available, if
not, use adler (fastest checksum other than crc32c).

Change-Id: I764851a46a94a879239cd127eac411e98342e67f
Signed-off-by: Shuichi Ihara <sihara@ddn.com>
Reviewed-on: http://review.whamcloud.com/1009
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
13 files changed:
lustre/include/linux/obd_support.h
lustre/include/lustre/lustre_idl.h
lustre/include/obd_cksum.h
lustre/llite/llite_lib.c
lustre/mds/mds_lov.c
lustre/obdfilter/filter.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/import.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index beae348..1443051 100644 (file)
@@ -44,6 +44,8 @@
 #ifdef __KERNEL__
 #ifndef AUTOCONF_INCLUDED
 #include <linux/config.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
 #endif
 #include <linux/seq_file.h>
 #include <linux/module.h>
index fcba9a1..a9f6ba5 100644 (file)
@@ -1220,6 +1220,7 @@ extern void lustre_swab_connect(struct obd_connect_data *ocd);
 typedef enum {
         OBD_CKSUM_CRC32 = 0x00000001,
         OBD_CKSUM_ADLER = 0x00000002,
+        OBD_CKSUM_CRC32C= 0x00000004,
 } cksum_type_t;
 
 /*
@@ -1265,7 +1266,7 @@ enum obdo_flags {
         OBD_FL_SRVLOCK      = 0x00000800, /* delegate DLM locking to server */
         OBD_FL_CKSUM_CRC32  = 0x00001000, /* CRC32 checksum type */
         OBD_FL_CKSUM_ADLER  = 0x00002000, /* ADLER checksum type */
-        OBD_FL_CKSUM_RSVD1  = 0x00004000, /* for future cksum types */
+        OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */
         OBD_FL_CKSUM_RSVD2  = 0x00008000, /* for future cksum types */
         OBD_FL_CKSUM_RSVD3  = 0x00010000, /* for future cksum types */
         OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
@@ -1273,7 +1274,10 @@ enum obdo_flags {
         OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
         OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
 
-        OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER,
+        /* Note that while these checksum values are currently separate bits,
+         * in 2.x we can actually allow all values from 1-31 if we wanted. */
+        OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER |
+                              OBD_FL_CKSUM_CRC32C,
 
         /* mask for local-only flag, which won't be sent over network */
         OBD_FL_LOCAL_MASK   = 0xF0000000,
index ceae740..ef76b66 100644 (file)
@@ -80,16 +80,82 @@ static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len)
         return crc;
 }
 #endif
+#ifdef HAVE_ADLER
+/* Adler-32 is supported */
+#define CHECKSUM_ADLER OBD_CKSUM_ADLER
+#else
+#define CHECKSUM_ADLER 0
+#endif
+
+#ifdef X86_FEATURE_XMM4_2
+/* Call Nehalem+ CRC32C harware acceleration instruction on individual bytes. */
+static inline __u32 crc32c_hw_byte(__u32 crc, unsigned char const *p,
+                                  size_t bytes)
+{
+        while (bytes--) {
+                __asm__ __volatile__ (
+                        ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+                        : "=S"(crc)
+                        : "0"(crc), "c"(*p)
+                );
+                p++;
+        }
+
+        return crc;
+}
+
+#if BITS_PER_LONG > 32
+#define WORD_SHIFT 3
+#define WORD_MASK  7
+#define REX "0x48, "
+#else
+#define WORD_SHIFT 2
+#define WORD_MASK  3
+#define REX ""
+#endif
+
+/* Do we need to worry about unaligned input data here? */
+static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p, size_t len)
+{
+        unsigned int words = len >> WORD_SHIFT;
+        unsigned int bytes = len &  WORD_MASK;
+        long *ptmp = (long *)p;
+
+        while (words--) {
+                __asm__ __volatile__(
+                        ".byte 0xf2, " REX "0xf, 0x38, 0xf1, 0xf1;"
+                        : "=S"(crc)
+                        : "0"(crc), "c"(*ptmp)
+                );
+                ptmp++;
+        }
+
+        if (bytes)
+                crc = crc32c_hw_byte(crc, (unsigned char *)ptmp, bytes);
+
+        return crc;
+}
+#else
+/* We should never call this unless the CPU has previously been detected to
+ * support this instruction in the SSE4.2 feature set. b=23549  */
+static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p,size_t len)
+{
+        LBUG();
+}
+#endif
 
 static inline __u32 init_checksum(cksum_type_t cksum_type)
 {
         switch(cksum_type) {
-        case OBD_CKSUM_CRC32:
+        case OBD_CKSUM_CRC32C:
                 return ~0U;
 #ifdef HAVE_ADLER
         case OBD_CKSUM_ADLER:
                 return 1U;
 #endif
+        case OBD_CKSUM_CRC32:
+                return ~0U;
         default:
                 CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
                 LBUG();
@@ -101,12 +167,14 @@ static inline __u32 compute_checksum(__u32 cksum, unsigned char const *p,
                                      size_t len, cksum_type_t cksum_type)
 {
         switch(cksum_type) {
-        case OBD_CKSUM_CRC32:
-                return crc32_le(cksum, p, len);
+        case OBD_CKSUM_CRC32C:
+                return crc32c_hw(cksum, p, len);
 #ifdef HAVE_ADLER
         case OBD_CKSUM_ADLER:
                 return adler32(cksum, p, len);
 #endif
+        case OBD_CKSUM_CRC32:
+                return crc32_le(cksum, p, len);
         default:
                 CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
                 LBUG();
@@ -114,50 +182,90 @@ static inline __u32 compute_checksum(__u32 cksum, unsigned char const *p,
         return 0;
 }
 
+/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can
+ * only be a single checksum type per RPC.
+ *
+ * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask
+ * since they need to represent the full range of checksum algorithms that
+ * both the client and server can understand.
+ *
+ * In case of an unsupported types/flags we fall back to CRC32 (even though
+ * it isn't very fast) because that is supported by all clients
+ * checksums, since 1.6.5 (or earlier via patches).
+ *
+ * These flags should be listed in order of descending performance, so that
+ * in case multiple algorithms are supported the best one is used. */
 static inline obd_flag cksum_type_pack(cksum_type_t cksum_type)
 {
-        switch(cksum_type) {
-        case OBD_CKSUM_CRC32:
-                return OBD_FL_CKSUM_CRC32;
+        if (cksum_type & OBD_CKSUM_CRC32C)
+                return OBD_FL_CKSUM_CRC32C;
 #ifdef HAVE_ADLER
-        case OBD_CKSUM_ADLER:
+        if (cksum_type & OBD_CKSUM_ADLER)
                 return OBD_FL_CKSUM_ADLER;
 #endif
-        default:
+        if (unlikely(cksum_type && !(cksum_type & OBD_CKSUM_CRC32)))
                 CWARN("unknown cksum type %x\n", cksum_type);
-        }
+
         return OBD_FL_CKSUM_CRC32;
 }
 
 static inline cksum_type_t cksum_type_unpack(obd_flag o_flags)
 {
-        o_flags &= OBD_FL_CKSUM_ALL;
-        if ((o_flags - 1) & o_flags)
-                CWARN("several checksum types are set: %x\n", o_flags);
-        if (o_flags & OBD_FL_CKSUM_ADLER)
+        switch (o_flags & OBD_FL_CKSUM_ALL) {
+        case OBD_FL_CKSUM_CRC32C:
+                return OBD_CKSUM_CRC32C;
+        case OBD_FL_CKSUM_ADLER:
 #ifdef HAVE_ADLER
                 return OBD_CKSUM_ADLER;
 #else
                 CWARN("checksum type is set to adler32, but adler32 is not "
                       "supported (%x)\n", o_flags);
+                break;
 #endif
+        default:
+                break;
+        }
+
+        /* 1.6.4- only supported CRC32 and didn't set o_flags */
         return OBD_CKSUM_CRC32;
 }
 
+/* Return a bitmask of the checksum types supported on this system.
+ *
+ * CRC32 is a required for compatibility (starting with 1.6.5),
+ * after which we could move to Adler as the base checksum type.
+ *
+ * If hardware crc32c support is not available, it is slower than Adler,
+ * so don't include it, even if it could be emulated in software. b=23549 */
+static inline cksum_type_t cksum_types_supported(void)
+{
+        cksum_type_t ret = OBD_CKSUM_CRC32;
+
+#ifdef X86_FEATURE_XMM4_2
+        if (cpu_has_xmm4_2)
+                ret |= OBD_CKSUM_CRC32C;
+#endif
 #ifdef HAVE_ADLER
-/* Default preferred checksum algorithm to use (if supported by the server) */
-#define OSC_DEFAULT_CKSUM OBD_CKSUM_ADLER
-/* Adler-32 is supported */
-#define CHECKSUM_ADLER OBD_CKSUM_ADLER
-#else
-#define OSC_DEFAULT_CKSUM OBD_CKSUM_CRC32
-#define CHECKSUM_ADLER 0
+        ret |= OBD_CKSUM_ADLER;
 #endif
+        return ret;
+}
 
-#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | CHECKSUM_ADLER)
+/* Select the best checksum algorithm among those supplied in the cksum_types
+ * input.
+ *
+ * Currently, calling cksum_type_pack() with a mask will return the fastest
+ * checksum type due to its ordering, but in the future we might want to
+ * determine this based on benchmarking the different algorithms quickly.
+ * Caution is advised, however, since what is fastest on a single client may
+ * not be the fastest or most efficient algorithm on the server.  */
+static inline cksum_type_t cksum_type_select(cksum_type_t cksum_types)
+{
+        return cksum_type_unpack(cksum_type_pack(cksum_types));
+}
 
 /* Checksum algorithm names. Must be defined in the same order as the
  * OBD_CKSUM_* flags. */
-#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler"}
+#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"}
 
 #endif /* __OBD_H */
index f97250b..2a0d78a 100644 (file)
@@ -411,8 +411,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
                         data->ocd_cksum_types = OBD_CKSUM_ADLER;
                 else
-                        /* send the list of supported checksum types */
-                        data->ocd_cksum_types = OBD_CKSUM_ALL;
+                        data->ocd_cksum_types = cksum_types_supported();
         }
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
index 95edc07..70ab89c 100644 (file)
@@ -728,7 +728,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
         /* send max bytes per rpc */
         data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
         /* send the list of supported checksum types */
-        data->ocd_cksum_types = OBD_CKSUM_ALL;
+        data->ocd_cksum_types = cksum_types_supported();
         /* NB: lov_connect() needs to fill in .ocd_index for each OST */
         rc = obd_connect(NULL, &mds->mds_lov_exp, mds->mds_lov_obd, &obd->obd_uuid, data, NULL);
         OBD_FREE(data, sizeof(*data));
index d6a9fd8..ba5cca5 100644 (file)
@@ -2769,9 +2769,10 @@ static int filter_connect_internal(struct obd_export *exp,
                 /* The client set in ocd_cksum_types the checksum types it
                  * supports. We have to mask off the algorithms that we don't
                  * support */
-                if (cksum_types & OBD_CKSUM_ALL)
-                        data->ocd_cksum_types &= OBD_CKSUM_ALL;
-                else
+                data->ocd_cksum_types &= cksum_types_supported();
+
+                /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */
+                if (unlikely(data->ocd_cksum_types == 0))
                         data->ocd_cksum_types = OBD_CKSUM_CRC32;
 
                 CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
index be0027b..2f876d4 100644 (file)
@@ -1488,11 +1488,8 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
         if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & OBD_FL_MMAP)
                 return 1;
 
-        if (oa->o_valid & OBD_MD_FLFLAGS)
-                cksum_type = cksum_type_unpack(oa->o_flags);
-        else
-                cksum_type = OBD_CKSUM_CRC32;
-
+        cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+                                       oa->o_flags : 0);
         new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
                                       cksum_type);
 
@@ -1620,10 +1617,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                 char      *router;
                 cksum_type_t cksum_type;
 
-                if (body->oa.o_valid & OBD_MD_FLFLAGS)
-                        cksum_type = cksum_type_unpack(body->oa.o_flags);
-                else
-                        cksum_type = OBD_CKSUM_CRC32;
+                cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS?
+                                               body->oa.o_flags : 0);
                 client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
                                                  aa->aa_ppga, OST_READ,
                                                  cksum_type);
index 0452257..8dc1cec 100644 (file)
@@ -885,13 +885,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         }
 
         if (body->oa.o_valid & OBD_MD_FLCKSUM) {
-                cksum_type_t cksum_type = OBD_CKSUM_CRC32;
-
-                if (body->oa.o_valid & OBD_MD_FLFLAGS)
-                        cksum_type = cksum_type_unpack(body->oa.o_flags);
+                cksum_type_t cksum_type =
+                        cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ?
+                                          body->oa.o_flags : 0);
                 body->oa.o_flags = cksum_type_pack(cksum_type);
                 body->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
-                body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ, cksum_type);
+                body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ,cksum_type);
                 CDEBUG(D_PAGE,"checksum at read origin: %x\n",body->oa.o_cksum);
         } else {
                 body->oa.o_valid = 0;
index 03311e5..b0a5292 100644 (file)
@@ -1010,33 +1010,25 @@ finish:
                         /* We sent to the server ocd_cksum_types with bits set
                          * for algorithms we understand. The server masked off
                          * the checksum types it doesn't support */
-                        if ((ocd->ocd_cksum_types & OBD_CKSUM_ALL) == 0) {
+                        if ((ocd->ocd_cksum_types & cksum_types_supported()) == 0) {
                                 LCONSOLE_WARN("The negotiation of the checksum "
                                               "alogrithm to use with server %s "
                                               "failed (%x/%x), disabling "
                                               "checksums\n",
                                               obd2cli_tgt(imp->imp_obd),
                                               ocd->ocd_cksum_types,
-                                              OBD_CKSUM_ALL);
+                                              cksum_types_supported());
                                 cli->cl_checksum = 0;
                                 cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
-                                cli->cl_cksum_type = OBD_CKSUM_CRC32;
                         } else {
                                 cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
-
-                                if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM)
-                                        cli->cl_cksum_type = OSC_DEFAULT_CKSUM;
-                                else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER)
-                                        cli->cl_cksum_type = OBD_CKSUM_ADLER;
-                                else
-                                        cli->cl_cksum_type = OBD_CKSUM_CRC32;
                         }
                 } else {
                         /* The server does not support OBD_CONNECT_CKSUM.
                          * Enforce CRC32 for backward compatibility*/
                         cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
-                        cli->cl_cksum_type = OBD_CKSUM_CRC32;
                 }
+                cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types);
 
                 if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
                         cli->cl_max_pages_per_rpc =
index a25f53a..20134f0 100644 (file)
@@ -686,11 +686,13 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_FL_SRVLOCK == 2048);
         CLASSERT(OBD_FL_CKSUM_CRC32 == 4096);
         CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
+        CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
         CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
         CLASSERT(OBD_FL_MMAP == (0x00040000));
         CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
         CLASSERT(OBD_CKSUM_CRC32 == 1);
         CLASSERT(OBD_CKSUM_ADLER == 2);
+        CLASSERT(OBD_CKSUM_CRC32C == 4);
 
         /* Checks for struct lov_mds_md_v1 */
         LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",
index 14920ee..aa53e3c 100644 (file)
@@ -4171,13 +4171,12 @@ set_checksums()
        return 0
 }
 
-export ORIG_CSUM_TYPE=""
+export ORIG_CSUM_TYPE="`lctl get_param -n osc/*osc-[^mM]*/checksum_type |
+                        sed 's/.*\[\(.*\)\].*/\1/g' | head -n1`"
 CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"}
+[ "$ORIG_CSUM_TYPE" = "crc32c" ] && CKSUM_TYPES="$CKSUM_TYPES crc32c"
 set_checksum_type()
 {
-       [ "$ORIG_CSUM_TYPE" ] || \
-               ORIG_CSUM_TYPE=`lctl get_param -n osc/*osc-[^mM]*/checksum_type |
-                                sed 's/.*\[\(.*\)\].*/\1/g' | head -n1`
        lctl set_param -n osc.*osc-[^mM]*.checksum_type $1
        log "set checksum type to $1"
        return 0
index 93e1ea2..5ea06e7 100644 (file)
@@ -324,11 +324,13 @@ check_obdo(void)
         CHECK_CVALUE(OBD_FL_SRVLOCK);
         CHECK_CVALUE(OBD_FL_CKSUM_CRC32);
         CHECK_CVALUE(OBD_FL_CKSUM_ADLER);
+        CHECK_CVALUE(OBD_FL_CKSUM_CRC32C);
         CHECK_CVALUE(OBD_FL_SHRINK_GRANT);
         CHECK_CVALUE(OBD_FL_MMAP);
         CHECK_CVALUE(OBD_FL_RECOV_RESEND);
         CHECK_CVALUE(OBD_CKSUM_CRC32);
         CHECK_CVALUE(OBD_CKSUM_ADLER);
+        CHECK_CVALUE(OBD_CKSUM_CRC32C);
 }
 
 static void
index 8cd3ae5..79fca5c 100644 (file)
@@ -683,11 +683,13 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_FL_SRVLOCK == 2048);
         CLASSERT(OBD_FL_CKSUM_CRC32 == 4096);
         CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
+        CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
         CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
         CLASSERT(OBD_FL_MMAP == (0x00040000));
         CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
         CLASSERT(OBD_CKSUM_CRC32 == 1);
         CLASSERT(OBD_CKSUM_ADLER == 2);
+        CLASSERT(OBD_CKSUM_CRC32C == 4);
 
         /* Checks for struct lov_mds_md_v1 */
         LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",