Whamcloud - gitweb
LU-11011 osc: add preferred checksum type support 49/32349/11
authorLi Xi <lixi@ddn.com>
Thu, 10 May 2018 04:25:05 +0000 (00:25 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 21 Aug 2019 05:04:07 +0000 (05:04 +0000)
Some checksum types might not work correctly even though they are
available options and have the best speeds during test. In these
circumstances, users might want to use a certain checksum type which
is known to be functional. However, "lctl conf_param XXX-YYY.osc.
checksum_type=ZZZ" won't help to enforce a certain checksum type,
because the selected checksum type is determined during OSC
connection, which will overwrite the LLOG parameter.

To solve this problem, whenever a valid checksum type is set by "lctl
conf_param" or "lctl set_param", it is remembered as the perferred
checksum type for the OSC. During connection process, if that
checksum type is available, that checksum type will be selected as
the RPC checksum type regardless of its speed.

The semantics of interface /proc/fs/lustre/osc/*/checksum_type is
changed for a little bit. If a wrong checksum name is being written
into this entry, -EINVAL will be returned as before. If the written
string is a valid checksum name, even though the checksum type is
not supported by this OSC/OST pair, the checksum type will still be
remembered as the perferred checksum type, and return value will be
-ENOTSUPP. Whenever connecting/reconnecting happens, if perferred
checksum type is availabe, it will be used for the RPC checksum.

Change-Id: Ie6fdc1d8ed6c55531ad6b7c926659d644fefccaf
Signed-off-by: Li Xi <lixi@ddn.com>
Reviewed-on: https://review.whamcloud.com/32349
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd.h
lustre/include/obd_cksum.h
lustre/ldlm/ldlm_lib.c
lustre/osc/lproc_osc.c
lustre/ptlrpc/import.c
lustre/tests/sanity.sh

index b4d1b28..920a9f2 100644 (file)
@@ -342,6 +342,8 @@ struct client_obd {
         __u32                    cl_supp_cksum_types;
         /* checksum algorithm to be used */
        enum cksum_types         cl_cksum_type;
+       /* preferred checksum algorithm to be used */
+       enum cksum_types         cl_preferred_cksum_type;
 
         /* also protected by the poorly named _loi_list_lock lock above */
         struct osc_async_rc      cl_ar;
index 6e807d7..15353a3 100644 (file)
@@ -112,10 +112,17 @@ enum cksum_types obd_cksum_types_supported_server(const char *obd_name);
  * checksum type due to its benchmarking at libcfs module load.
  * Caution is advised, however, since what is fastest on a single client may
  * not be the fastest or most efficient algorithm on the server.  */
-static inline enum cksum_types
-obd_cksum_type_select(const char *obd_name, enum cksum_types cksum_types)
+static inline
+enum cksum_types obd_cksum_type_select(const char *obd_name,
+                                      enum cksum_types cksum_types,
+                                      enum cksum_types preferred)
 {
-       u32 flag = obd_cksum_type_pack(obd_name, cksum_types);
+       u32 flag;
+
+       if (preferred & cksum_types)
+               return preferred;
+
+       flag = obd_cksum_type_pack(obd_name, cksum_types);
 
        return obd_cksum_type_unpack(flag);
 }
index ebe3f33..68042ce 100644 (file)
@@ -413,7 +413,9 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
        init_waitqueue_head(&cli->cl_destroy_waitq);
        atomic_set(&cli->cl_destroy_in_flight, 0);
 
+
        cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+       cli->cl_preferred_cksum_type = 0;
 #ifdef ENABLE_CHECKSUM
        /* Turn on checksumming by default. */
        cli->cl_checksum = 1;
index b4c0e35..414d918 100644 (file)
@@ -425,28 +425,34 @@ static ssize_t osc_checksum_type_seq_write(struct file *file,
        int i;
        DECLARE_CKSUM_NAME;
        char kernbuf[10];
+       int rc = -EINVAL;
 
-        if (obd == NULL)
-                return 0;
+       if (obd == NULL)
+               return 0;
 
-        if (count > sizeof(kernbuf) - 1)
-                return -EINVAL;
+       if (count > sizeof(kernbuf) - 1)
+               return -EINVAL;
        if (copy_from_user(kernbuf, buffer, count))
-                return -EFAULT;
-        if (count > 0 && kernbuf[count - 1] == '\n')
-                kernbuf[count - 1] = '\0';
-        else
-                kernbuf[count] = '\0';
-
-        for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
-                if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
-                        continue;
-                if (!strcmp(kernbuf, cksum_name[i])) {
-                       obd->u.cli.cl_cksum_type = 1 << i;
-                       return count;
-                }
-        }
-        return -EINVAL;
+               return -EFAULT;
+
+       if (count > 0 && kernbuf[count - 1] == '\n')
+               kernbuf[count - 1] = '\0';
+       else
+               kernbuf[count] = '\0';
+
+       for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
+               if (strcmp(kernbuf, cksum_name[i]) == 0) {
+                       obd->u.cli.cl_preferred_cksum_type = BIT(i);
+                       if (obd->u.cli.cl_supp_cksum_types & BIT(i)) {
+                               obd->u.cli.cl_cksum_type = BIT(i);
+                               rc = count;
+                       } else {
+                               rc = -ENOTSUPP;
+                       }
+                       break;
+               }
+       }
+       return rc;
 }
 LPROC_SEQ_FOPS(osc_checksum_type);
 
index 0760bce..3aa6f39 100644 (file)
@@ -889,7 +889,8 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
                cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
        }
        cli->cl_cksum_type = obd_cksum_type_select(imp->imp_obd->obd_name,
-                                                  cli->cl_supp_cksum_types);
+                                                 cli->cl_supp_cksum_types,
+                                                 cli->cl_preferred_cksum_type);
 
        if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
                cli->cl_max_pages_per_rpc =
index 2ed3f57..1f346b1 100644 (file)
@@ -7747,9 +7747,22 @@ CKSUM_TYPES=${CKSUM_TYPES:-$(lctl get_param -n osc.*osc-[^mM]*.checksum_type |
 set_checksum_type()
 {
        lctl set_param -n osc.*osc-[^mM]*.checksum_type $1
-       log "set checksum type to $1"
-       return 0
+       rc=$?
+       log "set checksum type to $1, rc = $rc"
+       return $rc
+}
+
+get_osc_checksum_type()
+{
+       # arugment 1: OST name, like OST0000
+       ost=$1
+       checksum_type=$(lctl get_param -n osc.*${ost}-osc-[^mM]*.checksum_type |
+                       sed 's/.*\[\(.*\)\].*/\1/g')
+       rc=$?
+       [ $rc -ne 0 ] && error "failed to get checksum type of $ost, rc = $rc, output = $checksum_type"
+       echo $checksum_type
 }
+
 F77_TMP=$TMP/f77-temp
 F77SZ=8
 setup_f77() {
@@ -8001,6 +8014,38 @@ test_77k() { # LU-10906
 }
 run_test 77k "enable/disable checksum correctly"
 
+test_77l() {
+       [ $PARALLEL == "yes" ] && skip "skip parallel run"
+       $GSS && skip_env "could not run with gss"
+
+       set_checksums 1
+       stack_trap "set_checksums $ORIG_CSUM" EXIT
+       stack_trap "set_checksum_type $ORIG_CSUM_TYPE" EXIT
+
+       set_checksum_type invalid && error "unexpected success of invalid checksum type"
+
+       $LFS setstripe -c 1 -i 0 $DIR/$tfile
+       for algo in $CKSUM_TYPES; do
+               set_checksum_type $algo || error "fail to set checksum type $algo"
+               osc_algo=$(get_osc_checksum_type OST0000)
+               [ "$osc_algo" != "$algo" ] && error "checksum type is $osc_algo after setting it to $algo"
+
+               # no locks, no reqs to let the connection idle
+               cancel_lru_locks osc
+               lru_resize_disable osc
+               wait_osc_import_state client ost1 IDLE
+
+               # ensure ost1 is connected
+               stat $DIR/$tfile >/dev/null || error "can't stat"
+               wait_osc_import_state client ost1 FULL
+
+               osc_algo=$(get_osc_checksum_type OST0000)
+               [ "$osc_algo" != "$algo" ] && error "checksum type changed from $algo to $osc_algo after reconnection"
+       done
+       return 0
+}
+run_test 77l "preferred checksum type is remembered after reconnected"
+
 [ "$ORIG_CSUM" ] && set_checksums $ORIG_CSUM || true
 rm -f $F77_TMP
 unset F77_TMP