Whamcloud - gitweb
LU-14110 obdclass: Protect cl_env_percpu[]
authorEtienne AUJAMES <eaujames@ddn.com>
Thu, 14 Apr 2022 16:18:00 +0000 (09:18 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 26 Apr 2022 00:09:25 +0000 (00:09 +0000)
cl_env_percpu is not protected against multi client mounts on the
same node: "keys_fill" could be called with the same cl_env_percpu
context by several mount processes (race on lu_context.lc_value).

This patch add a mutex for cl_env_percpu to proctect contexts
"refill".

Lustre-change: https://review.whamcloud.com/40565
Lustre-commit: 881551fbb7335694b89a877072bcda0aeaf8705c

Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: Icfd6f3715899fa4ac5279e932f462e7cf29d98bd
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/47073
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/obd_support.h
lustre/llite/llite_lib.c
lustre/obdclass/cl_object.c
lustre/tests/sanityn.sh

index 0495bd6..57404b3 100644 (file)
@@ -603,6 +603,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_PCC_ATTACH_PAUSE                    0x1414
 #define OBD_FAIL_LLITE_SHORT_COMMIT                0x1415
 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2          0x1416
+#define OBD_FAIL_LLITE_RACE_MOUNT                  0x1417
 #define OBD_FAIL_LLITE_PAGE_ALLOC                  0x1418
 #define OBD_FAIL_LLITE_OPEN_DELAY                  0x1419
 
index 8fc5b95..ab9b17b 100644 (file)
@@ -1157,6 +1157,8 @@ int ll_fill_super(struct super_block *sb)
        CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
               profilenm, cfg_instance, sb);
 
+       OBD_RACE(OBD_FAIL_LLITE_RACE_MOUNT);
+
        OBD_ALLOC_PTR(cfg);
        if (cfg == NULL)
                GOTO(out_free_cfg, err = -ENOMEM);
index 730deb9..1b919c2 100644 (file)
@@ -887,6 +887,7 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 EXPORT_SYMBOL(cl_lvb2attr);
 
 static struct cl_env cl_env_percpu[NR_CPUS];
+static DEFINE_MUTEX(cl_env_percpu_mutex);
 
 static int cl_env_percpu_init(void)
 {
@@ -951,8 +952,10 @@ static void cl_env_percpu_refill(void)
 {
        int i;
 
+       mutex_lock(&cl_env_percpu_mutex);
        for_each_possible_cpu(i)
                lu_env_refill(&cl_env_percpu[i].ce_lu);
+       mutex_unlock(&cl_env_percpu_mutex);
 }
 
 void cl_env_percpu_put(struct lu_env *env)
index 6dcdf32..eee250d 100755 (executable)
@@ -5649,6 +5649,42 @@ test_112() {
 }
 run_test 112 "update max-inherit in default LMV"
 
+# LU-14110
+test_109() {
+       local i
+       local pid1 pid2
+
+       umount_client $MOUNT
+       umount_client $MOUNT2
+
+       echo "Starting race between client mount instances (50 iterations):"
+       for i in {1..50}; do
+               log "Iteration $i"
+
+#define OBD_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT        0x80001417
+               $LCTL set_param -n fail_loc=0x80001417
+
+               mount_client $MOUNT  & pid1=$!
+               mount_client $MOUNT2 & pid2=$!
+               wait $pid1 || error "Mount $MOUNT fails with $?"
+               wait $pid2 || error "Mount $MOUNT2 fails with $?"
+
+               umount_client $MOUNT  & pid1=$!
+               umount_client $MOUNT2 & pid2=$!
+               wait $pid1 || error "Umount $MOUNT fails with $?"
+               wait $pid2 || error "Umount $MOUNT2 fails with $?"
+
+               $LUSTRE_RMMOD || error "Fail to remove lustre modules"
+               load_modules
+               echo
+       done
+
+       mount_client $MOUNT
+       mount_client $MOUNT2
+}
+
+run_test 109 "Race with several mount instances on 1 node"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script