Whamcloud - gitweb
LU-14110 obdclass: Protect cl_env_percpu[] 65/40565/11
authorEtienne AUJAMES <eaujames@ddn.com>
Tue, 3 Nov 2020 14:35:17 +0000 (15:35 +0100)
committerOleg Drokin <green@whamcloud.com>
Mon, 22 Mar 2021 16:25:49 +0000 (16:25 +0000)
cl_env_percpu is not protected against multi client mounts on the
same node: "keys_fill" could be called with the same cl_env_percpu
context by several mount processes (race on lu_context.lc_value).

This patch add a mutex for cl_env_percpu to proctect contexts
"refill".

Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: Icfd6f3715899fa4ac5279e932f462e7cf29d98bd
Reviewed-on: https://review.whamcloud.com/40565
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/llite/llite_lib.c
lustre/obdclass/cl_object.c
lustre/tests/sanityn.sh

index f681534..93e9262 100644 (file)
@@ -593,6 +593,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_PCC_ATTACH_PAUSE                    0x1414
 #define OBD_FAIL_LLITE_SHORT_COMMIT                0x1415
 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2          0x1416
+#define OBD_FAIL_LLITE_RACE_MOUNT                  0x1417
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 0fbf1b1..f2d433a 100644 (file)
@@ -1216,6 +1216,8 @@ int ll_fill_super(struct super_block *sb)
        CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
               profilenm, cfg_instance, sb);
 
+       OBD_RACE(OBD_FAIL_LLITE_RACE_MOUNT);
+
        OBD_ALLOC_PTR(cfg);
        if (cfg == NULL)
                GOTO(out_free_cfg, err = -ENOMEM);
index ca989fa..ed6dc93 100644 (file)
@@ -874,6 +874,7 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 EXPORT_SYMBOL(cl_lvb2attr);
 
 static struct cl_env cl_env_percpu[NR_CPUS];
+static DEFINE_MUTEX(cl_env_percpu_mutex);
 
 static int cl_env_percpu_init(void)
 {
@@ -938,8 +939,10 @@ static void cl_env_percpu_refill(void)
 {
        int i;
 
+       mutex_lock(&cl_env_percpu_mutex);
        for_each_possible_cpu(i)
                lu_env_refill(&cl_env_percpu[i].ce_lu);
+       mutex_unlock(&cl_env_percpu_mutex);
 }
 
 void cl_env_percpu_put(struct lu_env *env)
index 7b2a1f6..f7e3bfc 100755 (executable)
@@ -5567,6 +5567,42 @@ test_108a() {
 }
 run_test 108a "lseek: parallel updates"
 
+# LU-14110
+test_109() {
+       local i
+       local pid1 pid2
+
+       umount_client $MOUNT
+       umount_client $MOUNT2
+
+       echo "Starting race between client mount instances (50 iterations):"
+       for i in {1..50}; do
+               log "Iteration $i"
+
+#define OBD_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT        0x80001417
+               $LCTL set_param -n fail_loc=0x80001417
+
+               mount_client $MOUNT  & pid1=$!
+               mount_client $MOUNT2 & pid2=$!
+               wait $pid1 || error "Mount $MOUNT fails with $?"
+               wait $pid2 || error "Mount $MOUNT2 fails with $?"
+
+               umount_client $MOUNT  & pid1=$!
+               umount_client $MOUNT2 & pid2=$!
+               wait $pid1 || error "Umount $MOUNT fails with $?"
+               wait $pid2 || error "Umount $MOUNT2 fails with $?"
+
+               $LUSTRE_RMMOD || error "Fail to remove lustre modules"
+               load_modules
+               echo
+       done
+
+       mount_client $MOUNT
+       mount_client $MOUNT2
+}
+
+run_test 109 "Race with several mount instances on 1 node"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script