From 881551fbb7335694b89a877072bcda0aeaf8705c Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Tue, 3 Nov 2020 15:35:17 +0100 Subject: [PATCH] LU-14110 obdclass: Protect cl_env_percpu[] cl_env_percpu is not protected against multi client mounts on the same node: "keys_fill" could be called with the same cl_env_percpu context by several mount processes (race on lu_context.lc_value). This patch add a mutex for cl_env_percpu to proctect contexts "refill". Signed-off-by: Etienne AUJAMES Change-Id: Icfd6f3715899fa4ac5279e932f462e7cf29d98bd Reviewed-on: https://review.whamcloud.com/40565 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/llite/llite_lib.c | 2 ++ lustre/obdclass/cl_object.c | 3 +++ lustre/tests/sanityn.sh | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f681534..93e9262 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -593,6 +593,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_PCC_ATTACH_PAUSE 0x1414 #define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2 0x1416 +#define OBD_FAIL_LLITE_RACE_MOUNT 0x1417 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 0fbf1b1..f2d433a 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1216,6 +1216,8 @@ int ll_fill_super(struct super_block *sb) CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n", profilenm, cfg_instance, sb); + OBD_RACE(OBD_FAIL_LLITE_RACE_MOUNT); + OBD_ALLOC_PTR(cfg); if (cfg == NULL) GOTO(out_free_cfg, err = -ENOMEM); diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index ca989fa..ed6dc93 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -874,6 +874,7 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb) EXPORT_SYMBOL(cl_lvb2attr); static struct cl_env cl_env_percpu[NR_CPUS]; +static DEFINE_MUTEX(cl_env_percpu_mutex); static int cl_env_percpu_init(void) { @@ -938,8 +939,10 @@ static void cl_env_percpu_refill(void) { int i; + mutex_lock(&cl_env_percpu_mutex); for_each_possible_cpu(i) lu_env_refill(&cl_env_percpu[i].ce_lu); + mutex_unlock(&cl_env_percpu_mutex); } void cl_env_percpu_put(struct lu_env *env) diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 7b2a1f6..f7e3bfc 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -5567,6 +5567,42 @@ test_108a() { } run_test 108a "lseek: parallel updates" +# LU-14110 +test_109() { + local i + local pid1 pid2 + + umount_client $MOUNT + umount_client $MOUNT2 + + echo "Starting race between client mount instances (50 iterations):" + for i in {1..50}; do + log "Iteration $i" + +#define OBD_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT 0x80001417 + $LCTL set_param -n fail_loc=0x80001417 + + mount_client $MOUNT & pid1=$! + mount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Mount $MOUNT fails with $?" + wait $pid2 || error "Mount $MOUNT2 fails with $?" + + umount_client $MOUNT & pid1=$! + umount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Umount $MOUNT fails with $?" + wait $pid2 || error "Umount $MOUNT2 fails with $?" + + $LUSTRE_RMMOD || error "Fail to remove lustre modules" + load_modules + echo + done + + mount_client $MOUNT + mount_client $MOUNT2 +} + +run_test 109 "Race with several mount instances on 1 node" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script -- 1.8.3.1