From e4fd618ff498814145002b2c3f56746b3d172e07 Mon Sep 17 00:00:00 2001 From: Yang Sheng Date: Tue, 10 Mar 2020 21:53:26 +0800 Subject: [PATCH] LU-11814 obd: Crashed while mount in parallel Ensure obd_type init finished before obd_setup was started. Also included a test case to reproduce the failure. (osd_handler.c:7132:osd_device_init0()) ASSERTION( info ) failed: (osd_handler.c:7132:osd_device_init0()) LBUG Pid: 2457, comm: mount.lustre 3.10.0-862.9.1.el7_lustre.ddn1.x86_64 Call Trace: [] libcfs_call_trace+0x8c/0xc0 [libcfs] [] lbug_with_loc+0x4c/0xa0 [libcfs] [] osd_device_alloc+0x615/0x770 [osd_ldiskfs] [] obd_setup+0x11a/0x2b0 [obdclass] [] class_setup+0x2a8/0x840 [obdclass] [] class_process_config+0x196d/0x2420 [obdclass] [] do_lcfg+0x258/0x500 [obdclass] [] lustre_start_simple+0x88/0x210 [obdclass] [] server_fill_super+0xf34/0x185a [obdclass] [] lustre_fill_super+0x328/0x950 [obdclass] [] mount_nodev+0x4f/0xb0 [] lustre_mount+0x38/0x60 [obdclass] [] mount_fs+0x3e/0x1b0 [] vfs_kern_mount+0x67/0x110 [] do_mount+0x1ef/0xce0 [] SyS_mount+0x83/0xd0 [] system_call_fastpath+0x1c/0x21 Signed-off-by: Yang Sheng Change-Id: If3129df5fd11226636fb84a9275f481cefb749f7 Reviewed-on: https://review.whamcloud.com/37974 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Neil Brown --- libcfs/libcfs/module.c | 4 ++- lustre/include/obd.h | 1 + lustre/include/obd_class.h | 58 +++++++++++++++++++++++--------------------- lustre/include/obd_support.h | 1 + lustre/obdclass/genops.c | 4 ++- lustre/obdclass/lu_object.c | 4 +++ lustre/tests/conf-sanity.sh | 18 ++++++++++++++ 7 files changed, 61 insertions(+), 29 deletions(-) diff --git a/libcfs/libcfs/module.c b/libcfs/libcfs/module.c index 81c1f6f..fd6f34b 100644 --- a/libcfs/libcfs/module.c +++ b/libcfs/libcfs/module.c @@ -387,8 +387,10 @@ static int proc_fail_loc(struct ctl_table *table, int write, long old_fail_loc = cfs_fail_loc; rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (old_fail_loc != cfs_fail_loc) + if (old_fail_loc != cfs_fail_loc) { + cfs_race_state = 1; wake_up(&cfs_race_waitq); + } return rc; } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index be44801..1c8bfa3 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -111,6 +111,7 @@ struct obd_type { struct kobject typ_kobj; }; #define typ_name typ_kobj.name +#define OBD_LU_TYPE_SETUP ((void *)0x01UL) struct brw_page { u64 off; diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index a077f8a..a9222b4 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -531,42 +531,46 @@ static inline int obd_set_info_async(const struct lu_env *env, static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) { int rc; - struct lu_device_type *ldt = obd->obd_type->typ_lu; - struct lu_device *d; + struct obd_type *type = obd->obd_type; + struct lu_device_type *ldt; - ENTRY; + ENTRY; - if (ldt != NULL) { - struct lu_context session_ctx; - struct lu_env env; - lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); - session_ctx.lc_thread = NULL; - lu_context_enter(&session_ctx); + wait_var_event(&type->typ_lu, + smp_load_acquire(&type->typ_lu) != OBD_LU_TYPE_SETUP); + ldt = type->typ_lu; + if (ldt != NULL) { + struct lu_context session_ctx; + struct lu_env env; - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - env.le_ses = &session_ctx; - d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg); - lu_env_fini(&env); - if (!IS_ERR(d)) { - obd->obd_lu_dev = d; - d->ld_obd = obd; - rc = 0; - } else - rc = PTR_ERR(d); - } - lu_context_exit(&session_ctx); - lu_context_fini(&session_ctx); + lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); + session_ctx.lc_thread = NULL; + lu_context_enter(&session_ctx); - } else { + rc = lu_env_init(&env, ldt->ldt_ctx_tags); + if (rc == 0) { + struct lu_device *dev; + env.le_ses = &session_ctx; + dev = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg); + lu_env_fini(&env); + if (!IS_ERR(dev)) { + obd->obd_lu_dev = dev; + dev->ld_obd = obd; + rc = 0; + } else + rc = PTR_ERR(dev); + } + lu_context_exit(&session_ctx); + lu_context_fini(&session_ctx); + } else { if (!obd->obd_type->typ_dt_ops->o_setup) { CERROR("%s: no %s operation\n", obd->obd_name, __func__); RETURN(-EOPNOTSUPP); } - rc = OBP(obd, setup)(obd, cfg); - } - RETURN(rc); + rc = OBP(obd, setup)(obd, cfg); + } + RETURN(rc); } static inline int obd_precleanup(struct obd_device *obd) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index baf90c3..d1bb8db 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -462,6 +462,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OBDCLASS_MODULE_LOAD 0x60a #define OBD_FAIL_OBD_ZERO_NLINK_RACE 0x60b #define OBD_FAIL_OBD_STOP_MDS_RACE 0x60c +#define OBD_FAIL_OBD_SETUP 0x60d #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 75388ed..97672f9 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -251,6 +251,7 @@ int class_register_type(const struct obd_ops *dt_ops, if (type == NULL) RETURN(-ENOMEM); + type->typ_lu = ldt ? OBD_LU_TYPE_SETUP : NULL; type->typ_kobj.kset = lustre_kset; kobject_init(&type->typ_kobj, &class_ktype); #ifdef HAVE_SERVER_SUPPORT @@ -289,8 +290,9 @@ dir_exist: setup_ldt: #endif if (ldt) { - type->typ_lu = ldt; rc = lu_device_type_init(ldt); + smp_store_release(&type->typ_lu, rc ? NULL : ldt); + wake_up_var(&type->typ_lu); if (rc) GOTO(failed, rc); } diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index b67bed1..916ad94 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -1486,6 +1486,10 @@ int lu_context_key_register(struct lu_context_key *key) if (lu_keys[i]) continue; key->lct_index = i; + + if (strncmp("osd_", module_name(key->lct_owner), 4) == 0) + CFS_RACE_WAIT(OBD_FAIL_OBD_SETUP); + if (cmpxchg(&lu_keys[i], NULL, key) != NULL) continue; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 5c86ba9..d86fe47 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -8906,6 +8906,24 @@ test_125() } run_test 125 "check l_tunedisk only tunes OSTs and their slave devices" +test_126() { + [[ "$MDS1_VERSION" -ge $(version_code 2.13.52) ]] || + skip "Need MDS version at least 2.13.52" + + cleanup + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_module ../libcfs/libcfs/libcfs + #define OBD_FAIL_OBD_SETUP 0x60d + do_facet mds1 $LCTL set_param fail_loc=0x60d + do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules & + for i in {1..40}; do + do_facet mds1 lsmod | grep -q osd_$mds1_FSTYPE && break + sleep 1 + done + clear_failloc $SINGLEMDS 20 & + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS +} +run_test 126 "mount in parallel shouldn't cause a crash" + if ! combined_mgs_mds ; then stop mgs fi -- 1.8.3.1