From c67a74b55c126ec1be6c195cb2e8cb8c2e6cf868 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Wed, 13 Apr 2016 17:40:43 -0400 Subject: [PATCH] LU-8044 mgs: Only add OSP for registered MDT Only add OSP for those really registered MDT, by checking if its llog is empty, otherwise it will incorrectly add "setup OSP" record in the begining of config log, which will cause setup failure. Signed-off-by: Di Wang Change-Id: I9df26a5e6653b4ac0413dad01ea2abfda6f1c1fb Reviewed-on: http://review.whamcloud.com/19658 Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/mgs/mgs_llog.c | 83 ++++++++++++++++++++++++++++++-------------- lustre/tests/conf-sanity.sh | 29 ++++++++++++++++ 3 files changed, 86 insertions(+), 27 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 71b4792..5103f66 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -466,6 +466,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MGS_TARGET_DEL_NET 0x90b #define OBD_FAIL_MGS_CONFIG_READ_NET 0x90c #define OBD_FAIL_MGS_LDLM_REPLY_NET 0x90d +#define OBD_FAIL_MGS_WRITE_TARGET_DELAY 0x90e #define OBD_FAIL_QUOTA_DQACQ_NET 0xA01 #define OBD_FAIL_QUOTA_EDQUOT 0xA02 diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 70a83dc..f9cb376 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -2293,34 +2293,34 @@ static int mgs_write_log_mdt(const struct lu_env *env, #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client */ - /* copy client info about lov/lmv */ - mgi->mgi_comp.comp_mti = mti; - mgi->mgi_comp.comp_fsdb = fsdb; + /* copy client info about lov/lmv */ + mgi->mgi_comp.comp_mti = mti; + mgi->mgi_comp.comp_fsdb = fsdb; - rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname, - &mgi->mgi_comp); - if (rc) - GOTO(out_free, rc); - rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname, - fsdb->fsdb_clilmv); - if (rc) - GOTO(out_free, rc); + rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname, + &mgi->mgi_comp); + if (rc) + GOTO(out_free, rc); + rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname, + fsdb->fsdb_clilmv); + if (rc) + GOTO(out_free, rc); - /* add mountopts */ - rc = record_start_log(env, mgs, &llh, cliname); - if (rc) - GOTO(out_free, rc); + /* add mountopts */ + rc = record_start_log(env, mgs, &llh, cliname); + if (rc) + GOTO(out_free, rc); - rc = record_marker(env, llh, fsdb, CM_START, cliname, - "mount opts"); - if (rc) - GOTO(out_end, rc); - rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov, - fsdb->fsdb_clilmv); - if (rc) - GOTO(out_end, rc); - rc = record_marker(env, llh, fsdb, CM_END, cliname, - "mount opts"); + rc = record_marker(env, llh, fsdb, CM_START, cliname, + "mount opts"); + if (rc) + GOTO(out_end, rc); + rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov, + fsdb->fsdb_clilmv); + if (rc) + GOTO(out_end, rc); + rc = record_marker(env, llh, fsdb, CM_END, cliname, + "mount opts"); if (rc) GOTO(out_end, rc); @@ -2335,8 +2335,34 @@ static int mgs_write_log_mdt(const struct lu_env *env, if (rc) GOTO(out_end, rc); - rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti, - i, logname); + /* NB: If the log for the MDT is empty, it means + * the MDT is only added to the index + * map, and not being process yet, i.e. this + * is an unregistered MDT, see mgs_write_log_target(). + * so we should skip it. Otherwise + * + * 1. MGS get register request for MDT1 and MDT2. + * + * 2. Then both MDT1 and MDT2 are added into + * fsdb_mdt_index_map. (see mgs_set_index()). + * + * 3. Then MDT1 get the lock of fsdb_mutex, then + * generate the config log, here, it will regard MDT2 + * as an existent MDT, and generate "add osp" for + * lustre-MDT0001-osp-MDT0002. Note: at the moment + * MDT0002 config log is still empty, so it will + * add "add osp" even before "lov setup", which + * will definitly cause trouble. + * + * 4. MDT1 registeration finished, fsdb_mutex is + * released, then MDT2 get in, then in above + * mgs_steal_llog_for_mdt_from_client(), it will + * add another osp log for lustre-MDT0001-osp-MDT0002, + * which will cause another trouble.*/ + if (!mgs_log_is_empty(env, mgs, logname)) + rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, + mti, i, logname); + name_destroy(&logname); if (rc) GOTO(out_end, rc); @@ -3745,6 +3771,9 @@ int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs, rc = 0; } + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ? + cfs_fail_val : 10); + mutex_lock(&fsdb->fsdb_mutex); if (mti->mti_flags & diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 9d0f17e..ae35999 100755 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -6421,6 +6421,35 @@ test_92() { } run_test 92 "ldev returns MGS NID correctly in command substitution" +test_93() { + [ $MDSCOUNT -lt 3 ] && skip "needs >= 3 MDTs" && return + + reformat + #start mgs or mgs/mdt0 + if ! combined_mgs_mds ; then + start_mgs + start_mdt 1 + else + start_mdt 1 + fi + + start_ost || error "OST0 start fail" + + #define OBD_FAIL_MGS_WRITE_TARGET_DELAY 0x90e + do_facet mgs "$LCTL set_param fail_val = 10 fail_loc=0x8000090e" + for num in $(seq 2 $MDSCOUNT); do + start_mdt $num & + done + + mount_client $MOUNT || error "mount client fails" + wait_osc_import_state mds ost FULL + wait_osc_import_state client ost FULL + check_mount || error "check_mount failed" + + cleanup || error "cleanup failed with $?" +} +run_test 93 "register mulitple MDT at the same time" + if ! combined_mgs_mds ; then stop mgs fi -- 1.8.3.1