From 5bf8c0eaf9101f98b49029a8651b73bce436db17 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Tue, 16 Apr 2019 22:32:43 +0900 Subject: [PATCH] LU-12098 mdd: explicitly clear changelogs on deregister In case of MDS crash in the middle of changelog_deregister, the system can end up with the changelogs user deregistered, but the changelog entries not actually cleared. Then the only way to get rid of the remaining changelogs not used anymore by any user is to register a new changelogs user and then deregister it. To protect from this scenario, explicitly clear changelogs used by the user, before actually deregistering it. Also add recovery-small test_136 for non-regression purpose. Lustre-change: https://review.whamcloud.com/34688 Lustre-commit: 83ffa859bc629e246de9fcdfc82838b14c6d0ea3 Signed-off-by: Sebastien Buisson Change-Id: I14576180c9351337fc4d9ed0e1b176d352584750 Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/34921 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/mdd/mdd_device.c | 12 ++++++++- lustre/tests/recovery-small.sh | 55 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 12220c4..1b6e346 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -542,6 +542,7 @@ extern char obd_jobid_var[]; #define CFS_FAIL_CHLOG_USER_REG_UNREG_RACE 0x1315 #define OBD_FAIL_FORCE_GC_THREAD 0x1316 #define OBD_FAIL_LLOG_PROCESS_TIMEOUT 0x1317 +#define OBD_FAIL_LLOG_PURGE_DELAY 0x1318 #define OBD_FAIL_LLITE 0x1400 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401 diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index ac5ee30..4179749 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -1639,6 +1639,8 @@ int mdd_changelog_user_purge(const struct lu_env *env, mdd_changelog_user_purge_cb, &mcup, 0, 0); + OBD_FAIL_TIMEOUT(OBD_FAIL_LLOG_PURGE_DELAY, cfs_fail_val); + if ((rc == 0) && (mcup.mcup_usercount == 0)) { spin_lock(&mdd->mdd_cl.mc_user_lock); if (mdd->mdd_cl.mc_users == 0) { @@ -1886,7 +1888,15 @@ static int mdd_iocontrol(const struct lu_env *env, struct md_device *m, if (unlikely(!barrier_entry(mdd->mdd_bottom))) RETURN(-EINPROGRESS); - rc = mdd_changelog_user_purge(env, mdd, data->ioc_u32_1); + /* explicitly clear changelog first, to protect from crash in + * the middle of purge that would lead to unregistered consumer + * but pending changelog entries + */ + rc = mdd_changelog_clear(env, mdd, data->ioc_u32_1, 0); + if (!rc) + rc = mdd_changelog_user_purge(env, + mdd, data->ioc_u32_1); + barrier_exit(mdd->mdd_bottom); break; default: diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 12ae698..892a313 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2739,6 +2739,61 @@ test_134() { } run_test 134 "race between failover and search for reply data free slot" +test_136() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $MDS1_VERSION -ge $(version_code 2.12.52) ]] || + skip "Need MDS version at least 2.12.52" + + local mdts=$(comma_list $(mdts_nodes)) + local MDT0=$(facet_svc $SINGLEMDS) + + local clog=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n) + [ -n "$clog" ] || error "changelog_register failed" + cl_mask=$(do_facet mds1 $LCTL get_param \ + mdd.$MDT0.changelog_mask -n) + changelog_chmask "ALL" + + # generate some changelog records to accumulate + test_mkdir -i 0 -c 0 $DIR/$tdir || error "mkdir $tdir failed" + createmany -m $DIR/$tdir/$tfile 10000 || + error "create $DIR/$tdir/$tfile failed" + + local size1=$(do_facet $SINGLEMDS \ + $LCTL get_param -n mdd.$MDT0.changelog_size) + echo "Changelog size $size1" + + #define OBD_FAIL_LLOG_PURGE_DELAY 0x1318 + do_nodes $mdts $LCTL set_param fail_loc=0x1318 fail_val=30 + + # launch changelog_deregister in background on MDS + do_facet mds1 "nohup $LCTL --device $MDT0 changelog_deregister $clog \ + > foo.out 2> foo.err < /dev/null &" + # give time to reach fail_loc + sleep 15 + + # fail_loc will make MDS sleep in the middle of changelog_deregister + # take this opportunity to abruptly kill MDS + FAILURE_MODE_save=$FAILURE_MODE + FAILURE_MODE=HARD + fail mds1 + FAILURE_MODE=$FAILURE_MODE_save + + do_nodes $mdts $LCTL set_param fail_loc=0x0 fail_val=0 + + local size2=$(do_facet $SINGLEMDS \ + $LCTL get_param -n mdd.$MDT0.changelog_size) + echo "Changelog size $size2" + local clog2=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.$MDT0.changelog_users | grep $clog") + echo "After crash, changelog user $clog2" + + [ -n "$clog2" -o $size2 -lt $size1 ] || + error "changelog record count unchanged" + + do_facet mds1 $LCTL set_param mdd.$MDT0.changelog_mask=\'$cl_mask\' -n +} +run_test 136 "changelog_deregister leaving pending records" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1