Whamcloud - gitweb
LU-12098 mdd: explicitly clear changelogs on deregister 21/34921/2
authorSebastien Buisson <sbuisson@ddn.com>
Tue, 16 Apr 2019 13:32:43 +0000 (22:32 +0900)
committerOleg Drokin <green@whamcloud.com>
Sat, 8 Jun 2019 02:35:07 +0000 (02:35 +0000)
In case of MDS crash in the middle of changelog_deregister, the system
can end up with the changelogs user deregistered, but the changelog
entries not actually cleared. Then the only way to get rid of the
remaining changelogs not used anymore by any user is to register a new
changelogs user and then deregister it.
To protect from this scenario, explicitly clear changelogs used by the
user, before actually deregistering it.

Also add recovery-small test_136 for non-regression purpose.

Lustre-change: https://review.whamcloud.com/34688
Lustre-commit: 83ffa859bc629e246de9fcdfc82838b14c6d0ea3

Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I14576180c9351337fc4d9ed0e1b176d352584750
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Signed-off-by: Minh Diep <mdiep@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34921
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdd/mdd_device.c
lustre/tests/recovery-small.sh

index 12220c4..1b6e346 100644 (file)
@@ -542,6 +542,7 @@ extern char obd_jobid_var[];
 #define CFS_FAIL_CHLOG_USER_REG_UNREG_RACE         0x1315
 #define OBD_FAIL_FORCE_GC_THREAD                   0x1316
 #define OBD_FAIL_LLOG_PROCESS_TIMEOUT              0x1317
+#define OBD_FAIL_LLOG_PURGE_DELAY                  0x1318
 
 #define OBD_FAIL_LLITE                              0x1400
 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE             0x1401
index ac5ee30..4179749 100644 (file)
@@ -1639,6 +1639,8 @@ int mdd_changelog_user_purge(const struct lu_env *env,
                              mdd_changelog_user_purge_cb, &mcup,
                              0, 0);
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_LLOG_PURGE_DELAY, cfs_fail_val);
+
        if ((rc == 0) && (mcup.mcup_usercount == 0)) {
                spin_lock(&mdd->mdd_cl.mc_user_lock);
                if (mdd->mdd_cl.mc_users == 0) {
@@ -1886,7 +1888,15 @@ static int mdd_iocontrol(const struct lu_env *env, struct md_device *m,
                if (unlikely(!barrier_entry(mdd->mdd_bottom)))
                        RETURN(-EINPROGRESS);
 
-               rc = mdd_changelog_user_purge(env, mdd, data->ioc_u32_1);
+               /* explicitly clear changelog first, to protect from crash in
+                * the middle of purge that would lead to unregistered consumer
+                * but pending changelog entries
+                */
+               rc = mdd_changelog_clear(env, mdd, data->ioc_u32_1, 0);
+               if (!rc)
+                       rc = mdd_changelog_user_purge(env,
+                                                     mdd, data->ioc_u32_1);
+
                barrier_exit(mdd->mdd_bottom);
                break;
        default:
index 12ae698..892a313 100755 (executable)
@@ -2739,6 +2739,61 @@ test_134() {
 }
 run_test 134 "race between failover and search for reply data free slot"
 
+test_136() {
+       remote_mds_nodsh && skip "remote MDS with nodsh" && return
+       [[ $MDS1_VERSION -ge $(version_code 2.12.52) ]] ||
+               skip "Need MDS version at least 2.12.52"
+
+       local mdts=$(comma_list $(mdts_nodes))
+       local MDT0=$(facet_svc $SINGLEMDS)
+
+       local clog=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n)
+       [ -n "$clog" ] || error "changelog_register failed"
+       cl_mask=$(do_facet mds1 $LCTL get_param \
+                               mdd.$MDT0.changelog_mask -n)
+       changelog_chmask "ALL"
+
+       # generate some changelog records to accumulate
+       test_mkdir -i 0 -c 0 $DIR/$tdir || error "mkdir $tdir failed"
+       createmany -m $DIR/$tdir/$tfile 10000 ||
+               error "create $DIR/$tdir/$tfile failed"
+
+       local size1=$(do_facet $SINGLEMDS \
+                     $LCTL get_param -n mdd.$MDT0.changelog_size)
+       echo "Changelog size $size1"
+
+       #define OBD_FAIL_LLOG_PURGE_DELAY                   0x1318
+       do_nodes $mdts $LCTL set_param fail_loc=0x1318 fail_val=30
+
+       # launch changelog_deregister in background on MDS
+       do_facet mds1 "nohup $LCTL --device $MDT0 changelog_deregister $clog \
+                       > foo.out 2> foo.err < /dev/null &"
+       # give time to reach fail_loc
+       sleep 15
+
+       # fail_loc will make MDS sleep in the middle of changelog_deregister
+       # take this opportunity to abruptly kill MDS
+       FAILURE_MODE_save=$FAILURE_MODE
+       FAILURE_MODE=HARD
+       fail mds1
+       FAILURE_MODE=$FAILURE_MODE_save
+
+       do_nodes $mdts $LCTL set_param fail_loc=0x0 fail_val=0
+
+       local size2=$(do_facet $SINGLEMDS \
+                     $LCTL get_param -n mdd.$MDT0.changelog_size)
+       echo "Changelog size $size2"
+       local clog2=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+                       mdd.$MDT0.changelog_users | grep $clog")
+       echo "After crash, changelog user $clog2"
+
+       [ -n "$clog2" -o $size2 -lt $size1 ] ||
+               error "changelog record count unchanged"
+
+       do_facet mds1 $LCTL set_param mdd.$MDT0.changelog_mask=\'$cl_mask\' -n
+}
+run_test 136 "changelog_deregister leaving pending records"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status