Whamcloud - gitweb
LU-14688 mdt: changelog purge deletes plain llog 19/43719/2
authorAlexander Boyko <alexander.boyko@hpe.com>
Mon, 17 May 2021 13:29:01 +0000 (09:29 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Jun 2021 17:49:20 +0000 (17:49 +0000)
With a massive cancel records changelog could delete a plain
llog file and skip one by one record cancelling.
Also patch fixes the race between llog_destroy and llog_next_block.

HPE-bug-id: LUS-9950
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I47c2ed97945e979745255381f83b6a417d7ba8b1
Reviewed-on: https://review.whamcloud.com/43719
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdd/mdd_device.c
lustre/obdclass/llog.c
lustre/obdclass/llog_osd.c
lustre/tests/sanity.sh

index 87d8abd..29dafee 100644 (file)
@@ -245,6 +245,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_REINT_OPEN                 0x169
 #define OBD_FAIL_MDS_REINT_OPEN2        0x16a
 #define OBD_FAIL_MDS_COMMITRW_DELAY     0x16b
 #define OBD_FAIL_MDS_REINT_OPEN                 0x169
 #define OBD_FAIL_MDS_REINT_OPEN2        0x16a
 #define OBD_FAIL_MDS_COMMITRW_DELAY     0x16b
+#define OBD_FAIL_MDS_CHANGELOG_DEL      0x16c
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index a83d770..e7923f1 100644 (file)
@@ -344,6 +344,28 @@ static int llog_changelog_cancel_cb(const struct lu_env *env,
                        OBD_RACE(OBD_FAIL_MDS_CHANGELOG_RACE);
        }
 
                        OBD_RACE(OBD_FAIL_MDS_CHANGELOG_RACE);
        }
 
+       /* Records folow one by one, cr_index++. We could calculate the
+        * last cr_index at this plain llog. And if it less then cookie endrec
+        * cancel the whole file.
+        */
+       if ((LLOG_HDR_BITMAP_SIZE(llh->lgh_hdr) - hdr->lrh_index +
+            rec->cr.cr_index) < cl_cookie->endrec) {
+               int rc;
+
+               if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_MDS_CHANGELOG_DEL))) {
+                       if (cfs_fail_val == 0) {
+                               cfs_fail_val = (unsigned long)llh & 0xFFFFFFFF;
+                               OBD_RACE(OBD_FAIL_MDS_CHANGELOG_DEL);
+                       }
+               }
+               rc = llog_destroy(env, llh);
+               if (!rc) {
+                       CDEBUG(D_HA, "Changelog destroyed plain "DFID"\n",
+                              PFID(&llh->lgh_id.lgl_oi.oi_fid));
+                       RETURN(LLOG_DEL_PLAIN);
+               }
+       }
+
        /* cancel them one at a time.  I suppose we could store up the cookies
         * and cancel them all at once; probably more efficient, but this is
         * done as a user call, so who cares... */
        /* cancel them one at a time.  I suppose we could store up the cookies
         * and cancel them all at once; probably more efficient, but this is
         * done as a user call, so who cares... */
index 5580286..12e179e 100644 (file)
@@ -560,6 +560,8 @@ repeat:
                        CDEBUG(D_OTHER, "cur_offset %llu, chunk_offset %llu,"
                               " buf_offset %u, rc = %d\n", cur_offset,
                               (__u64)chunk_offset, buf_offset, rc);
                        CDEBUG(D_OTHER, "cur_offset %llu, chunk_offset %llu,"
                               " buf_offset %u, rc = %d\n", cur_offset,
                               (__u64)chunk_offset, buf_offset, rc);
+               if (rc == -ESTALE)
+                       GOTO(out, rc = 0);
                /* we`ve tried to reread the chunk, but there is no
                 * new records */
                if (rc == -EIO && repeated && (chunk_offset + buf_offset) ==
                /* we`ve tried to reread the chunk, but there is no
                 * new records */
                if (rc == -EIO && repeated && (chunk_offset + buf_offset) ==
index 6021396..683b622 100644 (file)
@@ -903,9 +903,18 @@ static int llog_osd_next_block(const struct lu_env *env,
        LASSERT(loghandle);
        LASSERT(loghandle->lgh_ctxt);
 
        LASSERT(loghandle);
        LASSERT(loghandle->lgh_ctxt);
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_CHANGELOG_DEL) &&
+           cfs_fail_val == ((unsigned long)loghandle & 0xFFFFFFFF)) {
+               OBD_RACE(OBD_FAIL_MDS_CHANGELOG_DEL);
+               msleep(MSEC_PER_SEC >> 2);
+       }
+
        o = loghandle->lgh_obj;
        LASSERT(o);
        o = loghandle->lgh_obj;
        LASSERT(o);
-       LASSERT(llog_osd_exist(loghandle));
+       dt_read_lock(env, o, 0);
+       if (!llog_osd_exist(loghandle))
+               GOTO(out, rc = -ESTALE); //object was destroyed
+
        dt = lu2dt_dev(o->do_lu.lo_dev);
        LASSERT(dt);
 
        dt = lu2dt_dev(o->do_lu.lo_dev);
        LASSERT(dt);
 
@@ -1050,6 +1059,7 @@ retry:
        }
        GOTO(out, rc = -EIO);
 out:
        }
        GOTO(out, rc = -EIO);
 out:
+       dt_read_unlock(env, o);
        return rc;
 }
 
        return rc;
 }
 
@@ -1094,7 +1104,10 @@ static int llog_osd_prev_block(const struct lu_env *env,
 
        o = loghandle->lgh_obj;
        LASSERT(o);
 
        o = loghandle->lgh_obj;
        LASSERT(o);
-       LASSERT(llog_osd_exist(loghandle));
+       dt_read_lock(env, o, 0);
+       if (!llog_osd_exist(loghandle))
+               GOTO(out, rc = -ESTALE);
+
        dt = lu2dt_dev(o->do_lu.lo_dev);
        LASSERT(dt);
 
        dt = lu2dt_dev(o->do_lu.lo_dev);
        LASSERT(dt);
 
@@ -1177,6 +1190,7 @@ static int llog_osd_prev_block(const struct lu_env *env,
        }
        GOTO(out, rc = -EIO);
 out:
        }
        GOTO(out, rc = -EIO);
 out:
+       dt_read_unlock(env, o);
        return rc;
 }
 
        return rc;
 }
 
index 8306005..625bc80 100755 (executable)
@@ -16082,6 +16082,56 @@ test_160m() {
 }
 run_test 160m "Changelog clear race"
 
 }
 run_test 160m "Changelog clear race"
 
+test_160n() {
+       remote_mds_nodsh && skip "remote MDS with nodsh" && return
+       [[ $MDS1_VERSION -ge $(version_code 2.14.51) ]] ||
+               skip "Need MDS version at least 2.14.51"
+       local cl_users
+       local cl_user1
+       local cl_user2
+       local pid1
+       local first_rec
+       local last_rec=0
+
+       # Create a user
+       changelog_register || error "first changelog_register failed"
+
+       cl_users=(${CL_USERS[mds1]})
+       cl_user1="${cl_users[0]}"
+
+       # generate some changelog records to accumulate on MDT0
+       test_mkdir -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed"
+       first_rec=$(changelog_users $SINGLEMDS |
+                       awk '/^current.index:/ { print $NF }')
+       while (( last_rec < (( first_rec + 65000)) )); do
+               createmany -m $DIR/$tdir/$tfile 10000 ||
+                       error "create $DIR/$tdir/$tfile failed"
+
+               for i in $(seq 0 10000); do
+                       mrename $DIR/$tdir/$tfile$i $DIR/$tdir/$tfile-new$i \
+                               > /dev/null
+               done
+
+               unlinkmany $DIR/$tdir/$tfile-new 10000 ||
+                       error "unlinkmany failed unlink"
+               last_rec=$(changelog_users $SINGLEMDS |
+                       awk '/^current.index:/ { print $NF }')
+               echo last record $last_rec
+               (( last_rec == 0 )) && error "no changelog found"
+       done
+
+#define OBD_FAIL_MDS_CHANGELOG_DEL      0x16c
+       do_facet mds1 $LCTL set_param fail_loc=0x8000016c fail_val=0
+
+       __changelog_clear mds1 $cl_user1 0 &
+       pid1=$!
+       sleep 2
+       __changelog_clear mds1 $cl_user1 0 ||
+               error "fail to cancel record for $cl_user1"
+       wait $pid1
+       [[ $? -eq 0 ]] || error "fail to cancel record for $cl_user2"
+}
+run_test 160n "Changelog destroy race"
 
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
 
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"