Whamcloud - gitweb
LU-17261 lov: unlink can handle bogus striping
authorAlex Zhuravlev <bzzz@whamcloud.com>
Sat, 23 Mar 2024 17:13:32 +0000 (20:13 +0300)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 15 Apr 2024 09:55:12 +0000 (09:55 +0000)
Allow removing a file which has uninitialized OST objects in the
layout, possibly because LFSCK reconnected an orphan object back
into a mirrored file after the mirror had been deleted.

Don't wait and retry to access the bogus OST or MDT index in this
case, because the target will never appear, so waiting is futile.

Lustre-change: https://review.whamcloud.com/54544
Lustre-commit: 4ae823762db40d790ddd00c29e969b5c8e376430

Lustre-change: https://review.whamcloud.com/54719
Lustre-commit: 47573f85e60ac91f69c09b9edfbffc3f74fef298

Test-Parameters: testlist=sanity-flr,sanity-flr,sanity-flr,sanity-flr
Fixes: 94a4663db9 ("LU-17334 lmv: handle object created on newly added MDT")
Fixes: f35f897ec8 ("LU-17334 lov: handle object created on newly added OST")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I90b97c0e2d560d71b2a4c32a47fcfd7ae4e5535d
Reviewed-by: Zhenyu Xu <bobijam@hotmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54752
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/lmv/lmv_obd.c
lustre/lov/lov_ea.c
lustre/tests/sanity-flr.sh

index fa74db9..0a4e831 100644 (file)
@@ -159,6 +159,10 @@ retry:
                     "%s: MDT index %u/%u not configured\n" :
                     "%s: MDT index %u more than MDT count %u\n",
                     obd->obd_name, index, lmv->lmv_mdt_count);
+
+       if (index >= LOV_V1_INSANE_STRIPE_COUNT)
+               return NULL;
+
        if (now > next_print) {
                LCONSOLE_INFO("%s: wait %ds while client connects to new MDT\n",
                              obd->obd_name, (int)(retry_limit - now));
index 14b0852..f99502e 100644 (file)
@@ -286,7 +286,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
                        continue;
 
 retry_new_ost:
-               if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count ||
+               if (unlikely((u32)loi->loi_ost_idx >= lov->desc.ld_tgt_count ||
                             !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) {
                        time64_t now = ktime_get_seconds();
 
@@ -302,11 +302,15 @@ retry_new_ost:
 
                        /* log debug every loop, just to see it is trying */
                        CDEBUG_LIMIT(level,
-                               loi->loi_ost_idx < lov->desc.ld_tgt_count ?
+                               (u32)loi->loi_ost_idx < lov->desc.ld_tgt_count ?
                                "%s: FID "DOSTID" OST index %d/%u missing\n" :
                                "%s: FID "DOSTID" OST index %d more than OST count %u\n",
                                lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi),
                                loi->loi_ost_idx, lov->desc.ld_tgt_count);
+
+                       if ((u32)loi->loi_ost_idx >= LOV_V1_INSANE_STRIPE_COUNT)
+                               GOTO(out_lsme, rc = -EINVAL);
+
                        if (now > next_print) {
                                LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n",
                                              lov->desc.ld_uuid.uuid,
index e2d82f6..9c06938 100644 (file)
@@ -4360,6 +4360,44 @@ test_210a() {
 }
 run_test 210a "handle broken mirrored lovea"
 
+test_210b() {
+       local tf=$DIR/$tfile
+
+       [ "$FSTYPE" != "zfs" ] || skip "ZFS file number is not accurate"
+
+       $LFS setstripe -i0 -c1 $tf || error "can't create file"
+       dd if=/dev/zero of=$tf bs=1M count=1 || error "can't dd"
+
+       local ostdev=$(ostdevname 1)
+       local fid=($($LFS getstripe $DIR/$tfile | grep 0x))
+       local seq=${fid[3]#0x}
+       local oid=${fid[1]}
+       local oid_hex
+       if [ $seq == 0 ]; then
+               oid_hex=${fid[1]}
+       else
+               oid_hex=${fid[2]#0x}
+       fi
+       local objpath="O/$seq/d$(($oid % 32))/$oid_hex"
+       local cmd="$DEBUGFS -c -R \\\"stat $objpath\\\" $ostdev"
+
+       local ino=$(do_facet ost1 $cmd | grep Inode:)
+       [[ -n $ino ]] || error "can't access obj object: $objpath"
+
+#define OBD_FAIL_LOV_INVALID_OSTIDX                0x1428
+       do_facet mds1 "$LCTL set_param fail_loc=0x1428"
+       $LFS mirror extend -N $tf || error "can't mirror"
+
+       # now remove the file with bogus ostidx in the striping info
+       rm $tf || error "can't remove"
+       [[ -f $tf ]] && error "rm failed"
+       wait_delete_completed
+
+       local ino=$(do_facet ost1 $cmd | grep Inode:)
+       [[ -z $ino ]] || error "still CAN access obj object: $objpath"
+}
+run_test 210b "handle broken mirrored lovea (unlink)"
+
 complete_test $SECONDS
 check_and_cleanup_lustre
 exit_status