From 4ae823762db40d790ddd00c29e969b5c8e376430 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Sat, 23 Mar 2024 20:13:32 +0300 Subject: [PATCH] LU-17261 lov: unlink can handle bogus striping Allow removing a file which has uninitialized OST objects in the layout, possibly because LFSCK reconnected an orphan object back into a mirrored file after the mirror had been deleted. Don't wait and retry to access the bogus OST or MDT index in this case, because the target will never appear, so waiting is futile. Test-Parameters: testlist=sanity-flr,sanity-flr,sanity-flr,sanity-flr Fixes: 94a4663db9 ("LU-17334 lmv: handle object created on newly added MDT") Fixes: f35f897ec8 ("LU-17334 lov: handle object created on newly added OST") Signed-off-by: Alex Zhuravlev Change-Id: I90b97c0e2d560d71b2a4c32a47fcfd7ae4e5535d Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54544 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Zhenyu Xu Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/lmv/lmv_obd.c | 4 ++++ lustre/lov/lov_ea.c | 8 ++++++-- lustre/tests/sanity-flr.sh | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 74b5bc4..ab1d9e3 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -159,6 +159,10 @@ retry: "%s: MDT index %u/%u not configured\n" : "%s: MDT index %u more than MDT count %u\n", obd->obd_name, index, lmv->lmv_mdt_count); + + if (index >= LOV_V1_INSANE_STRIPE_COUNT) + return NULL; + if (now > next_print) { LCONSOLE_INFO("%s: wait %ds while client connects to new MDT\n", obd->obd_name, (int)(retry_limit - now)); diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index c9dfe73..a1ccd9d 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -276,7 +276,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, continue; retry_new_ost: - if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count || + if (unlikely((u32)loi->loi_ost_idx >= lov->desc.ld_tgt_count || !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) { time64_t now = ktime_get_seconds(); @@ -292,11 +292,15 @@ retry_new_ost: /* log debug every loop, just to see it is trying */ CDEBUG_LIMIT(level, - loi->loi_ost_idx < lov->desc.ld_tgt_count ? + (u32)loi->loi_ost_idx < lov->desc.ld_tgt_count ? "%s: FID "DOSTID" OST index %d/%u missing\n" : "%s: FID "DOSTID" OST index %d more than OST count %u\n", lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi), loi->loi_ost_idx, lov->desc.ld_tgt_count); + + if ((u32)loi->loi_ost_idx >= LOV_V1_INSANE_STRIPE_COUNT) + GOTO(out_lsme, rc = -EINVAL); + if (now > next_print) { LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n", lov->desc.ld_uuid.uuid, diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 26285ca..9c6c2cb 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -4454,6 +4454,42 @@ test_210a() { } run_test 210a "handle broken mirrored lovea" +test_210b() { + local tf=$DIR/$tfile + local after + local mirrorred + local before + + [ "$FSTYPE" != "zfs" ] || skip "ZFS file number is not accurate" + + stack_trap "rm -f $tf" + + # XXX: how to avoid precreate + wait_delete_completed + $LFS df -i | grep OST + before=$($LFS df -i|grep OST|awk '{sum=sum+$3}END{print sum}') + echo "before file creation: $before" + $LFS setstripe -c 2 $tf || error "can't create file" + dd if=/dev/zero of=$tf bs=1M count=1 || error "can't dd" + +#define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 + do_facet mds1 "$LCTL set_param fail_loc=0x1428" + $LFS mirror extend -N $tf || error "can't mirror" + $LFS df -i | grep OST + mirrored=$($LFS df -i|grep OST|awk '{sum=sum+$3}END{print sum}') + echo "after mirror: $mirrored" + + rm $tf || error "can't remove" + [[ -f $tf ]] && error "rm failed" + wait_delete_completed + + $LFS df -i | grep OST + after=$($LFS df -i|grep OST|awk '{sum=sum+$3}END{print sum}') + echo "after rm: $after" + (( after < before )) || error "something went wrong with unlink" +} +run_test 210b "handle broken mirrored lovea (unlink)" + complete_test $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1