From afa39b3cceabccd19e7c412ff90667e95cbfe3e8 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Wed, 26 Aug 2020 22:47:14 +0800 Subject: [PATCH] LU-12295 mdd: don't LBUG() if dir nlink is wrong Sometimes dir nlink may not be correctly decreased: subdir is remote, when it's unlinked, its dirent is removed, but parent nlink decrease failed. Don't assert this in osd_destroy(), but print an error message and continue since we've checked directory is empty. Add OBD_FAIL_OSD_REF_DEL to simulate the error above. Add sanity 48f. Signed-off-by: Lai Siyao Change-Id: I483aaf7a62b7761868b5e2af8dbfa92929fda78c Reviewed-on: https://review.whamcloud.com/39734 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev --- lustre/include/obd_support.h | 3 +-- lustre/osd-ldiskfs/osd_handler.c | 19 +++++++------------ lustre/tests/sanity.sh | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index a3fb3c1..bca90b0 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -271,10 +271,9 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSD_OST_EA_FID_SET 0x197 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198 #define OBD_FAIL_OSD_INDEX_CRASH 0x199 - #define OBD_FAIL_OSD_TXN_START 0x19a - #define OBD_FAIL_OSD_DUPLICATE_MAP 0x19b +#define OBD_FAIL_OSD_REF_DEL 0x19c #define OBD_FAIL_OFD_SET_OID 0x1e0 diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index d869811..6ccd372 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1621,16 +1621,6 @@ static void osd_index_fini(struct osd_object *o) } } -/* - * Concurrency: no concurrent access is possible that late in object - * life-cycle (for all existing callers, that is. New callers have to provide - * their own locking.) - */ -static int osd_inode_unlinked(const struct inode *inode) -{ - return inode->i_nlink == 0; -} - enum { OSD_TXN_OI_DELETE_CREDITS = 20, OSD_TXN_INODE_DELETE_CREDITS = 20 @@ -3672,8 +3662,10 @@ static int osd_destroy(const struct lu_env *env, struct dt_object *dt, } if (S_ISDIR(inode->i_mode)) { - LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1 || - inode->i_nlink == 2); + if (inode->i_nlink > 2) + CERROR("%s: directory "DFID" ino %lu link count is %u at unlink. run e2fsck to repair\n", + osd_name(osd), PFID(fid), inode->i_ino, + inode->i_nlink); spin_lock(&obj->oo_guard); clear_nlink(inode); @@ -4221,6 +4213,9 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_is_write_locked(env, obj)); LASSERT(th != NULL); + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL)) + return -EIO; + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d9012f1..c3ae827 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -5131,6 +5131,27 @@ test_48e() { # bug 4134 } run_test 48e "Access to recreated parent subdir (should return errors)" +test_48f() { + [[ $MDS1_VERSION -ge $(version_code 2.13.55) ]] || + skip "need MDS >= 2.13.55" + [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs" + [[ "$(facet_host mds1)" != "$(facet_host mds2)" ]] || + skip "needs different host for mdt1 mdt2" + [[ $(facet_fstype mds1) == ldiskfs ]] || skip "ldiskfs only" + + $LFS mkdir -i0 $DIR/$tdir + $LFS mkdir -i 1 $DIR/$tdir/sub1 $DIR/$tdir/sub2 $DIR/$tdir/sub3 + + for d in sub1 sub2 sub3; do + #define OBD_FAIL_OSD_REF_DEL 0x19c + do_facet mds1 $LCTL set_param fail_loc=0x8000019c + rm -rf $DIR/$tdir/$d && error "rm $d should fail" + done + + rm -d --interactive=never $DIR/$tdir || error "rm $tdir fail" +} +run_test 48f "non-zero nlink dir unlink won't LBUG()" + test_49() { # LU-1030 [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_ost_nodsh && skip "remote OST with nodsh" -- 1.8.3.1