Whamcloud - gitweb
LU-12295 mdd: don't LBUG() if dir nlink is wrong 34/39734/7
authorLai Siyao <lai.siyao@whamcloud.com>
Wed, 26 Aug 2020 14:47:14 +0000 (22:47 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 12 Sep 2020 15:43:44 +0000 (15:43 +0000)
Sometimes dir nlink may not be correctly decreased: subdir is remote,
when it's unlinked, its dirent is removed, but parent nlink decrease
failed.

Don't assert this in osd_destroy(), but print an error message and
continue since we've checked directory is empty.

Add OBD_FAIL_OSD_REF_DEL to simulate the error above.

Add sanity 48f.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I483aaf7a62b7761868b5e2af8dbfa92929fda78c
Reviewed-on: https://review.whamcloud.com/39734
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
lustre/include/obd_support.h
lustre/osd-ldiskfs/osd_handler.c
lustre/tests/sanity.sh

index a3fb3c1..bca90b0 100644 (file)
@@ -271,10 +271,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSD_OST_EA_FID_SET                    0x197
 #define OBD_FAIL_OSD_NO_OI_ENTRY                       0x198
 #define OBD_FAIL_OSD_INDEX_CRASH                       0x199
 #define OBD_FAIL_OSD_OST_EA_FID_SET                    0x197
 #define OBD_FAIL_OSD_NO_OI_ENTRY                       0x198
 #define OBD_FAIL_OSD_INDEX_CRASH                       0x199
-
 #define OBD_FAIL_OSD_TXN_START                         0x19a
 #define OBD_FAIL_OSD_TXN_START                         0x19a
-
 #define OBD_FAIL_OSD_DUPLICATE_MAP                     0x19b
 #define OBD_FAIL_OSD_DUPLICATE_MAP                     0x19b
+#define OBD_FAIL_OSD_REF_DEL                           0x19c
 
 #define OBD_FAIL_OFD_SET_OID                           0x1e0
 
 
 #define OBD_FAIL_OFD_SET_OID                           0x1e0
 
index d869811..6ccd372 100644 (file)
@@ -1621,16 +1621,6 @@ static void osd_index_fini(struct osd_object *o)
        }
 }
 
        }
 }
 
-/*
- * Concurrency: no concurrent access is possible that late in object
- * life-cycle (for all existing callers, that is. New callers have to provide
- * their own locking.)
- */
-static int osd_inode_unlinked(const struct inode *inode)
-{
-       return inode->i_nlink == 0;
-}
-
 enum {
        OSD_TXN_OI_DELETE_CREDITS    = 20,
        OSD_TXN_INODE_DELETE_CREDITS = 20
 enum {
        OSD_TXN_OI_DELETE_CREDITS    = 20,
        OSD_TXN_INODE_DELETE_CREDITS = 20
@@ -3672,8 +3662,10 @@ static int osd_destroy(const struct lu_env *env, struct dt_object *dt,
        }
 
        if (S_ISDIR(inode->i_mode)) {
        }
 
        if (S_ISDIR(inode->i_mode)) {
-               LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1 ||
-                       inode->i_nlink == 2);
+               if (inode->i_nlink > 2)
+                       CERROR("%s: directory "DFID" ino %lu link count is %u at unlink. run e2fsck to repair\n",
+                              osd_name(osd), PFID(fid), inode->i_ino,
+                              inode->i_nlink);
 
                spin_lock(&obj->oo_guard);
                clear_nlink(inode);
 
                spin_lock(&obj->oo_guard);
                clear_nlink(inode);
@@ -4221,6 +4213,9 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt,
        LASSERT(osd_is_write_locked(env, obj));
        LASSERT(th != NULL);
 
        LASSERT(osd_is_write_locked(env, obj));
        LASSERT(th != NULL);
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL))
+               return -EIO;
+
        oh = container_of(th, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle != NULL);
 
        oh = container_of(th, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle != NULL);
 
index d9012f1..c3ae827 100755 (executable)
@@ -5131,6 +5131,27 @@ test_48e() { # bug 4134
 }
 run_test 48e "Access to recreated parent subdir (should return errors)"
 
 }
 run_test 48e "Access to recreated parent subdir (should return errors)"
 
+test_48f() {
+       [[ $MDS1_VERSION -ge $(version_code 2.13.55) ]] ||
+               skip "need MDS >= 2.13.55"
+       [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs"
+       [[ "$(facet_host mds1)" != "$(facet_host mds2)" ]] ||
+               skip "needs different host for mdt1 mdt2"
+       [[ $(facet_fstype mds1) == ldiskfs ]] || skip "ldiskfs only"
+
+       $LFS mkdir -i0 $DIR/$tdir
+       $LFS mkdir -i 1 $DIR/$tdir/sub1 $DIR/$tdir/sub2 $DIR/$tdir/sub3
+
+       for d in sub1 sub2 sub3; do
+               #define OBD_FAIL_OSD_REF_DEL    0x19c
+               do_facet mds1 $LCTL set_param fail_loc=0x8000019c
+               rm -rf $DIR/$tdir/$d && error "rm $d should fail"
+       done
+
+       rm -d --interactive=never $DIR/$tdir || error "rm $tdir fail"
+}
+run_test 48f "non-zero nlink dir unlink won't LBUG()"
+
 test_49() { # LU-1030
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
        remote_ost_nodsh && skip "remote OST with nodsh"
 test_49() { # LU-1030
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
        remote_ost_nodsh && skip "remote OST with nodsh"