Whamcloud - gitweb
LU-14694 mdt: do not remove orphans at umount 83/43783/20
authorAlex Zhuravlev <bzzz@whamcloud.com>
Tue, 25 May 2021 15:39:14 +0000 (18:39 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 10 Aug 2021 08:07:41 +0000 (08:07 +0000)
as it's very likely that another MDT is being umounted as well
and such a removal can get stuck if the object being removed
is a striped directory.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I0417b1b4447887e166c144605bbfa3249126eacd
Reviewed-on: https://review.whamcloud.com/43783
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdt/mdt_handler.c
lustre/tests/recovery-small.sh

index a54cef4..e2fda89 100644 (file)
@@ -6518,7 +6518,8 @@ static int mdt_export_cleanup(struct obd_export *exp)
                                rc = mdt_ctxt_add_dirty_flag(&env, info, mfd);
 
                        /* Don't unlink orphan on failover umount, LU-184 */
-                       if (exp->exp_flags & OBD_OPT_FAILOVER) {
+                       if (exp->exp_flags & OBD_OPT_FAILOVER ||
+                           exp->exp_obd->obd_stopping) {
                                ma->ma_valid = MA_FLAGS;
                                ma->ma_attr_flags |= MDS_KEEP_ORPHAN;
                        }
index a17a8fd..1c1b86d 100755 (executable)
@@ -3154,6 +3154,35 @@ test_148() {
 }
 run_test 148 "data corruption through resend"
 
+test_149() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+
+       test_mkdir -i 0 -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed"
+
+       # make an orphan striped dir
+       $MULTIOP $DIR/$tdir D_c &
+       local PID=$!
+       sleep 0.3
+       rmdir $DIR/$tdir || error "can't rmdir"
+
+       # stop a slave MDT where one ons stripe is located
+       stop mds2 -f
+
+       # stopping should not cause orphan as another MDT can
+       # be stopped yet
+       stop mds1 -f
+
+       start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail"
+       start mds2 $(mdsdevname 2) $MDS_MOUNT_OPTS || error "mds1 start fail"
+
+       kill -USR1 $PID
+       wait $PID
+
+       clients_up
+       return 0
+}
+run_test 149 "skip orphan removal at umount"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status