Whamcloud - gitweb
LU-9285 osp: revert patches LU-8367 and LU-8973 25/25925/8
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Fri, 10 Mar 2017 08:50:21 +0000 (11:50 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 9 May 2017 03:45:04 +0000 (03:45 +0000)
another solution will be proposed.

Revert "LU-8972 osp: skip subsequent orphan cleanups"

This reverts commit 6f56f71b407a8c14db4c2accd37da5b4feecde1a.

Revert "LU-8367 osp: do not block orphan cleanup"

This reverts commit 2ce0d5b0640e3e440822080e407eee1ce1cafd75.

Change-Id: I4fb215d4dcdbe0edac0c25998b7deebf02a427c0
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-on: https://review.whamcloud.com/25925
Tested-by: Jenkins
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c
lustre/tests/conf-sanity.sh
lustre/tests/recovery-small.sh
lustre/tests/sanity-lfsck.sh

index 9f10001..4a0ca32 100644 (file)
@@ -249,7 +249,6 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_XATTR_REP                 0x161
 #define OBD_FAIL_MDS_TRACK_OVERFLOW     0x162
 #define OBD_FAIL_MDS_LOV_CREATE_RACE    0x163
-#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT         0x164
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index 9d3a650..eb88b68 100644 (file)
@@ -88,7 +88,6 @@ struct osp_precreate {
        int                              osp_pre_create_slow;
        /* cleaning up orphans or recreating missing objects */
        int                              osp_pre_recovering;
-       int                              osp_pre_delorphan_sent;
 };
 
 struct osp_update_request_sub {
index a89c9f5..4d867fa 100644 (file)
@@ -577,7 +577,6 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
                RETURN(rc);
        }
 
-       LASSERT(d->opd_pre->osp_pre_delorphan_sent != 0);
        spin_lock(&d->opd_pre_lock);
        if (d->opd_pre_create_count > d->opd_pre_max_create_count / 2)
                d->opd_pre_create_count = d->opd_pre_max_create_count / 2;
@@ -783,20 +782,18 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        int                      update_status = 0;
        int                      rc;
        int                      diff;
-       struct lu_fid            fid;
 
        ENTRY;
 
        /*
-        * wait for local recovery to finish, so we can cleanup orphans.
-        * orphans are all objects since "last used" (assigned).
-        * consider reserved objects as created otherwise we can get into
-        * a livelock when one blocked thread holding a reservation can
-        * block recovery. see LU-8367 for the details. in some cases this
-        * can result in gaps (i.e. leaked objects), but we've got LFSCK...
-        *
-        * do not allow new reservations because they may end up getting
-        * orphans being cleaned up below. so we block new reservations.
+        * wait for local recovery to finish, so we can cleanup orphans
+        * orphans are all objects since "last used" (assigned), but
+        * there might be objects reserved and in some cases they won't
+        * be used. we can't cleanup them till we're sure they won't be
+        * used. also can't we allow new reservations because they may
+        * end up getting orphans being cleaned up below. so we block
+        * new reservations and wait till all reserved objects either
+        * user or released.
         */
        spin_lock(&d->opd_pre_lock);
        d->opd_pre_recovering = 1;
@@ -806,12 +803,16 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
         * catch all osp_precreate_reserve() calls who find
         * "!opd_pre_recovering".
         */
-       l_wait_event(d->opd_pre_waitq, d->opd_recovery_completed ||
+       l_wait_event(d->opd_pre_waitq,
+                    (!d->opd_pre_reserved && d->opd_recovery_completed) ||
                     !osp_precreate_running(d) || d->opd_got_disconnected,
                     &lwi);
        if (!osp_precreate_running(d) || d->opd_got_disconnected)
                GOTO(out, rc = -EAGAIN);
 
+       CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
+              d->opd_obd->obd_name, PFID(&d->opd_last_used_fid));
+
        *last_fid = d->opd_last_used_fid;
        /* The OSP should already get the valid seq now */
        LASSERT(!fid_is_zero(last_fid));
@@ -840,24 +841,10 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        if (body == NULL)
                GOTO(out, rc = -EPROTO);
 
-       body->oa.o_flags = 0;
+       body->oa.o_flags = OBD_FL_DELORPHAN;
        body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
 
-       /* unless this is the very first DELORPHAN (when we really
-        * can destroy some orphans), just tell OST to recreate
-        * missing objects in our precreate pool */
-       spin_lock(&d->opd_pre_lock);
-       if (d->opd_pre->osp_pre_delorphan_sent) {
-               fid = d->opd_pre_last_created_fid;
-       } else {
-               fid = d->opd_last_used_fid;
-               body->oa.o_flags = OBD_FL_DELORPHAN;
-       }
-       spin_unlock(&d->opd_pre_lock);
-       fid_to_ostid(&fid, &body->oa.o_oi);
-
-       CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
-              d->opd_obd->obd_name, PFID(&fid));
+       fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi);
 
        ptlrpc_request_set_replen(req);
 
@@ -880,10 +867,10 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index);
 
        spin_lock(&d->opd_pre_lock);
-       diff = osp_fid_diff(&fid, last_fid);
+       diff = osp_fid_diff(&d->opd_last_used_fid, last_fid);
        if (diff > 0) {
                d->opd_pre_create_count = OST_MIN_PRECREATE + diff;
-               d->opd_pre_last_created_fid = *last_fid;
+               d->opd_pre_last_created_fid = d->opd_last_used_fid;
        } else {
                d->opd_pre_create_count = OST_MIN_PRECREATE;
                d->opd_pre_last_created_fid = *last_fid;
@@ -894,11 +881,9 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
         */
        LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
                LUSTRE_DATA_SEQ_MAX_WIDTH);
-       if (d->opd_pre->osp_pre_delorphan_sent == 0)
-               d->opd_pre_used_fid = d->opd_pre_last_created_fid;
+       d->opd_pre_used_fid = d->opd_pre_last_created_fid;
        d->opd_pre_create_slow = 0;
        spin_unlock(&d->opd_pre_lock);
-       d->opd_pre->osp_pre_delorphan_sent = 1;
 
        CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID
               "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid),
@@ -1386,12 +1371,6 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
        if (d->opd_pre_max_create_count == 0)
                RETURN(-ENOBUFS);
 
-       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_OSP_PRECREATE_WAIT)) {
-               if (d->opd_index == cfs_fail_val)
-                       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_OSP_PRECREATE_WAIT,
-                                        obd_timeout);
-       }
-
        /*
         * wait till:
         *  - preallocation is done
index 975d3e6..cd25613 100755 (executable)
@@ -5,7 +5,7 @@ set -e
 ONLY=${ONLY:-"$*"}
 
 # bug number for skipped test:  LU-8972
-ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT"
+ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT 101"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 is_sles11()                                            # LU-2181
index 4ae5c42..e8cf128 100755 (executable)
@@ -2685,53 +2685,6 @@ test_133() {
 }
 run_test 133 "don't fail on flock resend"
 
-test_134() {
-       local file1
-       local pid1
-       local pid2
-       local i
-
-       [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return 0
-       [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.59) ]] &&
-               skip "Need MDS version at least 2.8.59" && return
-
-       test_mkdir -p $DIR/$tdir
-       file1="$DIR/$tdir/file1"
-       file2="$DIR/$tdir/file2"
-
-#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT         0x164
-       # reserve stripe on ost1, block on ost2
-       do_facet $SINGLEMDS \
-               "lctl set_param fail_loc=0x80000164 fail_val=1"
-       $SETSTRIPE  -c 2 -o 0,1 $file1 &
-       pid1=$!
-       sleep 1
-
-       # initiate recovery with orphan cleanup on ost1
-       facet_failover ost1
-
-       # when OST1 recovery is over, the first setstripe should still
-       # have the object reserved, but that should not block new creates
-       # on OST1
-       $SETSTRIPE  -c 1 -o 0 $file2 &
-       pid2=$!
-       for ((i=0;i<$((TIMEOUT/2));i++)); do
-               if ! stat /proc/$pid2 >&/dev/null; then
-                       echo "DONE!"
-                       break
-               fi
-               echo "WAITING ..."
-               sleep 1
-       done
-       if let "i >= (TIMEOUT/2)"; then
-               error "create seem to get blocked by recovery"
-       fi
-       wait $pid1
-       wait $pid2
-       return 0
-}
-run_test 134 "MDT<>OST recovery don't block multistripe file creation"
-
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
index 17746c5..8e3fe72 100644 (file)
@@ -1335,14 +1335,6 @@ test_11b() {
        umount_client $MOUNT
        stop ost1 || error "(1) Fail to stop ost1"
 
-       # stop MDS to forget last precreated object
-       echo "stop $SINGLEMDS"
-       stop $SINGLEMDS > /dev/null || error "(11) Fail to stop MDS!"
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(12) Fail to start MDS!"
-
        #define OBD_FAIL_OST_ENOSPC              0x215
        do_facet ost1 $LCTL set_param fail_loc=0x215