Whamcloud - gitweb
LU-9498 osp: revert till a fix 47/27447/3
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Tue, 6 Jun 2017 08:05:57 +0000 (12:05 +0400)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 13 Jun 2017 16:54:23 +0000 (16:54 +0000)
Revert "LU-8367 osp: orphan cleanup do not wait for reserved"

LU-9498 was discovered with soak testing, the root cause is unknown
at the moment.

This reverts commit 1b3028ab142a1f605e6274a6019bb39d89ae8d25.

Change-Id: I65016876f3e42345a06194771c9731e28a850adc
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-on: https://review.whamcloud.com/27447
Tested-by: Jenkins
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c
lustre/tests/recovery-small.sh

index 0a3813b..433c2f8 100644 (file)
@@ -249,7 +249,6 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_XATTR_REP                 0x161
 #define OBD_FAIL_MDS_TRACK_OVERFLOW     0x162
 #define OBD_FAIL_MDS_LOV_CREATE_RACE    0x163
-#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT         0x164
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index 546b6ba..7e3819f 100644 (file)
@@ -747,7 +747,13 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
         * how many objects are available for immediate creation
         */
        spin_lock(&d->opd_pre_lock);
-       sfs->os_fprecreated = osp_objs_precreated(env, d);
+       LASSERTF(fid_seq(&d->opd_pre_last_created_fid) ==
+                fid_seq(&d->opd_pre_used_fid),
+                "last_created "DFID", next_fid "DFID"\n",
+                PFID(&d->opd_pre_last_created_fid),
+                PFID(&d->opd_pre_used_fid));
+       sfs->os_fprecreated = fid_oid(&d->opd_pre_last_created_fid) -
+                             fid_oid(&d->opd_pre_used_fid);
        sfs->os_fprecreated -= d->opd_pre_reserved;
        LASSERTF(sfs->os_fprecreated <= OST_MAX_PRECREATE * 2,
                 "last_created "DFID", next_fid "DFID", reserved %llu\n",
index 774f37e..49ea455 100644 (file)
@@ -602,28 +602,6 @@ static inline int osp_is_fid_client(struct osp_device *osp)
        return imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_FID;
 }
 
-/**
- * Return number of precreated objects
- *
- * A simple helper to calculate the number of precreated objects on the device.
- *
- * \param[in] env      LU environment provided by the caller
- * \param[in] osp      OSP device
- *
- * \retval             the number of the precreated objects
- */
-static inline int osp_objs_precreated(const struct lu_env *env,
-                                     struct osp_device *osp)
-{
-       /* if pre_used_fid isn't initialized yet,
-        * then precreated hasn't started and no
-        * objects have been precreated */
-       if (fid_seq(&osp->opd_pre_used_fid) == 0)
-               return 0;
-       return osp_fid_diff(&osp->opd_pre_last_created_fid,
-                           &osp->opd_pre_used_fid);
-}
-
 struct object_update *
 update_buffer_get_update(struct object_update_request *request,
                         unsigned int index);
index c88eeb1..5e0c8c7 100644 (file)
@@ -239,6 +239,23 @@ void osp_statfs_need_now(struct osp_device *d)
 }
 
 /**
+ * Return number of precreated objects
+ *
+ * A simple helper to calculate the number of precreated objects on the device.
+ *
+ * \param[in] env      LU environment provided by the caller
+ * \param[in] osp      OSP device
+ *
+ * \retval             the number of the precreated objects
+ */
+static inline int osp_objs_precreated(const struct lu_env *env,
+                                     struct osp_device *osp)
+{
+       return osp_fid_diff(&osp->opd_pre_last_created_fid,
+                           &osp->opd_pre_used_fid);
+}
+
+/**
  * Check pool of precreated objects is nearly empty
  *
  * We should not wait till the pool of the precreated objects is exhausted,
@@ -758,7 +775,6 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
 {
        struct osp_thread_info  *osi = osp_env_info(env);
        struct lu_fid           *last_fid = &osi->osi_fid;
-       struct lu_fid            tmp;
        struct ptlrpc_request   *req = NULL;
        struct obd_import       *imp;
        struct ost_body         *body;
@@ -771,15 +787,24 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
 
        /*
         * wait for local recovery to finish, so we can cleanup orphans
-        * orphans are all objects since "last used" (assigned). we do not
-        * block waiting for all reservations as this can lead to a deadlock
-        * see LU-8972 for the details.
+        * orphans are all objects since "last used" (assigned), but
+        * there might be objects reserved and in some cases they won't
+        * be used. we can't cleanup them till we're sure they won't be
+        * used. also can't we allow new reservations because they may
+        * end up getting orphans being cleaned up below. so we block
+        * new reservations and wait till all reserved objects either
+        * user or released.
         */
        spin_lock(&d->opd_pre_lock);
        d->opd_pre_recovering = 1;
        spin_unlock(&d->opd_pre_lock);
-
-       l_wait_event(d->opd_pre_waitq, d->opd_recovery_completed ||
+       /*
+        * The locking above makes sure the opd_pre_reserved check below will
+        * catch all osp_precreate_reserve() calls who find
+        * "!opd_pre_recovering".
+        */
+       l_wait_event(d->opd_pre_waitq,
+                    (!d->opd_pre_reserved && d->opd_recovery_completed) ||
                     !osp_precreate_running(d) || d->opd_got_disconnected,
                     &lwi);
        if (!osp_precreate_running(d) || d->opd_got_disconnected)
@@ -793,7 +818,6 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        LASSERT(!fid_is_zero(last_fid));
        if (fid_oid(&d->opd_last_used_fid) < 2) {
                /* lastfid looks strange... ask OST */
-               LCONSOLE_WARN("%s: refresh last id\n", d->opd_obd->obd_name);
                rc = osp_get_lastfid_from_ost(env, d);
                if (rc)
                        GOTO(out, rc);
@@ -820,21 +844,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        body->oa.o_flags = OBD_FL_DELORPHAN;
        body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
 
-       /* cleanup objects upto used+reserved as we do not
-        * want to block the orphan cleanup procedure */
-       spin_lock(&d->opd_pre_lock);
-       if (fid_seq(&d->opd_pre_used_fid) != 0) {
-               tmp = d->opd_pre_used_fid;
-               tmp.f_oid += d->opd_pre_reserved;
-               /* shrink current precreate window to let reserved
-                * already objects be created and block new
-                * precreations */
-               d->opd_pre_last_created_fid = tmp;
-       } else {
-               tmp = d->opd_last_used_fid;
-       }
-       fid_to_ostid(&tmp, &body->oa.o_oi);
-       spin_unlock(&d->opd_pre_lock);
+       fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi);
 
        ptlrpc_request_set_replen(req);
 
@@ -851,9 +861,30 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        if (body == NULL)
                GOTO(out, rc = -EPROTO);
 
-       /* OST provides us with id new pool starts from in body->oa.o_id */
+       /*
+        * OST provides us with id new pool starts from in body->oa.o_id
+        */
        ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index);
 
+       spin_lock(&d->opd_pre_lock);
+       diff = osp_fid_diff(&d->opd_last_used_fid, last_fid);
+       if (diff > 0) {
+               d->opd_pre_create_count = OST_MIN_PRECREATE + diff;
+               d->opd_pre_last_created_fid = d->opd_last_used_fid;
+       } else {
+               d->opd_pre_create_count = OST_MIN_PRECREATE;
+               d->opd_pre_last_created_fid = *last_fid;
+       }
+       /*
+        * This empties the pre-creation pool and effectively blocks any new
+        * reservations.
+        */
+       LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
+               LUSTRE_DATA_SEQ_MAX_WIDTH);
+       d->opd_pre_used_fid = d->opd_pre_last_created_fid;
+       d->opd_pre_create_slow = 0;
+       spin_unlock(&d->opd_pre_lock);
+
        CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID
               "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid),
               PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid));
@@ -879,41 +910,12 @@ out:
                } else {
                        wake_up(&d->opd_pre_user_waitq);
                }
-               GOTO(ret, rc);
-       }
-
-       spin_lock(&d->opd_pre_lock);
-       d->opd_pre_recovering = 0;
-       spin_unlock(&d->opd_pre_lock);
-
-       /* now we wait until all reserved objects are consumed or released,
-        * so that the window doesn't change. otherwise we can get objects
-        * with wrong FIDs */
-       l_wait_event(d->opd_pre_waitq, d->opd_pre_reserved == 0 ||
-                    !osp_precreate_running(d) || d->opd_got_disconnected, &lwi);
-       if (!osp_precreate_running(d))
-               GOTO(ret, rc = 0);
-
-       spin_lock(&d->opd_pre_lock);
-       diff = osp_fid_diff(&d->opd_last_used_fid, last_fid);
-       if (diff > 0) {
-               d->opd_pre_create_count = OST_MIN_PRECREATE + diff;
-               d->opd_pre_last_created_fid = d->opd_last_used_fid;
        } else {
-               d->opd_pre_create_count = OST_MIN_PRECREATE;
-               d->opd_pre_last_created_fid = *last_fid;
+               spin_lock(&d->opd_pre_lock);
+               d->opd_pre_recovering = 0;
+               spin_unlock(&d->opd_pre_lock);
        }
-       /*
-        * This empties the pre-creation pool and effectively blocks any new
-        * reservations.
-        */
-       LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
-                       LUSTRE_DATA_SEQ_MAX_WIDTH);
-       d->opd_pre_used_fid = d->opd_pre_last_created_fid;
-       d->opd_pre_create_slow = 0;
-       spin_unlock(&d->opd_pre_lock);
 
-ret:
        RETURN(rc);
 }
 
@@ -1369,12 +1371,6 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
        if (d->opd_pre_max_create_count == 0)
                RETURN(-ENOBUFS);
 
-       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_OSP_PRECREATE_WAIT)) {
-               if (d->opd_index == cfs_fail_val)
-                       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_OSP_PRECREATE_WAIT,
-                                        obd_timeout);
-       }
-
        /*
         * wait till:
         *  - preallocation is done
index 4ae5c42..e8cf128 100755 (executable)
@@ -2685,53 +2685,6 @@ test_133() {
 }
 run_test 133 "don't fail on flock resend"
 
-test_134() {
-       local file1
-       local pid1
-       local pid2
-       local i
-
-       [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return 0
-       [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.59) ]] &&
-               skip "Need MDS version at least 2.8.59" && return
-
-       test_mkdir -p $DIR/$tdir
-       file1="$DIR/$tdir/file1"
-       file2="$DIR/$tdir/file2"
-
-#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT         0x164
-       # reserve stripe on ost1, block on ost2
-       do_facet $SINGLEMDS \
-               "lctl set_param fail_loc=0x80000164 fail_val=1"
-       $SETSTRIPE  -c 2 -o 0,1 $file1 &
-       pid1=$!
-       sleep 1
-
-       # initiate recovery with orphan cleanup on ost1
-       facet_failover ost1
-
-       # when OST1 recovery is over, the first setstripe should still
-       # have the object reserved, but that should not block new creates
-       # on OST1
-       $SETSTRIPE  -c 1 -o 0 $file2 &
-       pid2=$!
-       for ((i=0;i<$((TIMEOUT/2));i++)); do
-               if ! stat /proc/$pid2 >&/dev/null; then
-                       echo "DONE!"
-                       break
-               fi
-               echo "WAITING ..."
-               sleep 1
-       done
-       if let "i >= (TIMEOUT/2)"; then
-               error "create seem to get blocked by recovery"
-       fi
-       wait $pid1
-       wait $pid2
-       return 0
-}
-run_test 134 "MDT<>OST recovery don't block multistripe file creation"
-
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status