Whamcloud - gitweb
LU-15133 osp: only deactivate OSP on LAST_FID error 09/45309/7
authorLai Siyao <lai.siyao@whamcloud.com>
Wed, 20 Oct 2021 05:46:17 +0000 (01:46 -0400)
committerOleg Drokin <green@whamcloud.com>
Thu, 6 Jan 2022 22:02:44 +0000 (22:02 +0000)
ofd_get_info_hdl() should return -EFAULT upon LAST_FID error, which
is the same as LAST_ID error.

osp_get_lastfid_from_ost() should deactivate OSP only upon -EFAULT,
which means reading LAST_FID on OST failed. This can avoid unnecessary
admin intervention.

Add sanity 27S.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Ib78c8994c0398dd4b4db32005abd018933ef3a7c
Reviewed-on: https://review.whamcloud.com/45309
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/ofd/ofd_dev.c
lustre/osp/osp_precreate.c
lustre/tests/sanity.sh

index 1774f7e..4322ae2 100644 (file)
@@ -350,6 +350,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_SEEK_NET           0x24a
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
 #define OBD_FAIL_OST_SEEK_NET           0x24a
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
+#define OBD_FAIL_OST_GET_LAST_FID       0x252
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -721,6 +722,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSP_INVALID_LOGID             0x2106
 #define OBD_FAIL_OSP_CON_EVENT_DELAY           0x2107
 #define OBD_FAIL_OSP_PRECREATE_PAUSE           0x2108
 #define OBD_FAIL_OSP_INVALID_LOGID             0x2106
 #define OBD_FAIL_OSP_CON_EVENT_DELAY           0x2107
 #define OBD_FAIL_OSP_PRECREATE_PAUSE           0x2108
+#define OBD_FAIL_OSP_GET_LAST_FID              0x2109
 
 /* barrier */
 #define OBD_FAIL_MGS_BARRIER_READ_NET          0x2200
 
 /* barrier */
 #define OBD_FAIL_MGS_BARRIER_READ_NET          0x2200
index cb7a1b5..c0914a6 100644 (file)
@@ -1170,6 +1170,9 @@ static int ofd_get_info_hdl(struct tgt_session_info *tsi)
                if (rc)
                        RETURN(err_serious(rc));
 
                if (rc)
                        RETURN(err_serious(rc));
 
+               if (OBD_FAIL_CHECK(OBD_FAIL_OST_GET_LAST_FID))
+                       RETURN(-EAGAIN);
+
                fid = req_capsule_client_get(tsi->tsi_pill, &RMF_FID);
                if (fid == NULL)
                        RETURN(err_serious(-EPROTO));
                fid = req_capsule_client_get(tsi->tsi_pill, &RMF_FID);
                if (fid == NULL)
                        RETURN(err_serious(-EPROTO));
@@ -1183,12 +1186,12 @@ static int ofd_get_info_hdl(struct tgt_session_info *tsi)
                oseq = ofd_seq_load(tsi->tsi_env, ofd,
                                    ostid_seq(&fti->fti_ostid));
                if (IS_ERR(oseq))
                oseq = ofd_seq_load(tsi->tsi_env, ofd,
                                    ostid_seq(&fti->fti_ostid));
                if (IS_ERR(oseq))
-                       RETURN(PTR_ERR(oseq));
+                       RETURN(-EFAULT);
 
                rc = ostid_to_fid(fid, &oseq->os_oi,
                                  ofd->ofd_lut.lut_lsd.lsd_osd_index);
                if (rc != 0)
 
                rc = ostid_to_fid(fid, &oseq->os_oi,
                                  ofd->ofd_lut.lut_lsd.lsd_osd_index);
                if (rc != 0)
-                       GOTO(out_put, rc);
+                       GOTO(out_put, rc = -EFAULT);
 
                CDEBUG(D_HA, "%s: LAST FID is "DFID"\n", ofd_name(ofd),
                       PFID(fid));
 
                CDEBUG(D_HA, "%s: LAST FID is "DFID"\n", ofd_name(ofd),
                       PFID(fid));
index 74f144e..124ef8d 100644 (file)
@@ -782,12 +782,11 @@ static int osp_get_lastfid_from_ost(const struct lu_env *env,
 
        rc = ptlrpc_queue_wait(req);
        if (rc) {
 
        rc = ptlrpc_queue_wait(req);
        if (rc) {
-               /* bad-bad OST.. let sysadm sort this out */
-               if (rc == -ENOTSUPP) {
-                       CERROR("%s: server does not support FID: rc = %d\n",
-                              d->opd_obd->obd_name, -ENOTSUPP);
-               }
-               ptlrpc_set_import_active(imp, 0);
+               /* -EFAULT means reading LAST_FID failed (see ofd_get_info_hld),
+                * let sysadm sort this * out.
+                */
+               if (rc == -EFAULT)
+                       ptlrpc_set_import_active(imp, 0);
                GOTO(out, rc);
        }
 
                GOTO(out, rc);
        }
 
@@ -879,7 +878,8 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        *last_fid = d->opd_last_used_fid;
        /* The OSP should already get the valid seq now */
        LASSERT(!fid_is_zero(last_fid));
        *last_fid = d->opd_last_used_fid;
        /* The OSP should already get the valid seq now */
        LASSERT(!fid_is_zero(last_fid));
-       if (fid_oid(&d->opd_last_used_fid) < 2) {
+       if (fid_oid(&d->opd_last_used_fid) < 2 ||
+           OBD_FAIL_CHECK(OBD_FAIL_OSP_GET_LAST_FID)) {
                /* lastfid looks strange... ask OST */
                rc = osp_get_lastfid_from_ost(env, d);
                if (rc)
                /* lastfid looks strange... ask OST */
                rc = osp_get_lastfid_from_ost(env, d);
                if (rc)
@@ -1304,6 +1304,11 @@ static int osp_precreate_thread(void *_args)
                        if (d->opd_pre == NULL)
                                continue;
 
                        if (d->opd_pre == NULL)
                                continue;
 
+                       if (OBD_FAIL_CHECK(OBD_FAIL_OSP_GET_LAST_FID)) {
+                               d->opd_pre_recovering = 1;
+                               break;
+                       }
+
                        /* To avoid handling different seq in precreate/orphan
                         * cleanup, it will hold precreate until current seq is
                         * used up. */
                        /* To avoid handling different seq in precreate/orphan
                         * cleanup, it will hold precreate until current seq is
                         * used up. */
index 914c4a3..e788b63 100755 (executable)
@@ -3373,6 +3373,31 @@ test_27R() {
 }
 run_test 27R "test max_stripecount limitation when stripe count is set to -1"
 
 }
 run_test 27R "test max_stripecount limitation when stripe count is set to -1"
 
+test_27S() {
+       (( $MDS1_VERSION >= $(version_code 2.14.54) )) ||
+               skip "Need MDS version at least 2.14.54"
+       [[ "$(facet_host mds1)" != "$(facet_host ost1)" ]] ||
+               skip "needs different host for mdt1 ost1"
+
+       local count=$(precreated_ost_obj_count 0 0)
+
+       echo "precreate count $count"
+       mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed"
+       $LFS setstripe -i 0 -c 1 $DIR/$tdir || error "setstripe $tdir failed"
+       #define OBD_FAIL_OSP_GET_LAST_FID       0x2109
+       do_facet mds1 $LCTL set_param fail_loc=0x2109
+       #define OBD_FAIL_OST_GET_LAST_FID       0x252
+       do_facet ost1 $LCTL set_param fail_loc=0x252
+       createmany -o $DIR/$tdir/f $count &
+       pid=$!
+       echo "precreate count $(precreated_ost_obj_count 0 0)"
+       do_facet mds1 $LCTL set_param fail_loc=0
+       do_facet ost1 $LCTL set_param fail_loc=0
+       wait $pid || error "createmany failed"
+       echo "precreate count $(precreated_ost_obj_count 0 0)"
+}
+run_test 27S "don't deactivate OSP on network issue"
+
 # createtest also checks that device nodes are created and
 # then visible correctly (#2091)
 test_28() { # bug 2091
 # createtest also checks that device nodes are created and
 # then visible correctly (#2091)
 test_28() { # bug 2091