Whamcloud - gitweb
LU-5604 tgt: return missed fail ids 32/12232/7
authorLiang Zhen <liang.zhen@intel.com>
Mon, 17 Nov 2014 15:35:54 +0000 (23:35 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 18 Feb 2015 22:26:51 +0000 (22:26 +0000)
OBD_FAIL_LDLM_REPLY is missing from tgt_enqueue, and it's actually
not suitable for tgt_enqueue anymore because tgt_enqueue() is a
common handler now.

This patch includes a few changes:
- tgt_enqueue sets tgt_session_info::tsi_reply_fail_id to
  OBD_FAIL_MGS/MDS/OST_LDLM_REPLY_NET based on type of target.

- rewrite test_52 of replay-single, the only reason that test_52
  can pass is because there is a typo:

  $CHECKSTAT -t file $DIR/$tfile-* which should be $DIR/$tfile

- add definitions for OBD_FAIL_LDLM_SRV_CP/BL/GL_AST and resolve
  OBD_FAIL conflictions

- OBD_FAIL_UPDATE_OBJ_NET_REP was renamed to
  OBD_FAIL_OUT_UPDATE_NET_REP but referenced with old name in tests.

- OBD_FAIL_MDS_FAIL_LOV_LOG_ADD check is obsoleted as well as tests.
  Meanwhile the OSP code was updated to fix panic in case of error.

- OBD_FAIL_TGT_LAST_REPLAY is removed along with test. It was never
  used and it seems it was even introduced by mistake.

Test-Parameters: envdefinitions=SLOW=yes alwaysuploadlogs testlist=replay-dual,replay-single
Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Signed-off-by: Mikhail Pershin <mike.pershin@intel.com>
Change-Id: If5113e459f5628047e17114b6bc20ba910f3c142
Reviewed-on: http://review.whamcloud.com/12232
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/ldlm/ldlm_lockd.c
lustre/osp/osp_sync.c
lustre/target/tgt_handler.c
lustre/tests/replay-dual.sh
lustre/tests/replay-single.sh

index 6c43bb4..e896549 100644 (file)
@@ -246,6 +246,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MDS_RENAME2             0x154
 #define OBD_FAIL_MDS_RENAME3             0x155
 #define OBD_FAIL_MDS_RENAME4             0x156
+#define OBD_FAIL_MDS_LDLM_REPLY_NET     0x157
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
@@ -354,6 +355,10 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_LDLM_CP_CB_WAIT4        0x322
 #define OBD_FAIL_LDLM_CP_CB_WAIT5        0x323
 
+#define OBD_FAIL_LDLM_SRV_BL_AST        0x324
+#define OBD_FAIL_LDLM_SRV_CP_AST        0x325
+#define OBD_FAIL_LDLM_SRV_GL_AST        0x326
+
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
 
@@ -421,7 +426,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_TGT_REPLAY_DROP         0x707
 #define OBD_FAIL_TGT_FAKE_EXP            0x708
 #define OBD_FAIL_TGT_REPLAY_DELAY        0x709
-#define OBD_FAIL_TGT_LAST_REPLAY         0x710
+/* #define OBD_FAIL_TGT_LAST_REPLAY         0x710 (obsoleted) */
 #define OBD_FAIL_TGT_CLIENT_ADD          0x711
 #define OBD_FAIL_TGT_RCVG_FLAG           0x712
 #define OBD_FAIL_TGT_DELAY_CONDITIONAL  0x713
@@ -448,6 +453,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MGS_TARGET_REG_NET     0x90a
 #define OBD_FAIL_MGS_TARGET_DEL_NET     0x90b
 #define OBD_FAIL_MGS_CONFIG_READ_NET    0x90c
+#define OBD_FAIL_MGS_LDLM_REPLY_NET     0x90d
 
 #define OBD_FAIL_QUOTA_DQACQ_NET                       0xA01
 #define OBD_FAIL_QUOTA_EDQUOT            0xA02
@@ -599,6 +605,11 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_ONCE                           CFS_FAIL_ONCE
 #define OBD_FAILED                              CFS_FAILED
 
+#define LUT_FAIL_CLASS(fail_id)                        (((fail_id) >> 8) << 16)
+#define LUT_FAIL_MGT                           LUT_FAIL_CLASS(OBD_FAIL_MGS)
+#define LUT_FAIL_MDT                           LUT_FAIL_CLASS(OBD_FAIL_MDS)
+#define LUT_FAIL_OST                           LUT_FAIL_CLASS(OBD_FAIL_OST)
+
 extern atomic_t libcfs_kmemory;
 
 #ifdef CONFIG_PROC_FS
index a277287..7f70349 100644 (file)
@@ -920,7 +920,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         LASSERT(lock != NULL);
         LASSERT(data != NULL);
 
-       if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) {
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_SRV_CP_AST)) {
                LDLM_DEBUG(lock, "dropping CP AST");
                RETURN(0);
        }
index 00c4955..d0b6d7c 100644 (file)
@@ -344,25 +344,23 @@ static int osp_sync_add_rec(const struct lu_env *env, struct osp_device *d,
        ctxt = llog_get_context(d->opd_obd, LLOG_MDS_OST_ORIG_CTXT);
        if (ctxt == NULL)
                RETURN(-ENOMEM);
+
        rc = llog_add(env, ctxt->loc_handle, &osi->osi_hdr, &osi->osi_cookie,
                      th);
        llog_ctxt_put(ctxt);
 
-       CDEBUG(D_OTHER, "%s: new record "DOSTID":%lu/%lu: %d\n",
-              d->opd_obd->obd_name, POSTID(&osi->osi_cookie.lgc_lgl.lgl_oi),
-              (unsigned long) osi->osi_cookie.lgc_lgl.lgl_ogen,
-              (unsigned long) osi->osi_cookie.lgc_index, rc);
-
-       if (rc > 0)
-               rc = 0;
-
-       if (likely(rc == 0)) {
+       if (likely(rc >= 0)) {
+               CDEBUG(D_OTHER, "%s: new record "DOSTID":%lu/%lu: %d\n",
+                      d->opd_obd->obd_name,
+                      POSTID(&osi->osi_cookie.lgc_lgl.lgl_oi),
+                      (unsigned long)osi->osi_cookie.lgc_lgl.lgl_ogen,
+                      (unsigned long)osi->osi_cookie.lgc_index, rc);
                spin_lock(&d->opd_syn_lock);
                d->opd_syn_changes++;
                spin_unlock(&d->opd_syn_lock);
        }
-
-       RETURN(rc);
+       /* return 0 always here, error case just cause no llog record */
+       RETURN(0);
 }
 
 int osp_sync_add(const struct lu_env *env, struct osp_object *o,
index dd93805..55b1869 100644 (file)
@@ -1237,6 +1237,20 @@ int tgt_enqueue(struct tgt_session_info *tsi)
        if (rc)
                RETURN(err_serious(rc));
 
+       switch (LUT_FAIL_CLASS(tsi->tsi_reply_fail_id)) {
+       case LUT_FAIL_MDT:
+               tsi->tsi_reply_fail_id = OBD_FAIL_MDS_LDLM_REPLY_NET;
+               break;
+       case LUT_FAIL_OST:
+               tsi->tsi_reply_fail_id = OBD_FAIL_OST_LDLM_REPLY_NET;
+               break;
+       case LUT_FAIL_MGT:
+               tsi->tsi_reply_fail_id = OBD_FAIL_MGS_LDLM_REPLY_NET;
+               break;
+       default:
+               tsi->tsi_reply_fail_id = OBD_FAIL_LDLM_REPLY;
+               break;
+       }
        RETURN(req->rq_status);
 }
 EXPORT_SYMBOL(tgt_enqueue);
index 5cab1a4..69c7b20 100755 (executable)
@@ -901,9 +901,15 @@ test_25() {
        drop_ldlm_cancel "multiop $DIR2/$tfile Ow512" &
        sleep 1
 
-#define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
        # failover, replay and resend replayed waiting locks
-       do_facet ost1 lctl set_param fail_loc=0x80000213
+       if [ $(lustre_version_code ost1) -ge $(version_code 2.6.90) ]; then
+               #define OBD_FAIL_LDLM_SRV_CP_AST      0x325
+               do_facet ost1 lctl set_param fail_loc=0x80000325
+       else
+               #define OBD_FAIL_OST_LDLM_REPLY_NET     0x213
+               do_facet ost1 lctl set_param fail_loc=0x80000213
+       fi
+
        fail ost1
 
        # multiop does not finish because CP AST is skipped;
index 2dfe615..25f49a7 100755 (executable)
@@ -1069,17 +1069,28 @@ run_test 50 "Double OSC recovery, don't LASSERT (3812)"
 
 # b3764 timed out lock replay
 test_52() {
-    touch $DIR/$tfile
-    cancel_lru_locks mdc
+       [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.90) ] &&
+               skip "MDS prior to 2.6.90 handle LDLM_REPLY_NET incorrectly" &&
+               return 0
 
-    multiop $DIR/$tfile s || return 1
-    replay_barrier $SINGLEMDS
-#define OBD_FAIL_LDLM_REPLY              0x30c
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
-    fail $SINGLEMDS || return 2
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+       touch $DIR/$tfile
+       cancel_lru_locks mdc
+
+       multiop_bg_pause $DIR/$tfile s_s || return 1
+       mpid=$!
+
+       #define OBD_FAIL_MDS_LDLM_REPLY_NET     0x157
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000157"
 
-    $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
+       fail $SINGLEMDS || return 2
+       kill -USR1 $mpid
+       wait $mpid || return 3
+
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+       lctl set_param fail_loc=0x0
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+       rm -f $DIR/$tfile
 }
 run_test 52 "time out lock replay (3764)"
 
@@ -1988,22 +1999,6 @@ test_73b() {
 }
 run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
 
-test_73c() {
-    multiop_bg_pause $DIR/$tfile O_tSc || return 3
-    pid=$!
-    rm -f $DIR/$tfile
-
-    replay_barrier $SINGLEMDS
-#define OBD_FAIL_TGT_LAST_REPLAY       0x710
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000710"
-    fail $SINGLEMDS
-    kill -USR1 $pid
-    wait $pid || return 1
-    [ -e $DIR/$tfile ] && return 2
-    return 0
-}
-run_test 73c "open(O_CREAT), unlink, replay, reconnect at last_replay, close"
-
 # bug 18554
 test_74() {
     local clients=${CLIENTS:-$HOSTNAME}
@@ -2043,7 +2038,7 @@ test_80a() {
        local remote_dir=$DIR/$tdir/remote_dir
 
        mkdir -p $DIR/$tdir
-       #define OBD_FAIL_UPDATE_OBJ_NET_REP     0x1701
+       #define OBD_FAIL_OUT_UPDATE_NET_REP     0x1701
        do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
        $LFS mkdir -i $MDTIDX $remote_dir &
        local CLIENT_PID=$!
@@ -2487,26 +2482,6 @@ test_81h() {
 }
 run_test 81h "DNE: unlink remote dir, drop request reply, fail 2 MDTs"
 
-test_83a() {
-    mkdir -p $DIR/$tdir
-    createmany -o $DIR/$tdir/$tfile- 10 || return 1
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD       0x140
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
-    unlinkmany $DIR/$tdir/$tfile- 10 || return 2
-}
-run_test 83a "fail log_add during unlink recovery"
-
-test_83b() {
-    mkdir -p $DIR/$tdir
-    createmany -o $DIR/$tdir/$tfile- 10 || return 1
-    replay_barrier $SINGLEMDS
-    unlinkmany $DIR/$tdir/$tfile- 10 || return 2
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD       0x140
-    do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
-    fail $SINGLEMDS
-}
-run_test 83b "fail log_add during unlink recovery"
-
 test_84a() {
 #define OBD_FAIL_MDS_OPEN_WAIT_CREATE  0x144
     do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144"
@@ -2870,7 +2845,7 @@ test_100a() {
        #otherwise it may create single stripe dir here
        $LFS setdirstripe -i1 $DIR/$tdir/remote_dir
 
-       #define OBD_FAIL_UPDATE_OBJ_NET_REP     0x1701
+       #define OBD_FAIL_OUT_UPDATE_NET_REP     0x1701
        do_facet mds$((MDTIDX+1)) lctl set_param fail_loc=0x1701
        $LFS setdirstripe -i0 -c2 $striped_dir &
        local CLIENT_PID=$!