Whamcloud - gitweb
LU-10395 tests: add test_280 sanity 35/37635/6
authorAlexander Boyko <c17825@cray.com>
Thu, 20 Feb 2020 10:12:03 +0000 (05:12 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Apr 2020 17:21:04 +0000 (17:21 +0000)
The test reproduce a race between client llog processing
and MGS umount.

osd_fid2oi()) ASSERTION( osd->od_oi_table != NULL &&
osd->od_oi_count >= 1 ) failed: [0xa:0xb:0x0]

 #3 lbug_with_loc at ffffffffc0c098cb [libcfs]
 #4 __osd_oi_lookup at ffffffffc13e5610 [osd_ldiskfs]
 #5 osd_oi_lookup at ffffffffc13e76fd [osd_ldiskfs]
 #6 osd_fid_lookup at ffffffffc13e2cc5 [osd_ldiskfs]
 #7 osd_object_init at ffffffffc13e44e1 [osd_ldiskfs]
 #8 lu_object_start at ffffffffc0e262ab [obdclass]
 #9 lu_object_find_at at ffffffffc0e2a121 [obdclass]
 #10 dt_locate_at at ffffffffc0e2b6dd [obdclass]
 #11 llog_osd_open at ffffffffc0deae9e [obdclass]
 #12 llog_open at ffffffffc0dd8f3a [obdclass]
 #13 llog_origin_handle_next_block at  ffffffffc1200a06[ptlrpc]
 #14 tgt_llog_next_block at ffffffffc124b6c3 [ptlrpc]

Cray-bug-id: LUS-8505
Test-Parameters: standalonemgs=false trivial testlist=sanity envdefinitions=ONLY=280,ONLY_REPEAT=5
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I449b18779bfe391784ad532451213d5d5456c1b1
Reviewed-on: https://review.whamcloud.com/37635
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/obdclass/llog_osd.c
lustre/obdclass/obd_mount_server.c
lustre/osd-ldiskfs/osd_handler.c
lustre/ptlrpc/llog_server.c
lustre/tests/sanity.sh

index 3d4f445..1efac3f 100644 (file)
@@ -237,6 +237,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b
 #define OBD_FAIL_MDS_FLD_LOOKUP                        0x15c
 #define OBD_FAIL_MDS_CHANGELOG_REORDER 0x15d
+#define OBD_FAIL_MDS_LLOG_UMOUNT_RACE   0x15e
 #define OBD_FAIL_MDS_INTENT_DELAY              0x160
 #define OBD_FAIL_MDS_XATTR_REP                 0x161
 #define OBD_FAIL_MDS_TRACK_OVERFLOW     0x162
@@ -702,7 +703,7 @@ extern char obd_jobid_var[];
 
 /* LNet is allocated failure locations 0xe000 to 0xffff */
 /* Assign references to moved code to reduce code changes */
-#define OBD_FAIL_PRECHECK(id)                   CFS_FAIL_PRECHECK(id)
+#define OBD_FAIL_PRECHECK(id)                   (unlikely(CFS_FAIL_PRECHECK(id)))
 #define OBD_FAIL_CHECK(id)                      CFS_FAIL_CHECK(id)
 #define OBD_FAIL_CHECK_VALUE(id, value)         CFS_FAIL_CHECK_VALUE(id, value)
 #define OBD_FAIL_CHECK_ORSET(id, value)         CFS_FAIL_CHECK_ORSET(id, value)
index edc1edc..c634e45 100644 (file)
@@ -1309,7 +1309,12 @@ generate:
                        GOTO(out, rc);
                new_id = true;
        }
-
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) &&
+           cfs_fail_val == 1) {
+               cfs_fail_val = 2;
+               OBD_RACE(OBD_FAIL_MDS_LLOG_UMOUNT_RACE);
+               msleep(MSEC_PER_SEC);
+       }
        o = ls_locate(env, ls, &lgi->lgi_fid, NULL);
        if (IS_ERR(o))
                GOTO(out_name, rc = PTR_ERR(o));
index cff638f..24bf5c7 100644 (file)
@@ -140,6 +140,7 @@ static int server_deregister_mount(const char *name)
        OBD_FREE(lmi, sizeof(*lmi));
        mutex_unlock(&lustre_mount_info_lock);
 
+       OBD_RACE(OBD_FAIL_MDS_LLOG_UMOUNT_RACE);
        RETURN(0);
 }
 
index 273b3c1..38cf694 100644 (file)
@@ -1400,6 +1400,13 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l,
 
        LINVRNT(osd_invariant(obj));
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) &&
+           cfs_fail_val == 2) {
+               struct osd_thread_info *info = osd_oti_get(env);
+               struct osd_idmap_cache *oic = &info->oti_cache;
+               /* invalidate thread cache */
+               memset(&oic->oic_fid, 0, sizeof(oic->oic_fid));
+       }
        if (fid_is_otable_it(&l->lo_header->loh_fid)) {
                obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
                l->lo_header->loh_attr |= LOHA_EXISTS;
index ca91a1c..e5391a5 100644 (file)
@@ -142,6 +142,8 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req)
        ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx);
        if (ctxt == NULL)
                RETURN(-ENODEV);
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE))
+               cfs_fail_val = 1;
 
        rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle,
                       &body->lgd_logid, NULL, LLOG_OPEN_EXISTS);
index 18a1390..e54435d 100755 (executable)
@@ -19537,6 +19537,25 @@ test_278() {
 }
 run_test 278 "Race starting MDS between MDTs stop/start"
 
+test_280() {
+       [ $MGS_VERSION -lt $(version_code 2.13.52) ] &&
+               skip "Need MGS version at least 2.13.52"
+       [ $PARALLEL == "yes" ] && skip "skip parallel run"
+       combined_mgs_mds || skip "needs combined MGS/MDT"
+
+       umount_client $MOUNT
+#define OBD_FAIL_MDS_LLOG_UMOUNT_RACE   0x15e
+       do_facet mgs $LCTL set_param fail_loc=0x8000015e fail_val=0
+
+       mount_client $MOUNT &
+       sleep 1
+       stop mgs || error "stop mgs failed"
+       #for a race mgs would crash
+       start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "start mgs failed"
+       mount_client $MOUNT || error "mount client failed"
+}
+run_test 280 "Race between MGS umount and client llog processing"
+
 cleanup_test_300() {
        trap 0
        umask $SAVE_UMASK