From f4eeadee5ba5d4ab9d04918d8d81d18907daa831 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Thu, 20 Feb 2020 05:12:03 -0500 Subject: [PATCH] LU-10395 tests: add test_280 sanity The test reproduce a race between client llog processing and MGS umount. osd_fid2oi()) ASSERTION( osd->od_oi_table != NULL && osd->od_oi_count >= 1 ) failed: [0xa:0xb:0x0] #3 lbug_with_loc at ffffffffc0c098cb [libcfs] #4 __osd_oi_lookup at ffffffffc13e5610 [osd_ldiskfs] #5 osd_oi_lookup at ffffffffc13e76fd [osd_ldiskfs] #6 osd_fid_lookup at ffffffffc13e2cc5 [osd_ldiskfs] #7 osd_object_init at ffffffffc13e44e1 [osd_ldiskfs] #8 lu_object_start at ffffffffc0e262ab [obdclass] #9 lu_object_find_at at ffffffffc0e2a121 [obdclass] #10 dt_locate_at at ffffffffc0e2b6dd [obdclass] #11 llog_osd_open at ffffffffc0deae9e [obdclass] #12 llog_open at ffffffffc0dd8f3a [obdclass] #13 llog_origin_handle_next_block at ffffffffc1200a06[ptlrpc] #14 tgt_llog_next_block at ffffffffc124b6c3 [ptlrpc] Cray-bug-id: LUS-8505 Test-Parameters: standalonemgs=false trivial testlist=sanity envdefinitions=ONLY=280,ONLY_REPEAT=5 Signed-off-by: Alexander Boyko Change-Id: I449b18779bfe391784ad532451213d5d5456c1b1 Reviewed-on: https://review.whamcloud.com/37635 Reviewed-by: Alexey Lyashkov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 3 ++- lustre/obdclass/llog_osd.c | 7 ++++++- lustre/obdclass/obd_mount_server.c | 1 + lustre/osd-ldiskfs/osd_handler.c | 7 +++++++ lustre/ptlrpc/llog_server.c | 2 ++ lustre/tests/sanity.sh | 19 +++++++++++++++++++ 6 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3d4f445..1efac3f 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -237,6 +237,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b #define OBD_FAIL_MDS_FLD_LOOKUP 0x15c #define OBD_FAIL_MDS_CHANGELOG_REORDER 0x15d +#define OBD_FAIL_MDS_LLOG_UMOUNT_RACE 0x15e #define OBD_FAIL_MDS_INTENT_DELAY 0x160 #define OBD_FAIL_MDS_XATTR_REP 0x161 #define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162 @@ -702,7 +703,7 @@ extern char obd_jobid_var[]; /* LNet is allocated failure locations 0xe000 to 0xffff */ /* Assign references to moved code to reduce code changes */ -#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) +#define OBD_FAIL_PRECHECK(id) (unlikely(CFS_FAIL_PRECHECK(id))) #define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id) #define OBD_FAIL_CHECK_VALUE(id, value) CFS_FAIL_CHECK_VALUE(id, value) #define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value) diff --git a/lustre/obdclass/llog_osd.c b/lustre/obdclass/llog_osd.c index edc1edc..c634e45 100644 --- a/lustre/obdclass/llog_osd.c +++ b/lustre/obdclass/llog_osd.c @@ -1309,7 +1309,12 @@ generate: GOTO(out, rc); new_id = true; } - + if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) && + cfs_fail_val == 1) { + cfs_fail_val = 2; + OBD_RACE(OBD_FAIL_MDS_LLOG_UMOUNT_RACE); + msleep(MSEC_PER_SEC); + } o = ls_locate(env, ls, &lgi->lgi_fid, NULL); if (IS_ERR(o)) GOTO(out_name, rc = PTR_ERR(o)); diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index cff638f..24bf5c7 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -140,6 +140,7 @@ static int server_deregister_mount(const char *name) OBD_FREE(lmi, sizeof(*lmi)); mutex_unlock(&lustre_mount_info_lock); + OBD_RACE(OBD_FAIL_MDS_LLOG_UMOUNT_RACE); RETURN(0); } diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 273b3c1..38cf694 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1400,6 +1400,13 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l, LINVRNT(osd_invariant(obj)); + if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) && + cfs_fail_val == 2) { + struct osd_thread_info *info = osd_oti_get(env); + struct osd_idmap_cache *oic = &info->oti_cache; + /* invalidate thread cache */ + memset(&oic->oic_fid, 0, sizeof(oic->oic_fid)); + } if (fid_is_otable_it(&l->lo_header->loh_fid)) { obj->oo_dt.do_ops = &osd_obj_otable_it_ops; l->lo_header->loh_attr |= LOHA_EXISTS; diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index ca91a1c..e5391a5 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -142,6 +142,8 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ctxt = llog_get_context(req->rq_export->exp_obd, body->lgd_ctxt_idx); if (ctxt == NULL) RETURN(-ENODEV); + if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE)) + cfs_fail_val = 1; rc = llog_open(req->rq_svc_thread->t_env, ctxt, &loghandle, &body->lgd_logid, NULL, LLOG_OPEN_EXISTS); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 18a1390..e54435d 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -19537,6 +19537,25 @@ test_278() { } run_test 278 "Race starting MDS between MDTs stop/start" +test_280() { + [ $MGS_VERSION -lt $(version_code 2.13.52) ] && + skip "Need MGS version at least 2.13.52" + [ $PARALLEL == "yes" ] && skip "skip parallel run" + combined_mgs_mds || skip "needs combined MGS/MDT" + + umount_client $MOUNT +#define OBD_FAIL_MDS_LLOG_UMOUNT_RACE 0x15e + do_facet mgs $LCTL set_param fail_loc=0x8000015e fail_val=0 + + mount_client $MOUNT & + sleep 1 + stop mgs || error "stop mgs failed" + #for a race mgs would crash + start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "start mgs failed" + mount_client $MOUNT || error "mount client failed" +} +run_test 280 "Race between MGS umount and client llog processing" + cleanup_test_300() { trap 0 umask $SAVE_UMASK -- 1.8.3.1