From 4597fa7d884de0f1a1b030052d4d34983fed6109 Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Wed, 18 Dec 2019 21:52:29 -0500 Subject: [PATCH] LU-13061 osp: check catlog FID after reading in In osp_sync_llog_init, the catlog FID read from "CATALOGS" should be checked whether it is sane or not. Change-Id: I4342b21b7d5c6d408a9ab52a1e30815ae1d5f563 Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/36998 Tested-by: jenkins Reviewed-by: Yang Sheng Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 3 ++- lustre/osp/osp_sync.c | 29 +++++++++++++++++++++++------ lustre/tests/recovery-small.sh | 15 +++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index b81d22a..2a78d5a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -687,8 +687,9 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSP_FAKE_PRECREATE 0x2102 #define OBD_FAIL_OSP_RPCS_SEM 0x2104 #define OBD_FAIL_OSP_CANT_PROCESS_LLOG 0x2105 +#define OBD_FAIL_OSP_INVALID_LOGID 0x2106 - /* barrier */ +/* barrier */ #define OBD_FAIL_MGS_BARRIER_READ_NET 0x2200 #define OBD_FAIL_MGS_BARRIER_NOTIFY_NET 0x2201 diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index f5e18f6..5d20e0b 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -1421,13 +1421,30 @@ static int osp_sync_llog_init(const struct lu_env *env, struct osp_device *d) LASSERT(ctxt); if (likely(logid_id(&osi->osi_cid.lci_logid) != 0)) { - rc = llog_open(env, ctxt, &lgh, &osi->osi_cid.lci_logid, NULL, - LLOG_OPEN_EXISTS); - /* re-create llog if it is missing */ - if (rc == -ENOENT) + struct lu_fid fid_temp; + + if (CFS_FAIL_CHECK(OBD_FAIL_OSP_INVALID_LOGID)) { + memset(&osi->osi_cid, 0, sizeof(osi->osi_cid)); + logid_set_id(&osi->osi_cid.lci_logid, cfs_fail_val); + } + + logid_to_fid(&osi->osi_cid.lci_logid, &fid_temp); + if (fid_is_sane(&fid_temp)) { + rc = llog_open(env, ctxt, &lgh, &osi->osi_cid.lci_logid, + NULL, LLOG_OPEN_EXISTS); + + /* re-create llog if it is missing */ + if (rc == -ENOENT) + logid_set_id(&osi->osi_cid.lci_logid, 0); + else if (rc < 0) + GOTO(out_cleanup, rc); + } else { + CERROR("%s: the catid "DFID" for init llog %d is bad\n", + obd->obd_name, PFID(&fid_temp), d->opd_index); + + /* it will be recreated later */ logid_set_id(&osi->osi_cid.lci_logid, 0); - else if (rc < 0) - GOTO(out_cleanup, rc); + } } if (unlikely(logid_id(&osi->osi_cid.lci_logid) == 0)) { diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 389d530..ec35eec 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2904,6 +2904,21 @@ test_138() { } run_test 138 "Umount MDT during recovery" +test_139() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need MDS version at least 2.13.50" + + mdt_dev=$(mdsdevname 1) + + stop $SINGLEMDS || error "stop $SINGLEMDS failed" + +#define OBD_FAIL_OSP_INVALID_LOGID 0x2106 + do_facet $SINGLEMDS $LCTL set_param fail_val=0x68 fail_loc=0x80002106 + start $SINGLEMDS $mdt_dev $MDS_MOUNT_OPTS || error "Fail to start MDT" +} +run_test 139 "corrupted catid won't cause crash" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1