Whamcloud - gitweb
LU-13061 osp: check catlog FID after reading in 98/36998/4
authorHongchao Zhang <hongchao@whamcloud.com>
Thu, 19 Dec 2019 02:52:29 +0000 (21:52 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 10 Jan 2020 07:40:24 +0000 (07:40 +0000)
In osp_sync_llog_init, the catlog FID read from "CATALOGS"
should be checked whether it is sane or not.

Change-Id: I4342b21b7d5c6d408a9ab52a1e30815ae1d5f563
Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/36998
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Yang Sheng <ys@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd_support.h
lustre/osp/osp_sync.c
lustre/tests/recovery-small.sh

index b81d22a..2a78d5a 100644 (file)
@@ -687,8 +687,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSP_FAKE_PRECREATE            0x2102
 #define OBD_FAIL_OSP_RPCS_SEM                  0x2104
 #define OBD_FAIL_OSP_CANT_PROCESS_LLOG         0x2105
+#define OBD_FAIL_OSP_INVALID_LOGID             0x2106
 
- /* barrier */
+/* barrier */
 #define OBD_FAIL_MGS_BARRIER_READ_NET          0x2200
 #define OBD_FAIL_MGS_BARRIER_NOTIFY_NET                0x2201
 
index f5e18f6..5d20e0b 100644 (file)
@@ -1421,13 +1421,30 @@ static int osp_sync_llog_init(const struct lu_env *env, struct osp_device *d)
        LASSERT(ctxt);
 
        if (likely(logid_id(&osi->osi_cid.lci_logid) != 0)) {
-               rc = llog_open(env, ctxt, &lgh, &osi->osi_cid.lci_logid, NULL,
-                              LLOG_OPEN_EXISTS);
-               /* re-create llog if it is missing */
-               if (rc == -ENOENT)
+               struct lu_fid fid_temp;
+
+               if (CFS_FAIL_CHECK(OBD_FAIL_OSP_INVALID_LOGID)) {
+                       memset(&osi->osi_cid, 0, sizeof(osi->osi_cid));
+                       logid_set_id(&osi->osi_cid.lci_logid, cfs_fail_val);
+               }
+
+               logid_to_fid(&osi->osi_cid.lci_logid, &fid_temp);
+               if (fid_is_sane(&fid_temp)) {
+                       rc = llog_open(env, ctxt, &lgh, &osi->osi_cid.lci_logid,
+                                      NULL, LLOG_OPEN_EXISTS);
+
+                       /* re-create llog if it is missing */
+                       if (rc == -ENOENT)
+                               logid_set_id(&osi->osi_cid.lci_logid, 0);
+                       else if (rc < 0)
+                               GOTO(out_cleanup, rc);
+               } else {
+                       CERROR("%s: the catid "DFID" for init llog %d is bad\n",
+                              obd->obd_name, PFID(&fid_temp), d->opd_index);
+
+                       /* it will be recreated later */
                        logid_set_id(&osi->osi_cid.lci_logid, 0);
-               else if (rc < 0)
-                       GOTO(out_cleanup, rc);
+               }
        }
 
        if (unlikely(logid_id(&osi->osi_cid.lci_logid) == 0)) {
index 389d530..ec35eec 100755 (executable)
@@ -2904,6 +2904,21 @@ test_138() {
 }
 run_test 138 "Umount MDT during recovery"
 
+test_139() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       [ $MDS1_VERSION -lt $(version_code 2.13.50) ] &&
+               skip "Need MDS version at least 2.13.50"
+
+       mdt_dev=$(mdsdevname 1)
+
+       stop $SINGLEMDS || error "stop $SINGLEMDS failed"
+
+#define OBD_FAIL_OSP_INVALID_LOGID             0x2106
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0x68 fail_loc=0x80002106
+       start $SINGLEMDS $mdt_dev $MDS_MOUNT_OPTS || error "Fail to start MDT"
+}
+run_test 139 "corrupted catid won't cause crash"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status