Whamcloud - gitweb
LU-13535 lfsck: fix possible PFL layout corruption
[fs/lustre-release.git] / lustre / lfsck / lfsck_layout.c
index 4f250e0..68ca022 100644 (file)
@@ -368,7 +368,7 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        }
 #endif
 
-       if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
+       if (!lov_pattern_supported_normal_comp(lov_pattern(pattern))) {
                CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file "
                       DFID" in the component %x\n",
                       pattern, PFID(lfsck_dto2fid(obj)), comp_id);
@@ -379,8 +379,29 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        return 0;
 }
 
+static int lfsck_layout_verify_header_foreign(struct dt_object *obj,
+                                             struct lov_foreign_md *lfm,
+                                             size_t len)
+{
+       /* magic has been verified already */
+       __u32 value_len = le32_to_cpu(lfm->lfm_length);
+       /* type and flags are not checked for instance */
+
+       CDEBUG(D_INFO, "foreign LOV EA, magic %x, len %u, type %x, flags %x, for file "DFID"\n",
+              le32_to_cpu(lfm->lfm_magic), value_len,
+              le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags),
+              PFID(lfsck_dto2fid(obj)));
+
+       if (len != value_len + offsetof(typeof(*lfm), lfm_value))
+               CDEBUG(D_LFSCK, "foreign LOV EA internal size %u does not match EA full size %zu for file "DFID"\n",
+                      value_len, len, PFID(lfsck_dto2fid(obj)));
+
+       /* nothing to repair */
+       return -ENODATA;
+}
+
 static int lfsck_layout_verify_header(struct dt_object *obj,
-                                     struct lov_mds_md_v1 *lmm)
+                                     struct lov_mds_md_v1 *lmm, size_t len)
 {
        int rc = 0;
 
@@ -430,6 +451,10 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                                        le32_to_cpu(lcme->lcme_offset)), start,
                                        comp_id);
                }
+       } else if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_FOREIGN) {
+               rc = lfsck_layout_verify_header_foreign(obj,
+                                               (struct lov_foreign_md *)lmm,
+                                               len);
        } else {
                rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0);
        }
@@ -468,7 +493,7 @@ again:
                goto again;
        }
 
-       rc1 = lfsck_layout_verify_header(obj, buf->lb_buf);
+       rc1 = lfsck_layout_verify_header(obj, buf->lb_buf, rc);
 
        return rc1 ? rc1 : rc;
 }
@@ -622,7 +647,7 @@ lfsck_rbtree_insert(struct lfsck_layout_slave_data *llsd,
        return lrn;
 }
 
-extern const struct dt_index_operations lfsck_orphan_index_ops;
+static const struct dt_index_operations lfsck_orphan_index_ops;
 
 static int lfsck_rbtree_setup(const struct lu_env *env,
                              struct lfsck_component *com)
@@ -1366,23 +1391,15 @@ lfsck_layout_lastid_load(const struct lu_env *env,
 
                        if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) &&
                            cfs_fail_val > 0) {
-                               struct l_wait_info lwi = LWI_TIMEOUT(
-                                               cfs_time_seconds(cfs_fail_val),
-                                               NULL, NULL);
-
-                               /* Some others may changed the cfs_fail_val
-                                * as zero after above check, re-check it for
-                                * sure to avoid falling into wait for ever. */
-                               if (likely(lwi.lwi_timeout > 0)) {
-                                       struct ptlrpc_thread *thread =
-                                               &lfsck->li_thread;
-
-                                       up_write(&com->lc_sem);
-                                       l_wait_event(thread->t_ctl_waitq,
-                                                    !thread_is_running(thread),
-                                                    &lwi);
-                                       down_write(&com->lc_sem);
-                               }
+                               struct ptlrpc_thread *thread =
+                                       &lfsck->li_thread;
+
+                               up_write(&com->lc_sem);
+                               wait_event_idle_timeout(
+                                       thread->t_ctl_waitq,
+                                       !thread_is_running(thread),
+                                       cfs_time_seconds(cfs_fail_val));
+                               down_write(&com->lc_sem);
                        }
                }
 
@@ -2312,6 +2329,9 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env,
        int                              rc     = 0;
        ENTRY;
 
+       if (lfsck_is_dryrun(lfsck))
+               GOTO(log, rc = 0);
+
        if (unlikely(lpf == NULL))
                GOTO(log, rc = -ENXIO);
 
@@ -2712,6 +2732,9 @@ static int lfsck_layout_conflict_create(const struct lu_env *env,
        if (rc != 0 && rc != -ENOENT)
                GOTO(unlock, rc);
 
+       if (lfsck_is_dryrun(com->lc_lfsck))
+               GOTO(unlock, rc = 0);
+
        th = dt_trans_create(env, dev);
        if (IS_ERR(th))
                GOTO(unlock, rc = PTR_ERR(th));
@@ -2791,6 +2814,9 @@ static int lfsck_layout_recreate_lovea(const struct lu_env *env,
        bool new_mirror = true;
        ENTRY;
 
+       if (lfsck_is_dryrun(lfsck))
+               RETURN(0);
+
        rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
                              MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR,
                              LCK_EX);
@@ -2881,7 +2907,7 @@ again:
        }
 
        lmm = buf->lb_buf;
-       rc1 = lfsck_layout_verify_header(parent, lmm);
+       rc1 = lfsck_layout_verify_header(parent, lmm, lovea_size);
 
        /* If the LOV EA crashed, the rebuild it. */
        if (rc1 == -EINVAL) {
@@ -4040,6 +4066,11 @@ log:
        return rc;
 }
 
+#define CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid, msg)              \
+       CDEBUG(D_LFSCK, "%s:("DFID"|"DFID")/"DFID":XATTR %s: %s\n",      \
+              lfsck_lfsck2name(lfsck), PFID(&lso->lso_fid), PFID(pfid), \
+              PFID(cfid), XATTR_NAME_FID, msg);
+
 /* Check whether the OST-object correctly back points to the
  * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
 static int lfsck_layout_check_parent(const struct lu_env *env,
@@ -4057,6 +4088,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
        struct lov_mds_md_v1            *lmm;
        struct lov_ost_data_v1          *objs;
        struct lustre_handle             lh     = { 0 };
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
        int                              rc;
        int                              i;
        __u32                            magic;
@@ -4068,13 +4100,20 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
        idx = pfid->f_stripe_idx;
        pfid->f_ver = 0;
 
-       if (unlikely(!fid_is_sane(pfid)))
+       if (unlikely(!fid_is_sane(pfid))) {
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent FID is invalid");
+
                RETURN(LLIT_UNMATCHED_PAIR);
+       }
 
        if (lu_fid_eq(pfid, &lso->lso_fid)) {
                if (likely(llr->llr_lov_idx == idx))
                        RETURN(0);
 
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the stripe index is unmatched");
+
                RETURN(LLIT_UNMATCHED_PAIR);
        }
 
@@ -4082,17 +4121,38 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
        if (IS_ERR(tobj))
                RETURN(PTR_ERR(tobj));
 
-       if (dt_object_exists(tobj) == 0 || lfsck_is_dead_obj(tobj) ||
-           !S_ISREG(lfsck_object_type(tobj)))
+       if (dt_object_exists(tobj) == 0) {
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent is nonexistent");
+
+               GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+       }
+
+       if (lfsck_is_dead_obj(tobj)) {
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent is dead object");
+
+               GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+       }
+
+       if (!S_ISREG(lfsck_object_type(tobj))) {
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent is not a regular file");
+
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+       }
 
        /* Load the tobj's layout EA, in spite of it is a local MDT-object or
         * remote one on another MDT. Then check whether the given OST-object
         * is in such layout. If yes, it is multiple referenced, otherwise it
         * is unmatched referenced case. */
        rc = lfsck_layout_get_lovea(env, tobj, buf);
-       if (rc == 0 || rc == -ENODATA || rc == -ENOENT)
+       if (rc == 0 || rc == -ENODATA || rc == -ENOENT) {
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent has no stripe data");
+
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+       }
 
        if (unlikely(rc == -EOPNOTSUPP))
                GOTO(out, rc = LLIT_NONE);
@@ -4106,8 +4166,12 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
                struct lov_comp_md_v1 *lcm = buf->lb_buf;
                struct lov_comp_md_entry_v1 *lcme;
 
-               if (ff->ff_layout.ol_comp_id == 0)
+               if (ff->ff_layout.ol_comp_id == 0) {
+                       CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                             "the parent has incorrect comp_id");
+
                        GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+               }
 
                count = le16_to_cpu(lcm->lcm_entry_count);
                for (i = 0; i < count; i++) {
@@ -4118,13 +4182,21 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
                                        le32_to_cpu(lcme->lcme_offset);
                                magic = le32_to_cpu(lmm->lmm_magic);
                                if (!(le32_to_cpu(lcme->lcme_flags) &
-                                     LCME_FL_INIT))
+                                     LCME_FL_INIT)) {
+                                       CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid,
+                                                             cfid,
+                                                             "the parent has uninitialized component");
+
                                        GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+                               }
 
                                goto further;
                        }
                }
 
+               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                     "the parent has no matched comp_id");
+
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
        }
 
@@ -4172,10 +4244,15 @@ further:
                         * after taken the lock. */
                        if (!dt_object_remote(tobj)) {
                                if (dt_object_exists(tobj) == 0 ||
-                                   lfsck_is_dead_obj(tobj))
+                                   lfsck_is_dead_obj(tobj)) {
+                                       CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid,
+                                                             cfid,
+                                                             "the parent doesn't exist anymore after lock");
+
                                        rc = LLIT_UNMATCHED_PAIR;
-                               else
+                               } else {
                                        rc = LLIT_MULTIPLE_REFERENCED;
+                               }
 
                                GOTO(unlock, rc);
                        }
@@ -4190,15 +4267,22 @@ further:
                         * has been been removed or not. */
                        rc = dt_xattr_get(env, tobj, &LU_BUF_NULL,
                                          XATTR_NAME_DUMMY);
-                       if (unlikely(rc == -ENOENT || rc >= 0))
+                       if (unlikely(rc == -ENOENT || rc >= 0)) {
+                               CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                                                     "the parent is remote object and nonexistent after lock");
+
                                rc = LLIT_UNMATCHED_PAIR;
-                       else if (rc == -ENODATA)
+                       } else if (rc == -ENODATA) {
                                rc = LLIT_MULTIPLE_REFERENCED;
+                       }
 
                        GOTO(unlock, rc);
                }
        }
 
+       CDEBUG_UNMATCHED_PAIR(lfsck, lso, pfid, cfid,
+                             "the parent has no matched stripe");
+
        GOTO(out, rc = LLIT_UNMATCHED_PAIR);
 
 unlock:
@@ -4259,6 +4343,12 @@ static int lfsck_layout_assistant_handler_p1(const struct lu_env *env,
        lfsck_buf_init(&buf, ff, sizeof(*ff));
        rc = dt_xattr_get(env, child, &buf, XATTR_NAME_FID);
        if (unlikely(rc > 0 && rc < sizeof(struct lu_fid))) {
+               CDEBUG(D_LFSCK, "%s:"DFID"/"DFID": "
+                      "the child object's %s is corrupted\n",
+                      lfsck_lfsck2name(lfsck), PFID(&lso->lso_fid),
+                      PFID(lu_object_fid(&child->do_lu)),
+                      XATTR_NAME_FID);
+
                type = LLIT_UNMATCHED_PAIR;
                goto repair;
        }
@@ -4657,7 +4747,7 @@ static int lfsck_layout_async_query(const struct lu_env *env,
        *tmp = *lr;
        ptlrpc_request_set_replen(req);
 
-       llsaa = ptlrpc_req_async_args(req);
+       llsaa = ptlrpc_req_async_args(llsaa, req);
        llsaa->llsaa_exp = exp;
        llsaa->llsaa_com = lfsck_component_get(com);
        llsaa->llsaa_llst = llst;
@@ -5331,7 +5421,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
                                     struct dt_object *parent,
                                     struct lov_mds_md_v1 *lmm, __u32 comp_id)
 {
-       struct lfsck_thread_info        *info    = lfsck_env_info(env);
+       struct lfsck_thread_info        *info    = lfsck_env_info(env);
        struct lfsck_instance           *lfsck   = com->lc_lfsck;
        struct lfsck_bookmark           *bk      = &lfsck->li_bookmark_ram;
        struct lfsck_layout             *lo      = com->lc_file_ram;
@@ -5341,7 +5431,6 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
        struct lfsck_tgt_descs          *ltds    = &lfsck->li_ost_descs;
        struct ptlrpc_thread            *mthread = &lfsck->li_thread;
        struct ptlrpc_thread            *athread = &lad->lad_thread;
-       struct l_wait_info               lwi     = { 0 };
        struct lu_buf                    buf;
        int                              rc      = 0;
        int                              i;
@@ -5371,11 +5460,10 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
                if (unlikely(lovea_slot_is_dummy(objs)))
                        continue;
 
-               l_wait_event(mthread->t_ctl_waitq,
-                            lad->lad_prefetched < bk->lb_async_windows ||
-                            !thread_is_running(mthread) ||
-                            thread_is_stopped(athread),
-                            &lwi);
+               wait_event_idle(mthread->t_ctl_waitq,
+                               lad->lad_prefetched < bk->lb_async_windows ||
+                               !thread_is_running(mthread) ||
+                               thread_is_stopped(athread));
 
                if (unlikely(!thread_is_running(mthread)) ||
                             thread_is_stopped(athread))
@@ -5583,14 +5671,15 @@ again:
        lmm = buf->lb_buf;
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_COMP_V1) {
+               struct lov_mds_md_v1 *v1;
                int i;
 
                lcm = buf->lb_buf;
                count = le16_to_cpu(lcm->lcm_entry_count);
                for (i = 0; i < count; i++) {
                        lcme = &lcm->lcm_entries[i];
-                       lmm = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
-                       if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) != 0)
+                       v1 = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
+                       if (memcmp(oi, &v1->lmm_oi, sizeof(*oi)) != 0)
                                goto fix;
                }
 
@@ -5622,7 +5711,7 @@ fix:
                if (IS_ERR(handle))
                        GOTO(out, rc = PTR_ERR(handle));
 
-               lfsck_buf_init(&ea_buf, lmm, size);
+               lfsck_buf_init(&ea_buf, buf->lb_buf, size);
                rc = dt_declare_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV,
                                          LU_XATTR_REPLACE, handle);
                if (rc != 0)
@@ -5639,12 +5728,13 @@ fix:
        }
 
        if (magic == LOV_MAGIC_COMP_V1) {
+               struct lov_mds_md_v1 *v1;
                int i;
 
                for (i = 0; i < count; i++) {
                        lcme = &lcm->lcm_entries[i];
-                       lmm = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
-                       lmm->lmm_oi = *oi;
+                       v1 = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
+                       v1->lmm_oi = *oi;
                }
        } else {
                lmm->lmm_oi = *oi;
@@ -5725,13 +5815,11 @@ static int lfsck_layout_slave_exec_oit(const struct lu_env *env,
 
        if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY5) &&
            cfs_fail_val == lfsck_dev_idx(lfsck)) {
-               struct l_wait_info       lwi = LWI_TIMEOUT(cfs_time_seconds(1),
-                                                          NULL, NULL);
                struct ptlrpc_thread    *thread = &lfsck->li_thread;
 
-               l_wait_event(thread->t_ctl_waitq,
-                            !thread_is_running(thread),
-                            &lwi);
+               wait_event_idle_timeout(thread->t_ctl_waitq,
+                                       !thread_is_running(thread),
+                                       cfs_time_seconds(1));
        }
 
        lfsck_rbtree_update_bitmap(env, com, fid, false);
@@ -6033,8 +6121,8 @@ static void lfsck_layout_dump(const struct lu_env *env,
                           "run_time_phase2: %lld seconds\n"
                           "average_speed_phase1: %llu items/sec\n"
                           "average_speed_phase2: N/A\n"
-                          "real-time_speed_phase1: %llu items/sec\n"
-                          "real-time_speed_phase2: N/A\n",
+                          "real_time_speed_phase1: %llu items/sec\n"
+                          "real_time_speed_phase2: N/A\n",
                           checked,
                           lo->ll_objs_checked_phase2,
                           rtime,
@@ -6080,8 +6168,8 @@ static void lfsck_layout_dump(const struct lu_env *env,
                           "run_time_phase2: %lld seconds\n"
                           "average_speed_phase1: %llu items/sec\n"
                           "average_speed_phase2: %llu items/sec\n"
-                          "real-time_speed_phase1: N/A\n"
-                          "real-time_speed_phase2: %llu items/sec\n"
+                          "real_time_speed_phase1: N/A\n"
+                          "real_time_speed_phase2: %llu items/sec\n"
                           "current_position: "DFID"\n",
                           lo->ll_objs_checked_phase1,
                           checked,
@@ -6105,8 +6193,8 @@ static void lfsck_layout_dump(const struct lu_env *env,
                           "run_time_phase2: %lld seconds\n"
                           "average_speed_phase1: %llu items/sec\n"
                           "average_speed_phase2: %llu objs/sec\n"
-                          "real-time_speed_phase1: N/A\n"
-                          "real-time_speed_phase2: N/A\n"
+                          "real_time_speed_phase1: N/A\n"
+                          "real_time_speed_phase2: N/A\n"
                           "current_position: N/A\n",
                           lo->ll_objs_checked_phase1,
                           lo->ll_objs_checked_phase2,
@@ -6182,9 +6270,6 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
                                       LFSCK_CHECKPOINT_INTERVAL;
 
        while (1) {
-               struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30),
-                                                    NULL, NULL);
-
                rc = lfsck_layout_slave_query_master(env, com);
                if (list_empty(&llsd->llsd_master_list)) {
                        if (unlikely(!thread_is_running(thread)))
@@ -6198,21 +6283,22 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
                if (rc < 0)
                        GOTO(done, rc);
 
-               rc = l_wait_event(thread->t_ctl_waitq,
-                                 !thread_is_running(thread) ||
-                                 lo->ll_flags & LF_INCOMPLETE ||
-                                 list_empty(&llsd->llsd_master_list),
-                                 &lwi);
+               rc = wait_event_idle_timeout(
+                       thread->t_ctl_waitq,
+                       !thread_is_running(thread) ||
+                       lo->ll_flags & LF_INCOMPLETE ||
+                       list_empty(&llsd->llsd_master_list),
+                       cfs_time_seconds(30));
                if (unlikely(!thread_is_running(thread)))
                        GOTO(done, rc = 0);
 
                if (lo->ll_flags & LF_INCOMPLETE)
                        GOTO(done, rc = 1);
 
-               if (rc == -ETIMEDOUT)
+               if (rc == 0)
                        continue;
 
-               GOTO(done, rc = (rc < 0 ? rc : 1));
+               GOTO(done, rc = 1);
        }
 
 done:
@@ -7496,7 +7582,7 @@ static int lfsck_orphan_it_key_rec(const struct lu_env *env,
        return 0;
 }
 
-const struct dt_index_operations lfsck_orphan_index_ops = {
+static const struct dt_index_operations lfsck_orphan_index_ops = {
        .dio_lookup             = lfsck_orphan_index_lookup,
        .dio_declare_insert     = lfsck_orphan_index_declare_insert,
        .dio_insert             = lfsck_orphan_index_insert,