Whamcloud - gitweb
LU-4941 lfsck: check LOV EA header properly
[fs/lustre-release.git] / lustre / lfsck / lfsck_layout.c
index de96726..e9318a1 100644 (file)
@@ -349,19 +349,40 @@ again:
 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
 {
        __u32 magic;
-       __u32 patten;
+       __u32 pattern;
 
        magic = le32_to_cpu(lmm->lmm_magic);
        /* If magic crashed, keep it there. Sometime later, during OST-object
         * orphan handling, if some OST-object(s) back-point to it, it can be
         * verified and repaired. */
-       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
-               return -EINVAL;
+       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3) {
+               struct ost_id   oi;
+               int             rc;
+
+               lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi);
+               if ((magic & LOV_MAGIC_MASK) == LOV_MAGIC_MAGIC)
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = -EINVAL;
 
-       patten = le32_to_cpu(lmm->lmm_pattern);
+               CDEBUG(D_LFSCK, "%s LOV EA magic %u on "DOSTID"\n",
+                      rc == -EINVAL ? "Unknown" : "Unsupported",
+                      magic, POSTID(&oi));
+
+               return rc;
+       }
+
+       pattern = le32_to_cpu(lmm->lmm_pattern);
        /* XXX: currently, we only support LOV_PATTERN_RAID0. */
-       if (patten != LOV_PATTERN_RAID0)
+       if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
+               struct ost_id oi;
+
+               lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi);
+               CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u on "DOSTID"\n",
+                      pattern, POSTID(&oi));
+
                return -EOPNOTSUPP;
+       }
 
        return 0;
 }
@@ -1153,6 +1174,17 @@ out:
        return rc;
 }
 
+static void lfsck_layout_record_failure(const struct lu_env *env,
+                                                struct lfsck_instance *lfsck,
+                                                struct lfsck_layout *lo)
+{
+       lo->ll_objs_failed_phase1++;
+       if (unlikely(lo->ll_pos_first_inconsistent == 0))
+               lo->ll_pos_first_inconsistent =
+                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
+                                                       lfsck->li_di_oit);
+}
+
 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                                               struct ptlrpc_request *req,
                                               void *args, int rc)
@@ -1699,14 +1731,14 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env,
                                     struct dt_object *parent,
                                     struct lu_fid *cfid,
                                     struct lu_buf *buf, int fl,
-                                    __u32 ost_idx, __u32 ea_off)
+                                    __u32 ost_idx, __u32 ea_off, bool reset)
 {
        struct lov_mds_md_v1    *lmm    = buf->lb_buf;
        struct lov_ost_data_v1  *objs;
        int                      rc;
        ENTRY;
 
-       if (fl == LU_XATTR_CREATE) {
+       if (fl == LU_XATTR_CREATE || reset) {
                LASSERT(buf->lb_len == lov_mds_md_size(ea_off + 1,
                                                       LOV_MAGIC_V1));
 
@@ -1995,7 +2027,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env,
                /* 3b. Add layout EA for the MDT-object. */
                rc = lfsck_layout_extend_lovea(env, th, pobj, cfid, ea_buf,
                                               LU_XATTR_CREATE, ltd->ltd_index,
-                                              ea_off);
+                                              ea_off, false);
        dt_write_unlock(env, pobj);
        if (rc < 0)
                GOTO(stop, rc);
@@ -2395,13 +2427,30 @@ again:
 
                buf->lb_len = rc;
                rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
-                                              fl, ost_idx, ea_off);
+                                              fl, ost_idx, ea_off, false);
 
                GOTO(unlock_parent, rc);
        }
 
        lmm = buf->lb_buf;
        rc1 = lfsck_layout_verify_header(lmm);
+
+       /* If the LOV EA crashed, the rebuild it. */
+       if (rc1 == -EINVAL) {
+               if (bk->lb_param & LPF_DRYRUN)
+                       GOTO(unlock_parent, rc = 1);
+
+               LASSERT(buf->lb_len >= rc);
+
+               buf->lb_len = rc;
+               memset(lmm, 0, buf->lb_len);
+               rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
+                                              fl, ost_idx, ea_off, true);
+
+               GOTO(unlock_parent, rc);
+       }
+
+       /* For other unknown magic/pattern, keep the current LOV EA. */
        if (rc1 != 0)
                GOTO(unlock_parent, rc = rc1);
 
@@ -2434,7 +2483,7 @@ again:
 
                buf->lb_len = rc;
                rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
-                                              fl, ost_idx, ea_off);
+                                              fl, ost_idx, ea_off, false);
                GOTO(unlock_parent, rc);
        }
 
@@ -2936,10 +2985,6 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env,
                GOTO(unlock2, rc = 0);
 
        lmm = buf->lb_buf;
-       rc = lfsck_layout_verify_header(lmm);
-       if (rc != 0)
-               GOTO(unlock2, rc);
-
        /* Someone change layout during the LFSCK, no need to repair then. */
        if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen)
                GOTO(unlock2, rc = 0);
@@ -3126,14 +3171,6 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
                GOTO(out, rc);
 
        lmm = buf->lb_buf;
-       rc = lfsck_layout_verify_header(lmm);
-       if (rc != 0)
-               GOTO(out, rc);
-
-       /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
-        * been verified in lfsck_layout_verify_header() already. If some
-        * new magic introduced in the future, then layout LFSCK needs to
-        * be updated also. */
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_V1) {
                objs = &(lmm->lmm_objects[0]);
@@ -3299,13 +3336,18 @@ out:
                        lo->ll_objs_skipped++;
                        rc = 0;
                } else {
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                }
        } else if (rc > 0) {
                LASSERTF(type > LLIT_NONE && type <= LLIT_MAX,
                         "unknown type = %d\n", type);
 
                lo->ll_objs_repaired[type - 1]++;
+               if (bk->lb_param & LPF_DRYRUN &&
+                   unlikely(lo->ll_pos_first_inconsistent == 0))
+                       lo->ll_pos_first_inconsistent =
+                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
+                                                       lfsck->li_di_oit);
        }
        up_write(&com->lc_sem);
 
@@ -4062,14 +4104,7 @@ static void lfsck_layout_fail(const struct lu_env *env,
        down_write(&com->lc_sem);
        if (new_checked)
                com->lc_new_checked++;
-       lo->ll_objs_failed_phase1++;
-       if (lo->ll_pos_first_inconsistent == 0) {
-               struct lfsck_instance *lfsck = com->lc_lfsck;
-
-               lo->ll_pos_first_inconsistent =
-                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
-                                                       lfsck->li_di_oit);
-       }
+       lfsck_layout_record_failure(env, com->lc_lfsck, lo);
        up_write(&com->lc_sem);
 }
 
@@ -4408,7 +4443,7 @@ next:
                down_write(&com->lc_sem);
                com->lc_new_checked++;
                if (rc < 0)
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                up_write(&com->lc_sem);
 
                if (cobj != NULL && !IS_ERR(cobj))
@@ -4478,6 +4513,8 @@ again:
        buf->lb_len = rc;
        lmm = buf->lb_buf;
        rc = lfsck_layout_verify_header(lmm);
+       /* If the LOV EA crashed, then it is possible to be rebuilt later
+        * when handle orphan OST-objects. */
        if (rc != 0)
                GOTO(out, rc);
 
@@ -4555,7 +4592,7 @@ out:
                down_write(&com->lc_sem);
                com->lc_new_checked++;
                if (rc < 0)
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                up_write(&com->lc_sem);
        }
        buf->lb_len = buflen;
@@ -4891,7 +4928,8 @@ static int lfsck_layout_dump(const struct lu_env *env,
                const struct dt_it_ops *iops;
                cfs_duration_t duration = cfs_time_current() -
                                          lfsck->li_time_last_checkpoint;
-               __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
+               __u64 checked = lo->ll_objs_checked_phase1 +
+                               com->lc_new_checked;
                __u64 speed = checked;
                __u64 new_checked = com->lc_new_checked * HZ;
                __u32 rtime = lo->ll_run_time_phase1 +
@@ -4942,31 +4980,36 @@ static int lfsck_layout_dump(const struct lu_env *env,
        } else if (lo->ll_status == LS_SCANNING_PHASE2) {
                cfs_duration_t duration = cfs_time_current() -
                                          lfsck->li_time_last_checkpoint;
-               __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
-               __u64 speed = checked;
+               __u64 checked = lo->ll_objs_checked_phase2 +
+                               com->lc_new_checked;
+               __u64 speed1 = lo->ll_objs_checked_phase1;
+               __u64 speed2 = checked;
                __u64 new_checked = com->lc_new_checked * HZ;
-               __u32 rtime = lo->ll_run_time_phase1 +
+               __u32 rtime = lo->ll_run_time_phase2 +
                              cfs_duration_sec(duration + HALF_SEC);
 
                if (duration != 0)
                        do_div(new_checked, duration);
+               if (lo->ll_run_time_phase1 != 0)
+                       do_div(speed1, lo->ll_run_time_phase1);
                if (rtime != 0)
-                       do_div(speed, rtime);
+                       do_div(speed2, rtime);
                rc = snprintf(buf, len,
                              "checked_phase1: "LPU64"\n"
                              "checked_phase2: "LPU64"\n"
                              "run_time_phase1: %u seconds\n"
                              "run_time_phase2: %u seconds\n"
                              "average_speed_phase1: "LPU64" items/sec\n"
-                             "average_speed_phase2: N/A\n"
-                             "real-time_speed_phase1: "LPU64" items/sec\n"
-                             "real-time_speed_phase2: N/A\n"
+                             "average_speed_phase2: "LPU64" items/sec\n"
+                             "real-time_speed_phase1: N/A\n"
+                             "real-time_speed_phase2: "LPU64" items/sec\n"
                              "current_position: "DFID"\n",
+                             lo->ll_objs_checked_phase1,
                              checked,
-                             lo->ll_objs_checked_phase2,
+                             lo->ll_run_time_phase1,
                              rtime,
-                             lo->ll_run_time_phase2,
-                             speed,
+                             speed1,
+                             speed2,
                              new_checked,
                              PFID(&com->lc_fid_latest_scanned_phase2));
                if (rc <= 0)