Whamcloud - gitweb
LU-4941 lfsck: check LOV EA header properly
[fs/lustre-release.git] / lustre / lfsck / lfsck_layout.c
index d658c60..e9318a1 100644 (file)
@@ -349,26 +349,47 @@ again:
 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
 {
        __u32 magic;
-       __u32 patten;
+       __u32 pattern;
 
        magic = le32_to_cpu(lmm->lmm_magic);
        /* If magic crashed, keep it there. Sometime later, during OST-object
         * orphan handling, if some OST-object(s) back-point to it, it can be
         * verified and repaired. */
-       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
-               return -EINVAL;
+       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3) {
+               struct ost_id   oi;
+               int             rc;
+
+               lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi);
+               if ((magic & LOV_MAGIC_MASK) == LOV_MAGIC_MAGIC)
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = -EINVAL;
+
+               CDEBUG(D_LFSCK, "%s LOV EA magic %u on "DOSTID"\n",
+                      rc == -EINVAL ? "Unknown" : "Unsupported",
+                      magic, POSTID(&oi));
+
+               return rc;
+       }
 
-       patten = le32_to_cpu(lmm->lmm_pattern);
+       pattern = le32_to_cpu(lmm->lmm_pattern);
        /* XXX: currently, we only support LOV_PATTERN_RAID0. */
-       if (patten != LOV_PATTERN_RAID0)
+       if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
+               struct ost_id oi;
+
+               lmm_oi_cpu_to_le(&oi, &lmm->lmm_oi);
+               CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u on "DOSTID"\n",
+                      pattern, POSTID(&oi));
+
                return -EOPNOTSUPP;
+       }
 
        return 0;
 }
 
 #define LFSCK_RBTREE_BITMAP_SIZE       PAGE_CACHE_SIZE
 #define LFSCK_RBTREE_BITMAP_WIDTH      (LFSCK_RBTREE_BITMAP_SIZE << 3)
-#define LFSCK_RBTREE_BITMAP_MASK       (LFSCK_RBTREE_BITMAP_SIZE - 1)
+#define LFSCK_RBTREE_BITMAP_MASK       (LFSCK_RBTREE_BITMAP_WIDTH - 1)
 
 struct lfsck_rbtree_node {
        struct rb_node   lrn_node;
@@ -392,7 +413,7 @@ static inline int lfsck_rbtree_cmp(struct lfsck_rbtree_node *lrn,
        if (oid < lrn->lrn_first_oid)
                return -1;
 
-       if (oid >= lrn->lrn_first_oid + LFSCK_RBTREE_BITMAP_WIDTH)
+       if (oid - lrn->lrn_first_oid >= LFSCK_RBTREE_BITMAP_WIDTH)
                return 1;
 
        return 0;
@@ -492,19 +513,19 @@ static struct lfsck_rbtree_node *
 lfsck_rbtree_insert(struct lfsck_layout_slave_data *llsd,
                    struct lfsck_rbtree_node *lrn)
 {
-       struct rb_node           **pos    = &(llsd->llsd_rb_root.rb_node);
+       struct rb_node           **pos    = &llsd->llsd_rb_root.rb_node;
        struct rb_node            *parent = NULL;
        struct lfsck_rbtree_node  *tmp;
        int                        rc;
 
-       while (*pos) {
+       while (*pos != NULL) {
                parent = *pos;
-               tmp = rb_entry(*pos, struct lfsck_rbtree_node, lrn_node);
+               tmp = rb_entry(parent, struct lfsck_rbtree_node, lrn_node);
                rc = lfsck_rbtree_cmp(tmp, lrn->lrn_seq, lrn->lrn_first_oid);
                if (rc < 0)
-                       pos = &((*pos)->rb_left);
+                       pos = &(*pos)->rb_left;
                else if (rc > 0)
-                       pos = &((*pos)->rb_right);
+                       pos = &(*pos)->rb_right;
                else
                        return tmp;
        }
@@ -1049,7 +1070,8 @@ lfsck_layout_lastid_store(const struct lu_env *env,
                lastid = cpu_to_le64(lls->lls_lastid);
                rc = dt_declare_record_write(env, lls->lls_lastid_obj,
                                             lfsck_buf_get(env, &lastid,
-                                            sizeof(lastid)), pos, th);
+                                                          sizeof(lastid)),
+                                            pos, th);
                if (rc != 0)
                        goto stop;
 
@@ -1152,6 +1174,17 @@ out:
        return rc;
 }
 
+static void lfsck_layout_record_failure(const struct lu_env *env,
+                                                struct lfsck_instance *lfsck,
+                                                struct lfsck_layout *lo)
+{
+       lo->ll_objs_failed_phase1++;
+       if (unlikely(lo->ll_pos_first_inconsistent == 0))
+               lo->ll_pos_first_inconsistent =
+                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
+                                                       lfsck->li_di_oit);
+}
+
 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                                               struct ptlrpc_request *req,
                                               void *args, int rc)
@@ -1576,7 +1609,6 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env,
        struct lfsck_bookmark   *bk    = &lfsck->li_bookmark_ram;
 
        down_write(&com->lc_sem);
-
        lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
                                HALF_SEC - lfsck->li_time_last_checkpoint);
        lo->ll_time_last_checkpoint = cfs_time_current_sec();
@@ -1600,15 +1632,7 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env,
                lo->ll_status = LS_FAILED;
        }
 
-       if (lo->ll_status != LS_PAUSED) {
-               spin_lock(&lfsck->li_lock);
-               list_del_init(&com->lc_link);
-               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
-               spin_unlock(&lfsck->li_lock);
-       }
-
        rc = lfsck_layout_store(env, com);
-
        up_write(&com->lc_sem);
 
        return rc;
@@ -1707,14 +1731,14 @@ static int lfsck_layout_extend_lovea(const struct lu_env *env,
                                     struct dt_object *parent,
                                     struct lu_fid *cfid,
                                     struct lu_buf *buf, int fl,
-                                    __u32 ost_idx, __u32 ea_off)
+                                    __u32 ost_idx, __u32 ea_off, bool reset)
 {
        struct lov_mds_md_v1    *lmm    = buf->lb_buf;
        struct lov_ost_data_v1  *objs;
        int                      rc;
        ENTRY;
 
-       if (fl == LU_XATTR_CREATE) {
+       if (fl == LU_XATTR_CREATE || reset) {
                LASSERT(buf->lb_len == lov_mds_md_size(ea_off + 1,
                                                       LOV_MAGIC_V1));
 
@@ -2003,7 +2027,7 @@ static int lfsck_layout_recreate_parent(const struct lu_env *env,
                /* 3b. Add layout EA for the MDT-object. */
                rc = lfsck_layout_extend_lovea(env, th, pobj, cfid, ea_buf,
                                               LU_XATTR_CREATE, ltd->ltd_index,
-                                              ea_off);
+                                              ea_off, false);
        dt_write_unlock(env, pobj);
        if (rc < 0)
                GOTO(stop, rc);
@@ -2048,7 +2072,7 @@ static int lfsck_layout_master_conditional_destroy(const struct lu_env *env,
 
        ltd = lfsck_tgt_get(&lfsck->li_ost_descs, index);
        if (unlikely(ltd == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        exp = ltd->ltd_exp;
        if (!(exp_connect_flags(exp) & OBD_CONNECT_LFSCK))
@@ -2403,13 +2427,30 @@ again:
 
                buf->lb_len = rc;
                rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
-                                              fl, ost_idx, ea_off);
+                                              fl, ost_idx, ea_off, false);
 
                GOTO(unlock_parent, rc);
        }
 
        lmm = buf->lb_buf;
        rc1 = lfsck_layout_verify_header(lmm);
+
+       /* If the LOV EA crashed, the rebuild it. */
+       if (rc1 == -EINVAL) {
+               if (bk->lb_param & LPF_DRYRUN)
+                       GOTO(unlock_parent, rc = 1);
+
+               LASSERT(buf->lb_len >= rc);
+
+               buf->lb_len = rc;
+               memset(lmm, 0, buf->lb_len);
+               rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
+                                              fl, ost_idx, ea_off, true);
+
+               GOTO(unlock_parent, rc);
+       }
+
+       /* For other unknown magic/pattern, keep the current LOV EA. */
        if (rc1 != 0)
                GOTO(unlock_parent, rc = rc1);
 
@@ -2442,7 +2483,7 @@ again:
 
                buf->lb_len = rc;
                rc = lfsck_layout_extend_lovea(env, handle, parent, cfid, buf,
-                                              fl, ost_idx, ea_off);
+                                              fl, ost_idx, ea_off, false);
                GOTO(unlock_parent, rc);
        }
 
@@ -2944,10 +2985,6 @@ static int lfsck_layout_repair_multiple_references(const struct lu_env *env,
                GOTO(unlock2, rc = 0);
 
        lmm = buf->lb_buf;
-       rc = lfsck_layout_verify_header(lmm);
-       if (rc != 0)
-               GOTO(unlock2, rc);
-
        /* Someone change layout during the LFSCK, no need to repair then. */
        if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen)
                GOTO(unlock2, rc = 0);
@@ -3134,14 +3171,6 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
                GOTO(out, rc);
 
        lmm = buf->lb_buf;
-       rc = lfsck_layout_verify_header(lmm);
-       if (rc != 0)
-               GOTO(out, rc);
-
-       /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
-        * been verified in lfsck_layout_verify_header() already. If some
-        * new magic introduced in the future, then layout LFSCK needs to
-        * be updated also. */
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_V1) {
                objs = &(lmm->lmm_objects[0]);
@@ -3292,23 +3321,33 @@ repair:
 out:
        down_write(&com->lc_sem);
        if (rc < 0) {
-               /* If cannot touch the target server,
-                * mark the LFSCK as INCOMPLETE. */
-               if (rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -ETIMEDOUT ||
-                   rc == -EHOSTDOWN || rc == -EHOSTUNREACH) {
+               struct lfsck_layout_master_data *llmd = com->lc_data;
+
+               if (unlikely(llmd->llmd_exit)) {
+                       rc = 0;
+               } else if (rc == -ENOTCONN || rc == -ESHUTDOWN ||
+                          rc == -ETIMEDOUT || rc == -EHOSTDOWN ||
+                          rc == -EHOSTUNREACH) {
+                       /* If cannot touch the target server,
+                        * mark the LFSCK as INCOMPLETE. */
                        CERROR("%s: Fail to talk with OST %x: rc = %d.\n",
                               lfsck_lfsck2name(lfsck), llr->llr_ost_idx, rc);
                        lo->ll_flags |= LF_INCOMPLETE;
                        lo->ll_objs_skipped++;
                        rc = 0;
                } else {
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                }
        } else if (rc > 0) {
                LASSERTF(type > LLIT_NONE && type <= LLIT_MAX,
                         "unknown type = %d\n", type);
 
                lo->ll_objs_repaired[type - 1]++;
+               if (bk->lb_param & LPF_DRYRUN &&
+                   unlikely(lo->ll_pos_first_inconsistent == 0))
+                       lo->ll_pos_first_inconsistent =
+                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
+                                                       lfsck->li_di_oit);
        }
        up_write(&com->lc_sem);
 
@@ -3362,7 +3401,8 @@ static int lfsck_layout_assistant(void *args)
                while (!list_empty(&llmd->llmd_req_list)) {
                        bool wakeup = false;
 
-                       if (unlikely(llmd->llmd_exit))
+                       if (unlikely(llmd->llmd_exit ||
+                                    !thread_is_running(mthread)))
                                GOTO(cleanup1, rc = llmd->llmd_post_result);
 
                        llr = list_entry(llmd->llmd_req_list.next,
@@ -3575,7 +3615,7 @@ cleanup2:
        /* Under force exit case, some requests may be just freed without
         * verification, those objects should be re-handled when next run.
         * So not update the on-disk tracing file under such case. */
-       if (!llmd->llmd_exit)
+       if (llmd->llmd_in_double_scan && !llmd->llmd_exit)
                rc1 = lfsck_layout_double_scan_result(env, com, rc);
 
 fini:
@@ -4064,14 +4104,7 @@ static void lfsck_layout_fail(const struct lu_env *env,
        down_write(&com->lc_sem);
        if (new_checked)
                com->lc_new_checked++;
-       lo->ll_objs_failed_phase1++;
-       if (lo->ll_pos_first_inconsistent == 0) {
-               struct lfsck_instance *lfsck = com->lc_lfsck;
-
-               lo->ll_pos_first_inconsistent =
-                       lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
-                                                       lfsck->li_di_oit);
-       }
+       lfsck_layout_record_failure(env, com->lc_lfsck, lo);
        up_write(&com->lc_sem);
 }
 
@@ -4410,7 +4443,7 @@ next:
                down_write(&com->lc_sem);
                com->lc_new_checked++;
                if (rc < 0)
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                up_write(&com->lc_sem);
 
                if (cobj != NULL && !IS_ERR(cobj))
@@ -4480,6 +4513,8 @@ again:
        buf->lb_len = rc;
        lmm = buf->lb_buf;
        rc = lfsck_layout_verify_header(lmm);
+       /* If the LOV EA crashed, then it is possible to be rebuilt later
+        * when handle orphan OST-objects. */
        if (rc != 0)
                GOTO(out, rc);
 
@@ -4557,7 +4592,7 @@ out:
                down_write(&com->lc_sem);
                com->lc_new_checked++;
                if (rc < 0)
-                       lo->ll_objs_failed_phase1++;
+                       lfsck_layout_record_failure(env, lfsck, lo);
                up_write(&com->lc_sem);
        }
        buf->lb_len = buflen;
@@ -4893,7 +4928,8 @@ static int lfsck_layout_dump(const struct lu_env *env,
                const struct dt_it_ops *iops;
                cfs_duration_t duration = cfs_time_current() -
                                          lfsck->li_time_last_checkpoint;
-               __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
+               __u64 checked = lo->ll_objs_checked_phase1 +
+                               com->lc_new_checked;
                __u64 speed = checked;
                __u64 new_checked = com->lc_new_checked * HZ;
                __u32 rtime = lo->ll_run_time_phase1 +
@@ -4944,31 +4980,36 @@ static int lfsck_layout_dump(const struct lu_env *env,
        } else if (lo->ll_status == LS_SCANNING_PHASE2) {
                cfs_duration_t duration = cfs_time_current() -
                                          lfsck->li_time_last_checkpoint;
-               __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
-               __u64 speed = checked;
+               __u64 checked = lo->ll_objs_checked_phase2 +
+                               com->lc_new_checked;
+               __u64 speed1 = lo->ll_objs_checked_phase1;
+               __u64 speed2 = checked;
                __u64 new_checked = com->lc_new_checked * HZ;
-               __u32 rtime = lo->ll_run_time_phase1 +
+               __u32 rtime = lo->ll_run_time_phase2 +
                              cfs_duration_sec(duration + HALF_SEC);
 
                if (duration != 0)
                        do_div(new_checked, duration);
+               if (lo->ll_run_time_phase1 != 0)
+                       do_div(speed1, lo->ll_run_time_phase1);
                if (rtime != 0)
-                       do_div(speed, rtime);
+                       do_div(speed2, rtime);
                rc = snprintf(buf, len,
                              "checked_phase1: "LPU64"\n"
                              "checked_phase2: "LPU64"\n"
                              "run_time_phase1: %u seconds\n"
                              "run_time_phase2: %u seconds\n"
                              "average_speed_phase1: "LPU64" items/sec\n"
-                             "average_speed_phase2: N/A\n"
-                             "real-time_speed_phase1: "LPU64" items/sec\n"
-                             "real-time_speed_phase2: N/A\n"
+                             "average_speed_phase2: "LPU64" items/sec\n"
+                             "real-time_speed_phase1: N/A\n"
+                             "real-time_speed_phase2: "LPU64" items/sec\n"
                              "current_position: "DFID"\n",
+                             lo->ll_objs_checked_phase1,
                              checked,
-                             lo->ll_objs_checked_phase2,
+                             lo->ll_run_time_phase1,
                              rtime,
-                             lo->ll_run_time_phase2,
-                             speed,
+                             speed1,
+                             speed2,
                              new_checked,
                              PFID(&com->lc_fid_latest_scanned_phase2));
                if (rc <= 0)
@@ -5236,7 +5277,7 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env,
        if (ltd == NULL) {
                spin_unlock(&ltds->ltd_lock);
 
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
        }
 
        list_del_init(&ltd->ltd_layout_phase_list);
@@ -5362,7 +5403,7 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env,
 
        llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index, true);
        if (llst == NULL)
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        lfsck_layout_llst_put(llst);
        if (list_empty(&llsd->llsd_master_list))
@@ -5794,7 +5835,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env,
 
        lfsck = lfsck_instance_find(dev, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(ERR_PTR(-ENODEV));
+               RETURN(ERR_PTR(-ENXIO));
 
        com = lfsck_component_find(lfsck, LT_LAYOUT);
        if (unlikely(com == NULL))
@@ -5810,7 +5851,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env,
 
        it->loi_llst = lfsck_layout_llst_find_and_del(llsd, attr, false);
        if (it->loi_llst == NULL)
-               GOTO(out, rc = -ENODEV);
+               GOTO(out, rc = -ENXIO);
 
        if (dev->dd_record_fid_accessed) {
                /* The first iteration against the rbtree, scan the whole rbtree