Whamcloud - gitweb
LU-5707 lfsck: store namespace LFSCK statistics info in new EA
[fs/lustre-release.git] / lustre / lfsck / lfsck_namespace.c
index f4308bc..9b67144 100644 (file)
@@ -319,9 +319,19 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env,
 }
 
 /**
- * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
- * \retval 0: succeed.
- * \retval -ve: failed cases.
+ * Load namespace LFSCK statistics information from the trace file.
+ *
+ * For old release (Lustre-2.6 or older), the statistics information was
+ * stored as XATTR_NAME_LFSCK_NAMESPACE_OLD EA. But in Lustre-2.7, we need
+ * more statistics information. To avoid confusing old MDT when downgrade,
+ * Lustre-2.7 stores the namespace LFSCK statistics information as new
+ * XATTR_NAME_LFSCK_NAMESPACE EA.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ *
+ * \retval             0 for success
+ * \retval             negative error number on failure
  */
 static int lfsck_namespace_load(const struct lu_env *env,
                                struct lfsck_component *com)
@@ -341,7 +351,7 @@ static int lfsck_namespace_load(const struct lu_env *env,
                        CDEBUG(D_LFSCK, "%s: invalid lfsck_namespace magic "
                               "%#x != %#x\n", lfsck_lfsck2name(com->lc_lfsck),
                               ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
-                       rc = 1;
+                       rc = -ESTALE;
                } else {
                        rc = 0;
                }
@@ -350,13 +360,22 @@ static int lfsck_namespace_load(const struct lu_env *env,
                       "expected = %d: rc = %d\n",
                       lfsck_lfsck2name(com->lc_lfsck), len, rc);
                if (rc >= 0)
-                       rc = 1;
+                       rc = -ESTALE;
+       } else {
+               /* Check whether it is old trace file or not.
+                * If yes, it should be reset via returning -ESTALE. */
+               rc = dt_xattr_get(env, com->lc_obj,
+                                 lfsck_buf_get(env, com->lc_file_disk, len),
+                                 XATTR_NAME_LFSCK_NAMESPACE_OLD, BYPASS_CAPA);
+               if (rc >= 0)
+                       rc = -ESTALE;
        }
+
        return rc;
 }
 
 static int lfsck_namespace_store(const struct lu_env *env,
-                                struct lfsck_component *com)
+                                struct lfsck_component *com, bool init)
 {
        struct dt_object                *obj    = com->lc_obj;
        struct lfsck_instance           *lfsck  = com->lc_lfsck;
@@ -367,6 +386,9 @@ static int lfsck_namespace_store(const struct lu_env *env,
        __u32                            nbits  = 0;
        int                              len    = com->lc_file_size;
        int                              rc;
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
+       struct lu_buf            tbuf   = { &len, sizeof(len) };
+#endif
        ENTRY;
 
        if (lad != NULL) {
@@ -398,6 +420,20 @@ static int lfsck_namespace_store(const struct lu_env *env,
                        GOTO(out, rc);
        }
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
+       /* To be compatible with old Lustre-2.x MDT (x <= 6), generate dummy
+        * XATTR_NAME_LFSCK_NAMESPACE_OLD EA, then when downgrade to Lustre-2.x,
+        * the old LFSCK will find "invalid" XATTR_NAME_LFSCK_NAMESPACE_OLD EA,
+        * then reset the namespace LFSCK trace file. */
+       if (init) {
+               rc = dt_declare_xattr_set(env, obj, &tbuf,
+                                         XATTR_NAME_LFSCK_NAMESPACE_OLD,
+                                         LU_XATTR_CREATE, handle);
+               if (rc != 0)
+                       GOTO(out, rc);
+       }
+#endif
+
        rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
        if (rc != 0)
                GOTO(out, rc);
@@ -411,6 +447,13 @@ static int lfsck_namespace_store(const struct lu_env *env,
                                  XATTR_NAME_LFSCK_BITMAP, 0, handle,
                                  BYPASS_CAPA);
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
+       if (rc == 0 && init)
+               rc = dt_xattr_set(env, obj, &tbuf,
+                                 XATTR_NAME_LFSCK_NAMESPACE_OLD,
+                                 LU_XATTR_CREATE, handle, BYPASS_CAPA);
+#endif
+
        GOTO(out, rc);
 
 out:
@@ -433,7 +476,7 @@ static int lfsck_namespace_init(const struct lu_env *env,
        ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
        ns->ln_status = LS_INIT;
        down_write(&com->lc_sem);
-       rc = lfsck_namespace_store(env, com);
+       rc = lfsck_namespace_store(env, com, true);
        up_write(&com->lc_sem);
        return rc;
 }
@@ -547,9 +590,9 @@ unlock:
        return rc;
 }
 
-static int lfsck_namespace_check_exist(const struct lu_env *env,
-                                      struct dt_object *dir,
-                                      struct dt_object *obj, const char *name)
+int lfsck_namespace_check_exist(const struct lu_env *env,
+                               struct dt_object *dir,
+                               struct dt_object *obj, const char *name)
 {
        struct lu_fid    *fid = &lfsck_env_info(env)->lti_fid;
        int               rc;
@@ -2554,6 +2597,9 @@ lost_parent:
                GOTO(out, rc);
        }
 
+       if (fid_is_zero(pfid))
+               GOTO(out, rc = 0);
+
        /* The ".." name entry is wrong, update it. */
        if (!lu_fid_eq(pfid, lfsck_dto2fid(parent))) {
                if (!lustre_handle_is_used(lh) && retry != NULL) {
@@ -2619,7 +2665,8 @@ lfsck_namespace_dsd_multiple(const struct lu_env *env,
        struct lfsck_bookmark    *bk            = &lfsck->li_bookmark_ram;
        struct dt_object         *parent        = NULL;
        struct linkea_data        ldata_new     = { 0 };
-       int                       count         = 0;
+       int                       dirent_count  = 0;
+       int                       linkea_count  = 0;
        int                       rc            = 0;
        bool                      once          = true;
        ENTRY;
@@ -2633,6 +2680,7 @@ again:
                /* Drop invalid linkEA entry. */
                if (!fid_is_sane(tfid)) {
                        linkea_del_buf(ldata, cname);
+                       linkea_count++;
                        continue;
                }
 
@@ -2666,6 +2714,7 @@ again:
                                 * child to be visible via other parent, then
                                 * remove this linkEA entry. */
                                linkea_del_buf(ldata, cname);
+                               linkea_count++;
                                continue;
                        }
 
@@ -2676,6 +2725,7 @@ again:
                if (unlikely(!dt_try_as_dir(env, parent))) {
                        lfsck_object_put(env, parent);
                        linkea_del_buf(ldata, cname);
+                       linkea_count++;
                        continue;
                }
 
@@ -2723,6 +2773,7 @@ rebuild:
                                RETURN(rc);
 
                        linkea_del_buf(ldata, cname);
+                       linkea_count++;
                        linkea_first_entry(ldata);
                        /* There may be some invalid dangling name entries under
                         * other parent directories, remove all of them. */
@@ -2759,13 +2810,13 @@ rebuild:
                                        goto next;
                                }
 
-                               count += rc;
+                               dirent_count += rc;
 
 next:
                                linkea_del_buf(ldata, cname);
                        }
 
-                       ns->ln_dirent_repaired += count;
+                       ns->ln_dirent_repaired += dirent_count;
 
                        RETURN(rc);
                }
@@ -2786,10 +2837,15 @@ next:
                linkea_del_buf(ldata, cname);
        }
 
+       linkea_first_entry(ldata);
        if (ldata->ld_leh->leh_reccount == 1) {
                rc = lfsck_namespace_dsd_single(env, com, child, pfid, ldata,
                                                lh, type, NULL);
 
+               if (rc == 0 && fid_is_zero(pfid) && linkea_count > 0)
+                       rc = lfsck_namespace_rebuild_linkea(env, com, child,
+                                                           ldata);
+
                RETURN(rc);
        }
 
@@ -2802,7 +2858,6 @@ next:
                RETURN(rc);
        }
 
-       linkea_first_entry(ldata);
        /* If the dangling name entry for the orphan directory object has
         * been remvoed, then just check whether the directory object is
         * still under the .lustre/lost+found/MDTxxxx/ or not. */
@@ -3003,6 +3058,22 @@ static int lfsck_namespace_double_scan_dir(const struct lu_env *env,
 
        LASSERT(!dt_object_remote(child));
 
+       if (flags & LNTF_UNCERTAIN_LMV) {
+               if (flags & LNTF_RECHECK_NAME_HASH) {
+                       rc = lfsck_namespace_scan_shard(env, com, child);
+                       if (rc < 0)
+                               RETURN(rc);
+
+                       ns->ln_striped_shards_scanned++;
+               } else {
+                       ns->ln_striped_shards_skipped++;
+               }
+       }
+
+       flags &= ~(LNTF_RECHECK_NAME_HASH | LNTF_UNCERTAIN_LMV);
+       if (flags == 0)
+               RETURN(0);
+
        if (flags & (LNTF_CHECK_LINKEA | LNTF_CHECK_PARENT) &&
            !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) {
                CDEBUG(D_LFSCK, "%s: some MDT(s) maybe NOT take part in the"
@@ -3055,6 +3126,8 @@ lock:
        } else if (lfsck->li_lpf_obj != NULL &&
                   lu_fid_eq(pfid, lfsck_dto2fid(lfsck->li_lpf_obj))) {
                lpf = true;
+       } else if (unlikely(!fid_is_sane(pfid))) {
+               fid_zero(pfid);
        }
 
        rc = lfsck_links_read(env, child, &ldata);
@@ -3622,6 +3695,29 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m,
                      time_phase2);
 }
 
+static void lfsck_namespace_release_lmv(const struct lu_env *env,
+                                       struct lfsck_component *com)
+{
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct lfsck_namespace          *ns     = com->lc_file_ram;
+
+       while (!list_empty(&lfsck->li_list_lmv)) {
+               struct lfsck_lmv_unit   *llu;
+               struct lfsck_lmv        *llmv;
+
+               llu = list_entry(lfsck->li_list_lmv.next,
+                                struct lfsck_lmv_unit, llu_link);
+               llmv = &llu->llu_lmv;
+
+               LASSERTF(atomic_read(&llmv->ll_ref) == 1,
+                        "still in using: %u\n",
+                        atomic_read(&llmv->ll_ref));
+
+               ns->ln_striped_dirs_skipped++;
+               lfsck_lmv_put(env, llmv);
+       }
+}
+
 /* namespace APIs */
 
 static int lfsck_namespace_reset(const struct lu_env *env,
@@ -3678,7 +3774,7 @@ static int lfsck_namespace_reset(const struct lu_env *env,
        lad->lad_incomplete = 0;
        CFS_RESET_BITMAP(lad->lad_bitmap);
 
-       rc = lfsck_namespace_store(env, com);
+       rc = lfsck_namespace_store(env, com, true);
 
        GOTO(out, rc);
 
@@ -3815,7 +3911,7 @@ static int lfsck_namespace_checkpoint(const struct lu_env *env,
                com->lc_new_checked = 0;
        }
 
-       rc = lfsck_namespace_store(env, com);
+       rc = lfsck_namespace_store(env, com, false);
        up_write(&com->lc_sem);
 
 log:
@@ -4100,6 +4196,8 @@ static int lfsck_namespace_post(const struct lu_env *env,
        lfsck_post_generic(env, com, &result);
 
        down_write(&com->lc_sem);
+       lfsck_namespace_release_lmv(env, com);
+
        spin_lock(&lfsck->li_lock);
        if (!init)
                ns->ln_pos_last_checkpoint = lfsck->li_pos_checkpoint;
@@ -4133,7 +4231,7 @@ static int lfsck_namespace_post(const struct lu_env *env,
                com->lc_new_checked = 0;
        }
 
-       rc = lfsck_namespace_store(env, com);
+       rc = lfsck_namespace_store(env, com, false);
        up_write(&com->lc_sem);
 
        CDEBUG(D_LFSCK, "%s: namespace LFSCK post done: rc = %d\n",
@@ -4205,7 +4303,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                                          lfsck->li_time_last_checkpoint;
                __u64 checked = ns->ln_items_checked + com->lc_new_checked;
                __u64 speed = checked;
-               __u64 new_checked = com->lc_new_checked * HZ;
+               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+                                                    MSEC_PER_SEC);
                __u32 rtime = ns->ln_run_time_phase1 +
                              cfs_duration_sec(duration + HALF_SEC);
 
@@ -4259,7 +4358,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                                com->lc_new_checked;
                __u64 speed1 = ns->ln_items_checked;
                __u64 speed2 = checked;
-               __u64 new_checked = com->lc_new_checked * HZ;
+               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
+                                                    MSEC_PER_SEC);
                __u32 rtime = ns->ln_run_time_phase2 +
                              cfs_duration_sec(duration + HALF_SEC);
 
@@ -4348,6 +4448,7 @@ static void lfsck_namespace_data_release(const struct lu_env *env,
        LASSERT(list_empty(&lad->lad_req_list));
 
        com->lc_data = NULL;
+       lfsck_namespace_release_lmv(env, com);
 
        spin_lock(&ltds->ltd_lock);
        list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list,
@@ -4386,6 +4487,8 @@ static void lfsck_namespace_quit(const struct lu_env *env,
                thread_is_stopped(&lad->lad_thread));
        LASSERT(list_empty(&lad->lad_req_list));
 
+       lfsck_namespace_release_lmv(env, com);
+
        spin_lock(&ltds->ltd_lock);
        list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list,
                                 ltd_namespace_phase_list) {
@@ -4521,6 +4624,28 @@ log:
 
                return 0;
        }
+       case LE_SET_LMV_MASTER: {
+               struct dt_object        *obj;
+
+               obj = lfsck_object_find_by_dev(env, lfsck->li_bottom,
+                                              &lr->lr_fid);
+               if (IS_ERR(obj))
+                       RETURN(PTR_ERR(obj));
+
+               rc = lfsck_namespace_notify_lmv_master_local(env, com, obj);
+               lfsck_object_put(env, obj);
+
+               RETURN(rc > 0 ? 0 : rc);
+       }
+       case LE_SET_LMV_SLAVE: {
+               if (!(lr->lr_flags & LEF_RECHECK_NAME_HASH))
+                       ns->ln_striped_shards_repaired++;
+
+               rc = lfsck_namespace_trace_update(env, com, &lr->lr_fid,
+                                                 LNTF_RECHECK_NAME_HASH, true);
+
+               RETURN(rc > 0 ? 0 : rc);
+       }
        case LE_PHASE1_DONE:
        case LE_PHASE2_DONE:
        case LE_PEER_EXIT:
@@ -4921,6 +5046,12 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
            (lnr->lnr_namelen == 1 || fid_seq_is_dot(fid_seq(&lnr->lnr_fid))))
                GOTO(out, rc = 0);
 
+       if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) {
+               rc = lfsck_namespace_handle_striped_master(env, com, lnr);
+
+               RETURN(rc);
+       }
+
        idx = lfsck_find_mdt_idx_by_fid(env, lfsck, &lnr->lnr_fid);
        if (idx < 0)
                GOTO(out, rc = idx);
@@ -5648,6 +5779,116 @@ out:
        lu_object_put(env, &parent->do_lu);
 }
 
+/**
+ * Rescan the striped directory after the master LMV EA reset.
+ *
+ * Sometimes, the master LMV EA of the striped directory maybe lost, so when
+ * the namespace LFSCK engine scan the striped directory for the first time,
+ * it will be regarded as a normal directory. As the LFSCK processing, some
+ * other LFSCK instance on other MDT will find the shard of this striped dir,
+ * and find that the master MDT-object of the striped directory lost its LMV
+ * EA, then such remote LFSCK instance will regenerate the master LMV EA and
+ * notify the LFSCK instance on this MDT to rescan the striped directory.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] llu      the lfsck_lmv_unit that contains the striped directory
+ *                     to be rescanned.
+ *
+ * \retval             positive number for success
+ * \retval             0 for LFSCK stopped/paused
+ * \retval             negative error number on failure
+ */
+static int lfsck_namespace_rescan_striped_dir(const struct lu_env *env,
+                                             struct lfsck_component *com,
+                                             struct lfsck_lmv_unit *llu)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct lfsck_assistant_data     *lad    = com->lc_data;
+       struct dt_object                *dir;
+       const struct dt_it_ops          *iops;
+       struct dt_it                    *di;
+       struct lu_dirent                *ent    =
+                       (struct lu_dirent *)info->lti_key;
+       struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
+       struct ptlrpc_thread            *thread = &lfsck->li_thread;
+       struct lfsck_namespace_req      *lnr;
+       struct lfsck_assistant_req      *lar;
+       int                              rc;
+       __u16                            type;
+       ENTRY;
+
+       LASSERT(list_empty(&lad->lad_req_list));
+
+       lfsck->li_lmv = &llu->llu_lmv;
+       lfsck->li_obj_dir = lfsck_object_get(llu->llu_obj);
+       rc = lfsck_open_dir(env, lfsck, 0);
+       if (rc != 0)
+               RETURN(rc);
+
+       dir = lfsck->li_obj_dir;
+       di = lfsck->li_di_dir;
+       iops = &dir->do_index_ops->dio_it;
+       do {
+               rc = iops->rec(env, di, (struct dt_rec *)ent,
+                              lfsck->li_args_dir);
+               if (rc == 0)
+                       rc = lfsck_unpack_ent(ent, &lfsck->li_cookie_dir,
+                                             &type);
+
+               if (rc != 0) {
+                       if (bk->lb_param & LPF_FAILOUT)
+                               GOTO(out, rc);
+
+                       goto next;
+               }
+
+               if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen))
+                       goto next;
+
+               lnr = lfsck_namespace_assistant_req_init(lfsck, ent, type);
+               if (IS_ERR(lnr)) {
+                       if (bk->lb_param & LPF_FAILOUT)
+                               GOTO(out, rc = PTR_ERR(lnr));
+
+                       goto next;
+               }
+
+               lar = &lnr->lnr_lar;
+               rc = lfsck_namespace_assistant_handler_p1(env, com, lar);
+               lfsck_namespace_assistant_req_fini(env, lar);
+               if (rc != 0 && bk->lb_param & LPF_FAILOUT)
+                       GOTO(out, rc);
+
+               if (unlikely(!thread_is_running(thread)))
+                       GOTO(out, rc = 0);
+
+next:
+               rc = iops->next(env, di);
+       } while (rc == 0);
+
+out:
+       lfsck_close_dir(env, lfsck, rc);
+       if (rc <= 0)
+               RETURN(rc);
+
+       /* The close_dir() may insert a dummy lnr in the lad->lad_req_list. */
+       if (list_empty(&lad->lad_req_list))
+               RETURN(1);
+
+       spin_lock(&lad->lad_lock);
+       lar = list_entry(lad->lad_req_list.next, struct lfsck_assistant_req,
+                         lar_list);
+       list_del_init(&lar->lar_list);
+       spin_unlock(&lad->lad_lock);
+
+       rc = lfsck_namespace_assistant_handler_p1(env, com, lar);
+       lfsck_namespace_assistant_req_fini(env, lar);
+
+       RETURN(rc == 0 ? 1 : rc);
+}
+
 static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env,
                                                struct lfsck_component *com)
 {
@@ -5665,6 +5906,20 @@ static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env,
        __u8                     flags  = 0;
        ENTRY;
 
+       while (!list_empty(&lfsck->li_list_lmv)) {
+               struct lfsck_lmv_unit *llu;
+
+               spin_lock(&lfsck->li_lock);
+               llu = list_entry(lfsck->li_list_lmv.next,
+                                struct lfsck_lmv_unit, llu_link);
+               list_del_init(&llu->llu_link);
+               spin_unlock(&lfsck->li_lock);
+
+               rc = lfsck_namespace_rescan_striped_dir(env, com, llu);
+               if (rc <= 0)
+                       RETURN(rc);
+       }
+
        CDEBUG(D_LFSCK, "%s: namespace LFSCK phase2 scan start\n",
               lfsck_lfsck2name(lfsck));
 
@@ -5754,7 +6009,7 @@ checkpoint:
                        ns->ln_time_last_checkpoint = cfs_time_current_sec();
                        ns->ln_objs_checked_phase2 += com->lc_new_checked;
                        com->lc_new_checked = 0;
-                       rc = lfsck_namespace_store(env, com);
+                       rc = lfsck_namespace_store(env, com, false);
                        up_write(&com->lc_sem);
                        if (rc != 0)
                                GOTO(put, rc);
@@ -5836,7 +6091,7 @@ static int lfsck_namespace_double_scan_result(const struct lu_env *env,
                ns->ln_status = LS_FAILED;
        }
 
-       rc = lfsck_namespace_store(env, com);
+       rc = lfsck_namespace_store(env, com, false);
        up_write(&com->lc_sem);
 
        return rc;
@@ -6287,10 +6542,10 @@ int lfsck_namespace_setup(const struct lu_env *env,
                GOTO(out, rc);
 
        rc = lfsck_namespace_load(env, com);
-       if (rc > 0)
-               rc = lfsck_namespace_reset(env, com, true);
-       else if (rc == -ENODATA)
+       if (rc == -ENODATA)
                rc = lfsck_namespace_init(env, com);
+       else if (rc < 0)
+               rc = lfsck_namespace_reset(env, com, true);
        if (rc != 0)
                GOTO(out, rc);