Whamcloud - gitweb
LU-13124 scrub: check for multiple linked file
[fs/lustre-release.git] / lustre / lfsck / lfsck_striped_dir.c
index 280c960..63a8754 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2014, 2015, Intel Corporation.
+ * Copyright (c) 2014, 2017, Intel Corporation.
  */
 /*
  * lustre/lfsck/lfsck_striped_dir.c
 
 #define DEBUG_SUBSYSTEM S_LFSCK
 
-#include <lustre/lustre_idl.h>
 #include <lu_object.h>
 #include <dt_object.h>
 #include <md_object.h>
 #include <lustre_lib.h>
 #include <lustre_net.h>
 #include <lustre_lmv.h>
-#include <lustre/lustre_user.h>
 
 #include "lfsck_internal.h"
 
@@ -170,14 +168,13 @@ void lfsck_lmv_put(const struct lu_env *env, struct lfsck_lmv *llmv)
 
                        LASSERT(llmv->ll_lslr != NULL);
 
-                       OBD_FREE_LARGE(llmv->ll_lslr,
-                                      sizeof(*llmv->ll_lslr) *
-                                      llmv->ll_stripes_allocated);
+                       OBD_FREE_PTR_ARRAY_LARGE(llmv->ll_lslr,
+                                                llmv->ll_stripes_allocated);
                        OBD_FREE_PTR(llu);
                } else {
                        if (llmv->ll_lslr != NULL)
-                               OBD_FREE_LARGE(llmv->ll_lslr,
-                                       sizeof(*llmv->ll_lslr) *
+                               OBD_FREE_PTR_ARRAY_LARGE(
+                                       llmv->ll_lslr,
                                        llmv->ll_stripes_allocated);
 
                        OBD_FREE_PTR(llmv);
@@ -211,7 +208,7 @@ static int lfsck_disable_master_lmv(const struct lu_env *env,
        int                              rc     = 0;
        ENTRY;
 
-       th = dt_trans_create(env, dev);
+       th = lfsck_trans_create(env, dev, lfsck);
        if (IS_ERR(th))
                GOTO(log, rc = PTR_ERR(th));
 
@@ -512,7 +509,7 @@ static int lfsck_record_lmv(const struct lu_env *env,
                int new_stripes = index + 1;
                size_t old_size = sizeof(*lslr) * llmv->ll_stripes_allocated;
 
-               OBD_ALLOC_LARGE(new_lslr, sizeof(*new_lslr) * new_stripes);
+               OBD_ALLOC_PTR_ARRAY_LARGE(new_lslr, new_stripes);
                if (new_lslr == NULL) {
                        llmv->ll_failed = 1;
 
@@ -838,26 +835,92 @@ out:
        return rc > 0 ? 0 : rc;
 }
 
-int lfsck_read_stripe_lmv(const struct lu_env *env, struct dt_object *obj,
+/**
+ * Read LMV from bottom object, so it doesn't contain stripe FIDs.
+ *
+ * TODO: test migrating/foreign directory lfsck
+ *
+ * \param[in] env      thread env
+ * \param[in] lfsck    lfsck instance
+ * \param[in] obj      dt object
+ * \param[out] lmv     LMV data pointer
+ *
+ * \retval             0 on success
+ * \retval             -ENODATA on no LMV, corrupt LMV, dir is dead or foreign
+ *                     -ev on other failures
+ */
+int lfsck_read_stripe_lmv(const struct lu_env *env,
+                         struct lfsck_instance *lfsck,
+                         struct dt_object *obj,
                          struct lmv_mds_md_v1 *lmv)
 {
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lu_buf *buf = &info->lti_buf;
+       struct lmv_foreign_md *lfm;
        int rc;
 
+       /* use bottom object to avoid reading in shard FIDs */
+       obj = lfsck_object_find_bottom(env, lfsck, lu_object_fid(&obj->do_lu));
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
        dt_read_lock(env, obj, 0);
-       rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lmv, sizeof(*lmv)),
-                         XATTR_NAME_LMV);
+       buf->lb_buf = lmv;
+       buf->lb_len = sizeof(*lmv);
+       rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV);
+       if (unlikely(rc == -ERANGE)) {
+               buf = &info->lti_big_buf;
+               /* this may be a foreign LMV */
+               rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LMV);
+               if (rc > sizeof(*lmv)) {
+                       int rc1;
+
+                       lu_buf_check_and_alloc(buf, rc);
+                       rc1 = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV);
+                       if (rc != rc1)
+                               rc = -ENODATA;
+               } else {
+                       rc = -ENODATA;
+               }
+       }
        dt_read_unlock(env, obj);
-       if (rc != sizeof(*lmv))
-               return rc > 0 ? -EINVAL : rc;
 
-       lfsck_lmv_header_le_to_cpu(lmv, lmv);
-       if ((lmv->lmv_magic == LMV_MAGIC &&
-            !(lmv->lmv_hash_type & LMV_HASH_FLAG_MIGRATION)) ||
-           (lmv->lmv_magic == LMV_MAGIC_STRIPE &&
-            !(lmv->lmv_hash_type & LMV_HASH_FLAG_DEAD)))
-               return 0;
+       lfsck_object_put(env, obj);
 
-       return -ENODATA;
+       if (rc > offsetof(typeof(*lfm), lfm_value) &&
+           *((__u32 *)buf->lb_buf) == LMV_MAGIC_FOREIGN) {
+               __u32 value_len;
+
+               lfm = buf->lb_buf;
+               value_len = le32_to_cpu(lfm->lfm_length);
+               CDEBUG(D_INFO,
+                      "foreign LMV EA, magic %x, len %u, type %x, flags %x, for dir "DFID"\n",
+                      le32_to_cpu(lfm->lfm_magic), value_len,
+                      le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags),
+                      PFID(lfsck_dto2fid(obj)));
+
+               if (rc != value_len + offsetof(typeof(*lfm), lfm_value))
+                       CDEBUG(D_LFSCK,
+                              "foreign LMV EA internal size %u does not match EA full size %d for dir "DFID"\n",
+                              value_len, rc, PFID(lfsck_dto2fid(obj)));
+
+               /* no further usage/decode of foreign LMV outside */
+               return -ENODATA;
+       }
+
+       if (rc == sizeof(*lmv)) {
+               rc = 0;
+               lfsck_lmv_header_le_to_cpu(lmv, lmv);
+               /* if LMV is corrupt, return -ENODATA */
+               if (lmv->lmv_magic != LMV_MAGIC_V1 &&
+                   lmv->lmv_magic != LMV_MAGIC_STRIPE) 
+                       rc = -ENODATA;
+       } else if (rc >= 0) {
+               /* LMV is corrupt */
+               rc = -ENODATA;
+       }
+
+       return rc;
 }
 
 /**
@@ -907,24 +970,30 @@ int lfsck_shard_name_to_index(const struct lu_env *env, const char *name,
        return idx;
 }
 
+static inline bool lfsck_name_hash_match(struct lmv_mds_md_v1 *lmv,
+                                        const char *name, int namelen)
+{
+       int idx;
+
+       idx = lmv_name_to_stripe_index_old(lmv, name, namelen);
+       if (idx == lmv->lmv_master_mdt_index)
+               return true;
+
+       if (!lmv_hash_is_layout_changing(lmv->lmv_hash_type))
+               return false;
+
+       idx = lmv_name_to_stripe_index(lmv, name, namelen);
+       return (idx == lmv->lmv_master_mdt_index);
+}
+
 bool lfsck_is_valid_slave_name_entry(const struct lu_env *env,
                                     struct lfsck_lmv *llmv,
                                     const char *name, int namelen)
 {
-       struct lmv_mds_md_v1    *lmv;
-       int                      idx;
-
        if (llmv == NULL || !llmv->ll_lmv_slave || !llmv->ll_lmv_verified)
                return true;
 
-       lmv = &llmv->ll_lmv;
-       idx = lmv_name_to_stripe_index(lmv->lmv_hash_type,
-                                      lmv->lmv_stripe_count,
-                                      name, namelen);
-       if (unlikely(idx != lmv->lmv_master_mdt_index))
-               return false;
-
-       return true;
+       return lfsck_name_hash_match(&llmv->ll_lmv, name, namelen);
 }
 
 /**
@@ -946,15 +1015,15 @@ bool lfsck_is_valid_slave_name_entry(const struct lu_env *env,
  * \retval             negative error number on failure
  */
 int lfsck_namespace_check_name(const struct lu_env *env,
+                              struct lfsck_instance *lfsck,
                               struct dt_object *parent,
                               struct dt_object *child,
                               const struct lu_name *cname)
 {
-       struct lmv_mds_md_v1    *lmv = &lfsck_env_info(env)->lti_lmv;
-       int                      idx;
-       int                      rc;
+       struct lmv_mds_md_v1 *lmv = &lfsck_env_info(env)->lti_lmv;
+       int rc;
 
-       rc = lfsck_read_stripe_lmv(env, parent, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv);
        if (rc != 0)
                RETURN(rc == -ENODATA ? 0 : rc);
 
@@ -962,11 +1031,8 @@ int lfsck_namespace_check_name(const struct lu_env *env,
                if (!lfsck_is_valid_slave_lmv(lmv))
                        return 0;
 
-               idx = lmv_name_to_stripe_index(lmv->lmv_hash_type,
-                                              lmv->lmv_stripe_count,
-                                              cname->ln_name,
-                                              cname->ln_namelen);
-               if (unlikely(idx != lmv->lmv_master_mdt_index))
+               if (!lfsck_name_hash_match(lmv, cname->ln_name,
+                                          cname->ln_namelen))
                        return 1;
        } else if (lfsck_shard_name_to_index(env, cname->ln_name,
                        cname->ln_namelen, lfsck_object_type(child),
@@ -1019,7 +1085,7 @@ int lfsck_namespace_update_lmv(const struct lu_env *env,
                        GOTO(log, rc);
        }
 
-       th = dt_trans_create(env, dev);
+       th = lfsck_trans_create(env, dev, lfsck);
        if (IS_ERR(th))
                GOTO(log, rc = PTR_ERR(th));
 
@@ -1278,7 +1344,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env,
        if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                RETURN(0);
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv4);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv4);
        if (rc != 0)
                RETURN(rc);
 
@@ -1293,7 +1359,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env,
        else
                count = lmv4->lmv_stripe_count;
 
-       OBD_ALLOC_LARGE(lslr, sizeof(struct lfsck_slave_lmv_rec) * count);
+       OBD_ALLOC_PTR_ARRAY_LARGE(lslr, count);
        if (lslr == NULL) {
                OBD_FREE_PTR(llu);
 
@@ -1320,6 +1386,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env,
                lfsck_lmv_put(env, llmv);
        } else {
                ns->ln_striped_dirs_repaired++;
+               llmv->ll_counted = 1;
                spin_lock(&lfsck->li_lock);
                list_add_tail(&llu->llu_link, &lfsck->li_list_lmv);
                spin_unlock(&lfsck->li_lock);
@@ -1386,7 +1453,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env,
        if (rc != 0)
                GOTO(log, rc);
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv3);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv3);
        if (rc == -ENODATA) {
                if (!(flags & LEF_SET_LMV_ALL))
                        GOTO(log, rc);
@@ -1404,6 +1471,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env,
 
        lmv3->lmv_magic = LMV_MAGIC;
        lmv3->lmv_master_mdt_index = pidx;
+       lmv3->lmv_layout_version++;
 
        if (flags & LEF_SET_LMV_ALL) {
                rc = lfsck_allow_regenerate_master_lmv(env, com, obj,
@@ -1484,8 +1552,7 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env,
        int                              rc     = 0;
        ENTRY;
 
-       rc = dt_lookup(env, shard, (struct dt_rec *)pfid,
-                      (const struct dt_key *)dotdot);
+       rc = dt_lookup_dir(env, shard, dotdot, pfid);
        if (rc != 0 || !fid_is_sane(pfid))
                GOTO(log, rc);
 
@@ -1563,7 +1630,7 @@ int lfsck_namespace_scan_shard(const struct lu_env *env,
        __u16                            type;
        ENTRY;
 
-       rc = lfsck_read_stripe_lmv(env, child, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
        if (rc != 0)
                RETURN(rc == -ENODATA ? 1 : rc);
 
@@ -1694,8 +1761,7 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
                GOTO(out, rc);
        }
 
-       rc = dt_lookup(env, obj, (struct dt_rec *)pfid,
-                      (const struct dt_key *)dotdot);
+       rc = dt_lookup_dir(env, obj, dotdot, pfid);
        if (rc != 0 || !fid_is_sane(pfid)) {
                rc = lfsck_namespace_trace_update(env, com, cfid,
                                        LNTF_UNCERTAIN_LMV, true);
@@ -1703,6 +1769,8 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
                GOTO(out, rc);
        }
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ENGINE_DELAY, cfs_fail_val);
+
        parent = lfsck_object_find_bottom(env, lfsck, pfid);
        if (IS_ERR(parent)) {
                rc = lfsck_namespace_trace_update(env, com, cfid,
@@ -1717,7 +1785,7 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
        if (unlikely(!dt_try_as_dir(env, parent)))
                GOTO(out, rc = -ENOTDIR);
 
-       rc = lfsck_read_stripe_lmv(env, parent, plmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, parent, plmv);
        if (rc != 0) {
                int rc1;
 
@@ -1773,8 +1841,7 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
                GOTO(out, rc);
        }
 
-       rc = dt_lookup(env, parent, (struct dt_rec *)&tfid,
-                      (const struct dt_key *)name2);
+       rc = dt_lookup_dir(env, parent, name2, &tfid);
        if (rc != 0 || !lu_fid_eq(cfid, &tfid))
                rc = lfsck_namespace_trace_update(env, com, cfid,
                                                  LNTF_UNCERTAIN_LMV, true);
@@ -1903,7 +1970,8 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env,
                        RETURN(rc);
 
                ns->ln_striped_dirs_scanned++;
-               ns->ln_striped_dirs_repaired++;
+               if (!llmv->ll_counted)
+                       ns->ln_striped_dirs_repaired++;
        }
 
        fld_range_set_mdt(range);
@@ -1912,7 +1980,6 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env,
                const struct lu_fid *cfid = &lslr->lslr_fid;
                const struct lu_name *cname;
                struct linkea_data ldata = { NULL };
-               int len;
                int rc1 = 0;
                bool repair_linkea = false;
                bool repair_lmvea = false;
@@ -1927,12 +1994,13 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env,
                if (fid_is_zero(cfid))
                        continue;
 
-               len = snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf),
-                              DFID":%u", PFID(cfid), i);
-               cname = lfsck_name_get_const(env, info->lti_tmpbuf, len);
-               memcpy(lnr->lnr_name, info->lti_tmpbuf, len);
-
-               obj = lfsck_object_find_bottom_nowait(env, lfsck, cfid);
+               lnr->lnr_fid = *cfid;
+               lnr->lnr_namelen = scnprintf(lnr->lnr_name,
+                                            lnr->lnr_size - sizeof(*lnr),
+                                            DFID":%u", PFID(cfid), i);
+               cname = lfsck_name_get_const(env, lnr->lnr_name,
+                                            lnr->lnr_namelen);
+               obj = lfsck_object_find_bottom(env, lfsck, cfid);
                if (IS_ERR(obj)) {
                        if (dir == NULL) {
                                dir = lfsck_assistant_object_load(env, lfsck,
@@ -1984,7 +2052,7 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env,
                        break;
                }
 
-               rc1 = lfsck_links_read(env, obj, &ldata);
+               rc1 = lfsck_links_read_with_rec(env, obj, &ldata);
                if (rc1 == -ENOENT) {
                        create = true;
                        goto repair;
@@ -2122,10 +2190,6 @@ repair:
                if (repair_linkea) {
                        struct lustre_handle lh = { 0 };
 
-                       rc1 = linkea_data_new(&ldata, &info->lti_big_buf);
-                       if (rc1 != 0)
-                               goto next;
-
                        if (dir == NULL) {
                                dir = lfsck_assistant_object_load(env, lfsck,
                                                                  lso);
@@ -2140,7 +2204,8 @@ repair:
                                }
                        }
 
-                       rc1 = linkea_add_buf(&ldata, cname, lfsck_dto2fid(dir));
+                       rc1 = linkea_links_new(&ldata, &info->lti_big_buf,
+                                              cname, lfsck_dto2fid(dir));
                        if (rc1 != 0)
                                goto next;
 
@@ -2161,17 +2226,22 @@ repair:
                }
 
 next:
-               CDEBUG(D_LFSCK, "%s: namespace LFSCK repair the shard "
-                     "%d "DFID" of the striped directory "DFID" with "
-                     "dangling %s/%s, rename %s/%s, llinkea %s/%s, "
-                     "repair_lmvea %s/%s: rc = %d\n", lfsck_lfsck2name(lfsck),
-                     i, PFID(cfid), PFID(&lnr->lnr_fid),
-                     create ? "yes" : "no", create_repaired ? "yes" : "no",
-                     rename ? "yes" : "no", rename_repaired ? "yes" : "no",
-                     repair_linkea ? "yes" : "no",
-                     linkea_repaired ? "yes" : "no",
-                     repair_lmvea ? "yes" : "no",
-                     lmvea_repaired ? "yes" : "no", rc1);
+               if (create || rename || repair_linkea || repair_lmvea) {
+                       CDEBUG(D_LFSCK, "%s: namespace LFSCK repair the shard "
+                              "%d "DFID" of the striped directory "DFID" with "
+                              "dangling %s/%s, rename %s/%s, llinkea %s/%s, "
+                              "repair_lmvea %s/%s: rc = %d\n",
+                              lfsck_lfsck2name(lfsck),
+                              i, PFID(cfid), PFID(pfid),
+                              create ? "yes" : "no",
+                              create_repaired ? "yes" : "no",
+                              rename ? "yes" : "no",
+                              rename_repaired ? "yes" : "no",
+                              repair_linkea ? "yes" : "no",
+                              linkea_repaired ? "yes" : "no",
+                              repair_lmvea ? "yes" : "no",
+                              lmvea_repaired ? "yes" : "no", rc1);
+               }
 
                if (obj != NULL && !IS_ERR(obj)) {
                        lfsck_object_put(env, obj);
@@ -2312,7 +2382,7 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env,
                dev = ltd->ltd_tgt;
        }
 
-       obj = lfsck_object_find_by_dev_nowait(env, dev, &lnr->lnr_fid);
+       obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid);
        if (IS_ERR(obj)) {
                if (lfsck_is_dead_obj(dir))
                        RETURN(0);
@@ -2350,7 +2420,7 @@ dangling:
                GOTO(out, rc = 0);
        }
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
        if (unlikely(rc == -ENOENT))
                /* It may happen when the remote object has been removed,
                 * but the local MDT does not aware of that. */