Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / lfsck / lfsck_striped_dir.c
index 5cffac7..b53e2f7 100644 (file)
@@ -836,42 +836,92 @@ out:
        return rc > 0 ? 0 : rc;
 }
 
-int lfsck_read_stripe_lmv(const struct lu_env *env, struct dt_object *obj,
+/**
+ * Read LMV from bottom object, so it doesn't contain stripe FIDs.
+ *
+ * TODO: test migrating/foreign directory lfsck
+ *
+ * \param[in] env      thread env
+ * \param[in] lfsck    lfsck instance
+ * \param[in] obj      dt object
+ * \param[out] lmv     LMV data pointer
+ *
+ * \retval             0 on success
+ * \retval             -ENODATA on no LMV, corrupt LMV, dir is dead or foreign
+ *                     -ev on other failures
+ */
+int lfsck_read_stripe_lmv(const struct lu_env *env,
+                         struct lfsck_instance *lfsck,
+                         struct dt_object *obj,
                          struct lmv_mds_md_v1 *lmv)
 {
        struct lfsck_thread_info *info = lfsck_env_info(env);
        struct lu_buf *buf = &info->lti_buf;
-       int size = sizeof(*lmv) + sizeof(struct lu_fid) * 2;
+       struct lmv_foreign_md *lfm;
        int rc;
 
+       /* use bottom object to avoid reading in shard FIDs */
+       obj = lfsck_object_find_bottom(env, lfsck, lu_object_fid(&obj->do_lu));
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
        dt_read_lock(env, obj, 0);
        buf->lb_buf = lmv;
        buf->lb_len = sizeof(*lmv);
        rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV);
        if (unlikely(rc == -ERANGE)) {
                buf = &info->lti_big_buf;
-               lu_buf_check_and_alloc(buf, size);
-               rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV);
-               /* For the in-migration directory, its LMV EA contains
-                * not only the LMV header, but also the FIDs for both
-                * source and target. So the LMV EA size is larger. */
-               if (rc == size) {
-                       rc = sizeof(*lmv);
-                       memcpy(lmv, buf->lb_buf, rc);
+               /* this may be a foreign LMV */
+               rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LMV);
+               if (rc > sizeof(*lmv)) {
+                       int rc1;
+
+                       lu_buf_check_and_alloc(buf, rc);
+                       rc1 = dt_xattr_get(env, obj, buf, XATTR_NAME_LMV);
+                       if (rc != rc1)
+                               rc = -ENODATA;
+               } else {
+                       rc = -ENODATA;
                }
        }
        dt_read_unlock(env, obj);
-       if (rc != sizeof(*lmv))
-               return rc > 0 ? -EINVAL : rc;
 
-       lfsck_lmv_header_le_to_cpu(lmv, lmv);
-       if ((lmv->lmv_magic == LMV_MAGIC &&
-            !(lmv->lmv_hash_type & LMV_HASH_FLAG_MIGRATION)) ||
-           (lmv->lmv_magic == LMV_MAGIC_STRIPE &&
-            !(lmv->lmv_hash_type & LMV_HASH_FLAG_DEAD)))
-               return 0;
+       lfsck_object_put(env, obj);
+
+       if (rc > offsetof(typeof(*lfm), lfm_value) &&
+           *((__u32 *)buf->lb_buf) == LMV_MAGIC_FOREIGN) {
+               __u32 value_len;
+
+               lfm = buf->lb_buf;
+               value_len = le32_to_cpu(lfm->lfm_length);
+               CDEBUG(D_INFO,
+                      "foreign LMV EA, magic %x, len %u, type %x, flags %x, for dir "DFID"\n",
+                      le32_to_cpu(lfm->lfm_magic), value_len,
+                      le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags),
+                      PFID(lfsck_dto2fid(obj)));
+
+               if (rc != value_len + offsetof(typeof(*lfm), lfm_value))
+                       CDEBUG(D_LFSCK,
+                              "foreign LMV EA internal size %u does not match EA full size %d for dir "DFID"\n",
+                              value_len, rc, PFID(lfsck_dto2fid(obj)));
+
+               /* no further usage/decode of foreign LMV outside */
+               return -ENODATA;
+       }
+
+       if (rc == sizeof(*lmv)) {
+               rc = 0;
+               lfsck_lmv_header_le_to_cpu(lmv, lmv);
+               /* if LMV is corrupt, return -ENODATA */
+               if (lmv->lmv_magic != LMV_MAGIC_V1 &&
+                   lmv->lmv_magic != LMV_MAGIC_STRIPE) 
+                       rc = -ENODATA;
+       } else if (rc >= 0) {
+               /* LMV is corrupt */
+               rc = -ENODATA;
+       }
 
-       return -ENODATA;
+       return rc;
 }
 
 /**
@@ -960,6 +1010,7 @@ bool lfsck_is_valid_slave_name_entry(const struct lu_env *env,
  * \retval             negative error number on failure
  */
 int lfsck_namespace_check_name(const struct lu_env *env,
+                              struct lfsck_instance *lfsck,
                               struct dt_object *parent,
                               struct dt_object *child,
                               const struct lu_name *cname)
@@ -968,7 +1019,7 @@ int lfsck_namespace_check_name(const struct lu_env *env,
        int                      idx;
        int                      rc;
 
-       rc = lfsck_read_stripe_lmv(env, parent, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv);
        if (rc != 0)
                RETURN(rc == -ENODATA ? 0 : rc);
 
@@ -1292,7 +1343,7 @@ int lfsck_namespace_notify_lmv_master_local(const struct lu_env *env,
        if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                RETURN(0);
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv4);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv4);
        if (rc != 0)
                RETURN(rc);
 
@@ -1401,7 +1452,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env,
        if (rc != 0)
                GOTO(log, rc);
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv3);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv3);
        if (rc == -ENODATA) {
                if (!(flags & LEF_SET_LMV_ALL))
                        GOTO(log, rc);
@@ -1419,6 +1470,7 @@ static int lfsck_namespace_set_lmv_master(const struct lu_env *env,
 
        lmv3->lmv_magic = LMV_MAGIC;
        lmv3->lmv_master_mdt_index = pidx;
+       lmv3->lmv_layout_version++;
 
        if (flags & LEF_SET_LMV_ALL) {
                rc = lfsck_allow_regenerate_master_lmv(env, com, obj,
@@ -1578,7 +1630,7 @@ int lfsck_namespace_scan_shard(const struct lu_env *env,
        __u16                            type;
        ENTRY;
 
-       rc = lfsck_read_stripe_lmv(env, child, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
        if (rc != 0)
                RETURN(rc == -ENODATA ? 1 : rc);
 
@@ -1718,6 +1770,8 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
                GOTO(out, rc);
        }
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ENGINE_DELAY, cfs_fail_val);
+
        parent = lfsck_object_find_bottom(env, lfsck, pfid);
        if (IS_ERR(parent)) {
                rc = lfsck_namespace_trace_update(env, com, cfid,
@@ -1732,7 +1786,7 @@ int lfsck_namespace_verify_stripe_slave(const struct lu_env *env,
        if (unlikely(!dt_try_as_dir(env, parent)))
                GOTO(out, rc = -ENOTDIR);
 
-       rc = lfsck_read_stripe_lmv(env, parent, plmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, parent, plmv);
        if (rc != 0) {
                int rc1;
 
@@ -2178,7 +2232,7 @@ next:
                      "%d "DFID" of the striped directory "DFID" with "
                      "dangling %s/%s, rename %s/%s, llinkea %s/%s, "
                      "repair_lmvea %s/%s: rc = %d\n", lfsck_lfsck2name(lfsck),
-                     i, PFID(cfid), PFID(&lnr->lnr_fid),
+                     i, PFID(cfid), PFID(pfid),
                      create ? "yes" : "no", create_repaired ? "yes" : "no",
                      rename ? "yes" : "no", rename_repaired ? "yes" : "no",
                      repair_linkea ? "yes" : "no",
@@ -2363,7 +2417,7 @@ dangling:
                GOTO(out, rc = 0);
        }
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
        if (unlikely(rc == -ENOENT))
                /* It may happen when the remote object has been removed,
                 * but the local MDT does not aware of that. */