Whamcloud - gitweb
LU-3591 lfsck: repair unmatched MDT-OST objects pairs
[fs/lustre-release.git] / lustre / lfsck / lfsck_layout.c
index 15e9c05..ea0450d 100644 (file)
@@ -1472,12 +1472,204 @@ unlock1:
        return rc;
 }
 
+/* If the OST-object does not recognize the MDT-object as its parent, and
+ * there is no other MDT-object claims as its parent, then just trust the
+ * given MDT-object as its parent. So update the OST-object filter_fid. */
+static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env,
+                                             struct lfsck_component *com,
+                                             struct lfsck_layout_req *llr,
+                                             const struct lu_attr *pla)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct filter_fid               *pfid   = &info->lti_new_pfid;
+       struct lu_attr                  *tla    = &info->lti_la3;
+       struct dt_object                *parent = llr->llr_parent->llo_obj;
+       struct dt_object                *child  = llr->llr_child;
+       struct dt_device                *dev    = lfsck_obj2dt_dev(child);
+       const struct lu_fid             *tfid   = lu_object_fid(&parent->do_lu);
+       struct thandle                  *handle;
+       struct lu_buf                   *buf;
+       struct lustre_handle             lh     = { 0 };
+       int                              rc;
+       ENTRY;
+
+       CDEBUG(D_LFSCK, "Repair unmatched MDT-OST pair for: parent "DFID
+              ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
+              PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
+              llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid);
+
+       rc = lfsck_layout_lock(env, com, parent, &lh,
+                              MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
+       if (rc != 0)
+               RETURN(rc);
+
+       handle = dt_trans_create(env, dev);
+       if (IS_ERR(handle))
+               GOTO(unlock1, rc = PTR_ERR(handle));
+
+       pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
+       pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
+       /* The ff_parent->f_ver is not the real parent fid->f_ver. Instead,
+        * it is the OST-object index in the parent MDT-object layout. */
+       pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
+       buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
+
+       rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       tla->la_valid = LA_UID | LA_GID;
+       tla->la_uid = pla->la_uid;
+       tla->la_gid = pla->la_gid;
+       rc = dt_declare_attr_set(env, child, tla, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, parent, 0);
+       if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
+               GOTO(unlock2, rc = 1);
+
+       rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle,
+                         BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(unlock2, rc);
+
+       /* Get the latest parent's owner. */
+       rc = dt_attr_get(env, parent, tla, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(unlock2, rc);
+
+       tla->la_valid = LA_UID | LA_GID;
+       rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA);
+
+       GOTO(unlock2, rc);
+
+unlock2:
+       dt_write_unlock(env, parent);
+
+stop:
+       rc = lfsck_layout_trans_stop(env, dev, handle, rc);
+
+unlock1:
+       lfsck_layout_unlock(&lh);
+
+       return rc;
+}
+
+/* Check whether the OST-object correctly back points to the
+ * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
+static int lfsck_layout_check_parent(const struct lu_env *env,
+                                    struct lfsck_component *com,
+                                    struct dt_object *parent,
+                                    const struct lu_fid *pfid,
+                                    const struct lu_fid *cfid,
+                                    const struct lu_attr *pla,
+                                    const struct lu_attr *cla,
+                                    struct lfsck_layout_req *llr,
+                                    struct lu_buf *lov_ea, __u32 idx)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_buf                   *buf    = &info->lti_big_buf;
+       struct dt_object                *tobj;
+       struct lov_mds_md_v1            *lmm;
+       struct lov_ost_data_v1          *objs;
+       int                              rc;
+       int                              i;
+       __u32                            magic;
+       __u16                            count;
+       ENTRY;
+
+       if (fid_is_zero(pfid)) {
+               /* client never wrote. */
+               if (cla->la_size == 0 && cla->la_blocks == 0)
+                       RETURN(0);
+
+               RETURN(LLIT_UNMATCHED_PAIR);
+       }
+
+       if (unlikely(!fid_is_sane(pfid)))
+               RETURN(LLIT_UNMATCHED_PAIR);
+
+       if (lu_fid_eq(pfid, lu_object_fid(&parent->do_lu))) {
+               if (llr->llr_lov_idx == idx)
+                       RETURN(0);
+
+               RETURN(LLIT_UNMATCHED_PAIR);
+       }
+
+       tobj = lfsck_object_find(env, com->lc_lfsck, pfid);
+       if (tobj == NULL)
+               RETURN(LLIT_UNMATCHED_PAIR);
+
+       if (IS_ERR(tobj))
+               RETURN(PTR_ERR(tobj));
+
+       if (!dt_object_exists(tobj))
+               GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+
+       /* Load the tobj's layout EA, in spite of it is a local MDT-object or
+        * remote one on another MDT. Then check whether the given OST-object
+        * is in such layout. If yes, it is multiple referenced, otherwise it
+        * is unmatched referenced case. */
+       rc = lfsck_layout_get_lovea(env, tobj, buf, NULL);
+       if (rc == 0)
+               GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+
+       if (rc < 0)
+               GOTO(out, rc);
+
+       lmm = buf->lb_buf;
+       rc = lfsck_layout_verify_header(lmm);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
+        * been verified in lfsck_layout_verify_header() already. If some
+        * new magic introduced in the future, then layout LFSCK needs to
+        * be updated also. */
+       magic = le32_to_cpu(lmm->lmm_magic);
+       if (magic == LOV_MAGIC_V1) {
+               objs = &(lmm->lmm_objects[0]);
+       } else {
+               LASSERT(magic == LOV_MAGIC_V3);
+               objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
+       }
+
+       count = le16_to_cpu(lmm->lmm_stripe_count);
+       for (i = 0; i < count; i++, objs++) {
+               struct lu_fid           *tfid   = &info->lti_fid2;
+               struct ost_id           *oi     = &info->lti_oi;
+
+               ostid_le_to_cpu(&objs->l_ost_oi, oi);
+               ostid_to_fid(tfid, oi, le32_to_cpu(objs->l_ost_idx));
+               if (lu_fid_eq(cfid, tfid)) {
+                       *lov_ea = *buf;
+
+                       GOTO(out, rc = LLIT_MULTIPLE_REFERENCED);
+               }
+       }
+
+       GOTO(out, rc = LLIT_UNMATCHED_PAIR);
+
+out:
+       lfsck_object_put(env, tobj);
+
+       return rc;
+}
+
 static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
                                             struct lfsck_component *com,
                                             struct lfsck_layout_req *llr)
 {
        struct lfsck_layout                  *lo     = com->lc_file_ram;
        struct lfsck_thread_info             *info   = lfsck_env_info(env);
+       struct filter_fid_old                *pea    = &info->lti_old_pfid;
+       struct lu_fid                        *pfid   = &info->lti_fid;
+       struct lu_buf                        *buf;
        struct dt_object                     *parent = llr->llr_parent->llo_obj;
        struct dt_object                     *child  = llr->llr_child;
        struct lu_attr                       *pla    = &info->lti_la;
@@ -1485,6 +1677,7 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
        struct lfsck_instance                *lfsck  = com->lc_lfsck;
        struct lfsck_bookmark                *bk     = &lfsck->li_bookmark_ram;
        enum lfsck_layout_inconsistency_type  type   = LLIT_NONE;
+       __u32                                 idx    = 0;
        int                                   rc;
        ENTRY;
 
@@ -1508,6 +1701,39 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
        if (rc != 0)
                GOTO(out, rc);
 
+       buf = lfsck_buf_get(env, pea, sizeof(struct filter_fid_old));
+       rc= dt_xattr_get(env, child, buf, XATTR_NAME_FID, BYPASS_CAPA);
+       if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) &&
+                    rc != sizeof(struct filter_fid))) {
+               type = LLIT_UNMATCHED_PAIR;
+               goto repair;
+       }
+
+       if (rc < 0 && rc != -ENODATA)
+               GOTO(out, rc);
+
+       if (rc == -ENODATA) {
+               fid_zero(pfid);
+       } else {
+               fid_le_to_cpu(pfid, &pea->ff_parent);
+               /* OST-object does not save parent FID::f_ver, instead,
+                * the OST-object index in the parent MDT-object layout
+                * EA reuses the pfid->f_ver. */
+               idx = pfid->f_ver;
+               pfid->f_ver = 0;
+       }
+
+       rc = lfsck_layout_check_parent(env, com, parent, pfid,
+                                      lu_object_fid(&child->do_lu),
+                                      pla, cla, llr, buf, idx);
+       if (rc > 0) {
+               type = rc;
+               goto repair;
+       }
+
+       if (rc < 0)
+               GOTO(out, rc);
+
        /* XXX: other inconsistency will be checked in other patches. */
 
 repair:
@@ -1528,11 +1754,12 @@ repair:
                                LA_ATIME | LA_MTIME | LA_CTIME;
                rc = lfsck_layout_recreate_ostobj(env, com, llr, cla);
                break;
+       case LLIT_UNMATCHED_PAIR:
+               rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
+               break;
 
        /* XXX: other inconsistency will be fixed in other patches. */
 
-       case LLIT_UNMATCHED_PAIR:
-               break;
        case LLIT_MULTIPLE_REFERENCED:
                break;
        case LLIT_INCONSISTENT_OWNER:
@@ -2345,6 +2572,7 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
        struct lu_buf                   *buf;
        int                              rc      = 0;
        int                              i;
+       __u32                            magic;
        __u16                            count;
        __u16                            gen;
        ENTRY;
@@ -2353,10 +2581,17 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
                            sizeof(struct filter_fid_old));
        count = le16_to_cpu(lmm->lmm_stripe_count);
        gen = le16_to_cpu(lmm->lmm_layout_gen);
-       if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1)
+       /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
+        * been verified in lfsck_layout_verify_header() already. If some
+        * new magic introduced in the future, then layout LFSCK needs to
+        * be updated also. */
+       magic = le32_to_cpu(lmm->lmm_magic);
+       if (magic == LOV_MAGIC_V1) {
                objs = &(lmm->lmm_objects[0]);
-       else
+       } else {
+               LASSERT(magic == LOV_MAGIC_V3);
                objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
+       }
 
        for (i = 0; i < count; i++, objs++) {
                struct lu_fid           *fid    = &info->lti_fid;