Whamcloud - gitweb
LU-3594 lfsck: repair inconsistent owner and multiple referenced cases 24/7524/30
authorFan Yong <fan.yong@intel.com>
Mon, 10 Feb 2014 13:16:50 +0000 (21:16 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 25 Feb 2014 00:16:28 +0000 (00:16 +0000)
Sometimes, the OST-object owner information is inconsistent with the
MDT-object owner information because of incompleted chown/chgrp, or
other system crash. Under such case, the MDT-object owner information
is trusted over the OST-object's. Because the chown/chgrp processing
order is: client => MDT => OST, it is possible that the OST-object
owner information is stale rather than the MDT-object's. Also, the
MDT-object's owner information is visible to users and can be directly
repaired by the system administrator, while the OST-object's owner
information is only used internally by quota. So the LFSCK will update
the OST-object owner information according to the MDT-object's owner.

If both MDT-object1 and MDT-object2 claim the OST-object1 as one
of its child OST-object, but the OST-object1 only recognizes the
MDT-object1, then the LFSCK will create new a OST-object and fix
the MDT-object2's layout information to reference the new created
OST-object.

Replace is_remote_th() with is_only_remote_trans(), then drop the
compat patch http://review.whamcloud.com/9361

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I6b148180b5a2d68650b291250c03aac651e5f6e9
Reviewed-on: http://review.whamcloud.com/7524
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/lfsck/lfsck_layout.c
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/mdd/mdd_object.c
lustre/osp/osp_internal.h
lustre/osp/osp_object.c
lustre/osp/osp_trans.c
lustre/target/out_lib.c
lustre/tests/sanity-lfsck.sh

index 7b636b5..a29a458 100644 (file)
@@ -504,6 +504,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_LFSCK_DANGLING        0x1610
 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
 #define OBD_FAIL_LFSCK_DANGLING        0x1610
 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR1 0x1611
 #define OBD_FAIL_LFSCK_UNMATCHED_PAIR2 0x1612
+#define OBD_FAIL_LFSCK_BAD_OWNER       0x1613
+#define OBD_FAIL_LFSCK_MULTIPLE_REF    0x1614
 
 #define OBD_FAIL_LFSCK_NOTIFY_NET      0x16f0
 #define OBD_FAIL_LFSCK_QUERY_NET       0x16f1
 
 #define OBD_FAIL_LFSCK_NOTIFY_NET      0x16f0
 #define OBD_FAIL_LFSCK_QUERY_NET       0x16f1
index ea0450d..07b6d6b 100644 (file)
@@ -1560,6 +1560,205 @@ unlock1:
        return rc;
 }
 
        return rc;
 }
 
+/* If there are more than one MDT-objects claim as the OST-object's parent,
+ * and the OST-object only recognizes one of them, then we need to generate
+ * new OST-object(s) with new fid(s) for the non-recognized MDT-object(s). */
+static int lfsck_layout_repair_multiple_references(const struct lu_env *env,
+                                                  struct lfsck_component *com,
+                                                  struct lfsck_layout_req *llr,
+                                                  struct lu_attr *la,
+                                                  struct lu_buf *buf)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct dt_allocation_hint       *hint   = &info->lti_hint;
+       struct dt_object_format         *dof    = &info->lti_dof;
+       struct dt_device                *pdev   = com->lc_lfsck->li_next;
+       struct ost_id                   *oi     = &info->lti_oi;
+       struct dt_object                *parent = llr->llr_parent->llo_obj;
+       struct dt_device                *cdev   = lfsck_obj2dt_dev(llr->llr_child);
+       struct dt_object                *child  = NULL;
+       struct lu_device                *d      = &cdev->dd_lu_dev;
+       struct lu_object                *o      = NULL;
+       struct thandle                  *handle;
+       struct lov_mds_md_v1            *lmm;
+       struct lov_ost_data_v1          *objs;
+       struct lustre_handle             lh     = { 0 };
+       __u32                            magic;
+       int                              rc;
+       ENTRY;
+
+       CDEBUG(D_LFSCK, "Repair multiple references for: parent "DFID
+              ", OST-index %u, stripe-index %u, owner %u:%u\n",
+              PFID(lfsck_dto2fid(parent)), llr->llr_ost_idx,
+              llr->llr_lov_idx, la->la_uid, la->la_gid);
+
+       rc = lfsck_layout_lock(env, com, parent, &lh,
+                              MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
+       if (rc != 0)
+               RETURN(rc);
+
+       handle = dt_trans_create(env, pdev);
+       if (IS_ERR(handle))
+               GOTO(unlock1, rc = PTR_ERR(handle));
+
+       o = lu_object_anon(env, d, NULL);
+       if (IS_ERR(o))
+               GOTO(stop, rc = PTR_ERR(o));
+
+       child = container_of(o, struct dt_object, do_lu);
+       o = lu_object_locate(o->lo_header, d->ld_type);
+       if (unlikely(o == NULL))
+               GOTO(stop, rc = -EINVAL);
+
+       child = container_of(o, struct dt_object, do_lu);
+       la->la_valid = LA_UID | LA_GID;
+       hint->dah_parent = NULL;
+       hint->dah_mode = 0;
+       dof->dof_type = DFT_REGULAR;
+       rc = dt_declare_create(env, child, la, NULL, NULL, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_declare_xattr_set(env, parent, buf, XATTR_NAME_LOV,
+                                 LU_XATTR_REPLACE, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, pdev, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, parent, 0);
+       if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
+               GOTO(unlock2, rc = 0);
+
+       rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA);
+       if (unlikely(rc == 0 || rc == -ENODATA || rc == -ERANGE))
+               GOTO(unlock2, rc = 0);
+
+       lmm = buf->lb_buf;
+       rc = lfsck_layout_verify_header(lmm);
+       if (rc != 0)
+               GOTO(unlock2, rc);
+
+       /* Someone change layout during the LFSCK, no need to repair then. */
+       if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen)
+               GOTO(unlock2, rc = 0);
+
+       rc = dt_create(env, child, la, hint, dof, handle);
+       if (rc != 0)
+               GOTO(unlock2, rc);
+
+       /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
+        * been verified in lfsck_layout_verify_header() already. If some
+        * new magic introduced in the future, then layout LFSCK needs to
+        * be updated also. */
+       magic = le32_to_cpu(lmm->lmm_magic);
+       if (magic == LOV_MAGIC_V1) {
+               objs = &(lmm->lmm_objects[0]);
+       } else {
+               LASSERT(magic == LOV_MAGIC_V3);
+               objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
+       }
+
+       lmm->lmm_layout_gen = cpu_to_le16(llr->llr_parent->llo_gen + 1);
+       fid_to_ostid(lu_object_fid(&child->do_lu), oi);
+       ostid_cpu_to_le(oi, &objs[llr->llr_lov_idx].l_ost_oi);
+       objs[llr->llr_lov_idx].l_ost_gen = cpu_to_le32(0);
+       objs[llr->llr_lov_idx].l_ost_idx = cpu_to_le32(llr->llr_ost_idx);
+       rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV,
+                         LU_XATTR_REPLACE, handle, BYPASS_CAPA);
+
+       GOTO(unlock2, rc = (rc == 0 ? 1 : rc));
+
+unlock2:
+       dt_write_unlock(env, parent);
+
+stop:
+       if (child != NULL)
+               lu_object_put(env, &child->do_lu);
+
+       dt_trans_stop(env, pdev, handle);
+
+unlock1:
+       lfsck_layout_unlock(&lh);
+
+       return rc;
+}
+
+/* If the MDT-object and the OST-object have different owner information,
+ * then trust the MDT-object, because the normal chown/chgrp handle order
+ * is from MDT to OST, and it is possible that some chown/chgrp operation
+ * is partly done. */
+static int lfsck_layout_repair_owner(const struct lu_env *env,
+                                    struct lfsck_component *com,
+                                    struct lfsck_layout_req *llr,
+                                    struct lu_attr *pla)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_attr                  *tla    = &info->lti_la3;
+       struct dt_object                *parent = llr->llr_parent->llo_obj;
+       struct dt_object                *child  = llr->llr_child;
+       struct dt_device                *dev    = lfsck_obj2dt_dev(child);
+       struct thandle                  *handle;
+       int                              rc;
+       ENTRY;
+
+       CDEBUG(D_LFSCK, "Repair inconsistent file owner for: parent "DFID
+              ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
+              PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
+              llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid);
+
+       handle = dt_trans_create(env, dev);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       tla->la_uid = pla->la_uid;
+       tla->la_gid = pla->la_gid;
+       tla->la_valid = LA_UID | LA_GID;
+       rc = dt_declare_attr_set(env, child, tla, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, handle);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* Use the dt_object lock to serialize with destroy and attr_set. */
+       dt_read_lock(env, parent, 0);
+       if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
+               GOTO(unlock, rc = 1);
+
+       /* Get the latest parent's owner. */
+       rc = dt_attr_get(env, parent, tla, BYPASS_CAPA);
+       if (rc != 0) {
+               CWARN("%s: fail to get the latest parent's ("DFID") owner, "
+                     "not sure whether some others chown/chgrp during the "
+                     "LFSCK: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck),
+                     PFID(lfsck_dto2fid(parent)), rc);
+
+               GOTO(unlock, rc);
+       }
+
+       /* Some others chown/chgrp during the LFSCK, needs to do nothing. */
+       if (unlikely(tla->la_uid != pla->la_uid ||
+                    tla->la_gid != pla->la_gid))
+               GOTO(unlock, rc = 1);
+
+       tla->la_valid = LA_UID | LA_GID;
+       rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA);
+
+       GOTO(unlock, rc);
+
+unlock:
+       dt_read_unlock(env, parent);
+
+stop:
+       rc = lfsck_layout_trans_stop(env, dev, handle, rc);
+
+       return rc;
+}
+
 /* Check whether the OST-object correctly back points to the
  * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
 static int lfsck_layout_check_parent(const struct lu_env *env,
 /* Check whether the OST-object correctly back points to the
  * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
 static int lfsck_layout_check_parent(const struct lu_env *env,
@@ -1585,8 +1784,13 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
 
        if (fid_is_zero(pfid)) {
                /* client never wrote. */
 
        if (fid_is_zero(pfid)) {
                /* client never wrote. */
-               if (cla->la_size == 0 && cla->la_blocks == 0)
+               if (cla->la_size == 0 && cla->la_blocks == 0) {
+                       if (unlikely(cla->la_uid != pla->la_uid ||
+                                    cla->la_gid != pla->la_gid))
+                               RETURN (LLIT_INCONSISTENT_OWNER);
+
                        RETURN(0);
                        RETURN(0);
+               }
 
                RETURN(LLIT_UNMATCHED_PAIR);
        }
 
                RETURN(LLIT_UNMATCHED_PAIR);
        }
@@ -1669,7 +1873,7 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
        struct lfsck_thread_info             *info   = lfsck_env_info(env);
        struct filter_fid_old                *pea    = &info->lti_old_pfid;
        struct lu_fid                        *pfid   = &info->lti_fid;
        struct lfsck_thread_info             *info   = lfsck_env_info(env);
        struct filter_fid_old                *pea    = &info->lti_old_pfid;
        struct lu_fid                        *pfid   = &info->lti_fid;
-       struct lu_buf                        *buf;
+       struct lu_buf                        *buf    = NULL;
        struct dt_object                     *parent = llr->llr_parent->llo_obj;
        struct dt_object                     *child  = llr->llr_child;
        struct lu_attr                       *pla    = &info->lti_la;
        struct dt_object                     *parent = llr->llr_parent->llo_obj;
        struct dt_object                     *child  = llr->llr_child;
        struct lu_attr                       *pla    = &info->lti_la;
@@ -1734,7 +1938,11 @@ static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
        if (rc < 0)
                GOTO(out, rc);
 
        if (rc < 0)
                GOTO(out, rc);
 
-       /* XXX: other inconsistency will be checked in other patches. */
+       if (unlikely(cla->la_uid != pla->la_uid ||
+                    cla->la_gid != pla->la_gid)) {
+               type = LLIT_INCONSISTENT_OWNER;
+               goto repair;
+       }
 
 repair:
        if (bk->lb_param & LPF_DRYRUN) {
 
 repair:
        if (bk->lb_param & LPF_DRYRUN) {
@@ -1757,12 +1965,12 @@ repair:
        case LLIT_UNMATCHED_PAIR:
                rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
                break;
        case LLIT_UNMATCHED_PAIR:
                rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
                break;
-
-       /* XXX: other inconsistency will be fixed in other patches. */
-
        case LLIT_MULTIPLE_REFERENCED:
        case LLIT_MULTIPLE_REFERENCED:
+               rc = lfsck_layout_repair_multiple_references(env, com, llr,
+                                                            pla, buf);
                break;
        case LLIT_INCONSISTENT_OWNER:
                break;
        case LLIT_INCONSISTENT_OWNER:
+               rc = lfsck_layout_repair_owner(env, com, llr, pla);
                break;
        default:
                rc = 0;
                break;
        default:
                rc = 0;
index a8b738b..bfa8b55 100644 (file)
@@ -556,14 +556,21 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        }
 
        for (i = 0; i < lo->ldo_stripenr; i++) {
        }
 
        for (i = 0; i < lo->ldo_stripenr; i++) {
-               const struct lu_fid     *fid;
+               struct lu_fid           *fid    = &info->lti_fid;
                struct lod_device       *lod;
                __u32                   index;
                int                     type    = LU_SEQ_RANGE_OST;
 
                lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
                LASSERT(lo->ldo_stripe[i]);
                struct lod_device       *lod;
                __u32                   index;
                int                     type    = LU_SEQ_RANGE_OST;
 
                lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
                LASSERT(lo->ldo_stripe[i]);
-               fid = lu_object_fid(&lo->ldo_stripe[i]->do_lu);
+
+               *fid = *lu_object_fid(&lo->ldo_stripe[i]->do_lu);
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_MULTIPLE_REF)) {
+                       if (cfs_fail_val == 0)
+                               cfs_fail_val = fid->f_oid;
+                       else
+                               fid->f_oid = cfs_fail_val;
+               }
 
                rc = fid_to_ostid(fid, &info->lti_ostid);
                LASSERT(rc == 0);
 
                rc = fid_to_ostid(fid, &info->lti_ostid);
                LASSERT(rc == 0);
index 2cbd028..5f69061 100644 (file)
@@ -303,6 +303,9 @@ static int lod_declare_attr_set(const struct lu_env *env,
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
                if (!(attr->la_valid & (LA_UID | LA_GID)))
                        RETURN(rc);
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
                if (!(attr->la_valid & (LA_UID | LA_GID)))
                        RETURN(rc);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
+                       RETURN(0);
        } else {
                if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
                                        LA_ATIME | LA_MTIME | LA_CTIME)))
        } else {
                if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
                                        LA_ATIME | LA_MTIME | LA_CTIME)))
@@ -380,6 +383,9 @@ static int lod_attr_set(const struct lu_env *env,
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
                if (!(attr->la_valid & (LA_UID | LA_GID)))
                        RETURN(rc);
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
                if (!(attr->la_valid & (LA_UID | LA_GID)))
                        RETURN(rc);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
+                       RETURN(0);
        } else {
                if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
                                        LA_ATIME | LA_MTIME | LA_CTIME)))
        } else {
                if (!(attr->la_valid & (LA_UID | LA_GID | LA_MODE |
                                        LA_ATIME | LA_MTIME | LA_CTIME)))
index b27306f..9e86b9e 100644 (file)
@@ -888,20 +888,24 @@ int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
                 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
                       la->la_mtime, la->la_ctime);
 
                 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
                       la->la_mtime, la->la_ctime);
 
-        if (la_copy->la_valid & LA_FLAGS) {
+       mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
+       if (la_copy->la_valid & LA_FLAGS) {
                rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
                rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
-                if (rc == 0)
-                        mdd_flags_xlate(mdd_obj, la_copy->la_flags);
-        } else if (la_copy->la_valid) {            /* setattr */
+               if (rc == 0)
+                       mdd_flags_xlate(mdd_obj, la_copy->la_flags);
+       } else if (la_copy->la_valid) { /* setattr */
                rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
                rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
-        }
+       }
+       mdd_write_unlock(env, mdd_obj);
+
+       if (rc == 0)
+               rc = mdd_attr_set_changelog(env, obj, handle, la->la_valid);
+
+       GOTO(stop, rc);
 
 
-        if (rc == 0)
-                rc = mdd_attr_set_changelog(env, obj, handle,
-                                           la->la_valid);
 stop:
 stop:
-        mdd_trans_stop(env, mdd, rc, handle);
-        RETURN(rc);
+       mdd_trans_stop(env, mdd, rc, handle);
+       return rc;
 }
 
 static int mdd_xattr_sanity_check(const struct lu_env *env,
 }
 
 static int mdd_xattr_sanity_check(const struct lu_env *env,
index b699f74..2af16c0 100644 (file)
@@ -277,10 +277,8 @@ struct osp_thread_info {
  * no local updates at all */
 static inline bool is_only_remote_trans(struct thandle *th)
 {
  * no local updates at all */
 static inline bool is_only_remote_trans(struct thandle *th)
 {
-       return th->th_dev->dd_ops == &osp_dt_ops;
+       return th->th_dev != NULL && th->th_dev->dd_ops == &osp_dt_ops;
 }
 }
-/* compat define for lfsck, to be removed soon */
-#define is_remote_trans(a) is_only_remote_trans(a)
 
 static inline void osp_objid_buf_prep(struct lu_buf *buf, loff_t *off,
                                      __u32 *id, int index)
 
 static inline void osp_objid_buf_prep(struct lu_buf *buf, loff_t *off,
                                      __u32 *id, int index)
index 5ffb7e4..aaf7546 100644 (file)
@@ -431,7 +431,7 @@ static int osp_declare_attr_set(const struct lu_env *env, struct dt_object *dt,
        if (!(attr->la_valid & (LA_UID | LA_GID)))
                RETURN(0);
 
        if (!(attr->la_valid & (LA_UID | LA_GID)))
                RETURN(0);
 
-       if (!is_remote_trans(th))
+       if (!is_only_remote_trans(th))
                /*
                 * track all UID/GID changes via llog
                 */
                /*
                 * track all UID/GID changes via llog
                 */
@@ -482,7 +482,7 @@ static int osp_attr_set(const struct lu_env *env, struct dt_object *dt,
                RETURN(0);
        }
 
                RETURN(0);
        }
 
-       if (!is_remote_trans(th))
+       if (!is_only_remote_trans(th))
                /*
                 * once transaction is committed put proper command on
                 * the queue going to our OST
                /*
                 * once transaction is committed put proper command on
                 * the queue going to our OST
@@ -879,7 +879,7 @@ static int osp_declare_object_create(const struct lu_env *env,
 
        ENTRY;
 
 
        ENTRY;
 
-       if (is_remote_trans(th)) {
+       if (is_only_remote_trans(th)) {
                LASSERT(fid_is_sane(fid));
 
                rc = osp_md_declare_object_create(env, dt, attr, hint, dof, th);
                LASSERT(fid_is_sane(fid));
 
                rc = osp_md_declare_object_create(env, dt, attr, hint, dof, th);
@@ -953,7 +953,7 @@ static int osp_object_create(const struct lu_env *env, struct dt_object *dt,
        struct lu_fid           *fid = &osi->osi_fid;
        ENTRY;
 
        struct lu_fid           *fid = &osi->osi_fid;
        ENTRY;
 
-       if (is_remote_trans(th)) {
+       if (is_only_remote_trans(th)) {
                LASSERT(fid_is_sane(lu_object_fid(&dt->do_lu)));
 
                rc = osp_md_object_create(env, dt, attr, hint, dof, th);
                LASSERT(fid_is_sane(lu_object_fid(&dt->do_lu)));
 
                rc = osp_md_object_create(env, dt, attr, hint, dof, th);
index 902de54..dc3481c 100644 (file)
@@ -228,7 +228,7 @@ struct thandle *osp_trans_create(const struct lu_env *env, struct dt_device *d)
 {
        struct thandle *th = NULL;
        struct thandle_update *tu = NULL;
 {
        struct thandle *th = NULL;
        struct thandle_update *tu = NULL;
-       int rc;
+       int rc = 0;
 
        OBD_ALLOC_PTR(th);
        if (unlikely(th == NULL))
 
        OBD_ALLOC_PTR(th);
        if (unlikely(th == NULL))
@@ -245,6 +245,8 @@ struct thandle *osp_trans_create(const struct lu_env *env, struct dt_device *d)
 
        INIT_LIST_HEAD(&tu->tu_remote_update_list);
        tu->tu_only_remote_trans = 1;
 
        INIT_LIST_HEAD(&tu->tu_remote_update_list);
        tu->tu_only_remote_trans = 1;
+       th->th_update = tu;
+
 out:
        if (rc != 0) {
                if (tu != NULL)
 out:
        if (rc != 0) {
                if (tu != NULL)
@@ -267,7 +269,7 @@ static int osp_trans_trigger(const struct lu_env *env, struct osp_device *osp,
 
        /* If the transaction only includes remote update, it should
         * still be asynchronous */
 
        /* If the transaction only includes remote update, it should
         * still be asynchronous */
-       if (tu->tu_only_remote_trans) {
+       if (is_only_remote_trans(th)) {
                struct osp_async_update_args    *args;
                struct ptlrpc_request           *req;
 
                struct osp_async_update_args    *args;
                struct ptlrpc_request           *req;
 
@@ -323,7 +325,7 @@ int osp_trans_start(const struct lu_env *env, struct dt_device *dt,
         * If it is remote unlink, it will send the remote req before
         * the local transaction, i.e. delete the name entry remote
         * first, then destroy the local object. */
         * If it is remote unlink, it will send the remote req before
         * the local transaction, i.e. delete the name entry remote
         * first, then destroy the local object. */
-       if (!tu->tu_only_remote_trans && !tu->tu_sent_after_local_trans)
+       if (!is_only_remote_trans(th) && !tu->tu_sent_after_local_trans)
                rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th);
 
        return rc;
                rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th);
 
        return rc;
@@ -339,26 +341,34 @@ int osp_trans_stop(const struct lu_env *env, struct dt_device *dt,
        LASSERT(tu != NULL);
        /* Check whether there are updates related with this OSP */
        update = out_find_update(tu, dt);
        LASSERT(tu != NULL);
        /* Check whether there are updates related with this OSP */
        update = out_find_update(tu, dt);
-       if (update == NULL)
-               return rc;
+       if (update == NULL) {
+               if (!is_only_remote_trans(th))
+                       return rc;
+               goto put;
+       }
 
 
-       if (update->ur_buf->ub_count == 0)
-               GOTO(free, rc);
+       if (update->ur_buf->ub_count == 0) {
+               out_destroy_update_req(update);
+               goto put;
+       }
 
 
-       if (tu->tu_only_remote_trans) {
-               if (th->th_result == 0)
+       if (is_only_remote_trans(th)) {
+               if (th->th_result == 0) {
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
                                               update, th);
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
                                               update, th);
-               else
+               } else {
                        rc = th->th_result;
                        rc = th->th_result;
+                       out_destroy_update_req(update);
+               }
        } else {
                if (tu->tu_sent_after_local_trans)
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
                                               update, th);
                rc = update->ur_rc;
        } else {
                if (tu->tu_sent_after_local_trans)
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
                                               update, th);
                rc = update->ur_rc;
+               out_destroy_update_req(update);
        }
        }
-free:
-       out_destroy_update_req(update);
+
+put:
        thandle_put(th);
        return rc;
 }
        thandle_put(th);
        return rc;
 }
index c2d41f7..8ba1fd6 100644 (file)
@@ -123,7 +123,8 @@ struct update_request *out_find_create_update_loc(struct thandle *th,
 
        list_add_tail(&update->ur_list, &tu->tu_remote_update_list);
 
 
        list_add_tail(&update->ur_list, &tu->tu_remote_update_list);
 
-       thandle_get(th);
+       if (!tu->tu_only_remote_trans)
+               thandle_get(th);
 
        RETURN(update);
 }
 
        RETURN(update);
 }
index 2a40d1c..405422f 100644 (file)
@@ -43,7 +43,7 @@ check_and_setup_lustre
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
 
 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c"
 
 [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] &&
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15"
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17"
 
 build_test_filter
 
 
 build_test_filter
 
@@ -1417,6 +1417,117 @@ test_15b() {
 }
 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
 
 }
 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
 
+test_16() {
+       echo "#####"
+       echo "If the OST-object's owner information does not match the owner"
+       echo "information stored in the MDT-object, then the LFSCK trust the"
+       echo "MDT-object and update the OST-object's owner information."
+       echo "#####"
+
+       echo "stopall"
+       stopall > /dev/null
+       echo "formatall"
+       formatall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       mkdir -p $DIR/$tdir
+       $LFS setstripe -c 1 -i 0 $DIR/$tdir
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1
+       cancel_lru_locks osc
+       sync
+       sleep 2
+
+       echo "Inject failure stub to skip OST-object owner changing"
+       #define OBD_FAIL_LFSCK_BAD_OWNER        0x1613
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1613
+       chown 1.1 $DIR/$tdir/f0
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       echo "Trigger layout LFSCK to find out inconsistent OST-object owner"
+       echo "and fix them"
+
+       $START_LAYOUT || error "(1) Fail to start LFSCK for layout!"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_layout |
+               awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2
+
+       local repaired=$($SHOW_LAYOUT |
+                        awk '/^repaired_inconsistent_owner/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(3) Fail to repair inconsistent owner: $repaired"
+}
+run_test 16 "LFSCK can repair inconsistent MDT-object/OST-object owner"
+
+test_17() {
+       echo "#####"
+       echo "If more than one MDT-objects reference the same OST-object,"
+       echo "and the OST-object only recognizes one MDT-object, then the"
+       echo "LFSCK should create new OST-objects for such non-recognized"
+       echo "MDT-objects."
+       echo "#####"
+
+       echo "stopall"
+       stopall > /dev/null
+       echo "formatall"
+       formatall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       mkdir -p $DIR/$tdir
+       $LFS setstripe -c 1 -i 0 $DIR/$tdir
+
+       echo "Inject failure stub to make two MDT-objects to refernce"
+       echo "the OST-object"
+
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0
+       #define OBD_FAIL_LFSCK_MULTIPLE_REF     0x1614
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1614
+
+       dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1
+       cancel_lru_locks osc
+       sync
+       sleep 2
+
+       createmany -o $DIR/$tdir/f 1 > /dev/null 2>&1
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0
+
+       echo "stopall to cleanup object cache"
+       stopall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects"
+       local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }')
+       [ $size -eq 1048576 ] ||
+               error "(1) f0 (wrong) size should be 1048576, but got $size"
+
+       echo "Trigger layout LFSCK to find out multiple refenced MDT-objects"
+       echo "and fix them"
+
+       $START_LAYOUT || error "(2) Fail to start LFSCK for layout!"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_layout |
+               awk '/^status/ { print \\\$2 }'" "completed" 3 || return 3
+
+       local repaired=$($SHOW_LAYOUT |
+                        awk '/^repaired_multiple_referenced/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(4) Fail to repair multiple references: $repaired"
+
+       echo "$DIR/$tdir/f0 and $DIR/$tdir/guard should use diff OST-objects"
+       dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=2 ||
+               error "(5) Fail to write f0."
+       size=$(ls -l $DIR/$tdir/guard | awk '{ print $5 }')
+       [ $size -eq 1048576 ] ||
+               error "(6) guard size should be 1048576, but got $size"
+}
+run_test 17 "LFSCK can repair multiple references"
+
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size