Whamcloud - gitweb
LU-3335 scrub: OI scrub on OST 69/6669/13
authorFan Yong <fan.yong@intel.com>
Wed, 19 Jun 2013 16:14:29 +0000 (00:14 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 26 Jul 2013 05:23:07 +0000 (05:23 +0000)
OI scrub should has the ability to handle kinds of OI, including
both the OI files on MDT and the /O directory on OST.

We trust the FID in LMA for both MDT objects and OST objects. So
if some /O sub-item does not match related LMA, then the /O will
be updated, instead of the LMA.

To guarantee that the OI scrub can run without MDT0 involved for
FLDB, the OST object needs to store some flag in its LMA to tell
OI scrub that it is for an OST object, no need to query the MDT0.

Test-Parameters: testlist=sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I2da3816b28a7c6d5ad0b7f8d5b43f98a9886ff0c
Reviewed-on: http://review.whamcloud.com/6669
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
15 files changed:
lustre/include/lustre/lustre_idl.h
lustre/include/obd_support.h
lustre/mdd/mdd_compat.c
lustre/obdclass/md_attrs.c
lustre/osd-ldiskfs/osd_compat.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_oi.c
lustre/osd-ldiskfs/osd_oi.h
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-zfs/osd_object.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity-scrub.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index fd0fb2a..c3352bb 100644 (file)
@@ -321,8 +321,11 @@ static inline int range_compare_loc(const struct lu_seq_range *r1,
  * xattr.
  */
 enum lma_compat {
-        LMAC_HSM = 0x00000001,
-        LMAC_SOM = 0x00000002,
+       LMAC_HSM        = 0x00000001,
+       LMAC_SOM        = 0x00000002,
+       LMAC_NOT_IN_OI  = 0x00000004, /* the object does NOT need OI mapping */
+       LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
+                                      * under /O/<seq>/d<x>. */
 };
 
 /**
@@ -331,16 +334,17 @@ enum lma_compat {
  * This information is stored in lustre_mdt_attrs::lma_incompat.
  */
 enum lma_incompat {
-       LMAI_RELEASED = 0x0000001, /* file is released */
-       LMAI_AGENT = 0x00000002, /* agent inode */
-       LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
-                                           is on the remote MDT */
+       LMAI_RELEASED           = 0x00000001, /* file is released */
+       LMAI_AGENT              = 0x00000002, /* agent inode */
+       LMAI_REMOTE_PARENT      = 0x00000004, /* the parent of the object
+                                                is on the remote MDT */
 };
 #define LMA_INCOMPAT_SUPP      (LMAI_AGENT | LMAI_REMOTE_PARENT)
 
 extern void lustre_lma_swab(struct lustre_mdt_attrs *lma);
 extern void lustre_lma_init(struct lustre_mdt_attrs *lma,
-                           const struct lu_fid *fid, __u32 incompat);
+                           const struct lu_fid *fid,
+                           __u32 compat, __u32 incompat);
 /**
  * SOM on-disk attributes stored in a separate xattr.
  */
index 75a5cd3..b9ec568 100644 (file)
@@ -264,6 +264,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OSD_SCRUB_FATAL                       0x192
 #define OBD_FAIL_OSD_FID_MAPPING                       0x193
 #define OBD_FAIL_OSD_LMA_INCOMPAT                      0x194
+#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY              0x195
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
index ae50e30..fedc877 100644 (file)
@@ -182,7 +182,7 @@ static int mdd_convert_lma(const struct lu_env *env, struct mdd_device *mdd,
        lu_root_fid(&fid);
 
        lma = (struct lustre_mdt_attrs *)&mdd_env_info(env)->mti_xattr_buf;
-       lustre_lma_init(lma, &fid, 0);
+       lustre_lma_init(lma, &fid, 0, 0);
        lustre_lma_swab(lma);
        buf.lb_buf = lma;
        buf.lb_len = sizeof(*lma);
index 6a001f0..111d707 100644 (file)
@@ -39,9 +39,9 @@
  * \param incompat - features that MDS must understand to access object
  */
 void lustre_lma_init(struct lustre_mdt_attrs *lma, const struct lu_fid *fid,
-                    __u32 incompat)
+                    __u32 compat, __u32 incompat)
 {
-       lma->lma_compat   = 0;
+       lma->lma_compat   = compat;
        lma->lma_incompat = incompat;
        lma->lma_self_fid = *fid;
 
index 2505868..2403aa1 100644 (file)
@@ -172,7 +172,7 @@ static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
        struct osd_mdobj_map    *omm;
        struct dentry           *d;
        struct osd_thread_info  *info = osd_oti_get(env);
-       struct lu_fid           *fid = &info->oti_fid;
+       struct lu_fid           *fid = &info->oti_fid3;
        int                     rc = 0;
        ENTRY;
 
@@ -197,9 +197,10 @@ static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
 
        /* Set LMA for remote parent inode */
        lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
-       rc = osd_ea_fid_set(info, d->d_inode, fid, 0);
-       if (rc != 0)
-               GOTO(cleanup, rc);
+       rc = osd_ea_fid_set(info, d->d_inode, fid, LMAC_NOT_IN_OI, 0);
+
+       GOTO(cleanup, rc);
+
 cleanup:
        pop_ctxt(&save, &new, NULL);
        if (rc) {
@@ -208,7 +209,7 @@ cleanup:
                OBD_FREE_PTR(omm);
                dev->od_mdt_map = NULL;
        }
-       RETURN(rc);
+       return rc;
 }
 
 static void osd_mdt_fini(struct osd_device *osd)
@@ -362,13 +363,16 @@ int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
  * CONFIGS
  *
  */
-static int osd_ost_init(struct osd_device *dev)
+static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
 {
-       struct lvfs_run_ctxt  new;
-       struct lvfs_run_ctxt  save;
-       struct dentry        *rootd = osd_sb(dev)->s_root;
-       struct dentry        *d;
-       int                   rc;
+       struct lvfs_run_ctxt     new;
+       struct lvfs_run_ctxt     save;
+       struct dentry           *rootd = osd_sb(dev)->s_root;
+       struct dentry           *d;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct inode            *inode;
+       struct lu_fid           *fid = &info->oti_fid3;
+       int                      rc;
        ENTRY;
 
        OBD_ALLOC_PTR(dev->od_ost_map);
@@ -396,17 +400,25 @@ static int osd_ost_init(struct osd_device *dev)
        if (IS_ERR(d))
                GOTO(cleanup, rc = PTR_ERR(d));
 
-       ldiskfs_set_inode_state(d->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       inode = d->d_inode;
+       ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
        dev->od_ost_map->om_root = d;
 
+       /* 'What the @fid is' is not imporatant, because the object
+        * has no OI mapping, and only is visible inside the OSD.*/
+       lu_igif_build(fid, inode->i_ino, inode->i_generation);
+       rc = osd_ea_fid_set(info, inode, fid,
+                           LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+
+       GOTO(cleanup, rc);
+
 cleanup:
        osd_pop_ctxt(dev, &new, &save);
         if (IS_ERR(d)) {
                 OBD_FREE_PTR(dev->od_ost_map);
                 RETURN(PTR_ERR(d));
         }
-
-       RETURN(rc);
+       return rc;
 }
 
 static void osd_seq_free(struct osd_obj_map *map,
@@ -461,7 +473,7 @@ int osd_obj_map_init(const struct lu_env *env, struct osd_device *dev)
        ENTRY;
 
        /* prepare structures for OST */
-       rc = osd_ost_init(dev);
+       rc = osd_ost_init(env, dev);
        if (rc)
                RETURN(rc);
 
@@ -498,6 +510,111 @@ void osd_obj_map_fini(struct osd_device *dev)
        osd_mdt_fini(dev);
 }
 
+/**
+ * Update the specified OI mapping.
+ *
+ * \retval   1, changed nothing
+ * \retval   0, changed successfully
+ * \retval -ve, on error
+ */
+static int osd_obj_update_entry(struct osd_thread_info *info,
+                               struct osd_device *osd,
+                               struct dentry *dir, const char *name,
+                               const struct lu_fid *fid,
+                               const struct osd_inode_id *id,
+                               struct thandle *th)
+{
+       struct inode               *parent = dir->d_inode;
+       struct osd_thandle         *oh;
+       struct dentry              *child;
+       struct ldiskfs_dir_entry_2 *de;
+       struct buffer_head         *bh;
+       struct inode               *inode;
+       struct dentry              *dentry = &info->oti_obj_dentry;
+       struct osd_inode_id        *oi_id  = &info->oti_id3;
+       struct lustre_mdt_attrs    *lma    = &info->oti_mdt_attrs;
+       struct lu_fid              *oi_fid = &lma->lma_self_fid;
+       int                         rc;
+       ENTRY;
+
+       oh = container_of(th, struct osd_thandle, ot_super);
+       LASSERT(oh->ot_handle != NULL);
+       LASSERT(oh->ot_handle->h_transaction != NULL);
+
+       child = &info->oti_child_dentry;
+       child->d_parent = dir;
+       child->d_name.hash = 0;
+       child->d_name.name = name;
+       child->d_name.len = strlen(name);
+
+       ll_vfs_dq_init(parent);
+       mutex_lock(&parent->i_mutex);
+       bh = osd_ldiskfs_find_entry(parent, child, &de, NULL);
+       if (bh == NULL)
+               GOTO(out, rc = -ENOENT);
+
+       if (le32_to_cpu(de->inode) == id->oii_ino)
+               GOTO(out, rc = 1);
+
+       osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
+       inode = osd_iget(info, osd, oi_id);
+       if (IS_ERR(inode)) {
+               rc = PTR_ERR(inode);
+               if (rc == -ENOENT || rc == -ESTALE)
+                       goto update;
+               GOTO(out, rc);
+       }
+
+       rc = osd_get_lma(info, inode, dentry, lma);
+       if (rc == -ENODATA) {
+               rc = osd_get_idif(info, inode, dentry, oi_fid);
+               if (rc > 0) {
+                       oi_fid = NULL;
+                       rc = 0;
+               }
+       }
+       iput(inode);
+
+       /* If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
+        * either a crashed object or a uninitialized one. Replace it. */
+       if (rc == -ENODATA || oi_fid == NULL)
+               goto update;
+
+       if (rc != 0)
+               GOTO(out, rc);
+
+       if (lu_fid_eq(fid, oi_fid)) {
+               CERROR("%s: the FID "DFID" is used by two objects: "
+                      "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
+                      oi_id->oii_ino, oi_id->oii_gen,
+                      id->oii_ino, id->oii_gen);
+               GOTO(out, rc = -EEXIST);
+       }
+
+update:
+       /* There may be temporary inconsistency: On one hand, the new
+        * object may be referenced by multiple entries, which is out
+        * of our control unless we traverse the whole /O completely,
+        * which is non-flat order and inefficient, should be avoided;
+        * On the other hand, the old object may become orphan if it
+        * is still valid. Since it was referenced by an invalid entry,
+        * making it as invisible temporary may be not worse. OI scrub
+        * will process it later. */
+       rc = ldiskfs_journal_get_write_access(oh->ot_handle, bh);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       de->inode = cpu_to_le32(id->oii_ino);
+       rc = ldiskfs_journal_dirty_metadata(oh->ot_handle, bh);
+
+       GOTO(out, rc);
+
+out:
+       brelse(bh);
+       mutex_unlock(&parent->i_mutex);
+       return rc;
+}
+
 static int osd_obj_del_entry(struct osd_thread_info *info,
                             struct osd_device *osd,
                             struct dentry *dird, char *name,
@@ -565,6 +682,9 @@ int osd_obj_add_entry(struct osd_thread_info *info,
         child->d_parent = dir;
         child->d_inode = inode;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
+               inode->i_ino++;
+
        ll_vfs_dq_init(dir->d_inode);
        mutex_lock(&dir->d_inode->i_mutex);
        rc = osd_ldiskfs_add_entry(oh->ot_handle, child, inode, NULL);
@@ -598,11 +718,14 @@ static inline void osd_oid_name(char *name, size_t name_size,
 }
 
 /* external locking is required */
-static int osd_seq_load_locked(struct osd_device *osd,
+static int osd_seq_load_locked(struct osd_thread_info *info,
+                              struct osd_device *osd,
                               struct osd_obj_seq *osd_seq)
 {
        struct osd_obj_map  *map = osd->od_ost_map;
        struct dentry       *seq_dir;
+       struct inode        *inode;
+       struct lu_fid       *fid = &info->oti_fid3;
        int                 rc = 0;
        int                 i;
        char                dir_name[32];
@@ -622,9 +745,18 @@ static int osd_seq_load_locked(struct osd_device *osd,
        else if (seq_dir->d_inode == NULL)
                GOTO(out_put, rc = -EFAULT);
 
-       ldiskfs_set_inode_state(seq_dir->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       inode = seq_dir->d_inode;
+       ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
        osd_seq->oos_root = seq_dir;
 
+       /* 'What the @fid is' is not imporatant, because the object
+        * has no OI mapping, and only is visible inside the OSD.*/
+       lu_igif_build(fid, inode->i_ino, inode->i_generation);
+       rc = osd_ea_fid_set(info, inode, fid,
+                           LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+       if (rc != 0)
+               GOTO(out_put, rc);
+
        LASSERT(osd_seq->oos_dirs == NULL);
        OBD_ALLOC(osd_seq->oos_dirs,
                  sizeof(seq_dir) * osd_seq->oos_subdir_count);
@@ -644,8 +776,17 @@ static int osd_seq_load_locked(struct osd_device *osd,
                        GOTO(out_free, rc = -EFAULT);
                }
 
-               ldiskfs_set_inode_state(dir->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+               inode = dir->d_inode;
+               ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
                osd_seq->oos_dirs[i] = dir;
+
+               /* 'What the @fid is' is not imporatant, because the object
+                * has no OI mapping, and only is visible inside the OSD.*/
+               lu_igif_build(fid, inode->i_ino, inode->i_generation);
+               rc = osd_ea_fid_set(info, inode, fid,
+                                   LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+               if (rc != 0)
+                       GOTO(out_free, rc);
        }
 
        if (rc != 0) {
@@ -664,7 +805,8 @@ out_err:
        RETURN(rc);
 }
 
-struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq)
+static struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
+                                       struct osd_device *osd, obd_seq seq)
 {
        struct osd_obj_map      *map;
        struct osd_obj_seq      *osd_seq;
@@ -700,7 +842,7 @@ struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq)
        /* Init subdir count to be 32, but each seq can have
         * different subdir count */
        osd_seq->oos_subdir_count = map->om_subdir_count;
-       rc = osd_seq_load_locked(osd, osd_seq);
+       rc = osd_seq_load_locked(info, osd, osd_seq);
        if (rc != 0)
                GOTO(cleanup, rc);
 
@@ -742,7 +884,7 @@ int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *dev,
        LASSERT(map->om_root);
 
         fid_to_ostid(fid, ostid);
-       osd_seq = osd_seq_load(dev, ostid_seq(ostid));
+       osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                RETURN(PTR_ERR(osd_seq));
 
@@ -788,28 +930,40 @@ int osd_obj_map_insert(struct osd_thread_info *info,
        struct osd_obj_seq      *osd_seq;
        struct dentry           *d;
        struct ost_id           *ostid = &info->oti_ostid;
+       obd_id                   oid;
        int                     dirn, rc = 0;
        char                    name[32];
-        ENTRY;
+       ENTRY;
 
-        map = osd->od_ost_map;
-        LASSERT(map);
+       map = osd->od_ost_map;
+       LASSERT(map);
 
        /* map fid to seq:objid */
-        fid_to_ostid(fid, ostid);
+       fid_to_ostid(fid, ostid);
 
-       osd_seq = osd_seq_load(osd, ostid_seq(ostid));
+       oid = ostid_id(ostid);
+       osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                RETURN(PTR_ERR(osd_seq));
 
-       dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
+       dirn = oid & (osd_seq->oos_subdir_count - 1);
        d = osd_seq->oos_dirs[dirn];
-        LASSERT(d);
+       LASSERT(d);
 
-       osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+       osd_oid_name(name, sizeof(name), fid, oid);
+
+again:
        rc = osd_obj_add_entry(info, osd, d, name, id, th);
+       if (rc == -EEXIST) {
+               rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
+               if (unlikely(rc == -ENOENT))
+                       goto again;
 
-        RETURN(rc);
+               if (unlikely(rc == 1))
+                       RETURN(0);
+       }
+
+       RETURN(rc);
 }
 
 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
@@ -829,7 +983,7 @@ int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
        /* map fid to seq:objid */
         fid_to_ostid(fid, ostid);
 
-       osd_seq = osd_seq_load(osd, ostid_seq(ostid));
+       osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
        if (IS_ERR(osd_seq))
                GOTO(cleanup, rc = PTR_ERR(osd_seq));
 
@@ -843,33 +997,98 @@ cleanup:
         RETURN(rc);
 }
 
-int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
-                       const struct lu_fid *fid,
-                       const struct osd_inode_id *id,
-                       struct thandle *th)
+int osd_obj_map_update(struct osd_thread_info *info,
+                      struct osd_device *osd,
+                      const struct lu_fid *fid,
+                      const struct osd_inode_id *id,
+                      struct thandle *th)
 {
-       struct osd_obj_map      *map = osd->od_ost_map;
-       struct dentry           *root = osd_sb(osd)->s_root;
-       char                    *name;
-       int                     rc = 0;
+       struct osd_obj_seq      *osd_seq;
+       struct dentry           *d;
+       struct ost_id           *ostid = &info->oti_ostid;
+       int                     dirn, rc = 0;
+       char                    name[32];
        ENTRY;
 
+       fid_to_ostid(fid, ostid);
+       osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
+       if (IS_ERR(osd_seq))
+               RETURN(PTR_ERR(osd_seq));
+
+       dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
+       d = osd_seq->oos_dirs[dirn];
+       LASSERT(d);
+
+       osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+       rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
+
+       RETURN(rc);
+}
+
+static struct dentry *
+osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
+                    const struct lu_fid *fid, char **name)
+{
+       struct dentry *root = ERR_PTR(-ENOENT);
+
        if (fid_is_last_id(fid)) {
-               struct osd_obj_seq      *osd_seq;
+               struct osd_obj_seq *osd_seq;
 
                /* on creation of LAST_ID we create O/<seq> hierarchy */
-               LASSERT(map);
-               osd_seq = osd_seq_load(osd, fid_seq(fid));
+               osd_seq = osd_seq_load(info, osd, fid_seq(fid));
                if (IS_ERR(osd_seq))
-                       RETURN(PTR_ERR(osd_seq));
-               rc = osd_obj_add_entry(info, osd, osd_seq->oos_root,
-                                      "LAST_ID", id, th);
+                       RETURN((struct dentry *)osd_seq);
+
+               *name = "LAST_ID";
+               root = osd_seq->oos_root;
        } else {
-               name = osd_lf_fid2name(fid);
-               if (name == NULL)
+               *name = osd_lf_fid2name(fid);
+               if (*name == NULL)
                        CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
-               else if (name[0])
-                       rc = osd_obj_add_entry(info, osd, root, name, id, th);
+               else if ((*name)[0])
+                       root = osd_sb(osd)->s_root;
+       }
+
+       return root;
+}
+
+int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       struct thandle *th)
+{
+       struct dentry   *root;
+       char            *name;
+       int              rc;
+       ENTRY;
+
+       root = osd_object_spec_find(info, osd, fid, &name);
+       if (!IS_ERR(root)) {
+               rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
+       } else {
+               rc = PTR_ERR(root);
+               if (rc == -ENOENT)
+                       rc = 1;
+       }
+
+       RETURN(rc);
+}
+
+int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       struct thandle *th)
+{
+       struct dentry   *root;
+       char            *name;
+       int              rc;
+       ENTRY;
+
+       root = osd_object_spec_find(info, osd, fid, &name);
+       if (!IS_ERR(root)) {
+               rc = osd_obj_add_entry(info, osd, root, name, id, th);
+       } else {
+               rc = PTR_ERR(root);
+               if (rc == -ENOENT)
+                       rc = 0;
        }
 
        RETURN(rc);
@@ -888,7 +1107,7 @@ int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
        if (fid_is_last_id(fid)) {
                struct osd_obj_seq *osd_seq;
 
-               osd_seq = osd_seq_load(osd, fid_seq(fid));
+               osd_seq = osd_seq_load(info, osd, fid_seq(fid));
                if (IS_ERR(osd_seq))
                        RETURN(PTR_ERR(osd_seq));
                root = osd_seq->oos_root;
index f5e3edf..523cfdf 100644 (file)
@@ -99,6 +99,7 @@ int osd_trans_declare_op2rb[] = {
        [OSD_OT_WRITE]          = OSD_OT_WRITE,
        [OSD_OT_INSERT]         = OSD_OT_DELETE,
        [OSD_OT_DELETE]         = OSD_OT_INSERT,
+       [OSD_OT_UPDATE]         = OSD_OT_MAX,
        [OSD_OT_QUOTA]          = OSD_OT_MAX,
 };
 
@@ -170,14 +171,6 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env,
         }
 }
 
-static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
-                                 const char *name, void *buf, int len)
-{
-       dentry->d_inode = inode;
-       dentry->d_sb = inode->i_sb;
-       return inode->i_op->getxattr(dentry, name, buf, len);
-}
-
 int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
                struct dentry *dentry, struct lustre_mdt_attrs *lma)
 {
@@ -280,35 +273,90 @@ osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
        return inode;
 }
 
-static struct inode *
-osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev,
-               struct osd_inode_id *id, const struct lu_fid *fid)
+/**
+ * \retval +v: new filter_fid, does not contain self-fid
+ * \retval 0:  filter_fid_old, contains self-fid
+ * \retval -v: other failure cases
+ */
+int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
+                struct dentry *dentry, struct lu_fid *fid)
 {
-       struct lustre_mdt_attrs *lma   = &info->oti_mdt_attrs;
-       struct inode            *inode;
+       struct filter_fid_old   *ff     = &info->oti_ff;
+       struct ost_id           *ostid  = &info->oti_ostid;
        int                      rc;
 
-       inode = osd_iget(info, dev, id);
-       if (IS_ERR(inode))
-               return inode;
+       rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
+       if (rc == sizeof(*ff)) {
+               rc = 0;
+               ostid_set_seq(ostid, le64_to_cpu(ff->ff_seq));
+               ostid_set_id(ostid, le64_to_cpu(ff->ff_objid));
+               /* XXX: should use real OST index in the future. LU-3569 */
+               ostid_to_fid(fid, ostid, 0);
+       } else if (rc == sizeof(struct filter_fid)) {
+               rc = 1;
+       } else if (rc >= 0) {
+               rc = -EINVAL;
+       }
 
-       rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
-       if (rc == -ENODATA)
-               return inode;
+       return rc;
+}
 
-       if (rc != 0) {
-               iput(inode);
-               return ERR_PTR(rc);
+static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
+{
+       struct osd_thread_info  *info   = osd_oti_get(env);
+       struct lustre_mdt_attrs *lma    = &info->oti_mdt_attrs;
+       struct inode            *inode  = obj->oo_inode;
+       struct dentry           *dentry = &info->oti_obj_dentry;
+       struct lu_fid           *fid    = NULL;
+       int                      rc;
+       ENTRY;
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
+               RETURN(0);
+
+       CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
+       rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
+                            info->oti_mdt_attrs_old, LMA_OLD_SIZE);
+       if (rc == -ENODATA) {
+               fid = &lma->lma_self_fid;
+               rc = osd_get_idif(info, inode, dentry, fid);
+               if (rc > 0)
+                       rc = 0;
        }
 
-       if (!lu_fid_eq(fid, &lma->lma_self_fid)) {
-               CDEBUG(D_LFSCK, "inconsistent obj: "DFID", %lu, "DFID"\n",
-                      PFID(&lma->lma_self_fid), inode->i_ino, PFID(fid));
-               iput(inode);
-               return ERR_PTR(-EREMCHG);
+       if (unlikely(rc == -ENODATA))
+               RETURN(0);
+
+       if (rc < 0)
+               RETURN(rc);
+
+       if (rc > 0) {
+               rc = 0;
+               lustre_lma_swab(lma);
+               if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
+                            CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
+                       CWARN("%s: unsupported incompat LMA feature(s) %#x for "
+                             "fid = "DFID", ino = %lu\n",
+                             osd_obj2dev(obj)->od_svname,
+                             lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
+                             PFID(lu_object_fid(&obj->oo_dt.do_lu)),
+                             inode->i_ino);
+                       rc = -EOPNOTSUPP;
+               } else if (!(lma->lma_compat & LMAC_NOT_IN_OI)) {
+                       fid = &lma->lma_self_fid;
+               }
        }
 
-       return inode;
+       if (fid != NULL &&
+           unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu), fid))) {
+               CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
+                      osd_obj2dev(obj)->od_svname,
+                      PFID(lu_object_fid(&obj->oo_dt.do_lu)),
+                      PFID(&lma->lma_self_fid));
+               rc = -EREMCHG;
+       }
+
+       RETURN(rc);
 }
 
 static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
@@ -325,7 +373,6 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
        struct scrub_file      *sf;
        int                     result;
        int                     saved  = 0;
-       bool                    verify = false;
        bool                    in_oi  = false;
        bool                    triggered = false;
        ENTRY;
@@ -358,9 +405,6 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
                        goto iget;
        }
 
-       if (sf->sf_flags & SF_INCONSISTENT)
-               verify = true;
-
        /*
         * Objects are created as locking anchors or place holders for objects
         * yet to be created. No need to osd_oi_lookup() at here because FID
@@ -372,9 +416,10 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
                GOTO(out, result = 0);
 
        /* Search order: 3. OI files. */
-       result = osd_oi_lookup(info, dev, fid, id, true);
+       result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
        if (result == -ENOENT) {
-               if (!fid_is_norm(fid) || fid_is_on_ost(info, dev, fid) ||
+               if (!fid_is_norm(fid) ||
+                   fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) ||
                    !ldiskfs_test_bit(osd_oi_fid2idx(dev,fid),
                                      sf->sf_oi_bitmap))
                        GOTO(out, result = 0);
@@ -388,10 +433,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
        in_oi = true;
 
 iget:
-       if (!verify)
-               inode = osd_iget(info, dev, id);
-       else
-               inode = osd_iget_verify(info, dev, id, fid);
+       inode = osd_iget(info, dev, id);
        if (IS_ERR(inode)) {
                result = PTR_ERR(inode);
                if (result == -ENOENT || result == -ESTALE) {
@@ -460,7 +502,6 @@ trigger:
                                                             fid, id);
                        if (result == 0) {
                                in_oi = false;
-                               verify = false;
                                goto iget;
                        }
 
@@ -473,6 +514,16 @@ trigger:
         obj->oo_inode = inode;
         LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
 
+       result = osd_check_lma(env, obj);
+       if (result != 0) {
+               iput(inode);
+               obj->oo_inode = NULL;
+               if (result == -EREMCHG)
+                       goto trigger;
+
+               GOTO(out, result);
+       }
+
        obj->oo_compat_dot_created = 1;
        obj->oo_compat_dotdot_created = 1;
 
@@ -504,50 +555,6 @@ static void osd_object_init0(struct osd_object *obj)
                 (LOHA_EXISTS | (obj->oo_inode->i_mode & S_IFMT));
 }
 
-static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
-{
-       struct osd_thread_info  *info   = osd_oti_get(env);
-       struct lustre_mdt_attrs *lma    = &info->oti_mdt_attrs;
-       int                     rc;
-       ENTRY;
-
-       CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
-       rc = __osd_xattr_get(obj->oo_inode, &info->oti_obj_dentry,
-                            XATTR_NAME_LMA, info->oti_mdt_attrs_old,
-                            LMA_OLD_SIZE);
-       if (rc > 0) {
-               rc = 0;
-               lustre_lma_swab(lma);
-               if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
-                            CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
-                       rc = -EOPNOTSUPP;
-                       CWARN("%s: unsupported incompat LMA feature(s) %#x for "
-                             "fid = "DFID", ino = %lu: rc = %d\n",
-                             osd_obj2dev(obj)->od_svname,
-                             lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
-                             PFID(lu_object_fid(&obj->oo_dt.do_lu)),
-                             obj->oo_inode->i_ino, rc);
-               }
-               if (unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu),
-                                       &lma->lma_self_fid))) {
-                       CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
-                              osd_obj2dev(obj)->od_svname,
-                              PFID(lu_object_fid(&obj->oo_dt.do_lu)),
-                              PFID(&lma->lma_self_fid));
-                       if (obj->oo_inode != NULL) {
-                               iput(obj->oo_inode);
-                               obj->oo_inode = NULL;
-                       }
-                       rc = -ESTALE;
-               }
-       } else if (rc == -ENODATA) {
-               /* haven't initialize LMA xattr */
-               rc = 0;
-       }
-
-       RETURN(rc);
-}
-
 /*
  * Concurrency: no concurrent access is possible that early in object
  * life-cycle.
@@ -568,13 +575,8 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l,
 
        result = osd_fid_lookup(env, obj, lu_object_fid(l), conf);
        obj->oo_dt.do_body_ops = &osd_body_ops_new;
-       if (result == 0 && obj->oo_inode != NULL) {
-               result = osd_check_lma(env, obj);
-               if (result != 0)
-                       return result;
-
+       if (result == 0 && obj->oo_inode != NULL)
                osd_object_init0(obj);
-       }
 
        LINVRNT(osd_invariant(obj));
        return result;
@@ -2088,7 +2090,7 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj,
         LASSERT(obj->oo_inode != NULL);
 
        osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
-       return osd_oi_insert(info, osd, fid, id, th);
+       return osd_oi_insert(info, osd, fid, id, th, OI_CHECK_FLD);
 }
 
 int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
@@ -2145,7 +2147,7 @@ static int osd_declare_object_create(const struct lu_env *env,
        osd_trans_declare_op(env, oh, OSD_OT_CREATE,
                             osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
        if (!fid_is_on_ost(osd_oti_get(env), osd_dt_dev(handle->th_dev),
-                          lu_object_fid(&dt->do_lu)))
+                          lu_object_fid(&dt->do_lu), OI_CHECK_FLD))
                /* Reuse idle OI block may cause additional one OI block
                 * to be changed. */
                osd_trans_declare_op(env, oh, OSD_OT_INSERT,
@@ -2293,7 +2295,7 @@ static int osd_object_destroy(const struct lu_env *env,
 
        osd_trans_exec_op(env, th, OSD_OT_DESTROY);
 
-        result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
+        result = osd_oi_delete(osd_oti_get(env), osd, fid, th, OI_CHECK_FLD);
 
         /* XXX: add to ext3 orphan list */
         /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
@@ -2315,23 +2317,45 @@ static int osd_object_destroy(const struct lu_env *env,
  * FIXME: It is good to have/use ldiskfs_xattr_set_handle() here
  */
 int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
-                  const struct lu_fid *fid, __u64 flags)
+                  const struct lu_fid *fid, __u32 compat, __u32 incompat)
 {
        struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
        int                      rc;
+       ENTRY;
 
        if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA))
-               return 0;
+               RETURN(0);
 
-       lustre_lma_init(lma, fid, flags);
+       lustre_lma_init(lma, fid, compat, incompat);
        lustre_lma_swab(lma);
 
        rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma, sizeof(*lma),
                             XATTR_CREATE);
-       /* Someone may created the EA by race. */
-       if (unlikely(rc == -EEXIST))
-               rc = 0;
-       return rc;
+       /* LMA may already exist, but we need to check that all the
+        * desired compat/incompat flags have been added. */
+       if (unlikely(rc == -EEXIST)) {
+               if (compat == 0 && incompat == 0)
+                       RETURN(0);
+
+               rc = __osd_xattr_get(inode, &info->oti_obj_dentry,
+                                    XATTR_NAME_LMA, info->oti_mdt_attrs_old,
+                                    LMA_OLD_SIZE);
+               if (rc <= 0)
+                       RETURN(-EINVAL);
+
+               lustre_lma_swab(lma);
+               if (!(~lma->lma_compat & compat) &&
+                   !(~lma->lma_incompat & incompat))
+                       RETURN(0);
+
+               lma->lma_compat |= compat;
+               lma->lma_incompat |= incompat;
+               lustre_lma_swab(lma);
+               rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
+                                    sizeof(*lma), XATTR_REPLACE);
+       }
+
+       RETURN(rc);
 }
 
 /**
@@ -2428,7 +2452,7 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env,
        }
 
        /* Set special LMA flag for local agent inode */
-       rc = osd_ea_fid_set(info, local, fid, LMAI_AGENT);
+       rc = osd_ea_fid_set(info, local, fid, 0, LMAI_AGENT);
        if (rc != 0) {
                CERROR("%s: set LMA for "DFID" remote inode failed: rc = %d\n",
                       osd_name(osd), PFID(fid), rc);
@@ -2513,7 +2537,10 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
        if (result == 0)
-               result = osd_ea_fid_set(info, obj->oo_inode, fid, 0);
+               result = osd_ea_fid_set(info, obj->oo_inode, fid,
+                               fid_is_on_ost(info, osd_obj2dev(obj),
+                                             fid, OI_CHECK_FLD) ?
+                               LMAC_FID_ON_OST : 0, 0);
 
        if (result == 0)
                result = __osd_oi_insert(env, obj, fid, th);
@@ -3786,7 +3813,7 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev,
                RETURN_EXIT;
 
 again:
-       rc = osd_oi_lookup(oti, dev, fid, id, true);
+       rc = osd_oi_lookup(oti, dev, fid, id, OI_CHECK_FLD);
        if (rc != 0 && rc != -ENOENT)
                RETURN_EXIT;
 
@@ -4947,6 +4974,8 @@ again:
 
        rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
        if (rc == 0) {
+               LASSERT(!(lma->lma_compat & LMAC_NOT_IN_OI));
+
                if (fid_is_sane(fid)) {
                        /* FID-in-dirent is valid. */
                        if (lu_fid_eq(fid, &lma->lma_self_fid))
@@ -5030,7 +5059,7 @@ again:
                if (unlikely(fid_is_sane(fid))) {
                        /* FID-in-dirent exists, but FID-in-LMA is lost.
                         * Trust the FID-in-dirent, and add FID-in-LMA. */
-                       rc = osd_ea_fid_set(info, inode, fid, 0);
+                       rc = osd_ea_fid_set(info, inode, fid, 0, 0);
                        if (rc == 0)
                                *attr |= LUDA_REPAIR;
                } else {
@@ -5351,7 +5380,10 @@ static int osd_mount(const struct lu_env *env,
        struct file_system_type *type;
        char                    *options = NULL;
        char                    *str;
-       int                       rc = 0;
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct lu_fid           *fid = &info->oti_fid;
+       struct inode            *inode;
+       int                      rc = 0;
         ENTRY;
 
        if (o->od_mnt != NULL)
@@ -5402,8 +5434,8 @@ static int osd_mount(const struct lu_env *env,
 
        if (IS_ERR(o->od_mnt)) {
                rc = PTR_ERR(o->od_mnt);
-               CERROR("%s: can't mount %s: %d\n", name, dev, rc);
                o->od_mnt = NULL;
+               CERROR("%s: can't mount %s: %d\n", name, dev, rc);
                GOTO(out, rc);
        }
 
@@ -5411,32 +5443,41 @@ static int osd_mount(const struct lu_env *env,
        if (dev_check_rdonly(o->od_mnt->mnt_sb->s_bdev)) {
                CERROR("%s: underlying device %s is marked as read-only. "
                       "Setup failed\n", name, dev);
-               mntput(o->od_mnt);
-               o->od_mnt = NULL;
-               GOTO(out, rc = -EROFS);
+               GOTO(out_mnt, rc = -EROFS);
        }
 #endif
 
        if (!LDISKFS_HAS_COMPAT_FEATURE(o->od_mnt->mnt_sb,
            LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)) {
                CERROR("%s: device %s is mounted w/o journal\n", name, dev);
-               mntput(o->od_mnt);
-               o->od_mnt = NULL;
-               GOTO(out, rc = -EINVAL);
+               GOTO(out_mnt, rc = -EINVAL);
+       }
+
+       inode = osd_sb(o)->s_root->d_inode;
+       ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
+       rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
+       if (rc != 0) {
+               CERROR("%s: failed to set lma on %s root inode\n", name, dev);
+               GOTO(out_mnt, rc);
        }
 
-       ldiskfs_set_inode_state(osd_sb(o)->s_root->d_inode,
-                               LDISKFS_STATE_LUSTRE_NO_OI);
        if (lmd_flags & LMD_FLG_NOSCRUB)
                o->od_noscrub = 1;
 
+       GOTO(out, rc = 0);
+
+out_mnt:
+       mntput(o->od_mnt);
+       o->od_mnt = NULL;
+
 out:
        if (__page)
                OBD_PAGE_FREE(__page);
        if (rc)
                fsfilt_put_ops(o->od_fsops);
 
-        RETURN(rc);
+       return rc;
 }
 
 static struct lu_device *osd_device_fini(const struct lu_env *env,
index 6733819..3680022 100644 (file)
@@ -223,8 +223,6 @@ struct osd_otable_it {
                                 ooi_waiting:1; /* it::next is waiting. */
 };
 
-extern const int osd_dto_credits_noquota[];
-
 /*
  * osd device.
  */
@@ -313,8 +311,9 @@ enum {
        OSD_OT_WRITE            = 7,
        OSD_OT_INSERT           = 8,
        OSD_OT_DELETE           = 9,
-       OSD_OT_QUOTA            = 10,
-       OSD_OT_MAX              = 11
+       OSD_OT_UPDATE           = 10,
+       OSD_OT_QUOTA            = 11,
+       OSD_OT_MAX              = 12
 };
 
 struct osd_thandle {
@@ -493,8 +492,10 @@ struct osd_thread_info {
 
         struct lu_fid          oti_fid;
        struct lu_fid          oti_fid2;
+       struct lu_fid          oti_fid3;
        struct osd_inode_id    oti_id;
        struct osd_inode_id    oti_id2;
+       struct osd_inode_id    oti_id3;
         struct ost_id          oti_ostid;
 
         /*
@@ -594,10 +595,19 @@ struct osd_thread_info {
        bool                    oti_rollback;
 
        char                    oti_name[48];
+       struct filter_fid_old   oti_ff;
 };
 
 extern int ldiskfs_pdo;
 
+static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
+                                 const char *name, void *buf, int len)
+{
+       dentry->d_inode = inode;
+       dentry->d_sb = inode->i_sb;
+       return inode->i_op->getxattr(dentry, name, buf, len);
+}
+
 static inline int __osd_xattr_set(struct osd_thread_info *info,
                                  struct inode *inode, const char *name,
                                  const void *buf, int buflen, int fl)
@@ -625,11 +635,13 @@ int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
 struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_inode_id *id);
 int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
-                  const struct lu_fid *fid, __u64 flags);
+                  const struct lu_fid *fid, __u32 compat, __u32 incompat);
 int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
                struct dentry *dentry, struct lustre_mdt_attrs *lma);
 int osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd,
                     struct osd_inode_id *id, const struct lu_fid *fid);
+int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
+                struct dentry *dentry, struct lu_fid *fid);
 
 int osd_obj_map_init(const struct lu_env *env, struct osd_device *osd);
 void osd_obj_map_fini(struct osd_device *dev);
@@ -640,11 +652,17 @@ int osd_obj_map_insert(struct osd_thread_info *info, struct osd_device *osd,
                       struct thandle *th);
 int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid, struct thandle *th);
+int osd_obj_map_update(struct osd_thread_info *info, struct osd_device *osd,
+                      const struct lu_fid *fid, const struct osd_inode_id *id,
+                      struct thandle *th);
 int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid, struct osd_inode_id *id);
 int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid, const struct osd_inode_id *id,
                        struct thandle *th);
+int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       struct thandle *th);
 
 void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
 int osd_scrub_file_store(struct osd_scrub *scrub);
@@ -696,9 +714,6 @@ void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec);
 int osd_quota_migration(const struct lu_env *env, struct dt_object *dt,
                        const struct dt_index_features *feat);
 
-/* osd_compat.c */
-struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq);
-
 static inline bool is_quota_glb_feat(const struct dt_index_features *feat)
 {
        return (feat == &dt_quota_iusr_features ||
index 9194672..230f015 100644 (file)
@@ -229,6 +229,13 @@ static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd,
                 RETURN(PTR_ERR(inode));
 
        ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       /* 'What the @fid is' is not imporatant, because these objects
+        * have no OI mappings, and only are visible inside the OSD.*/
+       lu_igif_build(&info->oti_fid, inode->i_ino, inode->i_generation);
+       rc = osd_ea_fid_set(info, inode, &info->oti_fid, LMAC_NOT_IN_OI, 0);
+       if (rc != 0)
+               GOTO(out_inode, rc);
+
         OBD_ALLOC_PTR(oi);
         if (oi == NULL)
                 GOTO(out_inode, rc = -ENOMEM);
@@ -461,12 +468,15 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti,
 }
 
 int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
-                 const struct lu_fid *fid)
+                 const struct lu_fid *fid, enum oi_check_flags flags)
 {
        struct lu_seq_range *range = &info->oti_seq_range;
        int rc;
        ENTRY;
 
+       if (flags & OI_KNOWN_ON_OST)
+               RETURN(1);
+
        if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid) ||
                     fid_is_llog(fid)))
                RETURN(0);
@@ -474,6 +484,9 @@ int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
        if (fid_is_idif(fid) || fid_is_last_id(fid))
                RETURN(1);
 
+       if (!(flags & OI_CHECK_FLD))
+               RETURN(0);
+
        rc = osd_fld_lookup(info->oti_env, osd, fid, range);
        if (rc != 0) {
                CERROR("%s: Can not lookup fld for "DFID"\n",
@@ -490,8 +503,8 @@ int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
        RETURN(0);
 }
 
-int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
-                   const struct lu_fid *fid, struct osd_inode_id *id)
+static int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                          const struct lu_fid *fid, struct osd_inode_id *id)
 {
        struct lu_fid *oi_fid = &info->oti_fid2;
        int            rc;
@@ -510,12 +523,12 @@ int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
 
 int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
                  const struct lu_fid *fid, struct osd_inode_id *id,
-                 bool check_fld)
+                 enum oi_check_flags flags)
 {
        if (unlikely(fid_is_last_id(fid)))
                return osd_obj_spec_lookup(info, osd, fid, id);
 
-       if ((check_fld && fid_is_on_ost(info, osd, fid)) || fid_is_llog(fid))
+       if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
                return osd_obj_map_lookup(info, osd, fid, id);
 
        if (fid_is_fs_root(fid)) {
@@ -570,7 +583,7 @@ static int osd_oi_iam_refresh(struct osd_thread_info *oti, struct osd_oi *oi,
 
 int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
                  const struct lu_fid *fid, const struct osd_inode_id *id,
-                 struct thandle *th)
+                 struct thandle *th, enum oi_check_flags flags)
 {
        struct lu_fid       *oi_fid = &info->oti_fid2;
        struct osd_inode_id *oi_id  = &info->oti_id2;
@@ -579,7 +592,7 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
        if (unlikely(fid_is_last_id(fid)))
                return osd_obj_spec_insert(info, osd, fid, id, th);
 
-       if (fid_is_on_ost(info, osd, fid) || fid_is_llog(fid))
+       if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
                return osd_obj_map_insert(info, osd, fid, id, th);
 
        fid_cpu_to_be(oi_fid, fid);
@@ -594,16 +607,12 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
                if (rc != -EEXIST)
                        return rc;
 
-               rc = osd_oi_lookup(info, osd, fid, oi_id, false);
-               if (unlikely(rc != 0))
+               rc = osd_oi_lookup(info, osd, fid, oi_id, 0);
+               if (rc != 0)
                        return rc;
 
-               if (osd_id_eq(id, oi_id)) {
-                       CERROR("%.16s: the FID "DFID" is there already:%u/%u\n",
-                              LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name,
-                              PFID(fid), id->oii_ino, id->oii_gen);
-                       return -EEXIST;
-               }
+               if (unlikely(osd_id_eq(id, oi_id)))
+                       return 0;
 
                /* Check whether the mapping for oi_id is valid or not. */
                inode = osd_iget(info, osd, oi_id);
@@ -622,7 +631,8 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
                if (rc != 0)
                        return rc;
 
-               if (lu_fid_eq(fid, &lma->lma_self_fid)) {
+               if (!(lma->lma_compat & LMAC_NOT_IN_OI) &&
+                   lu_fid_eq(fid, &lma->lma_self_fid)) {
                        CERROR("%.16s: the FID "DFID" is used by two objects: "
                               "%u/%u %u/%u\n",
                               LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name,
@@ -675,7 +685,7 @@ static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi,
 
 int osd_oi_delete(struct osd_thread_info *info,
                  struct osd_device *osd, const struct lu_fid *fid,
-                 struct thandle *th)
+                 struct thandle *th, enum oi_check_flags flags)
 {
        struct lu_fid *oi_fid = &info->oti_fid2;
 
@@ -686,7 +696,7 @@ int osd_oi_delete(struct osd_thread_info *info,
        if (fid_is_last_id(fid))
                return 0;
 
-       if (fid_is_on_ost(info, osd, fid) || fid_is_llog(fid))
+       if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
                return osd_obj_map_delete(info, osd, fid, th);
 
        fid_cpu_to_be(oi_fid, fid);
@@ -694,6 +704,33 @@ int osd_oi_delete(struct osd_thread_info *info,
                                 (const struct dt_key *)oi_fid, th);
 }
 
+int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd,
+                 const struct lu_fid *fid, const struct osd_inode_id *id,
+                 struct thandle *th, enum oi_check_flags flags)
+{
+       struct lu_fid       *oi_fid = &info->oti_fid2;
+       struct osd_inode_id *oi_id  = &info->oti_id2;
+       int                  rc     = 0;
+
+       if (unlikely(fid_is_last_id(fid)))
+               return osd_obj_spec_update(info, osd, fid, id, th);
+
+       if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
+               return osd_obj_map_update(info, osd, fid, id, th);
+
+       fid_cpu_to_be(oi_fid, fid);
+       osd_id_pack(oi_id, id);
+       rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid),
+                              (const struct dt_rec *)oi_id,
+                              (const struct dt_key *)oi_fid, th, false);
+       if (rc != 0)
+               return rc;
+
+       if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
+               rc = osd_obj_spec_update(info, osd, fid, id, th);
+       return rc;
+}
+
 int osd_oi_mod_init(void)
 {
         if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)
index 541e472..65e85ac 100644 (file)
@@ -131,22 +131,28 @@ static inline int osd_id_eq_strict(const struct osd_inode_id *id0,
        return (id0->oii_ino == id1->oii_ino && id0->oii_gen == id1->oii_gen);
 }
 
+enum oi_check_flags {
+       OI_CHECK_FLD    = 0x00000001,
+       OI_KNOWN_ON_OST = 0x00000002,
+};
+
 int osd_oi_mod_init(void);
 int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd);
 void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd);
-int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
-                   const struct lu_fid *fid, struct osd_inode_id *id);
 int  osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
                   const struct lu_fid *fid, struct osd_inode_id *id,
-                  bool check_fld);
+                  enum oi_check_flags flags);
 int  osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
                   const struct lu_fid *fid, const struct osd_inode_id *id,
-                  struct thandle *th);
+                  struct thandle *th, enum oi_check_flags flags);
 int  osd_oi_delete(struct osd_thread_info *info,
                   struct osd_device *osd, const struct lu_fid *fid,
-                  struct thandle *th);
+                  struct thandle *th, enum oi_check_flags flags);
+int  osd_oi_update(struct osd_thread_info *info, struct osd_device *osd,
+                  const struct lu_fid *fid, const struct osd_inode_id *id,
+                  struct thandle *th, enum oi_check_flags flags);
 
 int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
-                 const struct lu_fid *fid);
+                 const struct lu_fid *fid, enum oi_check_flags flags);
 #endif /* __KERNEL__ */
 #endif /* _OSD_OI_H */
index 98a3b24..95b0caf 100644 (file)
@@ -57,6 +57,9 @@
 #define SCRUB_NEXT_FATAL       6 /* simulate failure during OI scrub */
 #define SCRUB_NEXT_NOSCRUB     7 /* new created object, no scrub on it */
 #define SCRUB_NEXT_NOLMA       8 /* the inode has no FID-in-LMA */
+#define SCRUB_NEXT_OSTOBJ      9 /* for OST-object */
+#define SCRUB_NEXT_OSTOBJ_OLD  10 /* old OST-object, no LMA or no FID-on-OST
+                                   * flags in LMA */
 
 /* misc functions */
 
@@ -86,49 +89,44 @@ static inline int osd_scrub_has_window(struct osd_scrub *scrub,
 static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
                                     struct osd_device *dev,
                                     const struct lu_fid *fid,
-                                    const struct osd_inode_id *id, int ops)
+                                    const struct osd_inode_id *id,
+                                    int ops, enum oi_check_flags flags)
 {
-       struct lu_fid         *oi_fid = &info->oti_fid2;
-       struct osd_inode_id   *oi_id  = &info->oti_id2;
-       struct iam_container  *bag;
-       struct iam_path_descr *ipd;
-       handle_t              *jh;
-       int                    rc;
+       const struct lu_env *env = info->oti_env;
+       struct thandle      *th;
+       struct osd_thandle  *oh;
+       int                  rc;
        ENTRY;
 
-       fid_cpu_to_be(oi_fid, fid);
-       if (id != NULL)
-               osd_id_pack(oi_id, id);
-       jh = ldiskfs_journal_start_sb(osd_sb(dev),
-                                     osd_dto_credits_noquota[ops]);
-       if (IS_ERR(jh)) {
-               rc = PTR_ERR(jh);
-               CERROR("%.16s: fail to start trans for scrub store: rc = %d\n",
-                      LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc);
-               RETURN(rc);
-       }
+       th = dt_trans_create(env, &dev->od_dt_dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
 
-       bag = &osd_fid2oi(dev, fid)->oi_dir.od_container;
-       ipd = osd_idx_ipd_get(info->oti_env, bag);
-       if (unlikely(ipd == NULL)) {
-               ldiskfs_journal_stop(jh);
-               CERROR("%.16s: fail to get ipd for scrub store\n",
-                      LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name);
-               RETURN(-ENOMEM);
-       }
+       oh = container_of0(th, struct osd_thandle, ot_super);
+       LASSERT(oh->ot_handle == NULL);
 
        switch (ops) {
        case DTO_INDEX_UPDATE:
-               rc = iam_update(jh, bag, (const struct iam_key *)oi_fid,
-                               (struct iam_rec *)oi_id, ipd);
+               osd_trans_declare_op(env, oh, OSD_OT_UPDATE,
+                                    osd_dto_credits_noquota[DTO_INDEX_UPDATE]);
+               rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = osd_oi_update(info, dev, fid, id, th, flags);
                if (unlikely(rc == -ENOENT)) {
                        /* Some unlink thread may removed the OI mapping. */
                        rc = 1;
                }
                break;
        case DTO_INDEX_INSERT:
-               rc = iam_insert(jh, bag, (const struct iam_key *)oi_fid,
-                               (struct iam_rec *)oi_id, ipd);
+               osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                                    osd_dto_credits_noquota[DTO_INDEX_INSERT]);
+               rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = osd_oi_insert(info, dev, fid, id, th, flags);
                if (unlikely(rc == -EEXIST)) {
                        rc = 1;
                        /* XXX: There are trouble things when adding OI
@@ -165,7 +163,13 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
                }
                break;
        case DTO_INDEX_DELETE:
-               rc = iam_delete(jh, bag, (const struct iam_key *)oi_fid, ipd);
+               osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+                                    osd_dto_credits_noquota[DTO_INDEX_DELETE]);
+               rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = osd_oi_delete(info, dev, fid, th, flags);
                if (rc == -ENOENT) {
                        /* It is normal that the unlink thread has removed the
                         * OI mapping already. */
@@ -176,9 +180,12 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
                LASSERTF(0, "Unexpected ops %d\n", ops);
                break;
        }
-       osd_ipd_put(info->oti_env, bag, ipd);
-       ldiskfs_journal_stop(jh);
-       RETURN(rc);
+
+       GOTO(stop, rc);
+
+stop:
+       dt_trans_stop(env, &dev->od_dt_dev, th);
+       return rc;
 }
 
 /* OI_scrub file ops */
@@ -440,6 +447,23 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
        if (fid_is_igif(fid))
                sf->sf_items_igif++;
 
+       if (val == SCRUB_NEXT_OSTOBJ_OLD) {
+               inode = osd_iget(info, dev, lid);
+               if (IS_ERR(inode)) {
+                       rc = PTR_ERR(inode);
+                       /* Someone removed the inode. */
+                       if (rc == -ENOENT || rc == -ESTALE)
+                               rc = 0;
+                       GOTO(out, rc);
+               }
+
+               sf->sf_flags |= SF_UPGRADE;
+               rc = osd_ea_fid_set(info, inode, fid,
+                                   LMAC_FID_ON_OST, 0);
+               if (rc != 0)
+                       GOTO(out, rc);
+       }
+
        if ((val == SCRUB_NEXT_NOLMA) &&
            (!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA)))
                GOTO(out, rc = 0);
@@ -447,54 +471,53 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
        if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA))
                goto iget;
 
-       /* XXX: Currently, no FID-in-LMA for OST object, so osd_oi_lookup()
-        *      without checking FLD is enough.
-        *
-        *      It should be updated if FID-in-LMA for OSD object introduced
-        *      in the future. */
-       rc = osd_oi_lookup(info, dev, fid, lid2, false);
+       rc = osd_oi_lookup(info, dev, fid, lid2,
+               (val == SCRUB_NEXT_OSTOBJ ||
+                val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0);
        if (rc != 0) {
                if (rc != -ENOENT)
                        GOTO(out, rc);
 
 iget:
-               inode = osd_iget(info, dev, lid);
-               if (IS_ERR(inode)) {
-                       rc = PTR_ERR(inode);
-                       /* Someone removed the inode. */
-                       if (rc == -ENOENT || rc == -ESTALE)
-                               rc = 0;
-                       GOTO(out, rc);
-               }
-
-               /* Check whether the inode to be unlinked during OI scrub. */
-               if (unlikely(inode->i_nlink == 0)) {
-                       iput(inode);
-                       GOTO(out, rc = 0);
+               if (inode == NULL) {
+                       inode = osd_iget(info, dev, lid);
+                       if (IS_ERR(inode)) {
+                               rc = PTR_ERR(inode);
+                               /* Someone removed the inode. */
+                               if (rc == -ENOENT || rc == -ESTALE)
+                                       rc = 0;
+                               GOTO(out, rc);
+                       }
                }
 
+               scrub->os_full_speed = 1;
                ops = DTO_INDEX_INSERT;
                idx = osd_oi_fid2idx(dev, fid);
-               if (val == SCRUB_NEXT_NOLMA) {
+               switch (val) {
+               case SCRUB_NEXT_NOLMA:
                        sf->sf_flags |= SF_UPGRADE;
-                       scrub->os_full_speed = 1;
-                       rc = osd_ea_fid_set(info, inode, fid, 0);
+                       rc = osd_ea_fid_set(info, inode, fid, 0, 0);
                        if (rc != 0)
                                GOTO(out, rc);
 
                        if (!(sf->sf_flags & SF_INCONSISTENT))
                                dev->od_igif_inoi = 0;
-               } else {
+                       break;
+               case SCRUB_NEXT_OSTOBJ:
+                       sf->sf_flags |= SF_INCONSISTENT;
+               case SCRUB_NEXT_OSTOBJ_OLD:
+                       break;
+               default:
                        sf->sf_flags |= SF_RECREATED;
-                       scrub->os_full_speed = 1;
                        if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap)))
                                ldiskfs_set_bit(idx, sf->sf_oi_bitmap);
+                       break;
                }
        } else if (osd_id_eq(lid, lid2)) {
                GOTO(out, rc = 0);
        } else {
-               sf->sf_flags |= SF_INCONSISTENT;
                scrub->os_full_speed = 1;
+               sf->sf_flags |= SF_INCONSISTENT;
 
                /* XXX: If the device is restored from file-level backup, then
                 *      some IGIFs may have been already in OI files, and some
@@ -511,7 +534,9 @@ iget:
                dev->od_igif_inoi = 1;
        }
 
-       rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops);
+       rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops,
+                       (val == SCRUB_NEXT_OSTOBJ ||
+                        val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0);
        if (rc == 0) {
                if (scrub->os_in_prior)
                        sf->sf_items_updated_prior++;
@@ -534,16 +559,20 @@ out:
                rc = 0;
        }
 
-       if (ops == DTO_INDEX_INSERT) {
-               /* There may be conflict unlink during the OI scrub,
-                * if happend, then remove the new added OI mapping. */
-               if (unlikely(inode->i_nlink == 0))
-                       osd_scrub_refresh_mapping(info, dev, fid, lid,
-                                                 DTO_INDEX_DELETE);
-               iput(inode);
-       }
+       /* There may be conflict unlink during the OI scrub,
+        * if happend, then remove the new added OI mapping. */
+       if (ops == DTO_INDEX_INSERT && inode != NULL && !IS_ERR(inode) &&
+           unlikely(inode->i_nlink == 0))
+               osd_scrub_refresh_mapping(info, dev, fid, lid,
+                               DTO_INDEX_DELETE,
+                               (val == SCRUB_NEXT_OSTOBJ ||
+                                val == SCRUB_NEXT_OSTOBJ_OLD) ?
+                               OI_KNOWN_ON_OST : 0);
        up_write(&scrub->os_rwsem);
 
+       if (inode != NULL && !IS_ERR(inode))
+               iput(inode);
+
        if (oii != NULL) {
                LASSERT(!cfs_list_empty(&oii->oii_list));
 
@@ -659,13 +688,30 @@ static int osd_iit_next(struct osd_iit_param *param, __u32 *pos)
        }
 }
 
+/**
+ * \retval SCRUB_NEXT_OSTOBJ_OLD: FID-on-OST
+ * \retval 0: FID-on-MDT
+ */
+static int osd_scrub_check_local_fldb(struct osd_thread_info *info,
+                                     struct osd_device *dev,
+                                     struct lu_fid *fid)
+{
+       /* XXX: The initial OI scrub will scan the top level /O to generate
+        *      a small local FLDB according to the <seq>. If the given FID
+        *      is in the local FLDB, then it is FID-on-OST; otherwise it's
+        *      quite possible for FID-on-MDT. */
+       return 0;
+}
+
 static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
                        struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
                        struct super_block *sb, bool scrub)
 {
-       struct lustre_mdt_attrs *lma   = &info->oti_mdt_attrs;
+       struct lustre_mdt_attrs *lma            = &info->oti_mdt_attrs;
        struct inode            *inode;
-       int                      rc;
+       int                      rc             = 0;
+       bool                     has_lma        = false;
+       ENTRY;
 
        osd_id_gen(lid, pos, OSD_OII_NOGEN);
        inode = osd_iget(info, dev, lid);
@@ -674,43 +720,112 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
                /* The inode may be removed after bitmap searching, or the
                 * file is new created without inode initialized yet. */
                if (rc == -ENOENT || rc == -ESTALE)
-                       return SCRUB_NEXT_CONTINUE;
+                       RETURN(SCRUB_NEXT_CONTINUE);
 
                CERROR("%.16s: fail to read inode, ino# = %u, rc = %d\n",
                       LDISKFS_SB(sb)->s_es->s_volume_name, pos, rc);
-               return rc;
+               RETURN(rc);
        }
 
        /* If the inode has no OI mapping, then it is special locally used,
         * should be invisible to OI scrub or up layer LFSCK. */
-       if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI)) {
-               iput(inode);
-               return SCRUB_NEXT_CONTINUE;
-       }
+       if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI))
+               GOTO(put, rc = SCRUB_NEXT_CONTINUE);
 
        if (scrub &&
            ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
                /* Only skip it for the first OI scrub accessing. */
                ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
-               iput(inode);
-               return SCRUB_NEXT_NOSCRUB;
+               GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
        }
 
        rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
        if (rc == 0) {
-               if (fid_is_llog(&lma->lma_self_fid) ||
-                   (!scrub && fid_is_internal(&lma->lma_self_fid)) ||
-                   (scrub && (lma->lma_incompat & LMAI_AGENT)))
-                       rc = SCRUB_NEXT_CONTINUE;
-               else
-                       *fid = lma->lma_self_fid;
-       } else if (rc == -ENODATA) {
-               lu_igif_build(fid, inode->i_ino, inode->i_generation);
-               if (scrub)
-                       rc = SCRUB_NEXT_NOLMA;
-               else
-                       rc = 0;
+               has_lma = true;
+               if (lma->lma_compat & LMAC_NOT_IN_OI) {
+                       ldiskfs_set_inode_state(inode,
+                                               LDISKFS_STATE_LUSTRE_NO_OI);
+                       GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+               }
+
+               if (fid_is_llog(&lma->lma_self_fid))
+                       GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+
+               *fid = lma->lma_self_fid;
+               if (fid_is_internal(&lma->lma_self_fid)) {
+                       if (!scrub)
+                               rc = SCRUB_NEXT_CONTINUE;
+                       GOTO(put, rc);
+               }
+
+               if (!scrub)
+                       GOTO(put, rc);
+
+               if (fid_is_namespace_visible(fid) && !fid_is_norm(fid))
+                       GOTO(put, rc);
+
+               if (lma->lma_compat & LMAC_FID_ON_OST || fid_is_last_id(fid))
+                       GOTO(put, rc = SCRUB_NEXT_OSTOBJ);
+
+               if (fid_is_idif(fid))
+                       GOTO(put, rc = SCRUB_NEXT_OSTOBJ_OLD);
+
+               if (lma->lma_incompat & LMAI_AGENT)
+                       GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+
+               /* Here, it may be MDT-object, or may be 2.4 OST-object.
+                * Fall through. */
        }
+
+       if (rc == -ENODATA || rc == 0) {
+               rc = osd_get_idif(info, inode, &info->oti_obj_dentry, fid);
+               if (rc == 0) {
+                       if (scrub)
+                               /* It is old 2.x (x <= 3) or 1.8 OST-object. */
+                               rc = SCRUB_NEXT_OSTOBJ_OLD;
+                       GOTO(put, rc);
+               }
+
+               if (rc > 0) {
+                       if (!has_lma)
+                               /* It is FID-on-OST, but we do not know how
+                                * to generate its FID, ignore it directly. */
+                               rc = SCRUB_NEXT_CONTINUE;
+                       else
+                               /* It is 2.4 OST-object. */
+                               rc = SCRUB_NEXT_OSTOBJ_OLD;
+                       GOTO(put, rc);
+               }
+
+               if (rc != -ENODATA)
+                       GOTO(put, rc);
+
+               if (!has_lma) {
+                       if (dev->od_handle_nolma) {
+                               lu_igif_build(fid, inode->i_ino,
+                                             inode->i_generation);
+                               if (scrub)
+                                       rc = SCRUB_NEXT_NOLMA;
+                               else
+                                       rc = 0;
+                       } else {
+                               /* It may be FID-on-OST, or may be FID for
+                                * non-MDT0, anyway, we do not know how to
+                                * generate its FID, ignore it directly. */
+                               rc = SCRUB_NEXT_CONTINUE;
+                       }
+                       GOTO(put, rc);
+               }
+
+               /* For OI scrub case only: the object has LMA but has no ff
+                * (or ff crashed). It may be MDT-object, may be OST-object
+                * with crashed ff. The last check is local FLDB. */
+               rc = osd_scrub_check_local_fldb(info, dev, fid);
+       }
+
+       GOTO(put, rc);
+
+put:
        iput(inode);
        return rc;
 }
@@ -1301,20 +1416,26 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
                        lu_igif_build(&tfid, inode->i_ino, inode->i_generation);
                else
                        tfid = *fid;
-               rc = osd_ea_fid_set(info, inode, &tfid, 0);
+               rc = osd_ea_fid_set(info, inode, &tfid, 0, 0);
                if (rc != 0)
                        RETURN(rc);
        } else {
+               if (lma->lma_compat & LMAC_NOT_IN_OI)
+                       RETURN(0);
+
                tfid = lma->lma_self_fid;
        }
 
-       rc = __osd_oi_lookup(info, dev, &tfid, id2);
+       rc = osd_oi_lookup(info, dev, &tfid, id2, 0);
        if (rc != 0) {
                if (rc != -ENOENT)
                        RETURN(rc);
 
                rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
-                                              DTO_INDEX_INSERT);
+                                              DTO_INDEX_INSERT, 0);
+               if (rc > 0)
+                       rc = 0;
+
                RETURN(rc);
        }
 
@@ -1330,7 +1451,10 @@ osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
                        RETURN(rc);
        }
 
-       rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE);
+       rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
+                                      DTO_INDEX_UPDATE, 0);
+       if (rc > 0)
+               rc = 0;
 
        RETURN(rc);
 }
@@ -1437,7 +1561,19 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
        int                rc;
        ENTRY;
 
-       /* It is existing MDT device. */
+       /* It is existing MDT0 device. We only allow the case of object without
+        * LMA to happen on the MDT0, which is usually for old 1.8 MDT. Then we
+        * can generate IGIF mode FID for the object and related OI mapping. If
+        * it is on other MDTs, then becuase file-level backup/restore, related
+        * OI mapping may be invalid already, we do not know which is the right
+        * FID for the object. We only allow IGIF objects to reside on the MDT0.
+        *
+        * XXX: For the case of object on non-MDT0 device with neither LMA nor
+        *      "fid" xattr, then something crashed. We cannot re-generate the
+        *      FID directly, instead, the OI scrub will scan the OI structure
+        *      and try to re-generate the LMA from the OI mapping. But if the
+        *      OI mapping crashed or lost also, then we have to give up under
+        *      double failure cases. */
        dev->od_handle_nolma = 1;
        child = osd_ios_lookup_one_len(dot_lustre_name, dentry,
                                       strlen(dot_lustre_name));
@@ -1582,7 +1718,7 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info,
                        dput(child);
                else if (PTR_ERR(child) == -ENOENT)
                        osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
-                                                 NULL, DTO_INDEX_DELETE);
+                                                 NULL, DTO_INDEX_DELETE, 0);
                map++;
        }
 
@@ -1709,6 +1845,8 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        struct ldiskfs_super_block *es     = LDISKFS_SB(sb)->s_es;
        struct lvfs_run_ctxt        saved;
        struct file                *filp;
+       struct inode               *inode;
+       struct lu_fid              *fid    = &info->oti_fid;
        int                         dirty  = 0;
        int                         rc     = 0;
        ENTRY;
@@ -1726,14 +1864,26 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
 
        push_ctxt(&saved, ctxt, NULL);
        filp = filp_open(osd_scrub_name, O_RDWR | O_CREAT, 0644);
-       if (IS_ERR(filp))
+       if (IS_ERR(filp)) {
+               pop_ctxt(&saved, ctxt, NULL);
                RETURN(PTR_ERR(filp));
+       }
+
+       inode = filp->f_dentry->d_inode;
+       ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+       /* 'What the @fid is' is not imporatant, because the object
+        * has no OI mapping, and only is visible inside the OSD.*/
+       lu_igif_build(fid, inode->i_ino, inode->i_generation);
+       rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
+       if (rc != 0) {
+               filp_close(filp, 0);
+               pop_ctxt(&saved, ctxt, NULL);
+               RETURN(rc);
+       }
 
-       scrub->os_inode = igrab(filp->f_dentry->d_inode);
+       scrub->os_inode = igrab(inode);
        filp_close(filp, 0);
        pop_ctxt(&saved, ctxt, NULL);
-       ldiskfs_set_inode_state(scrub->os_inode,
-                               LDISKFS_STATE_LUSTRE_NO_OI);
 
        rc = osd_scrub_file_load(scrub);
        if (rc == -ENOENT) {
index 7fa7cb2..8cd6569 100644 (file)
@@ -1430,7 +1430,7 @@ static inline int osd_init_lma(const struct lu_env *env, struct osd_object *obj,
        struct lu_buf            buf;
        int rc;
 
-       lustre_lma_init(lma, fid, 0);
+       lustre_lma_init(lma, fid, 0, 0);
        lustre_lma_swab(lma);
        buf.lb_buf = lma;
        buf.lb_len = sizeof(*lma);
index ed024bd..1eee9bc 100644 (file)
@@ -437,6 +437,10 @@ void lustre_assert_wire_constants(void)
                (unsigned)LMAC_HSM);
        LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
                (unsigned)LMAC_SOM);
+       LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+               (unsigned)LMAC_NOT_IN_OI);
+       LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+               (unsigned)LMAC_FID_ON_OST);
        LASSERTF(OBJ_CREATE == 1, "found %lld\n",
                 (long long)OBJ_CREATE);
        LASSERTF(OBJ_DESTROY == 2, "found %lld\n",
index 97e4187..4dbf1b1 100644 (file)
@@ -42,11 +42,14 @@ check_and_setup_lustre
 build_test_filter
 
 MDT_DEV="${FSNAME}-MDT0000"
+OST_DEV="${FSNAME}-OST0000"
 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}"
 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
 SHOW_SCRUB="do_facet $SINGLEMDS \
                $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
+SHOW_SCRUB_ON_OST="do_facet ost1 \
+               $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
 MOUNT_OPTS_SCRUB="-o user_xattr"
 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
 
@@ -799,6 +802,42 @@ test_11() {
 }
 run_test 11 "OI scrub skips the new created objects only once"
 
+test_12() {
+       echo "stopall"
+       stopall > /dev/null
+       echo "formatall"
+       formatall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       mkdir -p $DIR/$tdir
+       $SETSTRIPE -c 1 -i 0 $DIR/$tdir
+
+       #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY               0x195
+       do_facet ost1 $LCTL set_param fail_loc=0x195
+       createmany -o $DIR/$tdir/f 1000
+
+       echo "stopall"
+       stopall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
+       do_facet ost1 $LCTL set_param fail_loc=0
+       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "init" ] ||
+               error "(1) Expect 'init', but got '$STATUS'"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+
+       sleep 3
+       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(3) Expect 'completed', but got '$STATUS'"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed"
+}
+run_test 12 "OI scrub can rebuild invalid /O entries"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}
index 742de0d..b845d0c 100644 (file)
@@ -210,6 +210,8 @@ check_lustre_mdt_attrs(void)
 
        CHECK_VALUE_X(LMAC_HSM);
        CHECK_VALUE_X(LMAC_SOM);
+       CHECK_VALUE_X(LMAC_NOT_IN_OI);
+       CHECK_VALUE_X(LMAC_FID_ON_OST);
 }
 
 static void
index 5bb959a..7b036e5 100644 (file)
@@ -445,6 +445,10 @@ void lustre_assert_wire_constants(void)
                (unsigned)LMAC_HSM);
        LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
                (unsigned)LMAC_SOM);
+       LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+               (unsigned)LMAC_NOT_IN_OI);
+       LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+               (unsigned)LMAC_FID_ON_OST);
        LASSERTF(OBJ_CREATE == 1, "found %lld\n",
                 (long long)OBJ_CREATE);
        LASSERTF(OBJ_DESTROY == 2, "found %lld\n",