* xattr.
*/
enum lma_compat {
- LMAC_HSM = 0x00000001,
- LMAC_SOM = 0x00000002,
+ LMAC_HSM = 0x00000001,
+ LMAC_SOM = 0x00000002,
+ LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
+ LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
+ * under /O/<seq>/d<x>. */
};
/**
* This information is stored in lustre_mdt_attrs::lma_incompat.
*/
enum lma_incompat {
- LMAI_RELEASED = 0x0000001, /* file is released */
- LMAI_AGENT = 0x00000002, /* agent inode */
- LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
- is on the remote MDT */
+ LMAI_RELEASED = 0x00000001, /* file is released */
+ LMAI_AGENT = 0x00000002, /* agent inode */
+ LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
+ is on the remote MDT */
};
#define LMA_INCOMPAT_SUPP (LMAI_AGENT | LMAI_REMOTE_PARENT)
extern void lustre_lma_swab(struct lustre_mdt_attrs *lma);
extern void lustre_lma_init(struct lustre_mdt_attrs *lma,
- const struct lu_fid *fid, __u32 incompat);
+ const struct lu_fid *fid,
+ __u32 compat, __u32 incompat);
/**
* SOM on-disk attributes stored in a separate xattr.
*/
#define OBD_FAIL_OSD_SCRUB_FATAL 0x192
#define OBD_FAIL_OSD_FID_MAPPING 0x193
#define OBD_FAIL_OSD_LMA_INCOMPAT 0x194
+#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
lu_root_fid(&fid);
lma = (struct lustre_mdt_attrs *)&mdd_env_info(env)->mti_xattr_buf;
- lustre_lma_init(lma, &fid, 0);
+ lustre_lma_init(lma, &fid, 0, 0);
lustre_lma_swab(lma);
buf.lb_buf = lma;
buf.lb_len = sizeof(*lma);
* \param incompat - features that MDS must understand to access object
*/
void lustre_lma_init(struct lustre_mdt_attrs *lma, const struct lu_fid *fid,
- __u32 incompat)
+ __u32 compat, __u32 incompat)
{
- lma->lma_compat = 0;
+ lma->lma_compat = compat;
lma->lma_incompat = incompat;
lma->lma_self_fid = *fid;
struct osd_mdobj_map *omm;
struct dentry *d;
struct osd_thread_info *info = osd_oti_get(env);
- struct lu_fid *fid = &info->oti_fid;
+ struct lu_fid *fid = &info->oti_fid3;
int rc = 0;
ENTRY;
/* Set LMA for remote parent inode */
lu_local_obj_fid(fid, REMOTE_PARENT_DIR_OID);
- rc = osd_ea_fid_set(info, d->d_inode, fid, 0);
- if (rc != 0)
- GOTO(cleanup, rc);
+ rc = osd_ea_fid_set(info, d->d_inode, fid, LMAC_NOT_IN_OI, 0);
+
+ GOTO(cleanup, rc);
+
cleanup:
pop_ctxt(&save, &new, NULL);
if (rc) {
OBD_FREE_PTR(omm);
dev->od_mdt_map = NULL;
}
- RETURN(rc);
+ return rc;
}
static void osd_mdt_fini(struct osd_device *osd)
* CONFIGS
*
*/
-static int osd_ost_init(struct osd_device *dev)
+static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
{
- struct lvfs_run_ctxt new;
- struct lvfs_run_ctxt save;
- struct dentry *rootd = osd_sb(dev)->s_root;
- struct dentry *d;
- int rc;
+ struct lvfs_run_ctxt new;
+ struct lvfs_run_ctxt save;
+ struct dentry *rootd = osd_sb(dev)->s_root;
+ struct dentry *d;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct inode *inode;
+ struct lu_fid *fid = &info->oti_fid3;
+ int rc;
ENTRY;
OBD_ALLOC_PTR(dev->od_ost_map);
if (IS_ERR(d))
GOTO(cleanup, rc = PTR_ERR(d));
- ldiskfs_set_inode_state(d->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ inode = d->d_inode;
+ ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
dev->od_ost_map->om_root = d;
+ /* 'What the @fid is' is not imporatant, because the object
+ * has no OI mapping, and only is visible inside the OSD.*/
+ lu_igif_build(fid, inode->i_ino, inode->i_generation);
+ rc = osd_ea_fid_set(info, inode, fid,
+ LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+
+ GOTO(cleanup, rc);
+
cleanup:
osd_pop_ctxt(dev, &new, &save);
if (IS_ERR(d)) {
OBD_FREE_PTR(dev->od_ost_map);
RETURN(PTR_ERR(d));
}
-
- RETURN(rc);
+ return rc;
}
static void osd_seq_free(struct osd_obj_map *map,
ENTRY;
/* prepare structures for OST */
- rc = osd_ost_init(dev);
+ rc = osd_ost_init(env, dev);
if (rc)
RETURN(rc);
osd_mdt_fini(dev);
}
+/**
+ * Update the specified OI mapping.
+ *
+ * \retval 1, changed nothing
+ * \retval 0, changed successfully
+ * \retval -ve, on error
+ */
+static int osd_obj_update_entry(struct osd_thread_info *info,
+ struct osd_device *osd,
+ struct dentry *dir, const char *name,
+ const struct lu_fid *fid,
+ const struct osd_inode_id *id,
+ struct thandle *th)
+{
+ struct inode *parent = dir->d_inode;
+ struct osd_thandle *oh;
+ struct dentry *child;
+ struct ldiskfs_dir_entry_2 *de;
+ struct buffer_head *bh;
+ struct inode *inode;
+ struct dentry *dentry = &info->oti_obj_dentry;
+ struct osd_inode_id *oi_id = &info->oti_id3;
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct lu_fid *oi_fid = &lma->lma_self_fid;
+ int rc;
+ ENTRY;
+
+ oh = container_of(th, struct osd_thandle, ot_super);
+ LASSERT(oh->ot_handle != NULL);
+ LASSERT(oh->ot_handle->h_transaction != NULL);
+
+ child = &info->oti_child_dentry;
+ child->d_parent = dir;
+ child->d_name.hash = 0;
+ child->d_name.name = name;
+ child->d_name.len = strlen(name);
+
+ ll_vfs_dq_init(parent);
+ mutex_lock(&parent->i_mutex);
+ bh = osd_ldiskfs_find_entry(parent, child, &de, NULL);
+ if (bh == NULL)
+ GOTO(out, rc = -ENOENT);
+
+ if (le32_to_cpu(de->inode) == id->oii_ino)
+ GOTO(out, rc = 1);
+
+ osd_id_gen(oi_id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
+ inode = osd_iget(info, osd, oi_id);
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+ if (rc == -ENOENT || rc == -ESTALE)
+ goto update;
+ GOTO(out, rc);
+ }
+
+ rc = osd_get_lma(info, inode, dentry, lma);
+ if (rc == -ENODATA) {
+ rc = osd_get_idif(info, inode, dentry, oi_fid);
+ if (rc > 0) {
+ oi_fid = NULL;
+ rc = 0;
+ }
+ }
+ iput(inode);
+
+ /* If the OST-object has neither FID-in-LMA nor FID-in-ff, it is
+ * either a crashed object or a uninitialized one. Replace it. */
+ if (rc == -ENODATA || oi_fid == NULL)
+ goto update;
+
+ if (rc != 0)
+ GOTO(out, rc);
+
+ if (lu_fid_eq(fid, oi_fid)) {
+ CERROR("%s: the FID "DFID" is used by two objects: "
+ "%u/%u %u/%u\n", osd_name(osd), PFID(fid),
+ oi_id->oii_ino, oi_id->oii_gen,
+ id->oii_ino, id->oii_gen);
+ GOTO(out, rc = -EEXIST);
+ }
+
+update:
+ /* There may be temporary inconsistency: On one hand, the new
+ * object may be referenced by multiple entries, which is out
+ * of our control unless we traverse the whole /O completely,
+ * which is non-flat order and inefficient, should be avoided;
+ * On the other hand, the old object may become orphan if it
+ * is still valid. Since it was referenced by an invalid entry,
+ * making it as invisible temporary may be not worse. OI scrub
+ * will process it later. */
+ rc = ldiskfs_journal_get_write_access(oh->ot_handle, bh);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ de->inode = cpu_to_le32(id->oii_ino);
+ rc = ldiskfs_journal_dirty_metadata(oh->ot_handle, bh);
+
+ GOTO(out, rc);
+
+out:
+ brelse(bh);
+ mutex_unlock(&parent->i_mutex);
+ return rc;
+}
+
static int osd_obj_del_entry(struct osd_thread_info *info,
struct osd_device *osd,
struct dentry *dird, char *name,
child->d_parent = dir;
child->d_inode = inode;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
+ inode->i_ino++;
+
ll_vfs_dq_init(dir->d_inode);
mutex_lock(&dir->d_inode->i_mutex);
rc = osd_ldiskfs_add_entry(oh->ot_handle, child, inode, NULL);
}
/* external locking is required */
-static int osd_seq_load_locked(struct osd_device *osd,
+static int osd_seq_load_locked(struct osd_thread_info *info,
+ struct osd_device *osd,
struct osd_obj_seq *osd_seq)
{
struct osd_obj_map *map = osd->od_ost_map;
struct dentry *seq_dir;
+ struct inode *inode;
+ struct lu_fid *fid = &info->oti_fid3;
int rc = 0;
int i;
char dir_name[32];
else if (seq_dir->d_inode == NULL)
GOTO(out_put, rc = -EFAULT);
- ldiskfs_set_inode_state(seq_dir->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ inode = seq_dir->d_inode;
+ ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
osd_seq->oos_root = seq_dir;
+ /* 'What the @fid is' is not imporatant, because the object
+ * has no OI mapping, and only is visible inside the OSD.*/
+ lu_igif_build(fid, inode->i_ino, inode->i_generation);
+ rc = osd_ea_fid_set(info, inode, fid,
+ LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+ if (rc != 0)
+ GOTO(out_put, rc);
+
LASSERT(osd_seq->oos_dirs == NULL);
OBD_ALLOC(osd_seq->oos_dirs,
sizeof(seq_dir) * osd_seq->oos_subdir_count);
GOTO(out_free, rc = -EFAULT);
}
- ldiskfs_set_inode_state(dir->d_inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ inode = dir->d_inode;
+ ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
osd_seq->oos_dirs[i] = dir;
+
+ /* 'What the @fid is' is not imporatant, because the object
+ * has no OI mapping, and only is visible inside the OSD.*/
+ lu_igif_build(fid, inode->i_ino, inode->i_generation);
+ rc = osd_ea_fid_set(info, inode, fid,
+ LMAC_NOT_IN_OI | LMAC_FID_ON_OST, 0);
+ if (rc != 0)
+ GOTO(out_free, rc);
}
if (rc != 0) {
RETURN(rc);
}
-struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq)
+static struct osd_obj_seq *osd_seq_load(struct osd_thread_info *info,
+ struct osd_device *osd, obd_seq seq)
{
struct osd_obj_map *map;
struct osd_obj_seq *osd_seq;
/* Init subdir count to be 32, but each seq can have
* different subdir count */
osd_seq->oos_subdir_count = map->om_subdir_count;
- rc = osd_seq_load_locked(osd, osd_seq);
+ rc = osd_seq_load_locked(info, osd, osd_seq);
if (rc != 0)
GOTO(cleanup, rc);
LASSERT(map->om_root);
fid_to_ostid(fid, ostid);
- osd_seq = osd_seq_load(dev, ostid_seq(ostid));
+ osd_seq = osd_seq_load(info, dev, ostid_seq(ostid));
if (IS_ERR(osd_seq))
RETURN(PTR_ERR(osd_seq));
struct osd_obj_seq *osd_seq;
struct dentry *d;
struct ost_id *ostid = &info->oti_ostid;
+ obd_id oid;
int dirn, rc = 0;
char name[32];
- ENTRY;
+ ENTRY;
- map = osd->od_ost_map;
- LASSERT(map);
+ map = osd->od_ost_map;
+ LASSERT(map);
/* map fid to seq:objid */
- fid_to_ostid(fid, ostid);
+ fid_to_ostid(fid, ostid);
- osd_seq = osd_seq_load(osd, ostid_seq(ostid));
+ oid = ostid_id(ostid);
+ osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
if (IS_ERR(osd_seq))
RETURN(PTR_ERR(osd_seq));
- dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
+ dirn = oid & (osd_seq->oos_subdir_count - 1);
d = osd_seq->oos_dirs[dirn];
- LASSERT(d);
+ LASSERT(d);
- osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+ osd_oid_name(name, sizeof(name), fid, oid);
+
+again:
rc = osd_obj_add_entry(info, osd, d, name, id, th);
+ if (rc == -EEXIST) {
+ rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
+ if (unlikely(rc == -ENOENT))
+ goto again;
- RETURN(rc);
+ if (unlikely(rc == 1))
+ RETURN(0);
+ }
+
+ RETURN(rc);
}
int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
/* map fid to seq:objid */
fid_to_ostid(fid, ostid);
- osd_seq = osd_seq_load(osd, ostid_seq(ostid));
+ osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
if (IS_ERR(osd_seq))
GOTO(cleanup, rc = PTR_ERR(osd_seq));
RETURN(rc);
}
-int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
- const struct lu_fid *fid,
- const struct osd_inode_id *id,
- struct thandle *th)
+int osd_obj_map_update(struct osd_thread_info *info,
+ struct osd_device *osd,
+ const struct lu_fid *fid,
+ const struct osd_inode_id *id,
+ struct thandle *th)
{
- struct osd_obj_map *map = osd->od_ost_map;
- struct dentry *root = osd_sb(osd)->s_root;
- char *name;
- int rc = 0;
+ struct osd_obj_seq *osd_seq;
+ struct dentry *d;
+ struct ost_id *ostid = &info->oti_ostid;
+ int dirn, rc = 0;
+ char name[32];
ENTRY;
+ fid_to_ostid(fid, ostid);
+ osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
+ if (IS_ERR(osd_seq))
+ RETURN(PTR_ERR(osd_seq));
+
+ dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
+ d = osd_seq->oos_dirs[dirn];
+ LASSERT(d);
+
+ osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+ rc = osd_obj_update_entry(info, osd, d, name, fid, id, th);
+
+ RETURN(rc);
+}
+
+static struct dentry *
+osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, char **name)
+{
+ struct dentry *root = ERR_PTR(-ENOENT);
+
if (fid_is_last_id(fid)) {
- struct osd_obj_seq *osd_seq;
+ struct osd_obj_seq *osd_seq;
/* on creation of LAST_ID we create O/<seq> hierarchy */
- LASSERT(map);
- osd_seq = osd_seq_load(osd, fid_seq(fid));
+ osd_seq = osd_seq_load(info, osd, fid_seq(fid));
if (IS_ERR(osd_seq))
- RETURN(PTR_ERR(osd_seq));
- rc = osd_obj_add_entry(info, osd, osd_seq->oos_root,
- "LAST_ID", id, th);
+ RETURN((struct dentry *)osd_seq);
+
+ *name = "LAST_ID";
+ root = osd_seq->oos_root;
} else {
- name = osd_lf_fid2name(fid);
- if (name == NULL)
+ *name = osd_lf_fid2name(fid);
+ if (*name == NULL)
CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid));
- else if (name[0])
- rc = osd_obj_add_entry(info, osd, root, name, id, th);
+ else if ((*name)[0])
+ root = osd_sb(osd)->s_root;
+ }
+
+ return root;
+}
+
+int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th)
+{
+ struct dentry *root;
+ char *name;
+ int rc;
+ ENTRY;
+
+ root = osd_object_spec_find(info, osd, fid, &name);
+ if (!IS_ERR(root)) {
+ rc = osd_obj_update_entry(info, osd, root, name, fid, id, th);
+ } else {
+ rc = PTR_ERR(root);
+ if (rc == -ENOENT)
+ rc = 1;
+ }
+
+ RETURN(rc);
+}
+
+int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th)
+{
+ struct dentry *root;
+ char *name;
+ int rc;
+ ENTRY;
+
+ root = osd_object_spec_find(info, osd, fid, &name);
+ if (!IS_ERR(root)) {
+ rc = osd_obj_add_entry(info, osd, root, name, id, th);
+ } else {
+ rc = PTR_ERR(root);
+ if (rc == -ENOENT)
+ rc = 0;
}
RETURN(rc);
if (fid_is_last_id(fid)) {
struct osd_obj_seq *osd_seq;
- osd_seq = osd_seq_load(osd, fid_seq(fid));
+ osd_seq = osd_seq_load(info, osd, fid_seq(fid));
if (IS_ERR(osd_seq))
RETURN(PTR_ERR(osd_seq));
root = osd_seq->oos_root;
[OSD_OT_WRITE] = OSD_OT_WRITE,
[OSD_OT_INSERT] = OSD_OT_DELETE,
[OSD_OT_DELETE] = OSD_OT_INSERT,
+ [OSD_OT_UPDATE] = OSD_OT_MAX,
[OSD_OT_QUOTA] = OSD_OT_MAX,
};
}
}
-static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
- const char *name, void *buf, int len)
-{
- dentry->d_inode = inode;
- dentry->d_sb = inode->i_sb;
- return inode->i_op->getxattr(dentry, name, buf, len);
-}
-
int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
struct dentry *dentry, struct lustre_mdt_attrs *lma)
{
return inode;
}
-static struct inode *
-osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev,
- struct osd_inode_id *id, const struct lu_fid *fid)
+/**
+ * \retval +v: new filter_fid, does not contain self-fid
+ * \retval 0: filter_fid_old, contains self-fid
+ * \retval -v: other failure cases
+ */
+int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
+ struct dentry *dentry, struct lu_fid *fid)
{
- struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
- struct inode *inode;
+ struct filter_fid_old *ff = &info->oti_ff;
+ struct ost_id *ostid = &info->oti_ostid;
int rc;
- inode = osd_iget(info, dev, id);
- if (IS_ERR(inode))
- return inode;
+ rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
+ if (rc == sizeof(*ff)) {
+ rc = 0;
+ ostid_set_seq(ostid, le64_to_cpu(ff->ff_seq));
+ ostid_set_id(ostid, le64_to_cpu(ff->ff_objid));
+ /* XXX: should use real OST index in the future. LU-3569 */
+ ostid_to_fid(fid, ostid, 0);
+ } else if (rc == sizeof(struct filter_fid)) {
+ rc = 1;
+ } else if (rc >= 0) {
+ rc = -EINVAL;
+ }
- rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
- if (rc == -ENODATA)
- return inode;
+ return rc;
+}
- if (rc != 0) {
- iput(inode);
- return ERR_PTR(rc);
+static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct inode *inode = obj->oo_inode;
+ struct dentry *dentry = &info->oti_obj_dentry;
+ struct lu_fid *fid = NULL;
+ int rc;
+ ENTRY;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
+ RETURN(0);
+
+ CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
+ rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
+ info->oti_mdt_attrs_old, LMA_OLD_SIZE);
+ if (rc == -ENODATA) {
+ fid = &lma->lma_self_fid;
+ rc = osd_get_idif(info, inode, dentry, fid);
+ if (rc > 0)
+ rc = 0;
}
- if (!lu_fid_eq(fid, &lma->lma_self_fid)) {
- CDEBUG(D_LFSCK, "inconsistent obj: "DFID", %lu, "DFID"\n",
- PFID(&lma->lma_self_fid), inode->i_ino, PFID(fid));
- iput(inode);
- return ERR_PTR(-EREMCHG);
+ if (unlikely(rc == -ENODATA))
+ RETURN(0);
+
+ if (rc < 0)
+ RETURN(rc);
+
+ if (rc > 0) {
+ rc = 0;
+ lustre_lma_swab(lma);
+ if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
+ CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
+ CWARN("%s: unsupported incompat LMA feature(s) %#x for "
+ "fid = "DFID", ino = %lu\n",
+ osd_obj2dev(obj)->od_svname,
+ lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
+ PFID(lu_object_fid(&obj->oo_dt.do_lu)),
+ inode->i_ino);
+ rc = -EOPNOTSUPP;
+ } else if (!(lma->lma_compat & LMAC_NOT_IN_OI)) {
+ fid = &lma->lma_self_fid;
+ }
}
- return inode;
+ if (fid != NULL &&
+ unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu), fid))) {
+ CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
+ osd_obj2dev(obj)->od_svname,
+ PFID(lu_object_fid(&obj->oo_dt.do_lu)),
+ PFID(&lma->lma_self_fid));
+ rc = -EREMCHG;
+ }
+
+ RETURN(rc);
}
static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
struct scrub_file *sf;
int result;
int saved = 0;
- bool verify = false;
bool in_oi = false;
bool triggered = false;
ENTRY;
goto iget;
}
- if (sf->sf_flags & SF_INCONSISTENT)
- verify = true;
-
/*
* Objects are created as locking anchors or place holders for objects
* yet to be created. No need to osd_oi_lookup() at here because FID
GOTO(out, result = 0);
/* Search order: 3. OI files. */
- result = osd_oi_lookup(info, dev, fid, id, true);
+ result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
if (result == -ENOENT) {
- if (!fid_is_norm(fid) || fid_is_on_ost(info, dev, fid) ||
+ if (!fid_is_norm(fid) ||
+ fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) ||
!ldiskfs_test_bit(osd_oi_fid2idx(dev,fid),
sf->sf_oi_bitmap))
GOTO(out, result = 0);
in_oi = true;
iget:
- if (!verify)
- inode = osd_iget(info, dev, id);
- else
- inode = osd_iget_verify(info, dev, id, fid);
+ inode = osd_iget(info, dev, id);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
if (result == -ENOENT || result == -ESTALE) {
fid, id);
if (result == 0) {
in_oi = false;
- verify = false;
goto iget;
}
obj->oo_inode = inode;
LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
+ result = osd_check_lma(env, obj);
+ if (result != 0) {
+ iput(inode);
+ obj->oo_inode = NULL;
+ if (result == -EREMCHG)
+ goto trigger;
+
+ GOTO(out, result);
+ }
+
obj->oo_compat_dot_created = 1;
obj->oo_compat_dotdot_created = 1;
(LOHA_EXISTS | (obj->oo_inode->i_mode & S_IFMT));
}
-static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
-{
- struct osd_thread_info *info = osd_oti_get(env);
- struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
- int rc;
- ENTRY;
-
- CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
- rc = __osd_xattr_get(obj->oo_inode, &info->oti_obj_dentry,
- XATTR_NAME_LMA, info->oti_mdt_attrs_old,
- LMA_OLD_SIZE);
- if (rc > 0) {
- rc = 0;
- lustre_lma_swab(lma);
- if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
- CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
- rc = -EOPNOTSUPP;
- CWARN("%s: unsupported incompat LMA feature(s) %#x for "
- "fid = "DFID", ino = %lu: rc = %d\n",
- osd_obj2dev(obj)->od_svname,
- lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
- PFID(lu_object_fid(&obj->oo_dt.do_lu)),
- obj->oo_inode->i_ino, rc);
- }
- if (unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu),
- &lma->lma_self_fid))) {
- CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
- osd_obj2dev(obj)->od_svname,
- PFID(lu_object_fid(&obj->oo_dt.do_lu)),
- PFID(&lma->lma_self_fid));
- if (obj->oo_inode != NULL) {
- iput(obj->oo_inode);
- obj->oo_inode = NULL;
- }
- rc = -ESTALE;
- }
- } else if (rc == -ENODATA) {
- /* haven't initialize LMA xattr */
- rc = 0;
- }
-
- RETURN(rc);
-}
-
/*
* Concurrency: no concurrent access is possible that early in object
* life-cycle.
result = osd_fid_lookup(env, obj, lu_object_fid(l), conf);
obj->oo_dt.do_body_ops = &osd_body_ops_new;
- if (result == 0 && obj->oo_inode != NULL) {
- result = osd_check_lma(env, obj);
- if (result != 0)
- return result;
-
+ if (result == 0 && obj->oo_inode != NULL)
osd_object_init0(obj);
- }
LINVRNT(osd_invariant(obj));
return result;
LASSERT(obj->oo_inode != NULL);
osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
- return osd_oi_insert(info, osd, fid, id, th);
+ return osd_oi_insert(info, osd, fid, id, th, OI_CHECK_FLD);
}
int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
osd_trans_declare_op(env, oh, OSD_OT_CREATE,
osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
if (!fid_is_on_ost(osd_oti_get(env), osd_dt_dev(handle->th_dev),
- lu_object_fid(&dt->do_lu)))
+ lu_object_fid(&dt->do_lu), OI_CHECK_FLD))
/* Reuse idle OI block may cause additional one OI block
* to be changed. */
osd_trans_declare_op(env, oh, OSD_OT_INSERT,
osd_trans_exec_op(env, th, OSD_OT_DESTROY);
- result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
+ result = osd_oi_delete(osd_oti_get(env), osd, fid, th, OI_CHECK_FLD);
/* XXX: add to ext3 orphan list */
/* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
* FIXME: It is good to have/use ldiskfs_xattr_set_handle() here
*/
int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
- const struct lu_fid *fid, __u64 flags)
+ const struct lu_fid *fid, __u32 compat, __u32 incompat)
{
struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
int rc;
+ ENTRY;
if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA))
- return 0;
+ RETURN(0);
- lustre_lma_init(lma, fid, flags);
+ lustre_lma_init(lma, fid, compat, incompat);
lustre_lma_swab(lma);
rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma, sizeof(*lma),
XATTR_CREATE);
- /* Someone may created the EA by race. */
- if (unlikely(rc == -EEXIST))
- rc = 0;
- return rc;
+ /* LMA may already exist, but we need to check that all the
+ * desired compat/incompat flags have been added. */
+ if (unlikely(rc == -EEXIST)) {
+ if (compat == 0 && incompat == 0)
+ RETURN(0);
+
+ rc = __osd_xattr_get(inode, &info->oti_obj_dentry,
+ XATTR_NAME_LMA, info->oti_mdt_attrs_old,
+ LMA_OLD_SIZE);
+ if (rc <= 0)
+ RETURN(-EINVAL);
+
+ lustre_lma_swab(lma);
+ if (!(~lma->lma_compat & compat) &&
+ !(~lma->lma_incompat & incompat))
+ RETURN(0);
+
+ lma->lma_compat |= compat;
+ lma->lma_incompat |= incompat;
+ lustre_lma_swab(lma);
+ rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma,
+ sizeof(*lma), XATTR_REPLACE);
+ }
+
+ RETURN(rc);
}
/**
}
/* Set special LMA flag for local agent inode */
- rc = osd_ea_fid_set(info, local, fid, LMAI_AGENT);
+ rc = osd_ea_fid_set(info, local, fid, 0, LMAI_AGENT);
if (rc != 0) {
CERROR("%s: set LMA for "DFID" remote inode failed: rc = %d\n",
osd_name(osd), PFID(fid), rc);
result = __osd_object_create(info, obj, attr, hint, dof, th);
if (result == 0)
- result = osd_ea_fid_set(info, obj->oo_inode, fid, 0);
+ result = osd_ea_fid_set(info, obj->oo_inode, fid,
+ fid_is_on_ost(info, osd_obj2dev(obj),
+ fid, OI_CHECK_FLD) ?
+ LMAC_FID_ON_OST : 0, 0);
if (result == 0)
result = __osd_oi_insert(env, obj, fid, th);
RETURN_EXIT;
again:
- rc = osd_oi_lookup(oti, dev, fid, id, true);
+ rc = osd_oi_lookup(oti, dev, fid, id, OI_CHECK_FLD);
if (rc != 0 && rc != -ENOENT)
RETURN_EXIT;
rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
if (rc == 0) {
+ LASSERT(!(lma->lma_compat & LMAC_NOT_IN_OI));
+
if (fid_is_sane(fid)) {
/* FID-in-dirent is valid. */
if (lu_fid_eq(fid, &lma->lma_self_fid))
if (unlikely(fid_is_sane(fid))) {
/* FID-in-dirent exists, but FID-in-LMA is lost.
* Trust the FID-in-dirent, and add FID-in-LMA. */
- rc = osd_ea_fid_set(info, inode, fid, 0);
+ rc = osd_ea_fid_set(info, inode, fid, 0, 0);
if (rc == 0)
*attr |= LUDA_REPAIR;
} else {
struct file_system_type *type;
char *options = NULL;
char *str;
- int rc = 0;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lu_fid *fid = &info->oti_fid;
+ struct inode *inode;
+ int rc = 0;
ENTRY;
if (o->od_mnt != NULL)
if (IS_ERR(o->od_mnt)) {
rc = PTR_ERR(o->od_mnt);
- CERROR("%s: can't mount %s: %d\n", name, dev, rc);
o->od_mnt = NULL;
+ CERROR("%s: can't mount %s: %d\n", name, dev, rc);
GOTO(out, rc);
}
if (dev_check_rdonly(o->od_mnt->mnt_sb->s_bdev)) {
CERROR("%s: underlying device %s is marked as read-only. "
"Setup failed\n", name, dev);
- mntput(o->od_mnt);
- o->od_mnt = NULL;
- GOTO(out, rc = -EROFS);
+ GOTO(out_mnt, rc = -EROFS);
}
#endif
if (!LDISKFS_HAS_COMPAT_FEATURE(o->od_mnt->mnt_sb,
LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)) {
CERROR("%s: device %s is mounted w/o journal\n", name, dev);
- mntput(o->od_mnt);
- o->od_mnt = NULL;
- GOTO(out, rc = -EINVAL);
+ GOTO(out_mnt, rc = -EINVAL);
+ }
+
+ inode = osd_sb(o)->s_root->d_inode;
+ ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
+ rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
+ if (rc != 0) {
+ CERROR("%s: failed to set lma on %s root inode\n", name, dev);
+ GOTO(out_mnt, rc);
}
- ldiskfs_set_inode_state(osd_sb(o)->s_root->d_inode,
- LDISKFS_STATE_LUSTRE_NO_OI);
if (lmd_flags & LMD_FLG_NOSCRUB)
o->od_noscrub = 1;
+ GOTO(out, rc = 0);
+
+out_mnt:
+ mntput(o->od_mnt);
+ o->od_mnt = NULL;
+
out:
if (__page)
OBD_PAGE_FREE(__page);
if (rc)
fsfilt_put_ops(o->od_fsops);
- RETURN(rc);
+ return rc;
}
static struct lu_device *osd_device_fini(const struct lu_env *env,
ooi_waiting:1; /* it::next is waiting. */
};
-extern const int osd_dto_credits_noquota[];
-
/*
* osd device.
*/
OSD_OT_WRITE = 7,
OSD_OT_INSERT = 8,
OSD_OT_DELETE = 9,
- OSD_OT_QUOTA = 10,
- OSD_OT_MAX = 11
+ OSD_OT_UPDATE = 10,
+ OSD_OT_QUOTA = 11,
+ OSD_OT_MAX = 12
};
struct osd_thandle {
struct lu_fid oti_fid;
struct lu_fid oti_fid2;
+ struct lu_fid oti_fid3;
struct osd_inode_id oti_id;
struct osd_inode_id oti_id2;
+ struct osd_inode_id oti_id3;
struct ost_id oti_ostid;
/*
bool oti_rollback;
char oti_name[48];
+ struct filter_fid_old oti_ff;
};
extern int ldiskfs_pdo;
+static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
+ const char *name, void *buf, int len)
+{
+ dentry->d_inode = inode;
+ dentry->d_sb = inode->i_sb;
+ return inode->i_op->getxattr(dentry, name, buf, len);
+}
+
static inline int __osd_xattr_set(struct osd_thread_info *info,
struct inode *inode, const char *name,
const void *buf, int buflen, int fl)
struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
struct osd_inode_id *id);
int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
- const struct lu_fid *fid, __u64 flags);
+ const struct lu_fid *fid, __u32 compat, __u32 incompat);
int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
struct dentry *dentry, struct lustre_mdt_attrs *lma);
int osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd,
struct osd_inode_id *id, const struct lu_fid *fid);
+int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
+ struct dentry *dentry, struct lu_fid *fid);
int osd_obj_map_init(const struct lu_env *env, struct osd_device *osd);
void osd_obj_map_fini(struct osd_device *dev);
struct thandle *th);
int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, struct thandle *th);
+int osd_obj_map_update(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th);
int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, struct osd_inode_id *id);
int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, const struct osd_inode_id *id,
struct thandle *th);
+int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th);
void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
int osd_scrub_file_store(struct osd_scrub *scrub);
int osd_quota_migration(const struct lu_env *env, struct dt_object *dt,
const struct dt_index_features *feat);
-/* osd_compat.c */
-struct osd_obj_seq *osd_seq_load(struct osd_device *osd, obd_seq seq);
-
static inline bool is_quota_glb_feat(const struct dt_index_features *feat)
{
return (feat == &dt_quota_iusr_features ||
RETURN(PTR_ERR(inode));
ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ /* 'What the @fid is' is not imporatant, because these objects
+ * have no OI mappings, and only are visible inside the OSD.*/
+ lu_igif_build(&info->oti_fid, inode->i_ino, inode->i_generation);
+ rc = osd_ea_fid_set(info, inode, &info->oti_fid, LMAC_NOT_IN_OI, 0);
+ if (rc != 0)
+ GOTO(out_inode, rc);
+
OBD_ALLOC_PTR(oi);
if (oi == NULL)
GOTO(out_inode, rc = -ENOMEM);
}
int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
- const struct lu_fid *fid)
+ const struct lu_fid *fid, enum oi_check_flags flags)
{
struct lu_seq_range *range = &info->oti_seq_range;
int rc;
ENTRY;
+ if (flags & OI_KNOWN_ON_OST)
+ RETURN(1);
+
if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid) ||
fid_is_llog(fid)))
RETURN(0);
if (fid_is_idif(fid) || fid_is_last_id(fid))
RETURN(1);
+ if (!(flags & OI_CHECK_FLD))
+ RETURN(0);
+
rc = osd_fld_lookup(info->oti_env, osd, fid, range);
if (rc != 0) {
CERROR("%s: Can not lookup fld for "DFID"\n",
RETURN(0);
}
-int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
- const struct lu_fid *fid, struct osd_inode_id *id)
+static int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, struct osd_inode_id *id)
{
struct lu_fid *oi_fid = &info->oti_fid2;
int rc;
int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, struct osd_inode_id *id,
- bool check_fld)
+ enum oi_check_flags flags)
{
if (unlikely(fid_is_last_id(fid)))
return osd_obj_spec_lookup(info, osd, fid, id);
- if ((check_fld && fid_is_on_ost(info, osd, fid)) || fid_is_llog(fid))
+ if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
return osd_obj_map_lookup(info, osd, fid, id);
if (fid_is_fs_root(fid)) {
int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, const struct osd_inode_id *id,
- struct thandle *th)
+ struct thandle *th, enum oi_check_flags flags)
{
struct lu_fid *oi_fid = &info->oti_fid2;
struct osd_inode_id *oi_id = &info->oti_id2;
if (unlikely(fid_is_last_id(fid)))
return osd_obj_spec_insert(info, osd, fid, id, th);
- if (fid_is_on_ost(info, osd, fid) || fid_is_llog(fid))
+ if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
return osd_obj_map_insert(info, osd, fid, id, th);
fid_cpu_to_be(oi_fid, fid);
if (rc != -EEXIST)
return rc;
- rc = osd_oi_lookup(info, osd, fid, oi_id, false);
- if (unlikely(rc != 0))
+ rc = osd_oi_lookup(info, osd, fid, oi_id, 0);
+ if (rc != 0)
return rc;
- if (osd_id_eq(id, oi_id)) {
- CERROR("%.16s: the FID "DFID" is there already:%u/%u\n",
- LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name,
- PFID(fid), id->oii_ino, id->oii_gen);
- return -EEXIST;
- }
+ if (unlikely(osd_id_eq(id, oi_id)))
+ return 0;
/* Check whether the mapping for oi_id is valid or not. */
inode = osd_iget(info, osd, oi_id);
if (rc != 0)
return rc;
- if (lu_fid_eq(fid, &lma->lma_self_fid)) {
+ if (!(lma->lma_compat & LMAC_NOT_IN_OI) &&
+ lu_fid_eq(fid, &lma->lma_self_fid)) {
CERROR("%.16s: the FID "DFID" is used by two objects: "
"%u/%u %u/%u\n",
LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name,
int osd_oi_delete(struct osd_thread_info *info,
struct osd_device *osd, const struct lu_fid *fid,
- struct thandle *th)
+ struct thandle *th, enum oi_check_flags flags)
{
struct lu_fid *oi_fid = &info->oti_fid2;
if (fid_is_last_id(fid))
return 0;
- if (fid_is_on_ost(info, osd, fid) || fid_is_llog(fid))
+ if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
return osd_obj_map_delete(info, osd, fid, th);
fid_cpu_to_be(oi_fid, fid);
(const struct dt_key *)oi_fid, th);
}
+int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th, enum oi_check_flags flags)
+{
+ struct lu_fid *oi_fid = &info->oti_fid2;
+ struct osd_inode_id *oi_id = &info->oti_id2;
+ int rc = 0;
+
+ if (unlikely(fid_is_last_id(fid)))
+ return osd_obj_spec_update(info, osd, fid, id, th);
+
+ if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
+ return osd_obj_map_update(info, osd, fid, id, th);
+
+ fid_cpu_to_be(oi_fid, fid);
+ osd_id_pack(oi_id, id);
+ rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid),
+ (const struct dt_rec *)oi_id,
+ (const struct dt_key *)oi_fid, th, false);
+ if (rc != 0)
+ return rc;
+
+ if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
+ rc = osd_obj_spec_update(info, osd, fid, id, th);
+ return rc;
+}
+
int osd_oi_mod_init(void)
{
if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)
return (id0->oii_ino == id1->oii_ino && id0->oii_gen == id1->oii_gen);
}
+enum oi_check_flags {
+ OI_CHECK_FLD = 0x00000001,
+ OI_KNOWN_ON_OST = 0x00000002,
+};
+
int osd_oi_mod_init(void);
int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd);
void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd);
-int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
- const struct lu_fid *fid, struct osd_inode_id *id);
int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, struct osd_inode_id *id,
- bool check_fld);
+ enum oi_check_flags flags);
int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, const struct osd_inode_id *id,
- struct thandle *th);
+ struct thandle *th, enum oi_check_flags flags);
int osd_oi_delete(struct osd_thread_info *info,
struct osd_device *osd, const struct lu_fid *fid,
- struct thandle *th);
+ struct thandle *th, enum oi_check_flags flags);
+int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd,
+ const struct lu_fid *fid, const struct osd_inode_id *id,
+ struct thandle *th, enum oi_check_flags flags);
int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd,
- const struct lu_fid *fid);
+ const struct lu_fid *fid, enum oi_check_flags flags);
#endif /* __KERNEL__ */
#endif /* _OSD_OI_H */
#define SCRUB_NEXT_FATAL 6 /* simulate failure during OI scrub */
#define SCRUB_NEXT_NOSCRUB 7 /* new created object, no scrub on it */
#define SCRUB_NEXT_NOLMA 8 /* the inode has no FID-in-LMA */
+#define SCRUB_NEXT_OSTOBJ 9 /* for OST-object */
+#define SCRUB_NEXT_OSTOBJ_OLD 10 /* old OST-object, no LMA or no FID-on-OST
+ * flags in LMA */
/* misc functions */
static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
struct osd_device *dev,
const struct lu_fid *fid,
- const struct osd_inode_id *id, int ops)
+ const struct osd_inode_id *id,
+ int ops, enum oi_check_flags flags)
{
- struct lu_fid *oi_fid = &info->oti_fid2;
- struct osd_inode_id *oi_id = &info->oti_id2;
- struct iam_container *bag;
- struct iam_path_descr *ipd;
- handle_t *jh;
- int rc;
+ const struct lu_env *env = info->oti_env;
+ struct thandle *th;
+ struct osd_thandle *oh;
+ int rc;
ENTRY;
- fid_cpu_to_be(oi_fid, fid);
- if (id != NULL)
- osd_id_pack(oi_id, id);
- jh = ldiskfs_journal_start_sb(osd_sb(dev),
- osd_dto_credits_noquota[ops]);
- if (IS_ERR(jh)) {
- rc = PTR_ERR(jh);
- CERROR("%.16s: fail to start trans for scrub store: rc = %d\n",
- LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc);
- RETURN(rc);
- }
+ th = dt_trans_create(env, &dev->od_dt_dev);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
- bag = &osd_fid2oi(dev, fid)->oi_dir.od_container;
- ipd = osd_idx_ipd_get(info->oti_env, bag);
- if (unlikely(ipd == NULL)) {
- ldiskfs_journal_stop(jh);
- CERROR("%.16s: fail to get ipd for scrub store\n",
- LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name);
- RETURN(-ENOMEM);
- }
+ oh = container_of0(th, struct osd_thandle, ot_super);
+ LASSERT(oh->ot_handle == NULL);
switch (ops) {
case DTO_INDEX_UPDATE:
- rc = iam_update(jh, bag, (const struct iam_key *)oi_fid,
- (struct iam_rec *)oi_id, ipd);
+ osd_trans_declare_op(env, oh, OSD_OT_UPDATE,
+ osd_dto_credits_noquota[DTO_INDEX_UPDATE]);
+ rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = osd_oi_update(info, dev, fid, id, th, flags);
if (unlikely(rc == -ENOENT)) {
/* Some unlink thread may removed the OI mapping. */
rc = 1;
}
break;
case DTO_INDEX_INSERT:
- rc = iam_insert(jh, bag, (const struct iam_key *)oi_fid,
- (struct iam_rec *)oi_id, ipd);
+ osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+ osd_dto_credits_noquota[DTO_INDEX_INSERT]);
+ rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = osd_oi_insert(info, dev, fid, id, th, flags);
if (unlikely(rc == -EEXIST)) {
rc = 1;
/* XXX: There are trouble things when adding OI
}
break;
case DTO_INDEX_DELETE:
- rc = iam_delete(jh, bag, (const struct iam_key *)oi_fid, ipd);
+ osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+ osd_dto_credits_noquota[DTO_INDEX_DELETE]);
+ rc = dt_trans_start_local(env, &dev->od_dt_dev, th);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = osd_oi_delete(info, dev, fid, th, flags);
if (rc == -ENOENT) {
/* It is normal that the unlink thread has removed the
* OI mapping already. */
LASSERTF(0, "Unexpected ops %d\n", ops);
break;
}
- osd_ipd_put(info->oti_env, bag, ipd);
- ldiskfs_journal_stop(jh);
- RETURN(rc);
+
+ GOTO(stop, rc);
+
+stop:
+ dt_trans_stop(env, &dev->od_dt_dev, th);
+ return rc;
}
/* OI_scrub file ops */
if (fid_is_igif(fid))
sf->sf_items_igif++;
+ if (val == SCRUB_NEXT_OSTOBJ_OLD) {
+ inode = osd_iget(info, dev, lid);
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+ /* Someone removed the inode. */
+ if (rc == -ENOENT || rc == -ESTALE)
+ rc = 0;
+ GOTO(out, rc);
+ }
+
+ sf->sf_flags |= SF_UPGRADE;
+ rc = osd_ea_fid_set(info, inode, fid,
+ LMAC_FID_ON_OST, 0);
+ if (rc != 0)
+ GOTO(out, rc);
+ }
+
if ((val == SCRUB_NEXT_NOLMA) &&
(!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA)))
GOTO(out, rc = 0);
if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA))
goto iget;
- /* XXX: Currently, no FID-in-LMA for OST object, so osd_oi_lookup()
- * without checking FLD is enough.
- *
- * It should be updated if FID-in-LMA for OSD object introduced
- * in the future. */
- rc = osd_oi_lookup(info, dev, fid, lid2, false);
+ rc = osd_oi_lookup(info, dev, fid, lid2,
+ (val == SCRUB_NEXT_OSTOBJ ||
+ val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0);
if (rc != 0) {
if (rc != -ENOENT)
GOTO(out, rc);
iget:
- inode = osd_iget(info, dev, lid);
- if (IS_ERR(inode)) {
- rc = PTR_ERR(inode);
- /* Someone removed the inode. */
- if (rc == -ENOENT || rc == -ESTALE)
- rc = 0;
- GOTO(out, rc);
- }
-
- /* Check whether the inode to be unlinked during OI scrub. */
- if (unlikely(inode->i_nlink == 0)) {
- iput(inode);
- GOTO(out, rc = 0);
+ if (inode == NULL) {
+ inode = osd_iget(info, dev, lid);
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+ /* Someone removed the inode. */
+ if (rc == -ENOENT || rc == -ESTALE)
+ rc = 0;
+ GOTO(out, rc);
+ }
}
+ scrub->os_full_speed = 1;
ops = DTO_INDEX_INSERT;
idx = osd_oi_fid2idx(dev, fid);
- if (val == SCRUB_NEXT_NOLMA) {
+ switch (val) {
+ case SCRUB_NEXT_NOLMA:
sf->sf_flags |= SF_UPGRADE;
- scrub->os_full_speed = 1;
- rc = osd_ea_fid_set(info, inode, fid, 0);
+ rc = osd_ea_fid_set(info, inode, fid, 0, 0);
if (rc != 0)
GOTO(out, rc);
if (!(sf->sf_flags & SF_INCONSISTENT))
dev->od_igif_inoi = 0;
- } else {
+ break;
+ case SCRUB_NEXT_OSTOBJ:
+ sf->sf_flags |= SF_INCONSISTENT;
+ case SCRUB_NEXT_OSTOBJ_OLD:
+ break;
+ default:
sf->sf_flags |= SF_RECREATED;
- scrub->os_full_speed = 1;
if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap)))
ldiskfs_set_bit(idx, sf->sf_oi_bitmap);
+ break;
}
} else if (osd_id_eq(lid, lid2)) {
GOTO(out, rc = 0);
} else {
- sf->sf_flags |= SF_INCONSISTENT;
scrub->os_full_speed = 1;
+ sf->sf_flags |= SF_INCONSISTENT;
/* XXX: If the device is restored from file-level backup, then
* some IGIFs may have been already in OI files, and some
dev->od_igif_inoi = 1;
}
- rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops);
+ rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops,
+ (val == SCRUB_NEXT_OSTOBJ ||
+ val == SCRUB_NEXT_OSTOBJ_OLD) ? OI_KNOWN_ON_OST : 0);
if (rc == 0) {
if (scrub->os_in_prior)
sf->sf_items_updated_prior++;
rc = 0;
}
- if (ops == DTO_INDEX_INSERT) {
- /* There may be conflict unlink during the OI scrub,
- * if happend, then remove the new added OI mapping. */
- if (unlikely(inode->i_nlink == 0))
- osd_scrub_refresh_mapping(info, dev, fid, lid,
- DTO_INDEX_DELETE);
- iput(inode);
- }
+ /* There may be conflict unlink during the OI scrub,
+ * if happend, then remove the new added OI mapping. */
+ if (ops == DTO_INDEX_INSERT && inode != NULL && !IS_ERR(inode) &&
+ unlikely(inode->i_nlink == 0))
+ osd_scrub_refresh_mapping(info, dev, fid, lid,
+ DTO_INDEX_DELETE,
+ (val == SCRUB_NEXT_OSTOBJ ||
+ val == SCRUB_NEXT_OSTOBJ_OLD) ?
+ OI_KNOWN_ON_OST : 0);
up_write(&scrub->os_rwsem);
+ if (inode != NULL && !IS_ERR(inode))
+ iput(inode);
+
if (oii != NULL) {
LASSERT(!cfs_list_empty(&oii->oii_list));
}
}
+/**
+ * \retval SCRUB_NEXT_OSTOBJ_OLD: FID-on-OST
+ * \retval 0: FID-on-MDT
+ */
+static int osd_scrub_check_local_fldb(struct osd_thread_info *info,
+ struct osd_device *dev,
+ struct lu_fid *fid)
+{
+ /* XXX: The initial OI scrub will scan the top level /O to generate
+ * a small local FLDB according to the <seq>. If the given FID
+ * is in the local FLDB, then it is FID-on-OST; otherwise it's
+ * quite possible for FID-on-MDT. */
+ return 0;
+}
+
static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
struct super_block *sb, bool scrub)
{
- struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
struct inode *inode;
- int rc;
+ int rc = 0;
+ bool has_lma = false;
+ ENTRY;
osd_id_gen(lid, pos, OSD_OII_NOGEN);
inode = osd_iget(info, dev, lid);
/* The inode may be removed after bitmap searching, or the
* file is new created without inode initialized yet. */
if (rc == -ENOENT || rc == -ESTALE)
- return SCRUB_NEXT_CONTINUE;
+ RETURN(SCRUB_NEXT_CONTINUE);
CERROR("%.16s: fail to read inode, ino# = %u, rc = %d\n",
LDISKFS_SB(sb)->s_es->s_volume_name, pos, rc);
- return rc;
+ RETURN(rc);
}
/* If the inode has no OI mapping, then it is special locally used,
* should be invisible to OI scrub or up layer LFSCK. */
- if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI)) {
- iput(inode);
- return SCRUB_NEXT_CONTINUE;
- }
+ if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI))
+ GOTO(put, rc = SCRUB_NEXT_CONTINUE);
if (scrub &&
ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
/* Only skip it for the first OI scrub accessing. */
ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
- iput(inode);
- return SCRUB_NEXT_NOSCRUB;
+ GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
}
rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
if (rc == 0) {
- if (fid_is_llog(&lma->lma_self_fid) ||
- (!scrub && fid_is_internal(&lma->lma_self_fid)) ||
- (scrub && (lma->lma_incompat & LMAI_AGENT)))
- rc = SCRUB_NEXT_CONTINUE;
- else
- *fid = lma->lma_self_fid;
- } else if (rc == -ENODATA) {
- lu_igif_build(fid, inode->i_ino, inode->i_generation);
- if (scrub)
- rc = SCRUB_NEXT_NOLMA;
- else
- rc = 0;
+ has_lma = true;
+ if (lma->lma_compat & LMAC_NOT_IN_OI) {
+ ldiskfs_set_inode_state(inode,
+ LDISKFS_STATE_LUSTRE_NO_OI);
+ GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+ }
+
+ if (fid_is_llog(&lma->lma_self_fid))
+ GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+
+ *fid = lma->lma_self_fid;
+ if (fid_is_internal(&lma->lma_self_fid)) {
+ if (!scrub)
+ rc = SCRUB_NEXT_CONTINUE;
+ GOTO(put, rc);
+ }
+
+ if (!scrub)
+ GOTO(put, rc);
+
+ if (fid_is_namespace_visible(fid) && !fid_is_norm(fid))
+ GOTO(put, rc);
+
+ if (lma->lma_compat & LMAC_FID_ON_OST || fid_is_last_id(fid))
+ GOTO(put, rc = SCRUB_NEXT_OSTOBJ);
+
+ if (fid_is_idif(fid))
+ GOTO(put, rc = SCRUB_NEXT_OSTOBJ_OLD);
+
+ if (lma->lma_incompat & LMAI_AGENT)
+ GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+
+ /* Here, it may be MDT-object, or may be 2.4 OST-object.
+ * Fall through. */
}
+
+ if (rc == -ENODATA || rc == 0) {
+ rc = osd_get_idif(info, inode, &info->oti_obj_dentry, fid);
+ if (rc == 0) {
+ if (scrub)
+ /* It is old 2.x (x <= 3) or 1.8 OST-object. */
+ rc = SCRUB_NEXT_OSTOBJ_OLD;
+ GOTO(put, rc);
+ }
+
+ if (rc > 0) {
+ if (!has_lma)
+ /* It is FID-on-OST, but we do not know how
+ * to generate its FID, ignore it directly. */
+ rc = SCRUB_NEXT_CONTINUE;
+ else
+ /* It is 2.4 OST-object. */
+ rc = SCRUB_NEXT_OSTOBJ_OLD;
+ GOTO(put, rc);
+ }
+
+ if (rc != -ENODATA)
+ GOTO(put, rc);
+
+ if (!has_lma) {
+ if (dev->od_handle_nolma) {
+ lu_igif_build(fid, inode->i_ino,
+ inode->i_generation);
+ if (scrub)
+ rc = SCRUB_NEXT_NOLMA;
+ else
+ rc = 0;
+ } else {
+ /* It may be FID-on-OST, or may be FID for
+ * non-MDT0, anyway, we do not know how to
+ * generate its FID, ignore it directly. */
+ rc = SCRUB_NEXT_CONTINUE;
+ }
+ GOTO(put, rc);
+ }
+
+ /* For OI scrub case only: the object has LMA but has no ff
+ * (or ff crashed). It may be MDT-object, may be OST-object
+ * with crashed ff. The last check is local FLDB. */
+ rc = osd_scrub_check_local_fldb(info, dev, fid);
+ }
+
+ GOTO(put, rc);
+
+put:
iput(inode);
return rc;
}
lu_igif_build(&tfid, inode->i_ino, inode->i_generation);
else
tfid = *fid;
- rc = osd_ea_fid_set(info, inode, &tfid, 0);
+ rc = osd_ea_fid_set(info, inode, &tfid, 0, 0);
if (rc != 0)
RETURN(rc);
} else {
+ if (lma->lma_compat & LMAC_NOT_IN_OI)
+ RETURN(0);
+
tfid = lma->lma_self_fid;
}
- rc = __osd_oi_lookup(info, dev, &tfid, id2);
+ rc = osd_oi_lookup(info, dev, &tfid, id2, 0);
if (rc != 0) {
if (rc != -ENOENT)
RETURN(rc);
rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
- DTO_INDEX_INSERT);
+ DTO_INDEX_INSERT, 0);
+ if (rc > 0)
+ rc = 0;
+
RETURN(rc);
}
RETURN(rc);
}
- rc = osd_scrub_refresh_mapping(info, dev, &tfid, id, DTO_INDEX_UPDATE);
+ rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
+ DTO_INDEX_UPDATE, 0);
+ if (rc > 0)
+ rc = 0;
RETURN(rc);
}
int rc;
ENTRY;
- /* It is existing MDT device. */
+ /* It is existing MDT0 device. We only allow the case of object without
+ * LMA to happen on the MDT0, which is usually for old 1.8 MDT. Then we
+ * can generate IGIF mode FID for the object and related OI mapping. If
+ * it is on other MDTs, then becuase file-level backup/restore, related
+ * OI mapping may be invalid already, we do not know which is the right
+ * FID for the object. We only allow IGIF objects to reside on the MDT0.
+ *
+ * XXX: For the case of object on non-MDT0 device with neither LMA nor
+ * "fid" xattr, then something crashed. We cannot re-generate the
+ * FID directly, instead, the OI scrub will scan the OI structure
+ * and try to re-generate the LMA from the OI mapping. But if the
+ * OI mapping crashed or lost also, then we have to give up under
+ * double failure cases. */
dev->od_handle_nolma = 1;
child = osd_ios_lookup_one_len(dot_lustre_name, dentry,
strlen(dot_lustre_name));
dput(child);
else if (PTR_ERR(child) == -ENOENT)
osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
- NULL, DTO_INDEX_DELETE);
+ NULL, DTO_INDEX_DELETE, 0);
map++;
}
struct ldiskfs_super_block *es = LDISKFS_SB(sb)->s_es;
struct lvfs_run_ctxt saved;
struct file *filp;
+ struct inode *inode;
+ struct lu_fid *fid = &info->oti_fid;
int dirty = 0;
int rc = 0;
ENTRY;
push_ctxt(&saved, ctxt, NULL);
filp = filp_open(osd_scrub_name, O_RDWR | O_CREAT, 0644);
- if (IS_ERR(filp))
+ if (IS_ERR(filp)) {
+ pop_ctxt(&saved, ctxt, NULL);
RETURN(PTR_ERR(filp));
+ }
+
+ inode = filp->f_dentry->d_inode;
+ ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
+ /* 'What the @fid is' is not imporatant, because the object
+ * has no OI mapping, and only is visible inside the OSD.*/
+ lu_igif_build(fid, inode->i_ino, inode->i_generation);
+ rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
+ if (rc != 0) {
+ filp_close(filp, 0);
+ pop_ctxt(&saved, ctxt, NULL);
+ RETURN(rc);
+ }
- scrub->os_inode = igrab(filp->f_dentry->d_inode);
+ scrub->os_inode = igrab(inode);
filp_close(filp, 0);
pop_ctxt(&saved, ctxt, NULL);
- ldiskfs_set_inode_state(scrub->os_inode,
- LDISKFS_STATE_LUSTRE_NO_OI);
rc = osd_scrub_file_load(scrub);
if (rc == -ENOENT) {
struct lu_buf buf;
int rc;
- lustre_lma_init(lma, fid, 0);
+ lustre_lma_init(lma, fid, 0, 0);
lustre_lma_swab(lma);
buf.lb_buf = lma;
buf.lb_len = sizeof(*lma);
(unsigned)LMAC_HSM);
LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
(unsigned)LMAC_SOM);
+ LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_NOT_IN_OI);
+ LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_FID_ON_OST);
LASSERTF(OBJ_CREATE == 1, "found %lld\n",
(long long)OBJ_CREATE);
LASSERTF(OBJ_DESTROY == 2, "found %lld\n",
build_test_filter
MDT_DEV="${FSNAME}-MDT0000"
+OST_DEV="${FSNAME}-OST0000"
MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}"
STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
SHOW_SCRUB="do_facet $SINGLEMDS \
$LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
+SHOW_SCRUB_ON_OST="do_facet ost1 \
+ $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
MOUNT_OPTS_SCRUB="-o user_xattr"
MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
}
run_test 11 "OI scrub skips the new created objects only once"
+test_12() {
+ echo "stopall"
+ stopall > /dev/null
+ echo "formatall"
+ formatall > /dev/null
+ echo "setupall"
+ setupall > /dev/null
+
+ mkdir -p $DIR/$tdir
+ $SETSTRIPE -c 1 -i 0 $DIR/$tdir
+
+ #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
+ do_facet ost1 $LCTL set_param fail_loc=0x195
+ createmany -o $DIR/$tdir/f 1000
+
+ echo "stopall"
+ stopall > /dev/null
+ echo "setupall"
+ setupall > /dev/null
+
+ do_facet ost1 $LCTL set_param fail_loc=0
+ local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "init" ] ||
+ error "(1) Expect 'init', but got '$STATUS'"
+
+ ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+
+ sleep 3
+ local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "completed" ] ||
+ error "(3) Expect 'completed', but got '$STATUS'"
+
+ ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed"
+}
+run_test 12 "OI scrub can rebuild invalid /O entries"
+
# restore MDS/OST size
MDSSIZE=${SAVED_MDSSIZE}
OSTSIZE=${SAVED_OSTSIZE}
CHECK_VALUE_X(LMAC_HSM);
CHECK_VALUE_X(LMAC_SOM);
+ CHECK_VALUE_X(LMAC_NOT_IN_OI);
+ CHECK_VALUE_X(LMAC_FID_ON_OST);
}
static void
(unsigned)LMAC_HSM);
LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
(unsigned)LMAC_SOM);
+ LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_NOT_IN_OI);
+ LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_FID_ON_OST);
LASSERTF(OBJ_CREATE == 1, "found %lld\n",
(long long)OBJ_CREATE);
LASSERTF(OBJ_DESTROY == 2, "found %lld\n",