__u32 ol_comp_id;
} __attribute__((packed));
-/* keep this one for compatibility */
-struct filter_fid_old {
- struct lu_fid ff_parent;
- __u64 ff_objid;
- __u64 ff_seq;
+/* The filter_fid structure has changed several times over its lifetime.
+ * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
+ * stripe_index and the "self FID" (objid/seq) to be able to recover the
+ * OST objects in case of corruption. With the move to 2.4 and OSD-API for
+ * the OST, the "trusted.lma" xattr was added to the OST objects to store
+ * the "self FID" to be consistent with the MDT on-disk format, and the
+ * filter_fid only stored the MDT inode parent FID and stripe index.
+ *
+ * In 2.10, the addition of PFL composite layouts required more information
+ * to be stored into the filter_fid in order to be able to identify which
+ * component the OST object belonged. As well, the stripe size may vary
+ * between components, so it was no longer safe to assume the stripe size
+ * or stripe_count of a file. This is also more robust for plain layouts.
+ *
+ * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
+ * enough space to store both the filter_fid and LMA in the inode, so they
+ * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
+ * an extra seek for every OST object access.
+ *
+ * In 2.11, FLR mirror layouts also need to store the layout version and
+ * range so that writes to old versions of the layout are not allowed.
+ * That ensures that mirrored objects are not modified by evicted clients,
+ * and ensures that the components are correctly marked stale on the MDT.
+ */
+struct filter_fid_18_23 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ __u64 ff_objid;
+ __u64 ff_seq;
+};
+
+struct filter_fid_24_29 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+};
+
+struct filter_fid_210 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ struct ost_layout ff_layout;
};
struct filter_fid {
- struct lu_fid ff_parent;
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
struct ost_layout ff_layout;
+ __u32 ff_layout_version;
+ __u32 ff_range; /* range of layout version that
+ * write are allowed */
} __attribute__((packed));
/* Userspace should treat lu_fid as opaque, and only use the following methods
dst->ol_comp_id = __le32_to_cpu(src->ol_comp_id);
}
-/* Both filter_fid_*cpu* functions not currently used */
static inline void filter_fid_cpu_to_le(struct filter_fid *dst,
const struct filter_fid *src, int size)
{
struct lustre_cfg lti_lustre_cfg;
/* used to store parent default striping in create */
struct lod_default_striping lti_def_striping;
- struct filter_fid lti_ff;
+ struct filter_fid lti_ff;
};
extern const struct lu_device_operations lod_lu_ops;
}
/**
- * \retval +v: new filter_fid, does not contain self-fid
- * \retval 0: filter_fid_old, contains self-fid
+ * \retval +v: new filter_fid does not contain self-fid
+ * \retval 0: filter_fid_18_23, contains self-fid
* \retval -v: other failure cases
*/
int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
struct dentry *dentry, struct lu_fid *fid)
{
- struct filter_fid_old *ff = &info->oti_ff;
- struct ost_id *ostid = &info->oti_ostid;
- int rc;
+ struct filter_fid *ff = &info->oti_ff;
+ struct ost_id *ostid = &info->oti_ostid;
+ int rc;
rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
- if (rc == sizeof(*ff)) {
- rc = 0;
- ostid_set_seq(ostid, le64_to_cpu(ff->ff_seq));
- rc = ostid_set_id(ostid, le64_to_cpu(ff->ff_objid));
+ if (rc == sizeof(struct filter_fid_18_23)) {
+ struct filter_fid_18_23 *ff_old = (void *)ff;
+
+ ostid_set_seq(ostid, le64_to_cpu(ff_old->ff_seq));
+ rc = ostid_set_id(ostid, le64_to_cpu(ff_old->ff_objid));
/*
* XXX: use 0 as the index for compatibility, the caller will
* handle index related issues when necessary.
*/
if (!rc)
ostid_to_fid(fid, ostid, 0);
- } else if (rc == sizeof(struct filter_fid)) {
+ /* cast to (int) so "rc" comparison is signed instead of unsigned */
+ } else if (rc >= (int)sizeof(struct filter_fid_24_29)) {
rc = 1;
} else if (rc >= 0) {
rc = -EINVAL;
if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
fid = &lma->lma_self_fid;
rc = osd_get_idif(info, inode, dentry, fid);
- if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) {
+ if (rc > 0 || (rc == -ENODATA && osd->od_index_in_idif)) {
/* For the given OST-object, if it has neither LMA nor
* FID in XATTR_NAME_FID, then the given FID (which is
* contained in the @obj, from client RPC for locating
struct osd_idmap_cache oti_cache;
/* dedicated OI cache for insert (which needs inum) */
- struct osd_idmap_cache *oti_ins_cache;
- int oti_ins_cache_size;
- int oti_ins_cache_used;
-
- int oti_r_locks;
- int oti_w_locks;
- int oti_txns;
- /** used in osd_fid_set() to put xattr */
- struct lu_buf oti_buf;
- struct lu_buf oti_big_buf;
- /** used in osd_ea_fid_set() to set fid into common ea */
+ struct osd_idmap_cache *oti_ins_cache;
+ int oti_ins_cache_size;
+ int oti_ins_cache_used;
+
+ int oti_r_locks;
+ int oti_w_locks;
+ int oti_txns;
+ /** used in osd_fid_set() to put xattr */
+ struct lu_buf oti_buf;
+ struct lu_buf oti_big_buf;
+ /** used in osd_ea_fid_set() to set fid into common ea */
union {
struct lustre_ost_attrs oti_ost_attrs;
- struct filter_fid_old oti_ff;
- struct filter_fid oti_ff_new;
+ struct filter_fid_18_23 oti_ff_old;
+ struct filter_fid oti_ff;
};
/** 0-copy IO */
- struct osd_iobuf oti_iobuf;
+ struct osd_iobuf oti_iobuf;
/* used to access objects in /O */
- struct inode *oti_inode;
+ struct inode *oti_inode;
#define OSD_FID_REC_SZ 32
- char oti_ldp[OSD_FID_REC_SZ];
- char oti_ldp2[OSD_FID_REC_SZ];
+ char oti_ldp[OSD_FID_REC_SZ];
+ char oti_ldp2[OSD_FID_REC_SZ];
/* used by quota code */
union {
osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
struct inode *inode, const struct lu_fid *fid)
{
- struct filter_fid_old *ff = &info->oti_ff;
- struct dentry *dentry = &info->oti_obj_dentry;
- struct lu_fid *tfid = &info->oti_fid;
- handle_t *jh;
- int size = 0;
- int rc;
- bool reset = false;
+ struct filter_fid_18_23 *ff = &info->oti_ff_old;
+ struct dentry *dentry = &info->oti_obj_dentry;
+ struct lu_fid *tfid = &info->oti_fid;
+ bool fid_18_23 = false;
+ handle_t *jh;
+ int size = 0;
+ int rc;
ENTRY;
if (dev->od_scrub.os_file.sf_param & SP_DRYRUN)
if (rc)
GOTO(stop, rc);
- reset = true;
- } else if (rc != -ENODATA && rc != sizeof(struct filter_fid)) {
+ fid_18_23 = true;
+ } else if (rc != -ENODATA && rc < (int)sizeof(struct filter_fid_24_29)) {
GOTO(stop, rc = -EINVAL);
}
/* 3) make new LMA and add it */
rc = osd_ea_fid_set(info, inode, tfid, LMAC_FID_ON_OST, 0);
- if (reset) {
+ if (fid_18_23) {
if (rc)
/* If failed, we should try to add the old back. */
size = sizeof(*ff);
rc = osd_get_idif(info, inode, &info->oti_obj_dentry, fid);
if (rc == 0) {
if (scrub)
- /* It is old 2.x (x <= 3) or 1.8 OST-object. */
+ /* It is 2.3 or older OST-object. */
rc = SCRUB_NEXT_OSTOBJ_OLD;
return rc;
}
* to generate its FID, ignore it directly. */
rc = SCRUB_NEXT_CONTINUE;
else
- /* It is 2.4 OST-object. */
+ /* It is 2.4 or newer OST-object. */
rc = SCRUB_NEXT_OSTOBJ_OLD;
return rc;
}
/**
* It scans the /lost+found, and for the OST-object (with filter_fid
- * or filter_fid_old), move them back to its proper /O/<seq>/d<x>.
+ * or filter_fid_18_23), move them back to its proper /O/<seq>/d<x>.
*/
#ifdef HAVE_FILLDIR_USE_CTX
static int osd_ios_lf_fill(struct dir_context *buf,
* an old device, it can be found and cleared later.
*
* For the system with "SIF_NO_HANDLE_OLD_FID", we do not
- * need to check "filter_fid_old" and to convert it to
+ * need to check "filter_fid_18_23" and to convert it to
* "filter_fid" for each object, and all the IGIF should
* have their FID mapping in OI files already. */
if (dev->od_maybe_new)
for (i = 1; i < argc; i++) {
char buf[1024]; /* allow xattr that may be larger */
struct filter_fid *ff = (void *)buf;
+ static int printed;
int size;
size = getxattr(argv[i], "trusted.fid", buf,
}
if (size != sizeof(struct filter_fid) &&
- size != sizeof(struct filter_fid_old) &&
- size != sizeof(struct lu_fid)) {
- fprintf(stderr, "%s: warning: fid larger than expected"
- " (%d bytes), recompile?\n", argv[i], size);
- continue;
+ size != sizeof(struct filter_fid_18_23) &&
+ size != sizeof(struct filter_fid_24_29) &&
+ size != sizeof(struct filter_fid_210) && !printed) {
+ fprintf(stderr,
+ "%s: warning: ffid size is unexpected (%d bytes), recompile?\n",
+ argv[i], size);
+ printed = 1;
+
+ if (size < sizeof(struct filter_fid_24_29))
+ continue;
}
printf("%s: ", argv[i]);
- if (size == sizeof(struct filter_fid_old)) {
- struct filter_fid_old *ffo = (void *)buf;
+ if (size == sizeof(struct filter_fid_18_23)) {
+ struct filter_fid_18_23 *ffo = (void *)buf;
printf("objid=%llu seq=%llu ",
(unsigned long long)__le64_to_cpu(ffo->ff_objid),
/* this is stripe_nr actually */
__le32_to_cpu(ff->ff_parent.f_stripe_idx));
- if (size >= sizeof(struct filter_fid)) {
+ if (size >= sizeof(struct filter_fid_210)) {
struct ost_layout *ol = &ff->ff_layout;
/* new filter_fid, support PFL */
__le64_to_cpu(ol->ol_comp_start),
__le64_to_cpu(ol->ol_comp_end));
}
+ if (size >= sizeof(struct filter_fid))
+ printf(" layout_version=%u range=%u",
+ __le32_to_cpu(ff->ff_layout_version),
+ __le32_to_cpu(ff->ff_range));
printf("\n");
}