Whamcloud - gitweb
LU-10283 mdd: fix parent FID in changelog of striped directory 22/51322/5
authorDmitry Ivanov <dmitry.ivanov2@hpe.com>
Mon, 16 May 2022 18:15:19 +0000 (12:15 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 13 Dec 2023 12:20:00 +0000 (12:20 +0000)
Changelog entry for the file operations such as create, rename,
link, unlink, mkdir referred to parent FID ("p=") as a shard's
FID in a striped directory. The same was true for the source's
parent FID ("sp="). This commit hides the Lustre intrinsics from
user displaying the parent's directory FID instead as expected.

An object might be in a remote MDT, in which case obtaining the parent
FID via the linkEA can be an expensive operation, so the parent FID is
cached in the mdd_object, so that the cost of the cross-MDT RPC is
amortized over the lifetime of the object.

Certain userspace tools might depend on the previous behavior of
displaying the shard's parent FID in the changelog records, so this
canp be enabled by setting mdd.*.enable_shard_pfid=1, if this is
required for compatibility.

HPE-bug-id: LUS-10721
Signed-off-by: Dmitry Ivanov <dmitry.ivanov2@hpe.com>
Change-Id: Iae15b49f5852f36ba62ae1706d3a5f4ebf307bc4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51322
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/doc/lfs-changelog.1
lustre/mdd/mdd_device.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_lproc.c

index bb35a70..4c71f88 100644 (file)
@@ -51,6 +51,12 @@ is only accessible to the root user.
 .TP
 .SM EIO
 Failed to read the changelog record on the MDT.
+.SH NOTES
+Certain userspace tools might rely on past Lustre behavior of displaying the
+shard's parent FID instead of the real parent FID, in changelog records related
+to striped directories or filesystem objects contained within them; if this
+behavior is needed for compatibility, please set mdd.*.enable_shard_pfid=1. This
+tunable might be deprecated in a future Lustre release.
 .SH EXAMPLE
 .TP
 Register 2 changelog consumers on the MDT0000
index 21b5f57..9db11da 100644 (file)
@@ -148,6 +148,7 @@ static int mdd_init0(const struct lu_env *env, struct mdd_device *mdd,
        /* current mask is calculated from mask above and users masks */
        mdd->mdd_cl.mc_current_mask = CHANGELOG_MINMASK;
        mdd->mdd_cl.mc_deniednext = 60; /* 60 secs by default */
+       mdd->mdd_cl.mc_enable_shard_pfid = false; /* master pFID by default */
 
        dev = lustre_cfg_string(lcfg, 0);
        if (dev == NULL)
index 71d1aa1..f893aeb 100644 (file)
@@ -1088,13 +1088,71 @@ void mdd_changelog_rec_extra_xattr(struct changelog_rec *rec,
        strlcpy(xattr->cr_xattr, xattr_name, sizeof(xattr->cr_xattr));
 }
 
+/**
+ * Set the parent FID at \a pfid for a namespace change changelog record, using
+ * XATTR_NAME_LMV and linkEA from the remote object to obtain the correct
+ * parent FID for striped directories
+ *
+ * \param[in] env - environment
+ * \param[in] mdd - mdd device
+ * \param[in] parent - parent object
+ * \param[in] pattr - parent attribute
+ * \param[out] pfid - parent fid
+ *
+ * \retval 0 success
+ * \retval -errno failure
+ */
+int mdd_changelog_ns_pfid_set(const struct lu_env *env, struct mdd_device *mdd,
+                             struct mdd_object *parent,
+                             const struct lu_attr *pattr, struct lu_fid *pfid)
+{
+       int rc = 0;
+
+       /* Certain userspace tools might rely on the previous behavior of
+        * displaying the shard's parent FID, on some changelog records related
+        * to striped directories, so use that for compatibility if needed
+        */
+       if (mdd->mdd_cl.mc_enable_shard_pfid) {
+               *pfid = *mdd_object_fid(parent);
+               return 0;
+       }
+
+       if (!fid_is_zero(&parent->mod_striped_pfid)) {
+               *pfid = parent->mod_striped_pfid;
+               return 0;
+       }
+
+       /* is the parent dir striped? */
+       rc = mdo_xattr_get(env, parent, &LU_BUF_NULL, XATTR_NAME_LMV);
+       if (rc == -ENODATA) {
+               *pfid = *mdd_object_fid(parent);
+               parent->mod_striped_pfid = *pfid;
+               return 0;
+       }
+
+       if (rc < 0)
+               return rc;
+
+       LASSERT(!mdd_is_root(mdo2mdd(&parent->mod_obj),
+                            mdd_object_fid(parent)));
+
+       /* hide shard FID */
+       rc = mdd_parent_fid(env, parent, pattr, pfid);
+       if (!rc)
+               parent->mod_striped_pfid = *pfid;
+
+       return rc;
+}
+
 /** Store a namespace change changelog record
  * If this fails, we must fail the whole transaction; we don't
  * want the change to commit without the log entry.
  * \param target - mdd_object of change
- * \param tpfid - target parent dir/object fid
+ * \param parent - target parent object
+ * \param pattr - target parent attribute
  * \param sfid - source object fid
- * \param spfid - source parent fid
+ * \param sparent - source parent object
+ * \param spattr - source parent attribute
  * \param tname - target name string
  * \param sname - source name string
  * \param handle - transaction handle
@@ -1104,9 +1162,11 @@ int mdd_changelog_ns_store(const struct lu_env *env,
                           enum changelog_rec_type type,
                           enum changelog_rec_flags clf_flags,
                           struct mdd_object *target,
-                          const struct lu_fid *tpfid,
+                          struct mdd_object *parent,
+                          const struct lu_attr *pattr,
                           const struct lu_fid *sfid,
-                          const struct lu_fid *spfid,
+                          struct mdd_object *sparent,
+                          const struct lu_attr *spattr,
                           const struct lu_name *tname,
                           const struct lu_name *sname,
                           struct thandle *handle)
@@ -1122,7 +1182,7 @@ int mdd_changelog_ns_store(const struct lu_env *env,
        if (!mdd_changelog_enabled(env, mdd, type))
                RETURN(0);
 
-       LASSERT(tpfid != NULL);
+       LASSERT(S_ISDIR(mdd_object_type(parent)));
        LASSERT(tname != NULL);
        LASSERT(handle != NULL);
 
@@ -1159,12 +1219,25 @@ int mdd_changelog_ns_store(const struct lu_env *env,
        }
 
        rec->cr.cr_type = (__u32)type;
-       rec->cr.cr_pfid = *tpfid;
+
+       rc = mdd_changelog_ns_pfid_set(env, mdd, parent, pattr,
+                                      &rec->cr.cr_pfid);
+       if (rc < 0)
+               RETURN(rc);
+
        rec->cr.cr_namelen = tname->ln_namelen;
        memcpy(changelog_rec_name(&rec->cr), tname->ln_name, tname->ln_namelen);
 
-       if (clf_flags & CLF_RENAME)
-               mdd_changelog_rec_ext_rename(&rec->cr, sfid, spfid, sname);
+       if (clf_flags & CLF_RENAME) {
+               struct lu_fid spfid;
+
+               rc = mdd_changelog_ns_pfid_set(env, mdd, sparent, spattr,
+                                              &spfid);
+               if (rc < 0)
+                       RETURN(rc);
+
+               mdd_changelog_rec_ext_rename(&rec->cr, sfid, &spfid, sname);
+       }
 
        if (clf_flags & CLF_JOBID)
                mdd_changelog_rec_ext_jobid(&rec->cr, uc->uc_jobid);
@@ -1605,8 +1678,8 @@ out_unlock:
        mdd_write_unlock(env, mdd_sobj);
        if (rc == 0)
                rc = mdd_changelog_ns_store(env, mdd, CL_HARDLINK, 0, mdd_sobj,
-                                           mdd_object_fid(mdd_tobj), NULL,
-                                           NULL, lname, NULL, handle);
+                                           mdd_tobj, tattr, NULL,
+                                           NULL, NULL, lname, NULL, handle);
 stop:
        rc = mdd_trans_stop(env, mdd, rc, handle);
        if (is_vmalloc_addr(ldata->ld_buf))
@@ -1980,8 +2053,8 @@ cleanup:
 
                rc = mdd_changelog_ns_store(env, mdd,
                        is_dir ? CL_RMDIR : CL_UNLINK, cl_flags,
-                       mdd_cobj, mdd_object_fid(mdd_pobj), NULL, NULL,
-                       lname, NULL, handle);
+                       mdd_cobj, mdd_pobj, pattr, NULL,
+                       NULL, NULL, lname, NULL, handle);
        }
 
 stop:
@@ -2933,7 +3006,7 @@ out_volatile:
                                S_ISDIR(attr->la_mode) ? CL_MKDIR :
                                S_ISREG(attr->la_mode) ? CL_CREATE :
                                S_ISLNK(attr->la_mode) ? CL_SOFTLINK : CL_MKNOD,
-                               0, son, mdd_object_fid(mdd_pobj), NULL, NULL,
+                               0, son, mdd_pobj, pattr, NULL, NULL, NULL,
                                lname, NULL, handle);
 out_stop:
        rc2 = mdd_trans_stop(env, mdd, rc, handle);
@@ -3484,8 +3557,9 @@ cleanup:
 
        if (rc == 0)
                rc = mdd_changelog_ns_store(env, mdd, CL_RENAME, cl_flags,
-                                           mdd_tobj, tpobj_fid, lf, spobj_fid,
-                                           ltname, lsname, handle);
+                                           mdd_tobj, mdd_tpobj, tpattr, lf,
+                                           mdd_spobj, pattr, ltname, lsname,
+                                           handle);
 
 stop:
        rc = mdd_trans_stop(env, mdd, rc, handle);
@@ -4558,8 +4632,8 @@ static int mdd_migrate_object(const struct lu_env *env,
 
        rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0,
                                    spec->sp_migrate_nsonly ? sobj : tobj,
-                                   mdd_object_fid(spobj), mdd_object_fid(sobj),
-                                   mdd_object_fid(tpobj), tname, sname,
+                                   spobj, spattr, mdd_object_fid(sobj),
+                                   tpobj, tpattr, tname, sname,
                                    handle);
        if (rc)
                GOTO(stop, rc);
@@ -5073,8 +5147,8 @@ static int mdd_dir_split_plain(const struct lu_env *env,
 
        /* FID changes, record it as CL_MIGRATE */
        rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj,
-                                   mdd_object_fid(pobj), mdd_object_fid(obj),
-                                   mdd_object_fid(pobj), lname, lname, handle);
+                                   pobj, pattr, mdd_object_fid(obj),
+                                   pobj, pattr, lname, lname, handle);
        RETURN(rc);
 }
 
index d1e6e65..7ba9f35 100644 (file)
@@ -109,6 +109,9 @@ struct mdd_changelog {
        unsigned int            mc_deniednext; /* interval for recording denied
                                                * accesses
                                                */
+       unsigned char           mc_enable_shard_pfid; /* master or shard pFID
+                                                      * for striped dirs
+                                                      */
 };
 
 static inline __u64 cl_time(void)
@@ -175,6 +178,9 @@ enum mod_flags {
 
 struct mdd_object {
        struct md_object        mod_obj;
+       struct lu_fid           mod_striped_pfid; /* master dir parent FID, in
+                                                  * case this is a striped dir
+                                                  */
        /* open count */
        u32                     mod_count;
        u32                     mod_valid;
@@ -368,9 +374,11 @@ int mdd_changelog_ns_store(const struct lu_env *env, struct mdd_device *mdd,
                           enum changelog_rec_type type,
                           enum changelog_rec_flags clf_flags,
                           struct mdd_object *target,
-                          const struct lu_fid *tpfid,
+                          struct mdd_object *parent,
+                          const struct lu_attr *pattr,
                           const struct lu_fid *sfid,
-                          const struct lu_fid *spfid,
+                          struct mdd_object *src_parent,
+                          const struct lu_attr *src_pattr,
                           const struct lu_name *tname,
                           const struct lu_name *sname,
                           struct thandle *handle);
index 703c72c..86e6532 100644 (file)
@@ -532,6 +532,34 @@ static ssize_t changelog_deniednext_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(changelog_deniednext);
 
+static ssize_t enable_shard_pfid_show(struct kobject *kobj,
+                                     struct attribute *attr, char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n",
+                        mdd->mdd_cl.mc_enable_shard_pfid);
+}
+
+static ssize_t enable_shard_pfid_store(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      const char *buffer, size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned char val;
+       int rc;
+
+       rc = kstrtou8(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       mdd->mdd_cl.mc_enable_shard_pfid = !!val;
+       return count;
+}
+LUSTRE_RW_ATTR(enable_shard_pfid);
+
 static ssize_t sync_permission_show(struct kobject *kobj,
                                    struct attribute *attr, char *buf)
 {
@@ -763,6 +791,7 @@ static struct attribute *mdd_attrs[] = {
        &lustre_attr_changelog_min_gc_interval.attr,
        &lustre_attr_changelog_min_free_cat_entries.attr,
        &lustre_attr_changelog_deniednext.attr,
+       &lustre_attr_enable_shard_pfid.attr,
        &lustre_attr_lfsck_async_windows.attr,
        &lustre_attr_lfsck_speed_limit.attr,
        &lustre_attr_sync_permission.attr,