Whamcloud - gitweb
LU-17848 osd: deduplicate OSD locking 48/55148/11
authorTimothy Day <timday@amazon.com>
Sat, 18 May 2024 17:41:09 +0000 (17:41 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 22 Jan 2025 19:30:38 +0000 (19:30 +0000)
The two OSDs currently handle locking identically using a rw
semaphore. Future OSDs will likely handle locking in the same
way, considering the need for sleeping; it doesn't make sense to
use internal filesystem locking to handle osd_object lifetimes.
Plus, these locks are only used by upper layers of Lustre - never
internally in the OSD.

Create generic dt semaphore and wrap locking into generic dt_*
functions.

Signed-off-by: Timothy Day <timday@amazon.com>
Change-Id: I14df606ee99b94b9b2a610928e8ae5b9b2d8c718
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55148
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
lustre/include/dt_object.h
lustre/obdclass/dt_object.c
lustre/ofd/ofd_internal.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_io.c
lustre/osd-zfs/osd_internal.h
lustre/osd-zfs/osd_object.c

index 6201f66..9f7d8cd 100644 (file)
@@ -1839,6 +1839,8 @@ enum dt_otable_it_flags {
 struct dt_device {
        struct lu_device                   dd_lu_dev;
        const struct dt_device_operations *dd_ops;
+
+       /* OSD specific fields */
        struct lu_client_seq              *dd_cl_seq;
 
        /*
@@ -1878,6 +1880,10 @@ struct dt_object {
        const struct dt_object_operations *do_ops;
        const struct dt_body_operations   *do_body_ops;
        const struct dt_index_operations  *do_index_ops;
+
+       /* OSD specific fields */
+       struct rw_semaphore                dd_sem;
+       struct lu_env                     *dd_owner;
 };
 
 /*
@@ -1936,6 +1942,54 @@ static inline struct dt_object *dt_object_child(struct dt_object *o)
                            struct dt_object, do_lu);
 }
 
+#define DT_MAX_PATH 1024
+
+struct dt_find_hint {
+       struct lu_fid        *dfh_fid;
+       struct dt_device     *dfh_dt;
+       struct dt_object     *dfh_o;
+};
+
+struct dt_insert_rec {
+       union {
+               const struct lu_fid     *rec_fid;
+               void                    *rec_data;
+       };
+       union {
+               struct {
+                       __u32            rec_type;
+                       __u32            rec_padding;
+               };
+               __u64                    rec_misc;
+       };
+};
+
+struct dt_thread_info {
+       char                     dti_buf[DT_MAX_PATH];
+       struct dt_find_hint      dti_dfh;
+       struct lu_attr           dti_attr;
+       struct lu_fid            dti_fid;
+       struct dt_object_format  dti_dof;
+       struct lustre_mdt_attrs  dti_lma;
+       struct lu_buf            dti_lb;
+       struct lu_object_conf    dti_conf;
+       loff_t                   dti_off;
+       struct dt_insert_rec     dti_dt_rec;
+       int                      dti_r_locks;
+       int                      dti_w_locks;
+};
+
+extern struct lu_context_key dt_key;
+
+static inline struct dt_thread_info *dt_info(const struct lu_env *env)
+{
+       struct dt_thread_info *dti;
+
+       dti = lu_context_key_get(&env->le_ctx, &dt_key);
+       LASSERT(dti);
+       return dti;
+}
+
 /*
  * This is the general purpose transaction handle.
  * 1. Transaction Life Cycle
@@ -2021,8 +2075,6 @@ typedef int (*dt_entry_func_t)(const struct lu_env *env,
                            const char *name,
                            void *pvt);
 
-#define DT_MAX_PATH 1024
-
 int dt_path_parser(const struct lu_env *env,
                   char *local, dt_entry_func_t entry_func,
                   void *data);
@@ -2335,47 +2387,96 @@ static inline void dt_read_lock(const struct lu_env *env,
                                struct dt_object *dt,
                                unsigned int role)
 {
+       struct dt_thread_info *info = dt_info(env);
+
        LASSERT(dt);
        LASSERT(dt->do_ops);
-       LASSERT(dt->do_ops->do_read_lock);
-       dt->do_ops->do_read_lock(env, dt, role);
+       LASSERT(dt->dd_owner != env);
+
+       if (dt->do_ops->do_read_lock)
+               dt->do_ops->do_read_lock(env, dt, role);
+       else
+               down_read_nested(&dt->dd_sem, role);
+
+       LASSERT(dt->dd_owner == NULL);
+       info->dti_r_locks++;
 }
 
 static inline void dt_write_lock(const struct lu_env *env,
-                               struct dt_object *dt,
-                               unsigned int role)
+                                struct dt_object *dt,
+                                unsigned int role)
 {
+       struct dt_thread_info *info = dt_info(env);
+
        LASSERT(dt);
        LASSERT(dt->do_ops);
-       LASSERT(dt->do_ops->do_write_lock);
-       dt->do_ops->do_write_lock(env, dt, role);
+       LASSERT(dt->dd_owner != env);
+
+       if (dt->do_ops->do_write_lock)
+               dt->do_ops->do_write_lock(env, dt, role);
+       else
+               down_write_nested(&dt->dd_sem, role);
+
+       LASSERT(dt->dd_owner == NULL);
+       info->dti_w_locks++;
+
+       /* TODO: Cleanup usage of const */
+       dt->dd_owner = (struct lu_env *)env;
 }
 
 static inline void dt_read_unlock(const struct lu_env *env,
-                               struct dt_object *dt)
+                                 struct dt_object *dt)
 {
+       struct dt_thread_info *info = dt_info(env);
+
        LASSERT(dt);
        LASSERT(dt->do_ops);
-       LASSERT(dt->do_ops->do_read_unlock);
-       dt->do_ops->do_read_unlock(env, dt);
+       LASSERT(info->dti_r_locks > 0);
+
+       info->dti_r_locks--;
+
+       if (dt->do_ops->do_read_unlock)
+               dt->do_ops->do_read_unlock(env, dt);
+       else
+               up_read(&dt->dd_sem);
 }
 
 static inline void dt_write_unlock(const struct lu_env *env,
-                               struct dt_object *dt)
+                                  struct dt_object *dt)
 {
+       struct dt_thread_info *info = dt_info(env);
+
        LASSERT(dt);
        LASSERT(dt->do_ops);
-       LASSERT(dt->do_ops->do_write_unlock);
-       dt->do_ops->do_write_unlock(env, dt);
+       LASSERT(dt->dd_owner == env);
+       LASSERT(info->dti_w_locks > 0);
+
+       info->dti_w_locks--;
+       dt->dd_owner = NULL;
+
+       if (dt->do_ops->do_write_unlock)
+               dt->do_ops->do_write_unlock(env, dt);
+       else
+               up_write(&dt->dd_sem);
 }
 
-static inline int dt_write_locked(const struct lu_env *env,
-                                 struct dt_object *dt)
+static inline bool dt_write_locked(const struct lu_env *env,
+                                  struct dt_object *dt)
 {
        LASSERT(dt);
        LASSERT(dt->do_ops);
-       LASSERT(dt->do_ops->do_write_locked);
-       return dt->do_ops->do_write_locked(env, dt);
+
+       if (dt->do_ops->do_write_locked)
+               return dt->do_ops->do_write_locked(env, dt);
+
+       return dt->dd_owner == env;
+}
+
+static inline bool dt_thread_no_locks(const struct lu_env *env)
+{
+       struct dt_thread_info *info = dt_info(env);
+
+       return !info->dti_r_locks && !info->dti_w_locks;
 }
 
 static inline bool dt_object_stale(struct dt_object *dt)
@@ -3000,50 +3101,6 @@ static inline int dt_layout_pccro_check(const struct lu_env *env,
        return o->do_ops->do_layout_pccro_check(env, o, mlc);
 }
 
-struct dt_find_hint {
-       struct lu_fid        *dfh_fid;
-       struct dt_device     *dfh_dt;
-       struct dt_object     *dfh_o;
-};
-
-struct dt_insert_rec {
-       union {
-               const struct lu_fid     *rec_fid;
-               void                    *rec_data;
-       };
-       union {
-               struct {
-                       __u32            rec_type;
-                       __u32            rec_padding;
-               };
-               __u64                    rec_misc;
-       };
-};
-
-struct dt_thread_info {
-       char                     dti_buf[DT_MAX_PATH];
-       struct dt_find_hint      dti_dfh;
-       struct lu_attr           dti_attr;
-       struct lu_fid            dti_fid;
-       struct dt_object_format  dti_dof;
-       struct lustre_mdt_attrs  dti_lma;
-       struct lu_buf            dti_lb;
-       struct lu_object_conf    dti_conf;
-       loff_t                   dti_off;
-       struct dt_insert_rec     dti_dt_rec;
-};
-
-extern struct lu_context_key dt_key;
-
-static inline struct dt_thread_info *dt_info(const struct lu_env *env)
-{
-       struct dt_thread_info *dti;
-
-       dti = lu_context_key_get(&env->le_ctx, &dt_key);
-       LASSERT(dti);
-       return dti;
-}
-
 int dt_global_init(void);
 void dt_global_fini(void);
 int dt_tunables_init(struct dt_device *dt, struct obd_type *type,
index ccc1442..ae79fc4 100644 (file)
@@ -37,6 +37,7 @@ struct lu_context_key dt_key = {
        .lct_init = dt_global_key_init,
        .lct_fini = dt_global_key_fini
 };
+EXPORT_SYMBOL(dt_key);
 
 /*
  * no lock is necessary to protect the list, because call-backs
index 615f89d..a85d1fd 100644 (file)
@@ -229,17 +229,17 @@ static inline struct ofd_device *ofd_obj2dev(const struct ofd_object *fo)
 static inline void ofd_read_lock(const struct lu_env *env,
                                 struct ofd_object *fo)
 {
-       struct dt_object  *next = ofd_object_child(fo);
+       struct dt_object *next = ofd_object_child(fo);
 
-       next->do_ops->do_read_lock(env, next, 0);
+       dt_read_lock(env, next, 0);
 }
 
 static inline void ofd_read_unlock(const struct lu_env *env,
                                   struct ofd_object *fo)
 {
-       struct dt_object  *next = ofd_object_child(fo);
+       struct dt_object *next = ofd_object_child(fo);
 
-       next->do_ops->do_read_unlock(env, next);
+       dt_read_unlock(env, next);
 }
 
 static inline void ofd_write_lock(const struct lu_env *env,
@@ -247,15 +247,15 @@ static inline void ofd_write_lock(const struct lu_env *env,
 {
        struct dt_object *next = ofd_object_child(fo);
 
-       next->do_ops->do_write_lock(env, next, 0);
+       dt_write_lock(env, next, 0);
 }
 
 static inline void ofd_write_unlock(const struct lu_env *env,
                                    struct ofd_object *fo)
 {
-       struct dt_object  *next = ofd_object_child(fo);
+       struct dt_object *next = ofd_object_child(fo);
 
-       next->do_ops->do_write_unlock(env, next);
+       dt_write_unlock(env, next);
 }
 
 /*
index d467c0b..253d1a8 100644 (file)
@@ -154,16 +154,6 @@ static int osd_object_invariant(const struct lu_object *l)
 }
 
 /*
- * Concurrency: doesn't matter
- */
-static int osd_is_write_locked(const struct lu_env *env, struct osd_object *o)
-{
-       struct osd_thread_info *oti = osd_oti_get(env);
-
-       return oti->oti_w_locks > 0 && o->oo_owner == env;
-}
-
-/*
  * Concurrency: doesn't access mutable data
  */
 static int osd_root_get(const struct lu_env *env,
@@ -447,7 +437,7 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env,
 
                mo->oo_dt.do_ops = &osd_obj_ops;
                l->lo_ops = &osd_lu_obj_ops;
-               init_rwsem(&mo->oo_sem);
+               init_rwsem(&mo->oo_dt.dd_sem);
                init_rwsem(&mo->oo_ext_idx_sem);
                spin_lock_init(&mo->oo_guard);
                INIT_LIST_HEAD(&mo->oo_xattr_list);
@@ -2034,12 +2024,10 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d,
        LASSERT(current->journal_info == NULL);
 
        oh = container_of(th, struct osd_thandle, ot_super);
-       LASSERT(oh != NULL);
-       LASSERT(oh->ot_handle == NULL);
-       if (unlikely(ldiskfs_track_declares_assert != 0)) {
-               LASSERT(oti->oti_r_locks == 0);
-               LASSERT(oti->oti_w_locks == 0);
-       }
+       LASSERT(oh);
+       LASSERT(!oh->ot_handle);
+       if (unlikely(ldiskfs_track_declares_assert))
+               LASSERT(dt_thread_no_locks(env));
 
        rc = dt_txn_hook_start(env, d, th);
        if (rc != 0)
@@ -2795,72 +2783,6 @@ static const struct dt_device_operations osd_dt_ops = {
        .dt_reserve_or_free_quota = osd_reserve_or_free_quota,
 };
 
-static void osd_read_lock(const struct lu_env *env, struct dt_object *dt,
-                         unsigned int role)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-       struct osd_thread_info *oti = osd_oti_get(env);
-
-       LINVRNT(osd_invariant(obj));
-
-       LASSERT(obj->oo_owner != env);
-       down_read_nested(&obj->oo_sem, role);
-
-       LASSERT(obj->oo_owner == NULL);
-       oti->oti_r_locks++;
-}
-
-static void osd_write_lock(const struct lu_env *env, struct dt_object *dt,
-                          unsigned int role)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-       struct osd_thread_info *oti = osd_oti_get(env);
-
-       LINVRNT(osd_invariant(obj));
-
-       LASSERT(obj->oo_owner != env);
-       down_write_nested(&obj->oo_sem, role);
-
-       LASSERT(obj->oo_owner == NULL);
-       obj->oo_owner = env;
-       oti->oti_w_locks++;
-}
-
-static void osd_read_unlock(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-       struct osd_thread_info *oti = osd_oti_get(env);
-
-       LINVRNT(osd_invariant(obj));
-
-       LASSERT(oti->oti_r_locks > 0);
-       oti->oti_r_locks--;
-       up_read(&obj->oo_sem);
-}
-
-static void osd_write_unlock(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-       struct osd_thread_info *oti = osd_oti_get(env);
-
-       LINVRNT(osd_invariant(obj));
-
-       LASSERT(obj->oo_owner == env);
-       LASSERT(oti->oti_w_locks > 0);
-       oti->oti_w_locks--;
-       obj->oo_owner = NULL;
-       up_write(&obj->oo_sem);
-}
-
-static int osd_write_locked(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-
-       LINVRNT(osd_invariant(obj));
-
-       return obj->oo_owner == env;
-}
-
 static void osd_inode_getattr(const struct lu_env *env,
                              struct inode *inode, struct lu_attr *attr)
 {
@@ -4432,8 +4354,8 @@ static int osd_create(const struct lu_env *env, struct dt_object *dt,
 
        LINVRNT(osd_invariant(obj));
        LASSERT(!dt_object_remote(dt));
-       LASSERT(osd_is_write_locked(env, obj));
-       LASSERT(th != NULL);
+       LASSERT(dt_write_locked(env, dt));
+       LASSERT(th);
 
        if (unlikely(fid_is_acct(fid)))
                /*
@@ -4540,8 +4462,8 @@ static int osd_ref_add(const struct lu_env *env, struct dt_object *dt,
 
        LINVRNT(osd_invariant(obj));
        LASSERT(!dt_object_remote(dt));
-       LASSERT(osd_is_write_locked(env, obj));
-       LASSERT(th != NULL);
+       LASSERT(dt_write_locked(env, dt));
+       LASSERT(th);
 
        oh = container_of(th, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle != NULL);
@@ -4616,8 +4538,8 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt,
 
        LINVRNT(osd_invariant(obj));
        LASSERT(!dt_object_remote(dt));
-       LASSERT(osd_is_write_locked(env, obj));
-       LASSERT(th != NULL);
+       LASSERT(dt_write_locked(env, dt));
+       LASSERT(th);
 
        if (CFS_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL))
                return -EIO;
@@ -5474,11 +5396,6 @@ static int osd_otable_it_attr_get(const struct lu_env *env,
 }
 
 static const struct dt_object_operations osd_obj_ops = {
-       .do_read_lock           = osd_read_lock,
-       .do_write_lock          = osd_write_lock,
-       .do_read_unlock         = osd_read_unlock,
-       .do_write_unlock        = osd_write_unlock,
-       .do_write_locked        = osd_write_locked,
        .do_attr_get            = osd_attr_get,
        .do_declare_attr_set    = osd_declare_attr_set,
        .do_attr_set            = osd_attr_set,
@@ -8194,8 +8111,6 @@ static void osd_key_exit(const struct lu_context *ctx,
 
        if (olc)
                memset(olc, 0, sizeof(*olc));
-       LASSERT(info->oti_r_locks == 0);
-       LASSERT(info->oti_w_locks == 0);
        LASSERT(info->oti_txns    == 0);
        LASSERTF(info->oti_dio_pages_used == 0, "%d\n",
                 info->oti_dio_pages_used);
index 6b91eae..a925f17 100644 (file)
@@ -119,7 +119,6 @@ struct osd_object {
         */
        struct htree_lock_head *oo_hl_head;
        struct rw_semaphore     oo_ext_idx_sem;
-       struct rw_semaphore     oo_sem;
        struct osd_directory    *oo_dir;
        /** protects inode attributes. */
        spinlock_t              oo_guard;
@@ -138,8 +137,6 @@ struct osd_object {
        __u32                   oo_lma_flags;
        atomic_t                oo_dirent_count;
 
-        const struct lu_env    *oo_owner;
-
        struct list_head        oo_xattr_list;
        struct lu_object_header *oo_header;
 };
@@ -714,8 +711,6 @@ struct osd_thread_info {
        /* inc by osd_trans_create and dec by osd_trans_stop */
        int                             oti_ins_cache_depth;
 
-       int                             oti_r_locks;
-       int                             oti_w_locks;
        int                             oti_txns;
        /** used in osd_fid_set() to put xattr */
        struct lu_buf                   oti_buf;
index 6aa305b..9c768d2 100644 (file)
@@ -1470,7 +1470,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
                LASSERT(!PageWriteback(lnb[i].lnb_page));
 
                /*
-                * Since write and truncate are serialized by oo_sem, even
+                * Since write and truncate are serialized by dd_sem, even
                 * partial-page truncate should not leave dirty pages in the
                 * page cache.
                 */
index fbbe0ee..d90d9e7 100644 (file)
@@ -428,9 +428,6 @@ struct osd_object {
        nvlist_t                *oo_sa_xattr;
        struct list_head         oo_sa_linkage;
 
-       /* used to implement osd_object_*_{lock|unlock} */
-       struct rw_semaphore      oo_sem;
-
        /* to serialize some updates: destroy vs. others,
         * xattr_set, object block size change etc
         */
index a09e785..b21e4ea 100644 (file)
@@ -313,7 +313,7 @@ struct lu_object *osd_object_alloc(const struct lu_env *env,
                l->lo_ops = &osd_lu_obj_ops;
                INIT_LIST_HEAD(&mo->oo_sa_linkage);
                INIT_LIST_HEAD(&mo->oo_unlinked_linkage);
-               init_rwsem(&mo->oo_sem);
+               init_rwsem(&mo->oo_dt.dd_sem);
                init_rwsem(&mo->oo_guard);
                rwlock_init(&mo->oo_attr_lock);
                mo->oo_destroy = OSD_DESTROY_NONE;
@@ -929,56 +929,6 @@ static int osd_object_print(const struct lu_env *env, void *cookie,
        return (*p)(env, cookie, LUSTRE_OSD_ZFS_NAME"-object@%p", o);
 }
 
-static void osd_read_lock(const struct lu_env *env, struct dt_object *dt,
-                         unsigned int role)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-
-       LASSERT(osd_invariant(obj));
-
-       down_read_nested(&obj->oo_sem, role);
-}
-
-static void osd_write_lock(const struct lu_env *env, struct dt_object *dt,
-                          unsigned int role)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-
-       LASSERT(osd_invariant(obj));
-
-       down_write_nested(&obj->oo_sem, role);
-}
-
-static void osd_read_unlock(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-
-       LASSERT(osd_invariant(obj));
-       up_read(&obj->oo_sem);
-}
-
-static void osd_write_unlock(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-
-       LASSERT(osd_invariant(obj));
-       up_write(&obj->oo_sem);
-}
-
-static int osd_write_locked(const struct lu_env *env, struct dt_object *dt)
-{
-       struct osd_object *obj = osd_dt_obj(dt);
-       int rc = 1;
-
-       LASSERT(osd_invariant(obj));
-
-       if (down_write_trylock(&obj->oo_sem)) {
-               rc = 0;
-               up_write(&obj->oo_sem);
-       }
-       return rc;
-}
-
 static int osd_attr_get(const struct lu_env *env, struct dt_object *dt,
                        struct lu_attr *attr)
 {
@@ -2179,11 +2129,6 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt,
 }
 
 static const struct dt_object_operations osd_obj_ops = {
-       .do_read_lock           = osd_read_lock,
-       .do_write_lock          = osd_write_lock,
-       .do_read_unlock         = osd_read_unlock,
-       .do_write_unlock        = osd_write_unlock,
-       .do_write_locked        = osd_write_locked,
        .do_attr_get            = osd_attr_get,
        .do_declare_attr_set    = osd_declare_attr_set,
        .do_attr_set            = osd_attr_set,