From 7dce71b0daf786dd212457250a27283dc61b7222 Mon Sep 17 00:00:00 2001 From: Timothy Day Date: Sat, 18 May 2024 17:41:09 +0000 Subject: [PATCH] LU-17848 osd: deduplicate OSD locking The two OSDs currently handle locking identically using a rw semaphore. Future OSDs will likely handle locking in the same way, considering the need for sleeping; it doesn't make sense to use internal filesystem locking to handle osd_object lifetimes. Plus, these locks are only used by upper layers of Lustre - never internally in the OSD. Create generic dt semaphore and wrap locking into generic dt_* functions. Signed-off-by: Timothy Day Change-Id: I14df606ee99b94b9b2a610928e8ae5b9b2d8c718 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55148 Reviewed-by: James Simmons Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin Tested-by: Maloo Tested-by: jenkins --- lustre/include/dt_object.h | 181 +++++++++++++++++++++++++------------- lustre/obdclass/dt_object.c | 1 + lustre/ofd/ofd_internal.h | 14 +-- lustre/osd-ldiskfs/osd_handler.c | 107 +++------------------- lustre/osd-ldiskfs/osd_internal.h | 5 -- lustre/osd-ldiskfs/osd_io.c | 2 +- lustre/osd-zfs/osd_internal.h | 3 - lustre/osd-zfs/osd_object.c | 57 +----------- 8 files changed, 140 insertions(+), 230 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 6201f66..9f7d8cd 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1839,6 +1839,8 @@ enum dt_otable_it_flags { struct dt_device { struct lu_device dd_lu_dev; const struct dt_device_operations *dd_ops; + + /* OSD specific fields */ struct lu_client_seq *dd_cl_seq; /* @@ -1878,6 +1880,10 @@ struct dt_object { const struct dt_object_operations *do_ops; const struct dt_body_operations *do_body_ops; const struct dt_index_operations *do_index_ops; + + /* OSD specific fields */ + struct rw_semaphore dd_sem; + struct lu_env *dd_owner; }; /* @@ -1936,6 +1942,54 @@ static inline struct dt_object *dt_object_child(struct dt_object *o) struct dt_object, do_lu); } +#define DT_MAX_PATH 1024 + +struct dt_find_hint { + struct lu_fid *dfh_fid; + struct dt_device *dfh_dt; + struct dt_object *dfh_o; +}; + +struct dt_insert_rec { + union { + const struct lu_fid *rec_fid; + void *rec_data; + }; + union { + struct { + __u32 rec_type; + __u32 rec_padding; + }; + __u64 rec_misc; + }; +}; + +struct dt_thread_info { + char dti_buf[DT_MAX_PATH]; + struct dt_find_hint dti_dfh; + struct lu_attr dti_attr; + struct lu_fid dti_fid; + struct dt_object_format dti_dof; + struct lustre_mdt_attrs dti_lma; + struct lu_buf dti_lb; + struct lu_object_conf dti_conf; + loff_t dti_off; + struct dt_insert_rec dti_dt_rec; + int dti_r_locks; + int dti_w_locks; +}; + +extern struct lu_context_key dt_key; + +static inline struct dt_thread_info *dt_info(const struct lu_env *env) +{ + struct dt_thread_info *dti; + + dti = lu_context_key_get(&env->le_ctx, &dt_key); + LASSERT(dti); + return dti; +} + /* * This is the general purpose transaction handle. * 1. Transaction Life Cycle @@ -2021,8 +2075,6 @@ typedef int (*dt_entry_func_t)(const struct lu_env *env, const char *name, void *pvt); -#define DT_MAX_PATH 1024 - int dt_path_parser(const struct lu_env *env, char *local, dt_entry_func_t entry_func, void *data); @@ -2335,47 +2387,96 @@ static inline void dt_read_lock(const struct lu_env *env, struct dt_object *dt, unsigned int role) { + struct dt_thread_info *info = dt_info(env); + LASSERT(dt); LASSERT(dt->do_ops); - LASSERT(dt->do_ops->do_read_lock); - dt->do_ops->do_read_lock(env, dt, role); + LASSERT(dt->dd_owner != env); + + if (dt->do_ops->do_read_lock) + dt->do_ops->do_read_lock(env, dt, role); + else + down_read_nested(&dt->dd_sem, role); + + LASSERT(dt->dd_owner == NULL); + info->dti_r_locks++; } static inline void dt_write_lock(const struct lu_env *env, - struct dt_object *dt, - unsigned int role) + struct dt_object *dt, + unsigned int role) { + struct dt_thread_info *info = dt_info(env); + LASSERT(dt); LASSERT(dt->do_ops); - LASSERT(dt->do_ops->do_write_lock); - dt->do_ops->do_write_lock(env, dt, role); + LASSERT(dt->dd_owner != env); + + if (dt->do_ops->do_write_lock) + dt->do_ops->do_write_lock(env, dt, role); + else + down_write_nested(&dt->dd_sem, role); + + LASSERT(dt->dd_owner == NULL); + info->dti_w_locks++; + + /* TODO: Cleanup usage of const */ + dt->dd_owner = (struct lu_env *)env; } static inline void dt_read_unlock(const struct lu_env *env, - struct dt_object *dt) + struct dt_object *dt) { + struct dt_thread_info *info = dt_info(env); + LASSERT(dt); LASSERT(dt->do_ops); - LASSERT(dt->do_ops->do_read_unlock); - dt->do_ops->do_read_unlock(env, dt); + LASSERT(info->dti_r_locks > 0); + + info->dti_r_locks--; + + if (dt->do_ops->do_read_unlock) + dt->do_ops->do_read_unlock(env, dt); + else + up_read(&dt->dd_sem); } static inline void dt_write_unlock(const struct lu_env *env, - struct dt_object *dt) + struct dt_object *dt) { + struct dt_thread_info *info = dt_info(env); + LASSERT(dt); LASSERT(dt->do_ops); - LASSERT(dt->do_ops->do_write_unlock); - dt->do_ops->do_write_unlock(env, dt); + LASSERT(dt->dd_owner == env); + LASSERT(info->dti_w_locks > 0); + + info->dti_w_locks--; + dt->dd_owner = NULL; + + if (dt->do_ops->do_write_unlock) + dt->do_ops->do_write_unlock(env, dt); + else + up_write(&dt->dd_sem); } -static inline int dt_write_locked(const struct lu_env *env, - struct dt_object *dt) +static inline bool dt_write_locked(const struct lu_env *env, + struct dt_object *dt) { LASSERT(dt); LASSERT(dt->do_ops); - LASSERT(dt->do_ops->do_write_locked); - return dt->do_ops->do_write_locked(env, dt); + + if (dt->do_ops->do_write_locked) + return dt->do_ops->do_write_locked(env, dt); + + return dt->dd_owner == env; +} + +static inline bool dt_thread_no_locks(const struct lu_env *env) +{ + struct dt_thread_info *info = dt_info(env); + + return !info->dti_r_locks && !info->dti_w_locks; } static inline bool dt_object_stale(struct dt_object *dt) @@ -3000,50 +3101,6 @@ static inline int dt_layout_pccro_check(const struct lu_env *env, return o->do_ops->do_layout_pccro_check(env, o, mlc); } -struct dt_find_hint { - struct lu_fid *dfh_fid; - struct dt_device *dfh_dt; - struct dt_object *dfh_o; -}; - -struct dt_insert_rec { - union { - const struct lu_fid *rec_fid; - void *rec_data; - }; - union { - struct { - __u32 rec_type; - __u32 rec_padding; - }; - __u64 rec_misc; - }; -}; - -struct dt_thread_info { - char dti_buf[DT_MAX_PATH]; - struct dt_find_hint dti_dfh; - struct lu_attr dti_attr; - struct lu_fid dti_fid; - struct dt_object_format dti_dof; - struct lustre_mdt_attrs dti_lma; - struct lu_buf dti_lb; - struct lu_object_conf dti_conf; - loff_t dti_off; - struct dt_insert_rec dti_dt_rec; -}; - -extern struct lu_context_key dt_key; - -static inline struct dt_thread_info *dt_info(const struct lu_env *env) -{ - struct dt_thread_info *dti; - - dti = lu_context_key_get(&env->le_ctx, &dt_key); - LASSERT(dti); - return dti; -} - int dt_global_init(void); void dt_global_fini(void); int dt_tunables_init(struct dt_device *dt, struct obd_type *type, diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index ccc1442..ae79fc4 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -37,6 +37,7 @@ struct lu_context_key dt_key = { .lct_init = dt_global_key_init, .lct_fini = dt_global_key_fini }; +EXPORT_SYMBOL(dt_key); /* * no lock is necessary to protect the list, because call-backs diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 615f89d..a85d1fd 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -229,17 +229,17 @@ static inline struct ofd_device *ofd_obj2dev(const struct ofd_object *fo) static inline void ofd_read_lock(const struct lu_env *env, struct ofd_object *fo) { - struct dt_object *next = ofd_object_child(fo); + struct dt_object *next = ofd_object_child(fo); - next->do_ops->do_read_lock(env, next, 0); + dt_read_lock(env, next, 0); } static inline void ofd_read_unlock(const struct lu_env *env, struct ofd_object *fo) { - struct dt_object *next = ofd_object_child(fo); + struct dt_object *next = ofd_object_child(fo); - next->do_ops->do_read_unlock(env, next); + dt_read_unlock(env, next); } static inline void ofd_write_lock(const struct lu_env *env, @@ -247,15 +247,15 @@ static inline void ofd_write_lock(const struct lu_env *env, { struct dt_object *next = ofd_object_child(fo); - next->do_ops->do_write_lock(env, next, 0); + dt_write_lock(env, next, 0); } static inline void ofd_write_unlock(const struct lu_env *env, struct ofd_object *fo) { - struct dt_object *next = ofd_object_child(fo); + struct dt_object *next = ofd_object_child(fo); - next->do_ops->do_write_unlock(env, next); + dt_write_unlock(env, next); } /* diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index d467c0b..253d1a8 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -154,16 +154,6 @@ static int osd_object_invariant(const struct lu_object *l) } /* - * Concurrency: doesn't matter - */ -static int osd_is_write_locked(const struct lu_env *env, struct osd_object *o) -{ - struct osd_thread_info *oti = osd_oti_get(env); - - return oti->oti_w_locks > 0 && o->oo_owner == env; -} - -/* * Concurrency: doesn't access mutable data */ static int osd_root_get(const struct lu_env *env, @@ -447,7 +437,7 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, mo->oo_dt.do_ops = &osd_obj_ops; l->lo_ops = &osd_lu_obj_ops; - init_rwsem(&mo->oo_sem); + init_rwsem(&mo->oo_dt.dd_sem); init_rwsem(&mo->oo_ext_idx_sem); spin_lock_init(&mo->oo_guard); INIT_LIST_HEAD(&mo->oo_xattr_list); @@ -2034,12 +2024,10 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, LASSERT(current->journal_info == NULL); oh = container_of(th, struct osd_thandle, ot_super); - LASSERT(oh != NULL); - LASSERT(oh->ot_handle == NULL); - if (unlikely(ldiskfs_track_declares_assert != 0)) { - LASSERT(oti->oti_r_locks == 0); - LASSERT(oti->oti_w_locks == 0); - } + LASSERT(oh); + LASSERT(!oh->ot_handle); + if (unlikely(ldiskfs_track_declares_assert)) + LASSERT(dt_thread_no_locks(env)); rc = dt_txn_hook_start(env, d, th); if (rc != 0) @@ -2795,72 +2783,6 @@ static const struct dt_device_operations osd_dt_ops = { .dt_reserve_or_free_quota = osd_reserve_or_free_quota, }; -static void osd_read_lock(const struct lu_env *env, struct dt_object *dt, - unsigned int role) -{ - struct osd_object *obj = osd_dt_obj(dt); - struct osd_thread_info *oti = osd_oti_get(env); - - LINVRNT(osd_invariant(obj)); - - LASSERT(obj->oo_owner != env); - down_read_nested(&obj->oo_sem, role); - - LASSERT(obj->oo_owner == NULL); - oti->oti_r_locks++; -} - -static void osd_write_lock(const struct lu_env *env, struct dt_object *dt, - unsigned int role) -{ - struct osd_object *obj = osd_dt_obj(dt); - struct osd_thread_info *oti = osd_oti_get(env); - - LINVRNT(osd_invariant(obj)); - - LASSERT(obj->oo_owner != env); - down_write_nested(&obj->oo_sem, role); - - LASSERT(obj->oo_owner == NULL); - obj->oo_owner = env; - oti->oti_w_locks++; -} - -static void osd_read_unlock(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - struct osd_thread_info *oti = osd_oti_get(env); - - LINVRNT(osd_invariant(obj)); - - LASSERT(oti->oti_r_locks > 0); - oti->oti_r_locks--; - up_read(&obj->oo_sem); -} - -static void osd_write_unlock(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - struct osd_thread_info *oti = osd_oti_get(env); - - LINVRNT(osd_invariant(obj)); - - LASSERT(obj->oo_owner == env); - LASSERT(oti->oti_w_locks > 0); - oti->oti_w_locks--; - obj->oo_owner = NULL; - up_write(&obj->oo_sem); -} - -static int osd_write_locked(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - - LINVRNT(osd_invariant(obj)); - - return obj->oo_owner == env; -} - static void osd_inode_getattr(const struct lu_env *env, struct inode *inode, struct lu_attr *attr) { @@ -4432,8 +4354,8 @@ static int osd_create(const struct lu_env *env, struct dt_object *dt, LINVRNT(osd_invariant(obj)); LASSERT(!dt_object_remote(dt)); - LASSERT(osd_is_write_locked(env, obj)); - LASSERT(th != NULL); + LASSERT(dt_write_locked(env, dt)); + LASSERT(th); if (unlikely(fid_is_acct(fid))) /* @@ -4540,8 +4462,8 @@ static int osd_ref_add(const struct lu_env *env, struct dt_object *dt, LINVRNT(osd_invariant(obj)); LASSERT(!dt_object_remote(dt)); - LASSERT(osd_is_write_locked(env, obj)); - LASSERT(th != NULL); + LASSERT(dt_write_locked(env, dt)); + LASSERT(th); oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); @@ -4616,8 +4538,8 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt, LINVRNT(osd_invariant(obj)); LASSERT(!dt_object_remote(dt)); - LASSERT(osd_is_write_locked(env, obj)); - LASSERT(th != NULL); + LASSERT(dt_write_locked(env, dt)); + LASSERT(th); if (CFS_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL)) return -EIO; @@ -5474,11 +5396,6 @@ static int osd_otable_it_attr_get(const struct lu_env *env, } static const struct dt_object_operations osd_obj_ops = { - .do_read_lock = osd_read_lock, - .do_write_lock = osd_write_lock, - .do_read_unlock = osd_read_unlock, - .do_write_unlock = osd_write_unlock, - .do_write_locked = osd_write_locked, .do_attr_get = osd_attr_get, .do_declare_attr_set = osd_declare_attr_set, .do_attr_set = osd_attr_set, @@ -8194,8 +8111,6 @@ static void osd_key_exit(const struct lu_context *ctx, if (olc) memset(olc, 0, sizeof(*olc)); - LASSERT(info->oti_r_locks == 0); - LASSERT(info->oti_w_locks == 0); LASSERT(info->oti_txns == 0); LASSERTF(info->oti_dio_pages_used == 0, "%d\n", info->oti_dio_pages_used); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 6b91eae..a925f17 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -119,7 +119,6 @@ struct osd_object { */ struct htree_lock_head *oo_hl_head; struct rw_semaphore oo_ext_idx_sem; - struct rw_semaphore oo_sem; struct osd_directory *oo_dir; /** protects inode attributes. */ spinlock_t oo_guard; @@ -138,8 +137,6 @@ struct osd_object { __u32 oo_lma_flags; atomic_t oo_dirent_count; - const struct lu_env *oo_owner; - struct list_head oo_xattr_list; struct lu_object_header *oo_header; }; @@ -714,8 +711,6 @@ struct osd_thread_info { /* inc by osd_trans_create and dec by osd_trans_stop */ int oti_ins_cache_depth; - int oti_r_locks; - int oti_w_locks; int oti_txns; /** used in osd_fid_set() to put xattr */ struct lu_buf oti_buf; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 6aa305b..9c768d2 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1470,7 +1470,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, LASSERT(!PageWriteback(lnb[i].lnb_page)); /* - * Since write and truncate are serialized by oo_sem, even + * Since write and truncate are serialized by dd_sem, even * partial-page truncate should not leave dirty pages in the * page cache. */ diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index fbbe0ee..d90d9e7 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -428,9 +428,6 @@ struct osd_object { nvlist_t *oo_sa_xattr; struct list_head oo_sa_linkage; - /* used to implement osd_object_*_{lock|unlock} */ - struct rw_semaphore oo_sem; - /* to serialize some updates: destroy vs. others, * xattr_set, object block size change etc */ diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index a09e785..b21e4ea 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -313,7 +313,7 @@ struct lu_object *osd_object_alloc(const struct lu_env *env, l->lo_ops = &osd_lu_obj_ops; INIT_LIST_HEAD(&mo->oo_sa_linkage); INIT_LIST_HEAD(&mo->oo_unlinked_linkage); - init_rwsem(&mo->oo_sem); + init_rwsem(&mo->oo_dt.dd_sem); init_rwsem(&mo->oo_guard); rwlock_init(&mo->oo_attr_lock); mo->oo_destroy = OSD_DESTROY_NONE; @@ -929,56 +929,6 @@ static int osd_object_print(const struct lu_env *env, void *cookie, return (*p)(env, cookie, LUSTRE_OSD_ZFS_NAME"-object@%p", o); } -static void osd_read_lock(const struct lu_env *env, struct dt_object *dt, - unsigned int role) -{ - struct osd_object *obj = osd_dt_obj(dt); - - LASSERT(osd_invariant(obj)); - - down_read_nested(&obj->oo_sem, role); -} - -static void osd_write_lock(const struct lu_env *env, struct dt_object *dt, - unsigned int role) -{ - struct osd_object *obj = osd_dt_obj(dt); - - LASSERT(osd_invariant(obj)); - - down_write_nested(&obj->oo_sem, role); -} - -static void osd_read_unlock(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - - LASSERT(osd_invariant(obj)); - up_read(&obj->oo_sem); -} - -static void osd_write_unlock(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - - LASSERT(osd_invariant(obj)); - up_write(&obj->oo_sem); -} - -static int osd_write_locked(const struct lu_env *env, struct dt_object *dt) -{ - struct osd_object *obj = osd_dt_obj(dt); - int rc = 1; - - LASSERT(osd_invariant(obj)); - - if (down_write_trylock(&obj->oo_sem)) { - rc = 0; - up_write(&obj->oo_sem); - } - return rc; -} - static int osd_attr_get(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr) { @@ -2179,11 +2129,6 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, } static const struct dt_object_operations osd_obj_ops = { - .do_read_lock = osd_read_lock, - .do_write_lock = osd_write_lock, - .do_read_unlock = osd_read_unlock, - .do_write_unlock = osd_write_unlock, - .do_write_locked = osd_write_locked, .do_attr_get = osd_attr_get, .do_declare_attr_set = osd_declare_attr_set, .do_attr_set = osd_attr_set, -- 1.8.3.1