From f46782b4c7dcaacd0046ebad3e3d84c2bb0367d4 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Wed, 22 Dec 2021 07:06:16 -0500 Subject: [PATCH] LU-15393 lod: use killable semaphore for creation path lod_ost_alloc_qos() function sleeps during ost failover, but object allocation could use different OSTs. The patch changes down_write call to down_write_killable and adds timer for a wakeup. The main idea of this fix is next, when OST is lost during lod_ost_alloc_rr() and MDT does not have precreated objects for it lod_ost_alloc_rr()->..->lod_qos_declare_object_on() would sleep while holding a lq_rw_sem for read. Any creation thread would stuck at lod_ost_alloc_qos() waiting lq_rw_sem for write, after statfs update. Whith a fix sleep is limited and allocation would going through lod_ost_alloc_rr(). For read lq_rw_sem is shared and stripe allocation would skip OST without objects. lod_ost_alloc_rr() refills OST pool with a lq_rw_sem for write, when lq_rr.lqr_flags has LQ_DIRTY. This should happen only when OST is added/removed. No need to set LQ_DIRTY for lq_rr when statfs get error, this flag does not affect any change for pool list at lod_qos_calc_rr(). Change behaviour for lod_check_and_reserve_ost(), it would sleep during object allocation for speed 2 only. HPE-bug-id: LUS-10388 Signed-off-by: Alexander Boyko Change-Id: I4768c4cf7d2f9f02f0a9e0dfb6d15e02932cb5fe Reviewed-on: https://review.whamcloud.com/45921 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Alexey Lyashkov Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/autoconf/lustre-core.m4 | 92 ++++++++++++++++++++++++++++-------------- lustre/lod/lod_qos.c | 41 +++++++++++++++++-- 2 files changed, 99 insertions(+), 34 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index d9a83bb6a..7f984cb 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -446,36 +446,6 @@ AC_DEFUN([LC_HAVE_LIBAIO], [ AC_MSG_WARN([libaio is not installed in the system])) ]) # LC_HAVE_LIBAIO -AC_DEFUN([LC_HAVE_PROJECT_QUOTA], [ -LB_CHECK_COMPILE([if get_projid exists], -get_projid, [ - struct inode; - #include -],[ - struct dquot_operations ops = { }; - - ops.get_projid(NULL, NULL); -],[ - AC_DEFINE(HAVE_PROJECT_QUOTA, 1, - [get_projid function exists]) -]) -]) # LC_HAVE_PROJECT_QUOTA - -AC_DEFUN([LC_HAVE_GET_INODE_USAGE], [ -LB_CHECK_COMPILE([if get_inode_usage exists], -get_inode_usage, [ - struct inode; - #include -],[ - struct dquot_operations ops = { }; - - ops.get_inode_usage(NULL, NULL); -],[ - AC_DEFINE(HAVE_GET_INODE_USAGE, 1, - [get_inode_usage function exists]) -]) -]) # LC_HAVE_GET_INODE_USAGE - # # LC_INVALIDATE_RANGE # @@ -1131,6 +1101,26 @@ bdi_cap_map_copy, [ ]) # LC_HAVE_BDI_CAP_MAP_COPY # +# LC_HAVE_PROJECT_QUOTA +# +# Kernel version v4.0-rc1-197-g847aac644e92 +# +AC_DEFUN([LC_HAVE_PROJECT_QUOTA], [ +LB_CHECK_COMPILE([if get_projid exists], +get_projid, [ + struct inode; + #include +],[ + struct dquot_operations ops = { }; + + ops.get_projid(NULL, NULL); +],[ + AC_DEFINE(HAVE_PROJECT_QUOTA, 1, + [get_projid function exists]) +]) +]) # LC_HAVE_PROJECT_QUOTA + +# # LC_IOV_ITER_RW # # 4.1 kernel has iov_iter_rw @@ -1620,6 +1610,26 @@ lock_page_memcg, [ ]) # LC_LOCK_PAGE_MEMCG # +# LC_HAVE_DOWN_WRITE_KILLABLE +# +# Kernel version v4.6-rc3-28-g916633a40370 +# +AC_DEFUN([LC_HAVE_DOWN_WRITE_KILLABLE], [ +LB_CHECK_COMPILE([if down_write_killable exists], +down_write_killable, [ + struct rw_semaphore sem; + #include +],[ + int rc; + + rc = down_write_killable(&sem); +],[ + AC_DEFINE(HAVE_DOWN_WRITE_KILLABLE, 1, + [down_write_killable function exists]) +]) +]) # LC_HAVE_DOWN_WRITE_KILLABLE + +# # LC_D_INIT # # Kernel version 4.7-rc5 commit 285b102d3b745f3c2c110c9c327741d87e64aacc @@ -2007,6 +2017,27 @@ current_time, [ ]) # LIBCFS_CURRENT_TIME # +# LC_HAVE_GET_INODE_USAGE +# +# Kernel version v4.12-rc2-43-g7a9ca53aea10 +# +AC_DEFUN([LC_HAVE_GET_INODE_USAGE], [ +LB_CHECK_COMPILE([if get_inode_usage exists], +get_inode_usage, [ + struct inode; + #include +],[ + struct dquot_operations ops = { }; + + ops.get_inode_usage(NULL, NULL); +],[ + AC_DEFINE(HAVE_GET_INODE_USAGE, 1, + [get_inode_usage function exists]) +]) +]) # LC_HAVE_GET_INODE_USAGE + + +# # Kernel version 4.12-rc3 85787090a21eb749d8b347eaf9ff1a455637473c # changed struct super_block s_uuid into a proper uuid_t # @@ -2682,6 +2713,7 @@ AC_DEFUN([LC_PROG_LINUX], [ LC_HAVE_IN_COMPAT_SYSCALL LC_HAVE_XATTR_HANDLER_INODE_PARAM LC_LOCK_PAGE_MEMCG + LC_HAVE_DOWN_WRITE_KILLABLE # 4.7 LC_D_IN_LOOKUP diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 47911e9..d07f631 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -137,7 +137,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, LASSERT(desc->ld_active_tgt_count > 0); desc->ld_active_tgt_count--; set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); - set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags); CDEBUG(D_CONFIG, "%s: turns inactive\n", tgt->ltd_exp->exp_obd->obd_name); } @@ -153,7 +152,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, tgt->ltd_connecting = 0; desc->ld_active_tgt_count++; set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); - set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags); CDEBUG(D_CONFIG, "%s: turns active\n", tgt->ltd_exp->exp_obd->obd_name); } @@ -689,7 +687,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env, RETURN(rc); } - o = lod_qos_declare_object_on(env, lod, ost_idx, true, th); + o = lod_qos_declare_object_on(env, lod, ost_idx, (speed > 1), th); if (IS_ERR(o)) { CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n", ost_idx, (int) PTR_ERR(o)); @@ -1374,6 +1372,20 @@ out: RETURN(rc); } +#ifdef HAVE_DOWN_WRITE_KILLABLE +struct semaphore_timer { + struct timer_list timer; + struct task_struct *task; +}; + +static void process_semaphore_timer(struct timer_list *t) +{ + struct semaphore_timer *timeout = cfs_from_timer(timeout, t, timer); + + send_sig(SIGKILL, timeout->task, 1); +} +#endif + /** * Calculate penalties per-ost in a pool * @@ -1550,9 +1562,30 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo, stripes_per_ost = (lod_comp->llc_stripe_count - 1)/osts->op_count + 1; +#ifdef HAVE_DOWN_WRITE_KILLABLE + if (!down_write_trylock(&lod->lod_ost_descs.ltd_qos.lq_rw_sem)) { + struct semaphore_timer timer; + + kernel_sigaction(SIGKILL, SIG_DFL); + timer.task = current; + cfs_timer_setup(&timer.timer, process_semaphore_timer, 0, 0); + mod_timer(&timer.timer, jiffies + cfs_time_seconds(2)); + /* Do actual allocation, use write lock here. */ + rc = down_write_killable(&lod->lod_ost_descs.ltd_qos.lq_rw_sem); + + del_singleshot_timer_sync(&timer.timer); + kernel_sigaction(SIGKILL, SIG_IGN); + if (rc) { + flush_signals(current); + QOS_DEBUG("%s: wakeup semaphore on timeout rc = %d\n", + lod2obd(lod)->obd_name, rc); + GOTO(out_nolock, rc = -EAGAIN); + } + } +#else /* Do actual allocation, use write lock here. */ down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem); - +#endif /* * Check again, while we were sleeping on @lq_rw_sem things could * change. -- 1.8.3.1