Whamcloud - gitweb
LU-15393 lod: use killable semaphore for creation path 21/45921/7
authorAlexander Boyko <alexander.boyko@hpe.com>
Wed, 22 Dec 2021 12:06:16 +0000 (07:06 -0500)
committerOleg Drokin <green@whamcloud.com>
Sat, 11 Jun 2022 05:32:55 +0000 (05:32 +0000)
lod_ost_alloc_qos() function sleeps during ost failover, but object
allocation could use different OSTs. The patch changes
down_write call to down_write_killable and adds timer for a
wakeup.

The main idea of this fix is next, when OST is lost during
lod_ost_alloc_rr() and MDT does not have precreated objects for it
lod_ost_alloc_rr()->..->lod_qos_declare_object_on() would sleep while
holding a lq_rw_sem for read. Any creation thread would stuck at
lod_ost_alloc_qos() waiting lq_rw_sem for write, after statfs update.
Whith a fix sleep is limited and allocation would going through
lod_ost_alloc_rr(). For read lq_rw_sem is shared and stripe allocation
would skip OST without objects.

lod_ost_alloc_rr() refills OST pool with a lq_rw_sem for write, when
lq_rr.lqr_flags has LQ_DIRTY. This should happen only when OST is
added/removed. No need to set LQ_DIRTY for lq_rr when statfs get
error, this flag does not affect any change for pool list at
lod_qos_calc_rr().

Change behaviour for lod_check_and_reserve_ost(), it  would sleep
during object allocation for speed 2 only.

HPE-bug-id: LUS-10388
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I4768c4cf7d2f9f02f0a9e0dfb6d15e02932cb5fe
Reviewed-on: https://review.whamcloud.com/45921
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/lod/lod_qos.c

index d9a83bb..7f984cb 100644 (file)
@@ -446,36 +446,6 @@ AC_DEFUN([LC_HAVE_LIBAIO], [
                AC_MSG_WARN([libaio is not installed in the system]))
 ]) # LC_HAVE_LIBAIO
 
-AC_DEFUN([LC_HAVE_PROJECT_QUOTA], [
-LB_CHECK_COMPILE([if get_projid exists],
-get_projid, [
-       struct inode;
-       #include <linux/quota.h>
-],[
-       struct dquot_operations ops = { };
-
-       ops.get_projid(NULL, NULL);
-],[
-       AC_DEFINE(HAVE_PROJECT_QUOTA, 1,
-               [get_projid function exists])
-])
-]) # LC_HAVE_PROJECT_QUOTA
-
-AC_DEFUN([LC_HAVE_GET_INODE_USAGE], [
-LB_CHECK_COMPILE([if get_inode_usage exists],
-get_inode_usage, [
-       struct inode;
-       #include <linux/quota.h>
-],[
-       struct dquot_operations ops = { };
-
-       ops.get_inode_usage(NULL, NULL);
-],[
-       AC_DEFINE(HAVE_GET_INODE_USAGE, 1,
-               [get_inode_usage function exists])
-])
-]) # LC_HAVE_GET_INODE_USAGE
-
 #
 # LC_INVALIDATE_RANGE
 #
@@ -1131,6 +1101,26 @@ bdi_cap_map_copy, [
 ]) # LC_HAVE_BDI_CAP_MAP_COPY
 
 #
+# LC_HAVE_PROJECT_QUOTA
+#
+# Kernel version v4.0-rc1-197-g847aac644e92
+#
+AC_DEFUN([LC_HAVE_PROJECT_QUOTA], [
+LB_CHECK_COMPILE([if get_projid exists],
+get_projid, [
+       struct inode;
+       #include <linux/quota.h>
+],[
+       struct dquot_operations ops = { };
+
+       ops.get_projid(NULL, NULL);
+],[
+       AC_DEFINE(HAVE_PROJECT_QUOTA, 1,
+               [get_projid function exists])
+])
+]) # LC_HAVE_PROJECT_QUOTA
+
+#
 # LC_IOV_ITER_RW
 #
 # 4.1 kernel has iov_iter_rw
@@ -1620,6 +1610,26 @@ lock_page_memcg, [
 ]) # LC_LOCK_PAGE_MEMCG
 
 #
+# LC_HAVE_DOWN_WRITE_KILLABLE
+#
+# Kernel version v4.6-rc3-28-g916633a40370
+#
+AC_DEFUN([LC_HAVE_DOWN_WRITE_KILLABLE], [
+LB_CHECK_COMPILE([if down_write_killable exists],
+down_write_killable, [
+       struct rw_semaphore sem;
+       #include <linux/rwsem.h>
+],[
+       int rc;
+
+       rc = down_write_killable(&sem);
+],[
+       AC_DEFINE(HAVE_DOWN_WRITE_KILLABLE, 1,
+               [down_write_killable function exists])
+])
+]) # LC_HAVE_DOWN_WRITE_KILLABLE
+
+#
 # LC_D_INIT
 #
 # Kernel version 4.7-rc5 commit 285b102d3b745f3c2c110c9c327741d87e64aacc
@@ -2007,6 +2017,27 @@ current_time, [
 ]) # LIBCFS_CURRENT_TIME
 
 #
+# LC_HAVE_GET_INODE_USAGE
+#
+# Kernel version v4.12-rc2-43-g7a9ca53aea10
+#
+AC_DEFUN([LC_HAVE_GET_INODE_USAGE], [
+LB_CHECK_COMPILE([if get_inode_usage exists],
+get_inode_usage, [
+       struct inode;
+       #include <linux/quota.h>
+],[
+       struct dquot_operations ops = { };
+
+       ops.get_inode_usage(NULL, NULL);
+],[
+       AC_DEFINE(HAVE_GET_INODE_USAGE, 1,
+               [get_inode_usage function exists])
+])
+]) # LC_HAVE_GET_INODE_USAGE
+
+
+#
 # Kernel version 4.12-rc3 85787090a21eb749d8b347eaf9ff1a455637473c
 # changed struct super_block s_uuid into a proper uuid_t
 #
@@ -2682,6 +2713,7 @@ AC_DEFUN([LC_PROG_LINUX], [
        LC_HAVE_IN_COMPAT_SYSCALL
        LC_HAVE_XATTR_HANDLER_INODE_PARAM
        LC_LOCK_PAGE_MEMCG
+       LC_HAVE_DOWN_WRITE_KILLABLE
 
        # 4.7
        LC_D_IN_LOOKUP
index 47911e9..d07f631 100644 (file)
@@ -137,7 +137,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
                        LASSERT(desc->ld_active_tgt_count > 0);
                        desc->ld_active_tgt_count--;
                        set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
-                       set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_rr.lqr_flags);
                        CDEBUG(D_CONFIG, "%s: turns inactive\n",
                               tgt->ltd_exp->exp_obd->obd_name);
                }
@@ -153,7 +152,6 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
                        tgt->ltd_connecting = 0;
                        desc->ld_active_tgt_count++;
                        set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
-                       set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_rr.lqr_flags);
                        CDEBUG(D_CONFIG, "%s: turns active\n",
                               tgt->ltd_exp->exp_obd->obd_name);
                }
@@ -689,7 +687,7 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
                        RETURN(rc);
        }
 
-       o = lod_qos_declare_object_on(env, lod, ost_idx, true, th);
+       o = lod_qos_declare_object_on(env, lod, ost_idx, (speed > 1), th);
        if (IS_ERR(o)) {
                CDEBUG(D_OTHER, "can't declare new object on #%u: %d\n",
                       ost_idx, (int) PTR_ERR(o));
@@ -1374,6 +1372,20 @@ out:
        RETURN(rc);
 }
 
+#ifdef HAVE_DOWN_WRITE_KILLABLE
+struct semaphore_timer {
+       struct timer_list timer;
+       struct task_struct *task;
+};
+
+static void process_semaphore_timer(struct timer_list *t)
+{
+       struct semaphore_timer *timeout = cfs_from_timer(timeout, t, timer);
+
+       send_sig(SIGKILL, timeout->task, 1);
+}
+#endif
+
 /**
  * Calculate penalties per-ost in a pool
  *
@@ -1550,9 +1562,30 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                stripes_per_ost =
                        (lod_comp->llc_stripe_count - 1)/osts->op_count + 1;
 
+#ifdef HAVE_DOWN_WRITE_KILLABLE
+       if (!down_write_trylock(&lod->lod_ost_descs.ltd_qos.lq_rw_sem)) {
+               struct semaphore_timer timer;
+
+               kernel_sigaction(SIGKILL, SIG_DFL);
+               timer.task = current;
+               cfs_timer_setup(&timer.timer, process_semaphore_timer, 0, 0);
+               mod_timer(&timer.timer, jiffies + cfs_time_seconds(2));
+               /* Do actual allocation, use write lock here. */
+               rc = down_write_killable(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+
+               del_singleshot_timer_sync(&timer.timer);
+               kernel_sigaction(SIGKILL, SIG_IGN);
+               if (rc) {
+                       flush_signals(current);
+                       QOS_DEBUG("%s: wakeup semaphore on timeout rc = %d\n",
+                                 lod2obd(lod)->obd_name, rc);
+                       GOTO(out_nolock, rc = -EAGAIN);
+               }
+       }
+#else
        /* Do actual allocation, use write lock here. */
        down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
-
+#endif
        /*
         * Check again, while we were sleeping on @lq_rw_sem things could
         * change.