Whamcloud - gitweb
LU-11518 ldlm: pool recalc forceful call 64/39564/6
authorVitaly Fertman <c17818@cray.com>
Tue, 4 Aug 2020 17:45:12 +0000 (20:45 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Sep 2020 14:13:19 +0000 (14:13 +0000)
Let pool recalc to be able to be called forcefully independently of
the last recalc time;

Call the pool recalc forcefully on the lock decref instead of LRU
cancel to take into account the fresh SLV obtained from the server.

Call LRU recalc from after_reply if a significant SLV change occurs.
Add a sysfs attribute to control what 'a significant SLV change' is.

Signed-off-by: Vitaly Fertman <c17818@cray.com>
Change-Id: Iffeb8d73effdfc494f412422f285921aa4eb9811
HPE-bug-id: LUS-8678
Reviewed-on: https://es-gerrit.dev.cray.com/157134
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Tested-by: Jenkins Build User <nssreleng@cray.com>
Reviewed-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-on: https://review.whamcloud.com/39564
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_dlm.h
lustre/ldlm/ldlm_internal.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_pool.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c

index 5cc31e9..65e91fe 100644 (file)
@@ -69,6 +69,7 @@ extern struct kset *ldlm_svc_kset;
 #define LDLM_DIRTY_AGE_LIMIT (10)
 #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
 #define LDLM_DEFAULT_LRU_SHRINK_BATCH (16)
 #define LDLM_DIRTY_AGE_LIMIT (10)
 #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
 #define LDLM_DEFAULT_LRU_SHRINK_BATCH (16)
+#define LDLM_DEFAULT_SLV_RECALC_PCT (10)
 
 /**
  * LDLM non-error return states
 
 /**
  * LDLM non-error return states
@@ -197,6 +198,17 @@ static inline int lockmode_compat(enum ldlm_mode exist_mode,
  *
  */
 
  *
  */
 
+/* Cancel lru flag, it indicates we cancel aged locks. */
+enum ldlm_lru_flags {
+       LDLM_LRU_FLAG_NO_WAIT   = 0x1, /* Cancel locks w/o blocking (neither
+                                       * sending nor waiting for any RPCs) */
+       LDLM_LRU_FLAG_CLEANUP   = 0x2, /* Used when clearing lru, tells
+                                       * prepare_lru_list to set discard flag
+                                       * on PR extent locks so we don't waste
+                                       * time saving pages that will be
+                                       * discarded momentarily */
+};
+
 struct ldlm_pool;
 struct ldlm_lock;
 struct ldlm_resource;
 struct ldlm_pool;
 struct ldlm_lock;
 struct ldlm_resource;
@@ -212,7 +224,7 @@ struct ldlm_namespace;
  */
 struct ldlm_pool_ops {
        /** Recalculate pool \a pl usage */
  */
 struct ldlm_pool_ops {
        /** Recalculate pool \a pl usage */
-       int (*po_recalc)(struct ldlm_pool *pl);
+       int (*po_recalc)(struct ldlm_pool *pl, bool force);
        /** Cancel at least \a nr locks from pool \a pl */
        int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
        int (*po_setup)(struct ldlm_pool *pl, int limit);
        /** Cancel at least \a nr locks from pool \a pl */
        int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
        int (*po_setup)(struct ldlm_pool *pl, int limit);
@@ -451,6 +463,11 @@ struct ldlm_namespace {
         */
        unsigned int            ns_cancel_batch;
 
         */
        unsigned int            ns_cancel_batch;
 
+       /**
+        * How much the SLV should decrease in %% to trigger LRU cancel urgently.
+        */
+       unsigned int            ns_recalc_pct;
+
        /** Maximum allowed age (last used time) for locks in the LRU.  Set in
         * seconds from userspace, but stored in ns to avoid repeat conversions.
         */
        /** Maximum allowed age (last used time) for locks in the LRU.  Set in
         * seconds from userspace, but stored in ns to avoid repeat conversions.
         */
@@ -545,7 +562,13 @@ struct ldlm_namespace {
         * Flag to indicate namespace is being freed. Used to determine if
         * recalculation of LDLM pool statistics should be skipped.
         */
         * Flag to indicate namespace is being freed. Used to determine if
         * recalculation of LDLM pool statistics should be skipped.
         */
-       unsigned                ns_stopping:1;
+       unsigned                ns_stopping:1,
+
+       /**
+        * Flag to indicate the LRU recalc on RPC reply is in progress.
+        * Used to limit the process by 1 thread only.
+        */
+                               ns_rpc_recalc:1;
 
        /**
         * Which bucket should we start with the lock reclaim.
 
        /**
         * Which bucket should we start with the lock reclaim.
@@ -1811,7 +1834,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
 void ldlm_pool_fini(struct ldlm_pool *pl);
 int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
 void ldlm_pool_fini(struct ldlm_pool *pl);
 int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl);
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force);
 __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
 __u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
 __u64 ldlm_pool_get_clv(struct ldlm_pool *pl);
 __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
 __u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
 __u64 ldlm_pool_get_clv(struct ldlm_pool *pl);
index 44b944c..58b2db6 100644 (file)
@@ -94,17 +94,6 @@ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *,
 struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side);
 
 /* ldlm_request.c */
 struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side);
 
 /* ldlm_request.c */
-/* Cancel lru flag, it indicates we cancel aged locks. */
-enum ldlm_lru_flags {
-       LDLM_LRU_FLAG_NO_WAIT   = 0x1, /* Cancel locks w/o blocking (neither
-                                       * sending nor waiting for any RPCs) */
-       LDLM_LRU_FLAG_CLEANUP   = 0x2, /* Used when clearing lru, tells
-                                       * prepare_lru_list to set discard flag
-                                       * on PR extent locks so we don't waste
-                                       * time saving pages that will be
-                                       * discarded momentarily */
-};
-
 int ldlm_cancel_lru(struct ldlm_namespace *ns, int min,
                    enum ldlm_cancel_flags cancel_flags,
                    enum ldlm_lru_flags lru_flags);
 int ldlm_cancel_lru(struct ldlm_namespace *ns, int min,
                    enum ldlm_cancel_flags cancel_flags,
                    enum ldlm_lru_flags lru_flags);
@@ -185,6 +174,7 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns,
                           struct ldlm_lock_desc *ld,
                           struct list_head *cancels, int count,
                           enum ldlm_cancel_flags cancel_flags);
                           struct ldlm_lock_desc *ld,
                           struct list_head *cancels, int count,
                           enum ldlm_cancel_flags cancel_flags);
+int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns);
 int ldlm_bl_thread_wakeup(void);
 
 void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
 int ldlm_bl_thread_wakeup(void);
 
 void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
index ded6c7c..9b4937b 100644 (file)
@@ -905,7 +905,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
                if (ldlm_is_fail_loc(lock))
                        OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
                if (ldlm_is_fail_loc(lock))
                        OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
-               ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0);
+               ldlm_pool_recalc(&ns->ns_pool, true);
        } else {
                LDLM_DEBUG(lock, "do not add lock into lru list");
                unlock_res_and_lock(lock);
        } else {
                LDLM_DEBUG(lock, "do not add lock into lru list");
                unlock_res_and_lock(lock);
index 9b16cb6..ca0729c 100644 (file)
@@ -2214,6 +2214,11 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
        return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
 }
 
        return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
 }
 
+int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns)
+{
+       return ldlm_bl_to_thread(ns, NULL, NULL, NULL, 0, LCF_ASYNC);
+}
+
 int ldlm_bl_thread_wakeup(void)
 {
        wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq);
 int ldlm_bl_thread_wakeup(void)
 {
        wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq);
@@ -2832,10 +2837,17 @@ static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp,
                                                   LCF_BL_AST);
                ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
                                     blwi->blwi_flags);
                                                   LCF_BL_AST);
                ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
                                     blwi->blwi_flags);
-       } else {
+       } else if (blwi->blwi_lock) {
                ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
                                        blwi->blwi_lock);
                ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
                                        blwi->blwi_lock);
+       } else {
+               ldlm_pool_recalc(&blwi->blwi_ns->ns_pool, true);
+               spin_lock(&blwi->blwi_ns->ns_lock);
+               blwi->blwi_ns->ns_rpc_recalc = 0;
+               spin_unlock(&blwi->blwi_ns->ns_lock);
+               ldlm_namespace_put(blwi->blwi_ns);
        }
        }
+
        if (blwi->blwi_mem_pressure)
                memalloc_noreclaim_restore(mpflags);
 
        if (blwi->blwi_mem_pressure)
                memalloc_noreclaim_restore(mpflags);
 
index 8eb29c4..bdba328 100644 (file)
@@ -332,19 +332,19 @@ static void ldlm_srv_pool_push_slv(struct ldlm_pool *pl)
  *
  * \pre ->pl_lock is not locked.
  */
  *
  * \pre ->pl_lock is not locked.
  */
-static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
+static int ldlm_srv_pool_recalc(struct ldlm_pool *pl, bool force)
 {
        timeout_t recalc_interval_sec;
 
        ENTRY;
 
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
 {
        timeout_t recalc_interval_sec;
 
        ENTRY;
 
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
-       if (recalc_interval_sec < pl->pl_recalc_period)
+       if (!force && recalc_interval_sec < pl->pl_recalc_period)
                RETURN(0);
 
        spin_lock(&pl->pl_lock);
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
                RETURN(0);
 
        spin_lock(&pl->pl_lock);
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
-       if (recalc_interval_sec < pl->pl_recalc_period) {
+       if (!force && recalc_interval_sec < pl->pl_recalc_period) {
                spin_unlock(&pl->pl_lock);
                RETURN(0);
        }
                spin_unlock(&pl->pl_lock);
                RETURN(0);
        }
@@ -471,7 +471,7 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
 /**
  * Recalculates client size pool \a pl according to current SLV and Limit.
  */
 /**
  * Recalculates client size pool \a pl according to current SLV and Limit.
  */
-static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
+static int ldlm_cli_pool_recalc(struct ldlm_pool *pl, bool force)
 {
        timeout_t recalc_interval_sec;
        int ret;
 {
        timeout_t recalc_interval_sec;
        int ret;
@@ -479,7 +479,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
        ENTRY;
 
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        ENTRY;
 
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
-       if (recalc_interval_sec < pl->pl_recalc_period)
+       if (!force && recalc_interval_sec < pl->pl_recalc_period)
                RETURN(0);
 
        spin_lock(&pl->pl_lock);
                RETURN(0);
 
        spin_lock(&pl->pl_lock);
@@ -487,7 +487,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
         * Check if we need to recalc lists now.
         */
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         * Check if we need to recalc lists now.
         */
        recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
-       if (recalc_interval_sec < pl->pl_recalc_period) {
+       if (!force && recalc_interval_sec < pl->pl_recalc_period) {
                spin_unlock(&pl->pl_lock);
                RETURN(0);
        }
                spin_unlock(&pl->pl_lock);
                RETURN(0);
        }
@@ -571,7 +571,7 @@ static struct ldlm_pool_ops ldlm_cli_pool_ops = {
  *
  * \retval             time in seconds for the next recalc of this pool
  */
  *
  * \retval             time in seconds for the next recalc of this pool
  */
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force)
 {
        timeout_t recalc_interval_sec;
        int count;
 {
        timeout_t recalc_interval_sec;
        int count;
@@ -597,7 +597,7 @@ time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
        }
 
        if (pl->pl_ops->po_recalc != NULL) {
        }
 
        if (pl->pl_ops->po_recalc != NULL) {
-               count = pl->pl_ops->po_recalc(pl);
+               count = pl->pl_ops->po_recalc(pl, force);
                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
                                    count);
        }
                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
                                    count);
        }
@@ -991,7 +991,7 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
         * with too long call paths.
         */
        if (ns_is_server(ldlm_pl2ns(pl)))
         * with too long call paths.
         */
        if (ns_is_server(ldlm_pl2ns(pl)))
-               ldlm_pool_recalc(pl);
+               ldlm_pool_recalc(pl, false);
 }
 
 /**
 }
 
 /**
@@ -1016,7 +1016,7 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
        lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
 
        if (ns_is_server(ldlm_pl2ns(pl)))
        lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
 
        if (ns_is_server(ldlm_pl2ns(pl)))
-               ldlm_pool_recalc(pl);
+               ldlm_pool_recalc(pl, false);
 }
 
 /**
 }
 
 /**
@@ -1333,7 +1333,8 @@ static time64_t ldlm_pools_recalc_delay(enum ldlm_side side)
                 * After setup is done - recalc the pool.
                 */
                if (!skip) {
                 * After setup is done - recalc the pool.
                 */
                if (!skip) {
-                       delay = min(delay, ldlm_pool_recalc(&ns->ns_pool));
+                       delay = min(delay,
+                                   ldlm_pool_recalc(&ns->ns_pool, false));
                        ldlm_namespace_put(ns);
                }
        }
                        ldlm_namespace_put(ns);
                }
        }
@@ -1472,7 +1473,7 @@ int ldlm_pool_setup(struct ldlm_pool *pl, int limit)
        return 0;
 }
 
        return 0;
 }
 
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force)
 {
        return 0;
 }
 {
        return 0;
 }
index c322f15..b53a858 100644 (file)
@@ -1418,8 +1418,9 @@ static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp)
  */
 int ldlm_cli_update_pool(struct ptlrpc_request *req)
 {
  */
 int ldlm_cli_update_pool(struct ptlrpc_request *req)
 {
+       struct ldlm_namespace *ns;
        struct obd_device *obd;
        struct obd_device *obd;
-       __u64 new_slv;
+       __u64 new_slv, ratio;
        __u32 new_limit;
 
        ENTRY;
        __u32 new_limit;
 
        ENTRY;
@@ -1457,17 +1458,39 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req)
        read_unlock(&obd->obd_pool_lock);
 
        /*
        read_unlock(&obd->obd_pool_lock);
 
        /*
-        * Set new SLV and limit in OBD fields to make them accessible
-        * to the pool thread. We do not access obd_namespace and pool
-        * directly here as there is no reliable way to make sure that
-        * they are still alive at cleanup time. Evil races are possible
-        * which may cause Oops at that time.
+        * OBD device keeps the new pool attributes before they are handled by
+        * the pool.
         */
        write_lock(&obd->obd_pool_lock);
        obd->obd_pool_slv = new_slv;
        obd->obd_pool_limit = new_limit;
        write_unlock(&obd->obd_pool_lock);
 
         */
        write_lock(&obd->obd_pool_lock);
        obd->obd_pool_slv = new_slv;
        obd->obd_pool_limit = new_limit;
        write_unlock(&obd->obd_pool_lock);
 
+       /*
+        * Check if an urgent pool recalc is needed, let it to be a change of
+        * SLV on 10%. It is applicable to LRU resize enabled case only.
+        */
+       ns = obd->obd_namespace;
+       if (!ns_connect_lru_resize(ns) ||
+           ldlm_pool_get_slv(&ns->ns_pool) < new_slv)
+               RETURN(0);
+
+       ratio = 100 * new_slv / ldlm_pool_get_slv(&ns->ns_pool);
+       if (100 - ratio >= ns->ns_recalc_pct &&
+           !ns->ns_stopping && !ns->ns_rpc_recalc) {
+               bool recalc = false;
+
+               spin_lock(&ns->ns_lock);
+               if (!ns->ns_stopping && !ns->ns_rpc_recalc) {
+                       ldlm_namespace_get(ns);
+                       recalc = true;
+                       ns->ns_rpc_recalc = 1;
+               }
+               spin_unlock(&ns->ns_lock);
+               if (recalc)
+                       ldlm_bl_to_thread_ns(ns);
+       }
+
        RETURN(0);
 }
 
        RETURN(0);
 }
 
index e262e7f..a18daa8 100644 (file)
@@ -398,6 +398,35 @@ static ssize_t lru_cancel_batch_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(lru_cancel_batch);
 
 }
 LUSTRE_RW_ATTR(lru_cancel_batch);
 
+static ssize_t ns_recalc_pct_show(struct kobject *kobj,
+                                 struct attribute *attr, char *buf)
+{
+       struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
+                                                ns_kobj);
+
+       return snprintf(buf, sizeof(buf) - 1, "%u\n", ns->ns_recalc_pct);
+}
+
+static ssize_t ns_recalc_pct_store(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  const char *buffer, size_t count)
+{
+       struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
+                                                ns_kobj);
+       unsigned long tmp;
+
+       if (kstrtoul(buffer, 10, &tmp))
+               return -EINVAL;
+
+       if (tmp > 100)
+               return -ERANGE;
+
+       ns->ns_recalc_pct = (unsigned int)tmp;
+
+       return count;
+}
+LUSTRE_RW_ATTR(ns_recalc_pct);
+
 static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr,
                                char *buf)
 {
 static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr,
                                char *buf)
 {
@@ -649,6 +678,7 @@ static struct attribute *ldlm_ns_attrs[] = {
        &lustre_attr_resource_count.attr,
        &lustre_attr_lock_count.attr,
        &lustre_attr_lock_unused_count.attr,
        &lustre_attr_resource_count.attr,
        &lustre_attr_lock_count.attr,
        &lustre_attr_lock_unused_count.attr,
+       &lustre_attr_ns_recalc_pct.attr,
        &lustre_attr_lru_size.attr,
        &lustre_attr_lru_cancel_batch.attr,
        &lustre_attr_lru_max_age.attr,
        &lustre_attr_lru_size.attr,
        &lustre_attr_lru_cancel_batch.attr,
        &lustre_attr_lru_max_age.attr,
@@ -932,6 +962,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
        ns->ns_nr_unused          = 0;
        ns->ns_max_unused         = LDLM_DEFAULT_LRU_SIZE;
        ns->ns_cancel_batch       = LDLM_DEFAULT_LRU_SHRINK_BATCH;
        ns->ns_nr_unused          = 0;
        ns->ns_max_unused         = LDLM_DEFAULT_LRU_SIZE;
        ns->ns_cancel_batch       = LDLM_DEFAULT_LRU_SHRINK_BATCH;
+       ns->ns_recalc_pct         = LDLM_DEFAULT_SLV_RECALC_PCT;
        ns->ns_max_age            = ktime_set(LDLM_DEFAULT_MAX_ALIVE, 0);
        ns->ns_ctime_age_limit    = LDLM_CTIME_AGE_LIMIT;
        ns->ns_dirty_age_limit    = ktime_set(LDLM_DIRTY_AGE_LIMIT, 0);
        ns->ns_max_age            = ktime_set(LDLM_DEFAULT_MAX_ALIVE, 0);
        ns->ns_ctime_age_limit    = LDLM_CTIME_AGE_LIMIT;
        ns->ns_dirty_age_limit    = ktime_set(LDLM_DIRTY_AGE_LIMIT, 0);