Whamcloud - gitweb
LU-11518 ldlm: pool fixes 63/39563/6
authorVitaly Fertman <c17818@cray.com>
Fri, 31 Jul 2020 18:30:09 +0000 (21:30 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Sep 2020 14:13:12 +0000 (14:13 +0000)
At the time the client side recalc period was increased up to 10secs
the grant & cancel rates started showing the speed not in seconds but
in tens of seconds.

At the pool initialization time, the server side recalc job should not
be delayed on client's recalc period.

It may happen an NS time is significant and comparable (or even more)
than the recalc period of the next NS (all the following NS's) in the
list. If the time has been already spent on the next NS, it does not
mean we want to double the delay for the original NS and recalc after
next N secs.

Make lock volume factor more fine grained (default is 100 now vs the
original 1): it is likely to cancel locks on clients twice faster than
server requested is too fast.

Protect missed pl_server_lock_volume update by the pool lock.

Replace ktime_get_real_seconds with ktime_get_seconds for the recal
interval.

Signed-off-by: Vitaly Fertman <c17818@cray.com>
Change-Id: Icba73209682a1b1d0d20c087581fad4f73ee3389
HPE-bug-id: LUS-8678
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexey Lyashkov <c17817@cray.com>
Tested-by: Alexander Lezhoev <c17454@cray.com>
Reviewed-on: https://review.whamcloud.com/39563
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_dlm.h
lustre/ldlm/ldlm_pool.c
lustre/ldlm/ldlm_request.c
lustre/tests/sanity.sh

index f1071bd..5cc31e9 100644 (file)
@@ -254,8 +254,9 @@ struct ldlm_pool {
        __u64                   pl_server_lock_volume;
        /** Current biggest client lock volume. Protected by pl_lock. */
        __u64                   pl_client_lock_volume;
-       /** Lock volume factor. SLV on client is calculated as following:
-        *  server_slv * lock_volume_factor. */
+       /** Lock volume factor, shown in percents in procfs, but internally
+        *  Client SLV calculated as: server_slv * lock_volume_factor >> 8.
+        */
        atomic_t                pl_lock_volume_factor;
        /** Time when last SLV from server was obtained. */
        time64_t                pl_recalc_time;
index b6d64f0..8eb29c4 100644 (file)
@@ -286,13 +286,13 @@ static void ldlm_pool_recalc_slv(struct ldlm_pool *pl)
  *
  * \pre ->pl_lock is locked.
  */
-static void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
+static void ldlm_pool_recalc_stats(struct ldlm_pool *pl, timeout_t period)
 {
        int grant_plan = pl->pl_grant_plan;
        __u64 slv = pl->pl_server_lock_volume;
        int granted = ldlm_pool_granted(pl);
-       int grant_rate = atomic_read(&pl->pl_grant_rate);
-       int cancel_rate = atomic_read(&pl->pl_cancel_rate);
+       int grant_rate = atomic_read(&pl->pl_grant_rate) / period;
+       int cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
 
        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,
                            slv);
@@ -334,16 +334,16 @@ static void ldlm_srv_pool_push_slv(struct ldlm_pool *pl)
  */
 static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
 {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
 
        ENTRY;
 
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        if (recalc_interval_sec < pl->pl_recalc_period)
                RETURN(0);
 
        spin_lock(&pl->pl_lock);
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        if (recalc_interval_sec < pl->pl_recalc_period) {
                spin_unlock(&pl->pl_lock);
                RETURN(0);
@@ -364,7 +364,7 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
         */
        ldlm_pool_recalc_grant_plan(pl);
 
-       pl->pl_recalc_time = ktime_get_real_seconds();
+       pl->pl_recalc_time = ktime_get_seconds();
        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
                            recalc_interval_sec);
        spin_unlock(&pl->pl_lock);
@@ -473,12 +473,12 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
  */
 static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
        int ret;
 
        ENTRY;
 
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        if (recalc_interval_sec < pl->pl_recalc_period)
                RETURN(0);
 
@@ -486,7 +486,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
        /*
         * Check if we need to recalc lists now.
         */
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        if (recalc_interval_sec < pl->pl_recalc_period) {
                spin_unlock(&pl->pl_lock);
                RETURN(0);
@@ -511,7 +511,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
         * Time of LRU resizing might be longer than period,
         * so update after LRU resizing rather than before it.
         */
-       pl->pl_recalc_time = ktime_get_real_seconds();
+       pl->pl_recalc_time = ktime_get_seconds();
        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
                            recalc_interval_sec);
        spin_unlock(&pl->pl_lock);
@@ -540,7 +540,9 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
        /*
         * Make sure that pool knows last SLV and Limit from obd.
         */
+       spin_lock(&pl->pl_lock);
        ldlm_cli_pool_pop_slv(pl);
+       spin_unlock(&pl->pl_lock);
 
        spin_lock(&ns->ns_lock);
        unused = ns->ns_nr_unused;
@@ -566,23 +568,24 @@ static struct ldlm_pool_ops ldlm_cli_pool_ops = {
 /**
  * Pool recalc wrapper. Will call either client or server pool recalc callback
  * depending what pool \a pl is used.
+ *
+ * \retval             time in seconds for the next recalc of this pool
  */
 time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
 {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
        int count;
 
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
        if (recalc_interval_sec > 0) {
                spin_lock(&pl->pl_lock);
-               recalc_interval_sec = ktime_get_real_seconds() -
-                       pl->pl_recalc_time;
+               recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
 
                if (recalc_interval_sec > 0) {
                        /*
-                        * Update pool statistics every 1s.
+                        * Update pool statistics every recalc interval.
                         */
-                       ldlm_pool_recalc_stats(pl);
+                       ldlm_pool_recalc_stats(pl, recalc_interval_sec);
 
                        /*
                         * Zero out all rates and speed for the last period.
@@ -599,19 +602,7 @@ time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
                                    count);
        }
 
-       recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
-                             pl->pl_recalc_period;
-       if (recalc_interval_sec <= 0) {
-               /* DEBUG: should be re-removed after LU-4536 is fixed */
-               CDEBUG(D_DLMTRACE, "%s: Negative interval(%lld), too short period(%lld)\n",
-                      pl->pl_name, recalc_interval_sec,
-                      (s64)pl->pl_recalc_period);
-
-               /* Prevent too frequent recalculation. */
-               recalc_interval_sec = 1;
-       }
-
-       return recalc_interval_sec;
+       return pl->pl_recalc_time + pl->pl_recalc_period;
 }
 
 /**
@@ -657,6 +648,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
        int granted, grant_rate, cancel_rate, grant_step;
        int grant_speed, grant_plan, lvf;
        struct ldlm_pool *pl = m->private;
+       timeout_t period;
        __u64 slv, clv;
        __u32 limit;
 
@@ -666,8 +658,11 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
        limit = ldlm_pool_get_limit(pl);
        grant_plan = pl->pl_grant_plan;
        granted = ldlm_pool_granted(pl);
-       grant_rate = atomic_read(&pl->pl_grant_rate);
-       cancel_rate = atomic_read(&pl->pl_cancel_rate);
+       period = ktime_get_seconds() - pl->pl_recalc_time;
+       if (period <= 0)
+               period = 1;
+       grant_rate = atomic_read(&pl->pl_grant_rate) / period;
+       cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
        grant_speed = grant_rate - cancel_rate;
        lvf = atomic_read(&pl->pl_lock_volume_factor);
        grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
@@ -677,7 +672,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
                   "  SLV: %llu\n"
                   "  CLV: %llu\n"
                   "  LVF: %d\n",
-                  pl->pl_name, slv, clv, lvf);
+                  pl->pl_name, slv, clv, (lvf * 100) >> 8);
 
        if (ns_is_server(ldlm_pl2ns(pl))) {
                seq_printf(m, "  GSP: %d%%\n", grant_step);
@@ -698,11 +693,15 @@ static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr,
        struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
                                            pl_kobj);
        int grant_speed;
+       timeout_t period;
 
        spin_lock(&pl->pl_lock);
        /* serialize with ldlm_pool_recalc */
-       grant_speed = atomic_read(&pl->pl_grant_rate) -
-                       atomic_read(&pl->pl_cancel_rate);
+       period = ktime_get_seconds() - pl->pl_recalc_time;
+       if (period <= 0)
+               period = 1;
+       grant_speed = (atomic_read(&pl->pl_grant_rate) -
+                      atomic_read(&pl->pl_cancel_rate)) / period;
        spin_unlock(&pl->pl_lock);
        return sprintf(buf, "%d\n", grant_speed);
 }
@@ -718,6 +717,9 @@ LUSTRE_RW_ATTR(recalc_period);
 LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64);
 LUSTRE_RO_ATTR(server_lock_volume);
 
+LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(client_lock_volume, u64);
+LUSTRE_RO_ATTR(client_lock_volume);
+
 LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic);
 LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic);
 LUSTRE_RW_ATTR(limit);
@@ -731,16 +733,58 @@ LUSTRE_RO_ATTR(cancel_rate);
 LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic);
 LUSTRE_RO_ATTR(grant_rate);
 
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic);
-LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic);
+static ssize_t lock_volume_factor_show(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      char *buf)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+       unsigned long tmp;
+
+       tmp = (atomic_read(&pl->pl_lock_volume_factor) * 100) >> 8;
+       return sprintf(buf, "%lu\n", tmp);
+}
+
+static ssize_t lock_volume_factor_store(struct kobject *kobj,
+                                       struct attribute *attr,
+                                       const char *buffer,
+                                       size_t count)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+       unsigned long tmp;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &tmp);
+       if (rc < 0) {
+               return rc;
+       }
+
+       tmp = (tmp << 8) / 100;
+       atomic_set(&pl->pl_lock_volume_factor, tmp);
+
+       return count;
+
+}
 LUSTRE_RW_ATTR(lock_volume_factor);
 
+static ssize_t recalc_time_show(struct kobject *kobj,
+                               struct attribute *attr,
+                               char *buf)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       ktime_get_seconds() - pl->pl_recalc_time);
+}
+LUSTRE_RO_ATTR(recalc_time);
+
 /* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */
 static struct attribute *ldlm_pl_attrs[] = {
        &lustre_attr_grant_speed.attr,
        &lustre_attr_grant_plan.attr,
        &lustre_attr_recalc_period.attr,
        &lustre_attr_server_lock_volume.attr,
+       &lustre_attr_client_lock_volume.attr,
+       &lustre_attr_recalc_time.attr,
        &lustre_attr_limit.attr,
        &lustre_attr_granted.attr,
        &lustre_attr_cancel_rate.attr,
@@ -867,8 +911,8 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
 
        spin_lock_init(&pl->pl_lock);
        atomic_set(&pl->pl_granted, 0);
-       pl->pl_recalc_time = ktime_get_real_seconds();
-       atomic_set(&pl->pl_lock_volume_factor, 1);
+       pl->pl_recalc_time = ktime_get_seconds();
+       atomic_set(&pl->pl_lock_volume_factor, 1 << 8);
 
        atomic_set(&pl->pl_grant_rate, 0);
        atomic_set(&pl->pl_cancel_rate, 0);
@@ -1222,9 +1266,10 @@ static time64_t ldlm_pools_recalc_delay(enum ldlm_side side)
        struct ldlm_namespace *ns;
        struct ldlm_namespace *ns_old = NULL;
        /* seconds of sleep if no active namespaces */
-       time64_t delay = side == LDLM_NAMESPACE_SERVER ?
-                                LDLM_POOL_SRV_DEF_RECALC_PERIOD :
-                                LDLM_POOL_CLI_DEF_RECALC_PERIOD;
+       time64_t delay = ktime_get_seconds() +
+                        (side == LDLM_NAMESPACE_SERVER ?
+                         LDLM_POOL_SRV_DEF_RECALC_PERIOD :
+                         LDLM_POOL_CLI_DEF_RECALC_PERIOD);
        int nr;
 
        /* Recalc at least ldlm_namespace_nr(side) namespaces. */
@@ -1375,18 +1420,33 @@ static void ldlm_pools_recalc_task(struct work_struct *ws)
        /* Wake up the blocking threads from time to time. */
        ldlm_bl_thread_wakeup();
 
+       delay -= ktime_get_seconds();
+       if (delay <= 0) {
+               /* Prevent too frequent recalculation. */
+               CDEBUG(D_DLMTRACE, "Negative interval(%lld)\n", delay);
+               delay = 1;
+       }
+
        schedule_delayed_work(&ldlm_pools_recalc_work, cfs_time_seconds(delay));
 }
 
 int ldlm_pools_init(void)
 {
+       time64_t delay;
+
        DEF_SHRINKER_VAR(shsvar, ldlm_pools_srv_shrink,
                         ldlm_pools_srv_count, ldlm_pools_srv_scan);
        DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink,
                         ldlm_pools_cli_count, ldlm_pools_cli_scan);
 
-       schedule_delayed_work(&ldlm_pools_recalc_work,
-                             LDLM_POOL_CLI_DEF_RECALC_PERIOD);
+#ifdef HAVE_SERVER_SUPPORT
+       delay = min(LDLM_POOL_SRV_DEF_RECALC_PERIOD,
+                   LDLM_POOL_CLI_DEF_RECALC_PERIOD);
+#else
+       delay = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
+#endif
+
+       schedule_delayed_work(&ldlm_pools_recalc_work, delay);
        ldlm_pools_srv_shrinker = set_shrinker(DEFAULT_SEEKS, &shsvar);
        ldlm_pools_cli_shrinker = set_shrinker(DEFAULT_SEEKS, &shcvar);
 
index ff5ae53..c322f15 100644 (file)
@@ -1679,7 +1679,7 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
        lvf = ldlm_pool_get_lvf(pl);
        la = div_u64(ktime_to_ns(ktime_sub(cur, lock->l_last_used)),
                     NSEC_PER_SEC);
-       lv = lvf * la * ns->ns_nr_unused;
+       lv = lvf * la * ns->ns_nr_unused >> 8;
 
        /* Inform pool about current CLV to see it via debugfs. */
        ldlm_pool_set_clv(pl, lv);
index b6363ae..b4e016d 100755 (executable)
@@ -11814,18 +11814,18 @@ test_124a() {
                skip "Limit is too small $LIMIT"
        fi
 
-        # Make LVF so higher that sleeping for $SLEEP is enough to _start_
-        # killing locks. Some time was spent for creating locks. This means
-        # that up to the moment of sleep finish we must have killed some of
-        # them (10-100 locks). This depends on how fast ther were created.
-        # Many of them were touched in almost the same moment and thus will
-        # be killed in groups.
-        local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE))
-
-        # Use $LRU_SIZE_B here to take into account real number of locks
-        # created in the case of CMD, LRU_SIZE_B != $NR in most of cases
-        local LRU_SIZE_B=$LRU_SIZE
-        log "LVF=$LVF"
+       # Make LVF so higher that sleeping for $SLEEP is enough to _start_
+       # killing locks. Some time was spent for creating locks. This means
+       # that up to the moment of sleep finish we must have killed some of
+       # them (10-100 locks). This depends on how fast ther were created.
+       # Many of them were touched in almost the same moment and thus will
+       # be killed in groups.
+       local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE * 100))
+
+       # Use $LRU_SIZE_B here to take into account real number of locks
+       # created in the case of CMD, LRU_SIZE_B != $NR in most of cases
+       local LRU_SIZE_B=$LRU_SIZE
+       log "LVF=$LVF"
        local OLD_LVF=$($LCTL get_param -n $NSDIR.pool.lock_volume_factor)
        log "OLD_LVF=$OLD_LVF"
        $LCTL set_param -n $NSDIR.pool.lock_volume_factor $LVF