LU-11518 ldlm: pool fixes

author Vitaly Fertman <c17818@cray.com>

Fri, 31 Jul 2020 18:30:09 +0000 (21:30 +0300)

committer Oleg Drokin <green@whamcloud.com>

Sat, 19 Sep 2020 14:13:12 +0000 (14:13 +0000)
author Vitaly Fertman <c17818@cray.com>
Fri, 31 Jul 2020 18:30:09 +0000 (21:30 +0300)
committer Oleg Drokin <green@whamcloud.com>
Sat, 19 Sep 2020 14:13:12 +0000 (14:13 +0000)
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h

index f1071bd..5cc31e9 100644 (file)
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -254,8 +254,9 @@ struct ldlm_pool {
         __u64                   pl_server_lock_volume;
         /** Current biggest client lock volume. Protected by pl_lock. */
         __u64                   pl_client_lock_volume;
-       /** Lock volume factor. SLV on client is calculated as following:
-        *  server_slv * lock_volume_factor. */
+       /** Lock volume factor, shown in percents in procfs, but internally
+        *  Client SLV calculated as: server_slv * lock_volume_factor >> 8.
+        */
         atomic_t                pl_lock_volume_factor;
         /** Time when last SLV from server was obtained. */
         time64_t                pl_recalc_time;
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c

index b6d64f0..8eb29c4 100644 (file)
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -286,13 +286,13 @@ static void ldlm_pool_recalc_slv(struct ldlm_pool *pl)
   *
   * \pre ->pl_lock is locked.
   */
-static void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
+static void ldlm_pool_recalc_stats(struct ldlm_pool *pl, timeout_t period)
  {
         int grant_plan = pl->pl_grant_plan;
         __u64 slv = pl->pl_server_lock_volume;
         int granted = ldlm_pool_granted(pl);
-       int grant_rate = atomic_read(&pl->pl_grant_rate);
-       int cancel_rate = atomic_read(&pl->pl_cancel_rate);
+       int grant_rate = atomic_read(&pl->pl_grant_rate) / period;
+       int cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
  
         lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT,
                             slv);
@@ -334,16 +334,16 @@ static void ldlm_srv_pool_push_slv(struct ldlm_pool *pl)
   */
  static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
  {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
  
         ENTRY;
  
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         if (recalc_interval_sec < pl->pl_recalc_period)
                 RETURN(0);
  
         spin_lock(&pl->pl_lock);
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         if (recalc_interval_sec < pl->pl_recalc_period) {
                 spin_unlock(&pl->pl_lock);
                 RETURN(0);
@@ -364,7 +364,7 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
          */
         ldlm_pool_recalc_grant_plan(pl);
  
-       pl->pl_recalc_time = ktime_get_real_seconds();
+       pl->pl_recalc_time = ktime_get_seconds();
         lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
                             recalc_interval_sec);
         spin_unlock(&pl->pl_lock);
@@ -473,12 +473,12 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
   */
  static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
  {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
         int ret;
  
         ENTRY;
  
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         if (recalc_interval_sec < pl->pl_recalc_period)
                 RETURN(0);
  
@@ -486,7 +486,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
         /*
          * Check if we need to recalc lists now.
          */
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         if (recalc_interval_sec < pl->pl_recalc_period) {
                 spin_unlock(&pl->pl_lock);
                 RETURN(0);
@@ -511,7 +511,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
          * Time of LRU resizing might be longer than period,
          * so update after LRU resizing rather than before it.
          */
-       pl->pl_recalc_time = ktime_get_real_seconds();
+       pl->pl_recalc_time = ktime_get_seconds();
         lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
                             recalc_interval_sec);
         spin_unlock(&pl->pl_lock);
@@ -540,7 +540,9 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
         /*
          * Make sure that pool knows last SLV and Limit from obd.
          */
+       spin_lock(&pl->pl_lock);
         ldlm_cli_pool_pop_slv(pl);
+       spin_unlock(&pl->pl_lock);
  
         spin_lock(&ns->ns_lock);
         unused = ns->ns_nr_unused;
@@ -566,23 +568,24 @@ static struct ldlm_pool_ops ldlm_cli_pool_ops = {
  /**
   * Pool recalc wrapper. Will call either client or server pool recalc callback
   * depending what pool \a pl is used.
+ *
+ * \retval             time in seconds for the next recalc of this pool
   */
  time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
  {
-       time64_t recalc_interval_sec;
+       timeout_t recalc_interval_sec;
         int count;
  
-       recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time;
+       recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
         if (recalc_interval_sec > 0) {
                 spin_lock(&pl->pl_lock);
-               recalc_interval_sec = ktime_get_real_seconds() -
-                       pl->pl_recalc_time;
+               recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
  
                 if (recalc_interval_sec > 0) {
                         /*
-                        * Update pool statistics every 1s.
+                        * Update pool statistics every recalc interval.
                          */
-                       ldlm_pool_recalc_stats(pl);
+                       ldlm_pool_recalc_stats(pl, recalc_interval_sec);
  
                         /*
                          * Zero out all rates and speed for the last period.
@@ -599,19 +602,7 @@ time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
                                     count);
         }
  
-       recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() +
-                             pl->pl_recalc_period;
-       if (recalc_interval_sec <= 0) {
-               /* DEBUG: should be re-removed after LU-4536 is fixed */
-               CDEBUG(D_DLMTRACE, "%s: Negative interval(%lld), too short period(%lld)\n",
-                      pl->pl_name, recalc_interval_sec,
-                      (s64)pl->pl_recalc_period);
-
-               /* Prevent too frequent recalculation. */
-               recalc_interval_sec = 1;
-       }
-
-       return recalc_interval_sec;
+       return pl->pl_recalc_time + pl->pl_recalc_period;
  }
  
  /**
@@ -657,6 +648,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
         int granted, grant_rate, cancel_rate, grant_step;
         int grant_speed, grant_plan, lvf;
         struct ldlm_pool *pl = m->private;
+       timeout_t period;
         __u64 slv, clv;
         __u32 limit;
  
@@ -666,8 +658,11 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
         limit = ldlm_pool_get_limit(pl);
         grant_plan = pl->pl_grant_plan;
         granted = ldlm_pool_granted(pl);
-       grant_rate = atomic_read(&pl->pl_grant_rate);
-       cancel_rate = atomic_read(&pl->pl_cancel_rate);
+       period = ktime_get_seconds() - pl->pl_recalc_time;
+       if (period <= 0)
+               period = 1;
+       grant_rate = atomic_read(&pl->pl_grant_rate) / period;
+       cancel_rate = atomic_read(&pl->pl_cancel_rate) / period;
         grant_speed = grant_rate - cancel_rate;
         lvf = atomic_read(&pl->pl_lock_volume_factor);
         grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
@@ -677,7 +672,7 @@ static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused)
                    "  SLV: %llu\n"
                    "  CLV: %llu\n"
                    "  LVF: %d\n",
-                  pl->pl_name, slv, clv, lvf);
+                  pl->pl_name, slv, clv, (lvf * 100) >> 8);
  
         if (ns_is_server(ldlm_pl2ns(pl))) {
                 seq_printf(m, "  GSP: %d%%\n", grant_step);
@@ -698,11 +693,15 @@ static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr,
         struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool,
                                             pl_kobj);
         int grant_speed;
+       timeout_t period;
  
         spin_lock(&pl->pl_lock);
         /* serialize with ldlm_pool_recalc */
-       grant_speed = atomic_read(&pl->pl_grant_rate) -
-                       atomic_read(&pl->pl_cancel_rate);
+       period = ktime_get_seconds() - pl->pl_recalc_time;
+       if (period <= 0)
+               period = 1;
+       grant_speed = (atomic_read(&pl->pl_grant_rate) -
+                      atomic_read(&pl->pl_cancel_rate)) / period;
         spin_unlock(&pl->pl_lock);
         return sprintf(buf, "%d\n", grant_speed);
  }
@@ -718,6 +717,9 @@ LUSTRE_RW_ATTR(recalc_period);
  LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64);
  LUSTRE_RO_ATTR(server_lock_volume);
  
+LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(client_lock_volume, u64);
+LUSTRE_RO_ATTR(client_lock_volume);
+
  LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic);
  LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic);
  LUSTRE_RW_ATTR(limit);
@@ -731,16 +733,58 @@ LUSTRE_RO_ATTR(cancel_rate);
  LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic);
  LUSTRE_RO_ATTR(grant_rate);
  
-LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic);
-LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic);
+static ssize_t lock_volume_factor_show(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      char *buf)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+       unsigned long tmp;
+
+       tmp = (atomic_read(&pl->pl_lock_volume_factor) * 100) >> 8;
+       return sprintf(buf, "%lu\n", tmp);
+}
+
+static ssize_t lock_volume_factor_store(struct kobject *kobj,
+                                       struct attribute *attr,
+                                       const char *buffer,
+                                       size_t count)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+       unsigned long tmp;
+       int rc;
+
+       rc = kstrtoul(buffer, 10, &tmp);
+       if (rc < 0) {
+               return rc;
+       }
+
+       tmp = (tmp << 8) / 100;
+       atomic_set(&pl->pl_lock_volume_factor, tmp);
+
+       return count;
+
+}
  LUSTRE_RW_ATTR(lock_volume_factor);
  
+static ssize_t recalc_time_show(struct kobject *kobj,
+                               struct attribute *attr,
+                               char *buf)
+{
+       struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, pl_kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       ktime_get_seconds() - pl->pl_recalc_time);
+}
+LUSTRE_RO_ATTR(recalc_time);
+
  /* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */
  static struct attribute *ldlm_pl_attrs[] = {
         &lustre_attr_grant_speed.attr,
         &lustre_attr_grant_plan.attr,
         &lustre_attr_recalc_period.attr,
         &lustre_attr_server_lock_volume.attr,
+       &lustre_attr_client_lock_volume.attr,
+       &lustre_attr_recalc_time.attr,
         &lustre_attr_limit.attr,
         &lustre_attr_granted.attr,
         &lustre_attr_cancel_rate.attr,
@@ -867,8 +911,8 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
  
         spin_lock_init(&pl->pl_lock);
         atomic_set(&pl->pl_granted, 0);
-       pl->pl_recalc_time = ktime_get_real_seconds();
-       atomic_set(&pl->pl_lock_volume_factor, 1);
+       pl->pl_recalc_time = ktime_get_seconds();
+       atomic_set(&pl->pl_lock_volume_factor, 1 << 8);
  
         atomic_set(&pl->pl_grant_rate, 0);
         atomic_set(&pl->pl_cancel_rate, 0);
@@ -1222,9 +1266,10 @@ static time64_t ldlm_pools_recalc_delay(enum ldlm_side side)
         struct ldlm_namespace *ns;
         struct ldlm_namespace *ns_old = NULL;
         /* seconds of sleep if no active namespaces */
-       time64_t delay = side == LDLM_NAMESPACE_SERVER ?
-                                LDLM_POOL_SRV_DEF_RECALC_PERIOD :
-                                LDLM_POOL_CLI_DEF_RECALC_PERIOD;
+       time64_t delay = ktime_get_seconds() +
+                        (side == LDLM_NAMESPACE_SERVER ?
+                         LDLM_POOL_SRV_DEF_RECALC_PERIOD :
+                         LDLM_POOL_CLI_DEF_RECALC_PERIOD);
         int nr;
  
         /* Recalc at least ldlm_namespace_nr(side) namespaces. */
@@ -1375,18 +1420,33 @@ static void ldlm_pools_recalc_task(struct work_struct *ws)
         /* Wake up the blocking threads from time to time. */
         ldlm_bl_thread_wakeup();
  
+       delay -= ktime_get_seconds();
+       if (delay <= 0) {
+               /* Prevent too frequent recalculation. */
+               CDEBUG(D_DLMTRACE, "Negative interval(%lld)\n", delay);
+               delay = 1;
+       }
+
         schedule_delayed_work(&ldlm_pools_recalc_work, cfs_time_seconds(delay));
  }
  
  int ldlm_pools_init(void)
  {
+       time64_t delay;
+
         DEF_SHRINKER_VAR(shsvar, ldlm_pools_srv_shrink,
                          ldlm_pools_srv_count, ldlm_pools_srv_scan);
         DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink,
                          ldlm_pools_cli_count, ldlm_pools_cli_scan);
  
-       schedule_delayed_work(&ldlm_pools_recalc_work,
-                             LDLM_POOL_CLI_DEF_RECALC_PERIOD);
+#ifdef HAVE_SERVER_SUPPORT
+       delay = min(LDLM_POOL_SRV_DEF_RECALC_PERIOD,
+                   LDLM_POOL_CLI_DEF_RECALC_PERIOD);
+#else
+       delay = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
+#endif
+
+       schedule_delayed_work(&ldlm_pools_recalc_work, delay);
         ldlm_pools_srv_shrinker = set_shrinker(DEFAULT_SEEKS, &shsvar);
         ldlm_pools_cli_shrinker = set_shrinker(DEFAULT_SEEKS, &shcvar);
  
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index ff5ae53..c322f15 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -1679,7 +1679,7 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
         lvf = ldlm_pool_get_lvf(pl);
         la = div_u64(ktime_to_ns(ktime_sub(cur, lock->l_last_used)),
                      NSEC_PER_SEC);
-       lv = lvf * la * ns->ns_nr_unused;
+       lv = lvf * la * ns->ns_nr_unused >> 8;
  
         /* Inform pool about current CLV to see it via debugfs. */
         ldlm_pool_set_clv(pl, lv);
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index b6363ae..b4e016d 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -11814,18 +11814,18 @@ test_124a() {
                 skip "Limit is too small $LIMIT"
         fi
  
-        # Make LVF so higher that sleeping for $SLEEP is enough to _start_
-        # killing locks. Some time was spent for creating locks. This means
-        # that up to the moment of sleep finish we must have killed some of
-        # them (10-100 locks). This depends on how fast ther were created.
-        # Many of them were touched in almost the same moment and thus will
-        # be killed in groups.
-        local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE))
-
-        # Use $LRU_SIZE_B here to take into account real number of locks
-        # created in the case of CMD, LRU_SIZE_B != $NR in most of cases
-        local LRU_SIZE_B=$LRU_SIZE
-        log "LVF=$LVF"
+       # Make LVF so higher that sleeping for $SLEEP is enough to _start_
+       # killing locks. Some time was spent for creating locks. This means
+       # that up to the moment of sleep finish we must have killed some of
+       # them (10-100 locks). This depends on how fast ther were created.
+       # Many of them were touched in almost the same moment and thus will
+       # be killed in groups.
+       local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE * 100))
+
+       # Use $LRU_SIZE_B here to take into account real number of locks
+       # created in the case of CMD, LRU_SIZE_B != $NR in most of cases
+       local LRU_SIZE_B=$LRU_SIZE
+       log "LVF=$LVF"
         local OLD_LVF=$($LCTL get_param -n $NSDIR.pool.lock_volume_factor)
         log "OLD_LVF=$OLD_LVF"
         $LCTL set_param -n $NSDIR.pool.lock_volume_factor $LVF
author	Vitaly Fertman <c17818@cray.com>
	Fri, 31 Jul 2020 18:30:09 +0000 (21:30 +0300)
committer	Oleg Drokin <green@whamcloud.com>
	Sat, 19 Sep 2020 14:13:12 +0000 (14:13 +0000)
lustre/include/lustre_dlm.h		patch \| blob \| history
lustre/ldlm/ldlm_pool.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history