LU-6529 ldlm: improve proc interface of lock reclaim

author Niu Yawei <yawei.niu@intel.com>

Fri, 28 Aug 2015 07:52:32 +0000 (03:52 -0400)

committer Oleg Drokin <oleg.drokin@intel.com>

Wed, 16 Sep 2015 01:06:25 +0000 (01:06 +0000)
author Niu Yawei <yawei.niu@intel.com>
Fri, 28 Aug 2015 07:52:32 +0000 (03:52 -0400)
committer Oleg Drokin <oleg.drokin@intel.com>
Wed, 16 Sep 2015 01:06:25 +0000 (01:06 +0000)
diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h

index 5e26a40..e79dca3 100644 (file)
--- a/lustre/ldlm/ldlm_internal.h
+++ b/lustre/ldlm/ldlm_internal.h
@@ -344,8 +344,13 @@ void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
                                      union ldlm_wire_policy_data *wpolicy);
  
  /* ldlm_reclaim.c */
                                      union ldlm_wire_policy_data *wpolicy);
  
  /* ldlm_reclaim.c */
-extern __u64 ldlm_watermark_low;
-extern __u64 ldlm_watermark_high;
+#ifdef HAVE_SERVER_SUPPORT
+extern __u64 ldlm_reclaim_threshold;
+extern __u64 ldlm_lock_limit;
+extern __u64 ldlm_reclaim_threshold_mb;
+extern __u64 ldlm_lock_limit_mb;
+extern struct percpu_counter ldlm_granted_total;
+#endif
  int ldlm_reclaim_setup(void);
  void ldlm_reclaim_cleanup(void);
  void ldlm_reclaim_add(struct ldlm_lock *lock);
  int ldlm_reclaim_setup(void);
  void ldlm_reclaim_cleanup(void);
  void ldlm_reclaim_add(struct ldlm_lock *lock);
diff --git a/lustre/ldlm/ldlm_reclaim.c b/lustre/ldlm/ldlm_reclaim.c

index 722dd9f..d256c64 100644 (file)
--- a/lustre/ldlm/ldlm_reclaim.c
+++ b/lustre/ldlm/ldlm_reclaim.c
@@ -35,42 +35,33 @@
  
  /*
   * To avoid ldlm lock exhausting server memory, two global parameters:
  
  /*
   * To avoid ldlm lock exhausting server memory, two global parameters:
- * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
+ * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming
   * granted locks and rejecting incoming enqueue requests defensively.
   *
   * granted locks and rejecting incoming enqueue requests defensively.
   *
- * ldlm_watermark_low: When the amount of granted locks reaching this
+ * ldlm_reclaim_threshold: When the amount of granted locks reaching this
   * threshold, server start to revoke locks gradually.
   *
   * threshold, server start to revoke locks gradually.
   *
- * ldlm_watermark_high: When the amount of granted locks reaching this
+ * ldlm_lock_limit: When the amount of granted locks reaching this
   * threshold, server will return -EINPROGRESS to any incoming enqueue
   * request until the lock count is shrunk below the threshold again.
   *
   * threshold, server will return -EINPROGRESS to any incoming enqueue
   * request until the lock count is shrunk below the threshold again.
   *
- * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
+ * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the
   * total memory by default. It is tunable via proc entry, when it's set
   * to 0, the feature is disabled.
   */
  
   * total memory by default. It is tunable via proc entry, when it's set
   * to 0, the feature is disabled.
   */
  
-/*
- * FIXME:
- *
- * In current implementation, server identifies which locks should be
- * revoked by choosing locks from namespace/resource in a roundrobin
- * manner, which isn't optimal. The ideal way should be server notifies
- * clients to cancel locks voluntarily, because only client knows exactly
- * when the lock is last used.
- *
- * However how to notify client immediately is a problem, one idea
- * is to leverage the glimplse callbacks on some artificial global
- * lock (like quota global lock does), but that requires protocol
- * changes, let's fix it in future long-term solution.
- */
+#ifdef HAVE_SERVER_SUPPORT
  
  
-__u64 ldlm_watermark_low;
-__u64 ldlm_watermark_high;
+/* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */
+__u64 ldlm_reclaim_threshold;
+__u64 ldlm_lock_limit;
  
  
-#ifdef HAVE_SERVER_SUPPORT
+/* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for
+ * proc interface. */
+__u64 ldlm_reclaim_threshold_mb;
+__u64 ldlm_lock_limit_mb;
  
  
-static struct percpu_counter   ldlm_granted_total;
+struct percpu_counter          ldlm_granted_total;
  static atomic_t                        ldlm_nr_reclaimer;
  static cfs_duration_t          ldlm_last_reclaim_age;
  static cfs_time_t              ldlm_last_reclaim_time;
  static atomic_t                        ldlm_nr_reclaimer;
  static cfs_duration_t          ldlm_last_reclaim_age;
  static cfs_time_t              ldlm_last_reclaim_time;
@@ -101,6 +92,17 @@ static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
         return false;
  }
  
         return false;
  }
  
+/**
+ * Callback function for revoking locks from certain resource.
+ *
+ * \param [in] hs      ns_rs_hash
+ * \param [in] bd      current bucket of ns_rsh_hash
+ * \param [in] hnode   hnode of the resource
+ * \param [in] arg     opaque data
+ *
+ * \retval 0           continue the scan
+ * \retval 1           stop the iteration
+ */
  static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                 struct hlist_node *hnode, void *arg)
  
  static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                 struct hlist_node *hnode, void *arg)
  
@@ -162,6 +164,18 @@ static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
         return rc;
  }
  
         return rc;
  }
  
+/**
+ * Revoke locks from the resources of a namespace in a roundrobin
+ * manner.
+ *
+ * \param[in] ns       namespace to do the lock revoke on
+ * \param[in] count    count of lock to be revoked
+ * \param[in] age      only revoke locks older than the 'age'
+ * \param[in] skip     scan from the first lock on resource if the
+ *                     'skip' is false, otherwise, continue scan
+ *                     from the last scanned position
+ * \param[out] count   count of lock still to be revoked
+ */
  static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
                              cfs_duration_t age, bool skip)
  {
  static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
                              cfs_duration_t age, bool skip)
  {
@@ -224,6 +238,11 @@ static inline cfs_duration_t ldlm_reclaim_age(void)
         return age;
  }
  
         return age;
  }
  
+/**
+ * Revoke certain amount of locks from all the server namespaces
+ * in a roundrobin manner. Lock age is used to avoid reclaim on
+ * the non-aged locks.
+ */
  static void ldlm_reclaim_ns(void)
  {
         struct ldlm_namespace   *ns;
  static void ldlm_reclaim_ns(void)
  {
         struct ldlm_namespace   *ns;
@@ -290,23 +309,32 @@ void ldlm_reclaim_del(struct ldlm_lock *lock)
         percpu_counter_sub(&ldlm_granted_total, 1);
  }
  
         percpu_counter_sub(&ldlm_granted_total, 1);
  }
  
+/**
+ * Check on the total granted locks: return true if it reaches the
+ * high watermark (ldlm_lock_limit), otherwise return false; It also
+ * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold)
+ * is reached.
+ *
+ * \retval true                high watermark reached.
+ * \retval false       high watermark not reached.
+ */
  bool ldlm_reclaim_full(void)
  {
  bool ldlm_reclaim_full(void)
  {
-       __u64 high = ldlm_watermark_high;
-       __u64 low = ldlm_watermark_low;
+       __u64 high = ldlm_lock_limit;
+       __u64 low = ldlm_reclaim_threshold;
  
         if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
                 low = cfs_fail_val;
  
         if (low != 0 &&
  
         if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
                 low = cfs_fail_val;
  
         if (low != 0 &&
-           percpu_counter_read_positive(&ldlm_granted_total) > low)
+           percpu_counter_sum_positive(&ldlm_granted_total) > low)
                 ldlm_reclaim_ns();
  
         if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
                 high = cfs_fail_val;
  
         if (high != 0 &&
                 ldlm_reclaim_ns();
  
         if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
                 high = cfs_fail_val;
  
         if (high != 0 &&
-           percpu_counter_read_positive(&ldlm_granted_total) > high)
+           percpu_counter_sum_positive(&ldlm_granted_total) > high)
                 return true;
  
         return false;
                 return true;
  
         return false;
@@ -322,14 +350,23 @@ static inline __u64 ldlm_ratio2locknr(int ratio)
         return locknr;
  }
  
         return locknr;
  }
  
+static inline __u64 ldlm_locknr2mb(__u64 locknr)
+{
+       return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20;
+}
+
  #define LDLM_WM_RATIO_LOW_DEFAULT      20
  #define LDLM_WM_RATIO_HIGH_DEFAULT     30
  
  int ldlm_reclaim_setup(void)
  {
         atomic_set(&ldlm_nr_reclaimer, 0);
  #define LDLM_WM_RATIO_LOW_DEFAULT      20
  #define LDLM_WM_RATIO_HIGH_DEFAULT     30
  
  int ldlm_reclaim_setup(void)
  {
         atomic_set(&ldlm_nr_reclaimer, 0);
-       ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
-       ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+
+       ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
+       ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold);
+       ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+       ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit);
+
         ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
         ldlm_last_reclaim_time = cfs_time_current();
  
         ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
         ldlm_last_reclaim_time = cfs_time_current();
  
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index bcc299e..0a81a87 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -84,13 +84,11 @@ LPROC_SEQ_FOPS_WO_TYPE(ldlm, dump_ns);
  LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
  LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
  
  LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
  LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
  
-/* Lock count is stored in the watermark, and it's display as number of MB
- * memory consumed by the locks */
+#ifdef HAVE_SERVER_SUPPORT
+
  static int seq_watermark_show(struct seq_file *m, void *data)
  {
  static int seq_watermark_show(struct seq_file *m, void *data)
  {
-       __u64 locknr = *(__u64 *)m->private;
-       return seq_printf(m, LPU64"\n",
-                         (locknr * sizeof(struct ldlm_lock)) >> 20);
+       return seq_printf(m, LPU64"\n", *(__u64 *)m->private);
  }
  
  static ssize_t seq_watermark_write(struct file *file,
  }
  
  static ssize_t seq_watermark_write(struct file *file,
@@ -99,23 +97,51 @@ static ssize_t seq_watermark_write(struct file *file,
  {
         __u64 watermark;
         __u64 *data = ((struct seq_file *)file->private_data)->private;
  {
         __u64 watermark;
         __u64 *data = ((struct seq_file *)file->private_data)->private;
+       bool wm_low = (data == &ldlm_reclaim_threshold_mb) ? true : false;
         int rc;
  
         rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20);
         if (rc) {
         int rc;
  
         rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20);
         if (rc) {
-               CERROR("Failed to set LDLM watermark, rc = %d.\n", rc);
+               CERROR("Failed to set %s, rc = %d.\n",
+                      wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb",
+                      rc);
                 return rc;
         } else if (watermark != 0 && watermark < (1 << 20)) {
                 return rc;
         } else if (watermark != 0 && watermark < (1 << 20)) {
-               CERROR("Watermark should be greater than 1MB.\n");
+               CERROR("%s should be greater than 1MB.\n",
+                      wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb");
                 return -EINVAL;
         }
                 return -EINVAL;
         }
+       watermark >>= 20;
+
+       if (wm_low) {
+               if (ldlm_lock_limit_mb != 0 && watermark > ldlm_lock_limit_mb) {
+                       CERROR("lock_reclaim_threshold_mb must be smaller than "
+                              "lock_limit_mb.\n");
+                       return -EINVAL;
+               }
  
  
-       do_div(watermark, sizeof(struct ldlm_lock));
-       *data = watermark;
+               *data = watermark;
+               if (watermark != 0) {
+                       watermark <<= 20;
+                       do_div(watermark, sizeof(struct ldlm_lock));
+               }
+               ldlm_reclaim_threshold = watermark;
+       } else {
+               if (ldlm_reclaim_threshold_mb != 0 &&
+                   watermark < ldlm_reclaim_threshold_mb) {
+                       CERROR("lock_limit_mb must be greater than "
+                              "lock_reclaim_threshold_mb.\n");
+                       return -EINVAL;
+               }
+
+               *data = watermark;
+               if (watermark != 0) {
+                       watermark <<= 20;
+                       do_div(watermark, sizeof(struct ldlm_lock));
+               }
+               ldlm_lock_limit = watermark;
+       }
  
  
-       if (ldlm_watermark_low != 0 && ldlm_watermark_high != 0 &&
-           ldlm_watermark_low > ldlm_watermark_high)
-               ldlm_watermark_low = ldlm_watermark_high;
         return count;
  }
  
         return count;
  }
  
@@ -133,6 +159,27 @@ static const struct file_operations ldlm_watermark_fops = {
         .release        = lprocfs_single_release,
  };
  
         .release        = lprocfs_single_release,
  };
  
+static int seq_granted_show(struct seq_file *m, void *data)
+{
+       return seq_printf(m, LPU64"\n", percpu_counter_sum_positive(
+                               (struct percpu_counter *)m->private));
+}
+
+static int seq_granted_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, seq_granted_show, PDE_DATA(inode));
+}
+
+static const struct file_operations ldlm_granted_fops = {
+       .owner  = THIS_MODULE,
+       .open   = seq_granted_open,
+       .read   = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+#endif /* HAVE_SERVER_SUPPORT */
+
  int ldlm_proc_setup(void)
  {
         int rc;
  int ldlm_proc_setup(void)
  {
         int rc;
@@ -146,12 +193,17 @@ int ldlm_proc_setup(void)
                 { .name =       "cancel_unused_locks_before_replay",
                   .fops =       &ldlm_rw_uint_fops,
                   .data =       &ldlm_cancel_unused_locks_before_replay },
                 { .name =       "cancel_unused_locks_before_replay",
                   .fops =       &ldlm_rw_uint_fops,
                   .data =       &ldlm_cancel_unused_locks_before_replay },
-               { .name =       "watermark_mb_low",
+#ifdef HAVE_SERVER_SUPPORT
+               { .name =       "lock_reclaim_threshold_mb",
                   .fops =       &ldlm_watermark_fops,
                   .fops =       &ldlm_watermark_fops,
-                 .data =       &ldlm_watermark_low },
-               { .name =       "watermark_mb_high",
+                 .data =       &ldlm_reclaim_threshold_mb },
+               { .name =       "lock_limit_mb",
                   .fops =       &ldlm_watermark_fops,
                   .fops =       &ldlm_watermark_fops,
-                 .data =       &ldlm_watermark_high },
+                 .data =       &ldlm_lock_limit_mb },
+               { .name =       "lock_granted_count",
+                 .fops =       &ldlm_granted_fops,
+                 .data =       &ldlm_granted_total },
+#endif
                 { NULL }};
         ENTRY;
         LASSERT(ldlm_ns_proc_dir == NULL);
                 { NULL }};
         ENTRY;
         LASSERT(ldlm_ns_proc_dir == NULL);
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index e5a0168..e546fa3 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -9492,7 +9492,7 @@ test_134a() {
         rm $DIR/$tdir/m
         unlinkmany $DIR/$tdir/f $nr
  }
         rm $DIR/$tdir/m
         unlinkmany $DIR/$tdir/f $nr
  }
-run_test 134a "Server reclaims locks when reaching low watermark"
+run_test 134a "Server reclaims locks when reaching lock_reclaim_threshold"
  
  test_134b() {
         [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
  
  test_134b() {
         [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
@@ -9501,9 +9501,10 @@ test_134b() {
         mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
         cancel_lru_locks mdc
  
         mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
         cancel_lru_locks mdc
  
-       local low_wm=$(do_facet mds1 $LCTL get_param -n ldlm.watermark_mb_low)
+       local low_wm=$(do_facet mds1 $LCTL get_param -n \
+                       ldlm.lock_reclaim_threshold_mb)
         # disable reclaim temporarily
         # disable reclaim temporarily
-       do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=0
+       do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=0
  
         #define OBD_FAIL_LDLM_WATERMARK_HIGH     0x328
         do_facet mds1 $LCTL set_param fail_loc=0x328
  
         #define OBD_FAIL_LDLM_WATERMARK_HIGH     0x328
         do_facet mds1 $LCTL set_param fail_loc=0x328
@@ -9520,17 +9521,18 @@ test_134b() {
         if ! ps -p $create_pid  > /dev/null 2>&1; then
                 do_facet mds1 $LCTL set_param fail_loc=0
                 do_facet mds1 $LCTL set_param fail_val=0
         if ! ps -p $create_pid  > /dev/null 2>&1; then
                 do_facet mds1 $LCTL set_param fail_loc=0
                 do_facet mds1 $LCTL set_param fail_val=0
-               do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+               do_facet mds1 $LCTL set_param \
+                       ldlm.lock_reclaim_threshold_mb=${low_wm}m
                 error "createmany finished incorrectly!"
         fi
         do_facet mds1 $LCTL set_param fail_loc=0
         do_facet mds1 $LCTL set_param fail_val=0
                 error "createmany finished incorrectly!"
         fi
         do_facet mds1 $LCTL set_param fail_loc=0
         do_facet mds1 $LCTL set_param fail_val=0
-       do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+       do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=${low_wm}m
         wait $create_pid || return 1
  
         unlinkmany $DIR/$tdir/f $nr
  }
         wait $create_pid || return 1
  
         unlinkmany $DIR/$tdir/f $nr
  }
-run_test 134b "Server rejects lock request when reaching high watermark"
+run_test 134b "Server rejects lock request when reaching lock_limit_mb"
  
  test_140() { #bug-17379
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
  
  test_140() { #bug-17379
         [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
author	Niu Yawei <yawei.niu@intel.com>
	Fri, 28 Aug 2015 07:52:32 +0000 (03:52 -0400)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Wed, 16 Sep 2015 01:06:25 +0000 (01:06 +0000)
lustre/ldlm/ldlm_internal.h		patch \| blob \| history
lustre/ldlm/ldlm_reclaim.c		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history