X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fldlm%2Fldlm_reclaim.c;h=b551ea2cd419126da64d6301f2bb454f2ef26bb1;hb=refs%2Fchanges%2F64%2F13564%2F102;hp=722dd9f3336c3085b64234e6777fb9990e25ed4c;hpb=930dca7253bc2531bffa15dc763db1081cdf32d8;p=fs%2Flustre-release.git

diff --git a/lustre/ldlm/ldlm_reclaim.c b/lustre/ldlm/ldlm_reclaim.c
index 722dd9f..b551ea2 100644
--- a/lustre/ldlm/ldlm_reclaim.c
+++ b/lustre/ldlm/ldlm_reclaim.c
@@ -35,42 +35,33 @@
 
 /*
  * To avoid ldlm lock exhausting server memory, two global parameters:
- * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
+ * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming
  * granted locks and rejecting incoming enqueue requests defensively.
  *
- * ldlm_watermark_low: When the amount of granted locks reaching this
+ * ldlm_reclaim_threshold: When the amount of granted locks reaching this
  * threshold, server start to revoke locks gradually.
  *
- * ldlm_watermark_high: When the amount of granted locks reaching this
+ * ldlm_lock_limit: When the amount of granted locks reaching this
  * threshold, server will return -EINPROGRESS to any incoming enqueue
  * request until the lock count is shrunk below the threshold again.
  *
- * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
+ * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the
  * total memory by default. It is tunable via proc entry, when it's set
  * to 0, the feature is disabled.
  */
 
-/*
- * FIXME:
- *
- * In current implementation, server identifies which locks should be
- * revoked by choosing locks from namespace/resource in a roundrobin
- * manner, which isn't optimal. The ideal way should be server notifies
- * clients to cancel locks voluntarily, because only client knows exactly
- * when the lock is last used.
- *
- * However how to notify client immediately is a problem, one idea
- * is to leverage the glimplse callbacks on some artificial global
- * lock (like quota global lock does), but that requires protocol
- * changes, let's fix it in future long-term solution.
- */
+#ifdef HAVE_SERVER_SUPPORT
 
-__u64 ldlm_watermark_low;
-__u64 ldlm_watermark_high;
+/* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */
+__u64 ldlm_reclaim_threshold;
+__u64 ldlm_lock_limit;
 
-#ifdef HAVE_SERVER_SUPPORT
+/* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for
+ * proc interface. */
+__u64 ldlm_reclaim_threshold_mb;
+__u64 ldlm_lock_limit_mb;
 
-static struct percpu_counter	ldlm_granted_total;
+struct percpu_counter		ldlm_granted_total;
 static atomic_t			ldlm_nr_reclaimer;
 static cfs_duration_t		ldlm_last_reclaim_age;
 static cfs_time_t		ldlm_last_reclaim_time;
@@ -101,6 +92,17 @@ static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
 	return false;
 }
 
+/**
+ * Callback function for revoking locks from certain resource.
+ *
+ * \param [in] hs	ns_rs_hash
+ * \param [in] bd	current bucket of ns_rsh_hash
+ * \param [in] hnode	hnode of the resource
+ * \param [in] arg	opaque data
+ *
+ * \retval 0		continue the scan
+ * \retval 1		stop the iteration
+ */
 static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 				struct hlist_node *hnode, void *arg)
 
@@ -162,6 +164,18 @@ static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 	return rc;
 }
 
+/**
+ * Revoke locks from the resources of a namespace in a roundrobin
+ * manner.
+ *
+ * \param[in] ns	namespace to do the lock revoke on
+ * \param[in] count	count of lock to be revoked
+ * \param[in] age	only revoke locks older than the 'age'
+ * \param[in] skip	scan from the first lock on resource if the
+ *			'skip' is false, otherwise, continue scan
+ *			from the last scanned position
+ * \param[out] count	count of lock still to be revoked
+ */
 static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
 			     cfs_duration_t age, bool skip)
 {
@@ -224,6 +238,11 @@ static inline cfs_duration_t ldlm_reclaim_age(void)
 	return age;
 }
 
+/**
+ * Revoke certain amount of locks from all the server namespaces
+ * in a roundrobin manner. Lock age is used to avoid reclaim on
+ * the non-aged locks.
+ */
 static void ldlm_reclaim_ns(void)
 {
 	struct ldlm_namespace	*ns;
@@ -290,23 +309,32 @@ void ldlm_reclaim_del(struct ldlm_lock *lock)
 	percpu_counter_sub(&ldlm_granted_total, 1);
 }
 
+/**
+ * Check on the total granted locks: return true if it reaches the
+ * high watermark (ldlm_lock_limit), otherwise return false; It also
+ * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold)
+ * is reached.
+ *
+ * \retval true		high watermark reached.
+ * \retval false	high watermark not reached.
+ */
 bool ldlm_reclaim_full(void)
 {
-	__u64 high = ldlm_watermark_high;
-	__u64 low = ldlm_watermark_low;
+	__u64 high = ldlm_lock_limit;
+	__u64 low = ldlm_reclaim_threshold;
 
 	if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
 		low = cfs_fail_val;
 
 	if (low != 0 &&
-	    percpu_counter_read_positive(&ldlm_granted_total) > low)
+	    percpu_counter_sum_positive(&ldlm_granted_total) > low)
 		ldlm_reclaim_ns();
 
 	if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
 		high = cfs_fail_val;
 
 	if (high != 0 &&
-	    percpu_counter_read_positive(&ldlm_granted_total) > high)
+	    percpu_counter_sum_positive(&ldlm_granted_total) > high)
 		return true;
 
 	return false;
@@ -316,24 +344,37 @@ static inline __u64 ldlm_ratio2locknr(int ratio)
 {
 	__u64 locknr;
 
-	locknr = ((__u64)NUM_CACHEPAGES << PAGE_CACHE_SHIFT) * ratio;
+	locknr = ((__u64)NUM_CACHEPAGES << PAGE_SHIFT) * ratio;
 	do_div(locknr, 100 * sizeof(struct ldlm_lock));
 
 	return locknr;
 }
 
+static inline __u64 ldlm_locknr2mb(__u64 locknr)
+{
+	return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20;
+}
+
 #define LDLM_WM_RATIO_LOW_DEFAULT	20
 #define LDLM_WM_RATIO_HIGH_DEFAULT	30
 
 int ldlm_reclaim_setup(void)
 {
 	atomic_set(&ldlm_nr_reclaimer, 0);
-	ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
-	ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+
+	ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
+	ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold);
+	ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+	ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit);
+
 	ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
 	ldlm_last_reclaim_time = cfs_time_current();
 
+#ifdef HAVE_PERCPU_COUNTER_INIT_GFP_FLAG
+	return percpu_counter_init(&ldlm_granted_total, 0, GFP_KERNEL);
+#else
 	return percpu_counter_init(&ldlm_granted_total, 0);
+#endif
 }
 
 void ldlm_reclaim_cleanup(void)