From fb2a30015bf11df804785fea16cfad1ea797bde0 Mon Sep 17 00:00:00 2001
From: yury <yury>
Date: Tue, 27 Jan 2009 13:17:48 +0000
Subject: [PATCH]  b=17282  r=johann,shadow

 - simplifies a lot lru resize memory presssure handling. No estimation pass is needed right now as it was too CPU consuming.
---
 lustre/include/lustre_dlm.h |  8 ----
 lustre/ldlm/ldlm_internal.h |  2 -
 lustre/ldlm/ldlm_lock.c     |  2 +-
 lustre/ldlm/ldlm_pool.c     | 41 +++++++++++---------
 lustre/ldlm/ldlm_request.c  | 94 ++-------------------------------------------
 lustre/ldlm/ldlm_resource.c |  8 ----
 6 files changed, 28 insertions(+), 127 deletions(-)
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
index c20ba28..ce3d569 100644
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -366,12 +366,6 @@ typedef enum {
 #define NS_DEFAULT_CONTENTION_SECONDS 2
 #define NS_DEFAULT_CONTENDED_LOCKS 32
 
-/* Default value for ->ns_shrink_thumb. If lock is not extent one its cost
- * is one page. Here we have 256 pages which is 1M on i386. Thus by default
- * all extent locks which have more than 1M long extent will be kept in lru,
- * others (including ibits locks) will be canceled on memory pressure event. */
-#define LDLM_LOCK_SHRINK_THUMB 256
-
 struct ldlm_namespace {
         char                  *ns_name;
         ldlm_side_t            ns_client; /* is this a client-side lock tree? */
@@ -394,8 +388,6 @@ struct ldlm_namespace {
         unsigned int           ns_max_age;
         unsigned int           ns_timeouts;
 
-        /* Lower limit to number of pages in lock to keep it in cache */
-        unsigned int           ns_shrink_thumb;
         cfs_time_t             ns_next_dump;   /* next debug dump, jiffies */
 
         atomic_t               ns_locks;
diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h
index fee4b24..f6b4d30 100644
--- a/lustre/ldlm/ldlm_internal.h
+++ b/lustre/ldlm/ldlm_internal.h
@@ -79,8 +79,6 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync,
                     int flags);
 int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                           int count, int max, int cancel_flags, int flags);
-int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns, int count, int max,
-                             int flags);
 extern int ldlm_enqueue_min;
 int ldlm_get_enq_timeout(struct ldlm_lock *lock);
 
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c
index df6422a..d2face5 100644
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -184,8 +184,8 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
                 LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
                 list_del_init(&lock->l_lru);
+                LASSERT(ns->ns_nr_unused > 0);
                 ns->ns_nr_unused--;
-                LASSERT(ns->ns_nr_unused >= 0);
                 rc = 1;
         }
         return rc;
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c
index b7c0013..6159907 100644
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -381,13 +381,12 @@ static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
                                 int nr, unsigned int gfp_mask)
 {
         __u32 limit;
-        ENTRY;
 
         /* 
          * VM is asking how many entries may be potentially freed. 
          */
         if (nr == 0)
-                RETURN(atomic_read(&pl->pl_granted));
+                return atomic_read(&pl->pl_granted);
 
         /* 
          * Client already canceled locks but server is already in shrinker
@@ -427,7 +426,7 @@ static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
          * We did not really free any memory here so far, it only will be
          * freed later may be, so that we return 0 to not confuse VM. 
          */
-        RETURN(0);
+        return 0;
 }
 
 /**
@@ -508,7 +507,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
          * It may be called when SLV has changed much, this is why we do not
          * take into account pl->pl_recalc_time here. 
          */
-        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC, 
+        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_SYNC, 
                                LDLM_CANCEL_LRUR));
 }
 
@@ -520,12 +519,15 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
                                 int nr, unsigned int gfp_mask)
 {
-        ENTRY;
+        struct ldlm_namespace *ns;
+        int canceled = 0, unused;
+
+        ns = ldlm_pl2ns(pl);
 
         /*
          * Do not cancel locks in case lru resize is disabled for this ns. 
          */
-        if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+        if (!ns_connect_lru_resize(ns))
                 RETURN(0);
 
         /* 
@@ -533,19 +535,22 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
          */
         ldlm_cli_pool_pop_slv(pl);
 
-        /* 
-         * Find out how many locks may be released according to shrink 
-         * policy. 
-         */
-        if (nr == 0)
-                RETURN(ldlm_cancel_lru_estimate(ldlm_pl2ns(pl), 0, 0, 
-                                                LDLM_CANCEL_SHRINK));
-
-        /* 
-         * Cancel @nr locks accoding to shrink policy. 
+        spin_lock(&ns->ns_unused_lock);
+        unused = ns->ns_nr_unused;
+        spin_unlock(&ns->ns_unused_lock);
+        
+        if (nr) {
+                canceled = ldlm_cancel_lru(ns, nr, LDLM_SYNC, 
+                                           LDLM_CANCEL_SHRINK);
+        }
+#ifdef __KERNEL__
+        /*
+         * Retrun the number of potentially reclaimable locks.
          */
-        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC, 
-                               LDLM_CANCEL_SHRINK));
+        return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+#else
+        return unused - canceled;
+#endif
 }
 
 struct ldlm_pool_ops ldlm_srv_pool_ops = {
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index dea6ce6..5e77ff9 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -1182,54 +1182,6 @@ static int ldlm_cancel_list(struct list_head *cancels, int count, int flags)
 
 /* Return 1 to stop lru processing and keep current lock cached. Return zero
  * otherwise. */
-static ldlm_policy_res_t ldlm_cancel_shrink_policy(struct ldlm_namespace *ns,
-                                                   struct ldlm_lock *lock,
-                                                   int unused, int added,
-                                                   int count)
-{
-        int lock_cost;
-        __u64 page_nr;
-
-        /* Stop lru processing when we reached passed @count or checked all
-         * locks in lru. */
-        if (count && added >= count)
-                return LDLM_POLICY_KEEP_LOCK;
-
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {
-                struct ldlm_extent *l_extent;
-
-                /* For all extent locks cost is 1 + number of pages in
-                 * their extent. */
-                l_extent = &lock->l_policy_data.l_extent;
-                page_nr = (l_extent->end - l_extent->start);
-                do_div(page_nr, CFS_PAGE_SIZE);
-
-#ifdef __KERNEL__
-                /* XXX: In fact this is evil hack, we can't access inode
-                 * here. For doing it right we need somehow to have number
-                 * of covered by lock. This should be fixed later when 10718
-                 * is landed. */
-                if (lock->l_ast_data != NULL) {
-                        struct inode *inode = lock->l_ast_data;
-                        if (page_nr > inode->i_mapping->nrpages)
-                                page_nr = inode->i_mapping->nrpages;
-                }
-#endif
-                lock_cost = 1 + page_nr;
-        } else {
-                /* For all locks which are not extent ones cost is 1 */
-                lock_cost = 1;
-        }
-
-        /* Keep all expensive locks in lru for the memory pressure time
-         * cancel policy. They anyways may be canceled by lru resize
-         * pplicy if they have not small enough CLV. */
-        return lock_cost > ns->ns_shrink_thumb ?
-                LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
-}
-
-/* Return 1 to stop lru processing and keep current lock cached. Return zero
- * otherwise. */
 static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
                                                  struct ldlm_lock *lock,
                                                  int unused, int added,
@@ -1312,7 +1264,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
 {
         if (ns_connect_lru_resize(ns)) {
                 if (flags & LDLM_CANCEL_SHRINK)
-                        return ldlm_cancel_shrink_policy;
+                        /* We kill passed number of old locks. */
+                        return ldlm_cancel_passed_policy;
                 else if (flags & LDLM_CANCEL_LRUR)
                         return ldlm_cancel_lrur_policy;
                 else if (flags & LDLM_CANCEL_PASSED)
@@ -1454,45 +1407,6 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
         RETURN(ldlm_cancel_list(cancels, added, cancel_flags));
 }
 
-/* Returns number of locks which could be canceled next time when
- * ldlm_cancel_lru() is called. Used from locks pool shrinker. */
-int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns,
-                             int count, int max, int flags)
-{
-        ldlm_cancel_lru_policy_t pf;
-        struct ldlm_lock *lock;
-        int added = 0, unused;
-        ENTRY;
-
-        pf = ldlm_cancel_lru_policy(ns, flags);
-        LASSERT(pf != NULL);
-        spin_lock(&ns->ns_unused_lock);
-        unused = ns->ns_nr_unused;
-
-        list_for_each_entry(lock, &ns->ns_unused_list, l_lru) {
-                /* For any flags, stop scanning if @max is reached. */
-                if (max && added >= max)
-                        break;
-
-                /* Somebody is already doing CANCEL or there is a
-                 * blocking request will send cancel. Let's not count
-                 * this lock. */
-                if ((lock->l_flags & LDLM_FL_CANCELING) ||
-                    (lock->l_flags & LDLM_FL_BL_AST))
-                        continue;
-
-                /* Pass the lock through the policy filter and see if it
-                 * should stay in lru. */
-                if (pf(ns, lock, unused, added, count) == LDLM_POLICY_KEEP_LOCK)
-                        break;
-
-                added++;
-                unused--;
-        }
-        spin_unlock(&ns->ns_unused_lock);
-        RETURN(added);
-}
-
 /* when called with LDLM_ASYNC the blocking callback will be handled
  * in a thread and this function will return after the thread has been
  * asked to call the callback.  when called with LDLM_SYNC the blocking
@@ -1514,8 +1428,8 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync,
                         RETURN(count);
         }
 
-        /* If an error occured in ASYNC mode, or
-         * this is SYNC mode, cancel the list. */
+        /* If an error occured in ASYNC mode, or this is SYNC mode,
+         * cancel the list. */
         ldlm_cli_cancel_list(&cancels, count, NULL, 0);
         RETURN(count);
 }
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c
index cf3207b..7dff082 100644
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -269,13 +269,6 @@ void ldlm_proc_namespace(struct ldlm_namespace *ns)
                 lock_vars[0].write_fptr = lprocfs_wr_lru_size;
                 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
-                snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
-                         ns->ns_name);
-                lock_vars[0].data = ns;
-                lock_vars[0].read_fptr = lprocfs_rd_uint;
-                lock_vars[0].write_fptr = lprocfs_wr_uint;
-                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
-
                 snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
                          ns->ns_name);
                 lock_vars[0].data = &ns->ns_max_age;
@@ -344,7 +337,6 @@ ldlm_namespace_new(struct obd_device *obd, char *name,
         if (!ns->ns_name)
                 GOTO(out_hash, NULL);
 
-        ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
         ns->ns_appetite = apt;
 
         LASSERT(obd != NULL);
-- 
1.8.3.1