From: yury <yury>
Date: Mon, 10 Dec 2007 16:53:39 +0000 (+0000)
Subject: b=13766
X-Git-Tag: v1_7_0_51~411
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=2f83d64b5485e139e626580b1fa18630acbbb850

b=13766
r=nikita,adilger,shadow

- lots of fixes and cleanups in ldlm pools code.
---

diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
index 7e021b2..8eec429 100644
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -217,10 +217,12 @@ struct ldlm_lock;
 struct ldlm_resource;
 struct ldlm_namespace;
 
-typedef int (*ldlm_pool_recalc_t)(struct ldlm_pool *pl);
-                                  
-typedef int (*ldlm_pool_shrink_t)(struct ldlm_pool *pl,
-                                  int nr, unsigned int gfp_mask);
+struct ldlm_pool_ops {
+        int (*po_recalc)(struct ldlm_pool *pl);
+        int (*po_shrink)(struct ldlm_pool *pl, int nr, 
+                         unsigned int gfp_mask);
+        int (*po_setup)(struct ldlm_pool *pl, int limit);
+};
 
 enum {
         LDLM_POOL_CTL_RECALC = 1 << 0, /* Pool recalc is enabled */
@@ -235,39 +237,39 @@ enum {
 #define LDLM_POOLS_MODEST_MARGIN (5)
 
 /* A change to SLV in % after which we want to wake up pools thread asap. */
-#define LDLM_POOLS_FAST_SLV_CHANGE (5)
+#define LDLM_POOLS_FAST_SLV_CHANGE (50)
 
 struct ldlm_pool {
         /* Common pool fields */
-        cfs_proc_dir_entry_t  *pl_proc_dir;           /* Pool proc directory. */
-        char                   pl_name[100];          /* Pool name, should be long 
-                                                       * enough to contain complex 
-                                                       * proc entry name. */
-        spinlock_t             pl_lock;               /* Lock for protecting slv/clv 
-                                                       * updates. */
-        atomic_t               pl_limit;              /* Number of allowed locks in
-                                                       * in pool, both, client and 
-                                                       * server side. */
-        atomic_t               pl_granted;            /* Number of granted locks. */
-        atomic_t               pl_grant_rate;         /* Grant rate per T. */
-        atomic_t               pl_cancel_rate;        /* Cancel rate per T. */
-        atomic_t               pl_grant_speed;        /* Grant speed (GR - CR) per T. */
-        __u64                  pl_server_lock_volume; /* Server lock volume. Protected 
-                                                       * by pl_lock. */
-        cfs_time_t             pl_update_time;        /* Time when last slv from server 
-                                                       * was obtained. */
-        ldlm_pool_recalc_t     pl_recalc;             /* Recalc callback func pointer. */
-        ldlm_pool_shrink_t     pl_shrink;             /* Shrink callback func pointer. */
-        int                    pl_control;            /* Pool features mask */
+        cfs_proc_dir_entry_t  *pl_proc_dir;      /* Pool proc directory. */
+        char                   pl_name[100];     /* Pool name, should be long 
+                                                  * enough to contain complex
+                                                  * proc entry name. */
+        spinlock_t             pl_lock;          /* Lock for protecting slv/clv 
+                                                  * updates. */
+        atomic_t               pl_limit;         /* Number of allowed locks in
+                                                  * in pool, both, client and 
+                                                  * server side. */
+        atomic_t               pl_granted;       /* Number of granted locks. */
+        atomic_t               pl_grant_rate;    /* Grant rate per T. */
+        atomic_t               pl_cancel_rate;   /* Cancel rate per T. */
+        atomic_t               pl_grant_speed;   /* Grant speed (GR-CR) per T. */
+        __u64                  pl_server_lock_volume; /* Server lock volume. 
+                                                  * Protected by pl_lock */
+        atomic_t               pl_lock_volume_factor; /* Lock volume factor. */
+
+        time_t                 pl_recalc_time;   /* Time when last slv from 
+                                                  * server was obtained. */
+        struct ldlm_pool_ops  *pl_ops;           /* Recalc and shrink ops. */ 
+
+        int                    pl_control;       /* Pool features mask */
         
-        /* Server side pool fields */
-        atomic_t               pl_grant_plan;         /* Planned number of granted 
-                                                       * locks for next T. */
-        atomic_t               pl_grant_step;         /* Grant plan step for next T. */
+        atomic_t               pl_grant_plan;    /* Planned number of granted 
+                                                  * locks for next T. */
+        atomic_t               pl_grant_step;    /* Grant plan step for next 
+                                                  * T. */
 
-        /* Client side pool related fields */
-        atomic_t               pl_lock_volume_factor; /* Lock volume factor. */
-        struct lprocfs_stats  *pl_stats;              /* Pool statistics. */
+        struct lprocfs_stats  *pl_stats;         /* Pool statistics. */
 };
 
 static inline int pool_recalc_enabled(struct ldlm_pool *pl)
@@ -295,6 +297,12 @@ typedef enum {
         LDLM_NAMESPACE_MODEST = 1 << 1
 } ldlm_appetite_t;
 
+/* Default value for ->ns_shrink_thumb. If lock is not extent one its cost 
+ * is one page. Here we have 256 pages which is 1M on i386. Thus by default
+ * all extent locks which have more than 1M long extent will be kept in lru,
+ * others (including ibits locks) will be canceled on memory pressure event. */
+#define LDLM_LOCK_SHRINK_THUMB 256
+
 struct ldlm_namespace {
         char                  *ns_name;
         ldlm_side_t            ns_client; /* is this a client-side lock tree? */
@@ -315,6 +323,9 @@ struct ldlm_namespace {
 
         unsigned int           ns_max_unused;
         unsigned int           ns_max_age;
+        
+        /* Lower limit to number of pages in lock to keep it in cache */
+        unsigned int           ns_shrink_thumb;
         cfs_time_t             ns_next_dump;   /* next debug dump, jiffies */
 
         atomic_t               ns_locks;
@@ -805,7 +816,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, 
                      unsigned int gfp_mask);
 void ldlm_pool_fini(struct ldlm_pool *pl);
-int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit);
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
 int ldlm_pool_recalc(struct ldlm_pool *pl);
 __u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
 __u32 ldlm_pool_get_limit(struct ldlm_pool *pl);
diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h
index 25ac511..73adcb6 100644
--- a/lustre/ldlm/ldlm_internal.h
+++ b/lustre/ldlm/ldlm_internal.h
@@ -36,9 +36,15 @@ typedef enum {
 } ldlm_sync_t;
 
 /* Cancel lru flag, it indicates we cancel aged locks. */
-#define LDLM_CANCEL_AGED 0x00000001
+enum {
+        LDLM_CANCEL_AGED   = 1 << 0, /* Cancel aged locks (non lru resize). */
+        LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
+        LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
+        LDLM_CANCEL_LRUR   = 1 << 3  /* Cancel locks from lru resize. */
+};
 
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync);
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync, 
+                    int flags);
 int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                           int count, int max, int flags);
 
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 41d241f..abf719f 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1794,18 +1794,16 @@ int target_pack_pool_reply(struct ptlrpc_request *req)
         struct ldlm_pool *pl;
         ENTRY;
    
-        if (req->rq_export == NULL) {
+        if (!req->rq_export || !exp_connect_lru_resize(req->rq_export)) {
                 lustre_msg_set_slv(req->rq_repmsg, 0);
                 lustre_msg_set_limit(req->rq_repmsg, 0);
                 RETURN(0);
         }
  
-        if (!exp_connect_lru_resize(req->rq_export))
-                RETURN(0);
-        
         pl = ldlm_exp2pl(req->rq_export);
 
         spin_lock(&pl->pl_lock);
+        LASSERT(ldlm_pool_get_slv(pl) != 0 && ldlm_pool_get_limit(pl) != 0);
         lustre_msg_set_slv(req->rq_repmsg, ldlm_pool_get_slv(pl));
         lustre_msg_set_limit(req->rq_repmsg, ldlm_pool_get_limit(pl));
         spin_unlock(&pl->pl_lock);
@@ -1829,7 +1827,6 @@ int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
                 DEBUG_REQ(D_NET, req, "sending reply");
         }
 
-        target_pack_pool_reply(req);
         return (ptlrpc_send_reply(req, 1));
 }
 
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c
index b42d6a5..59c6e72 100644
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -635,7 +635,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
                  * enqueue. */
                 if (!exp_connect_cancelset(lock->l_conn_export) && 
                     !ns_connect_lru_resize(ns))
-                        ldlm_cancel_lru(ns, 0, LDLM_ASYNC);
+                        ldlm_cancel_lru(ns, 0, LDLM_ASYNC, 0);
         } else {
                 unlock_res_and_lock(lock);
         }
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c
index 704b4cd..979de47 100644
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -138,11 +138,17 @@ static inline __u64 ldlm_pool_slv_min(__u32 L)
 }
 
 enum {
-        LDLM_POOL_GRANTED_STAT = 0,
+        LDLM_POOL_FIRST_STAT = 0,
+        LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,
+        LDLM_POOL_GRANT_STAT,
+        LDLM_POOL_CANCEL_STAT,
         LDLM_POOL_GRANT_RATE_STAT,
         LDLM_POOL_CANCEL_RATE_STAT,
         LDLM_POOL_GRANT_PLAN_STAT,
         LDLM_POOL_SLV_STAT,
+        LDLM_POOL_SHRINK_REQTD_STAT,
+        LDLM_POOL_SHRINK_FREED_STAT,
+        LDLM_POOL_RECALC_STAT,
         LDLM_POOL_LAST_STAT
 };
 
@@ -218,8 +224,7 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
         ENTRY;
 
         spin_lock(&pl->pl_lock);
-        recalc_interval_sec = cfs_duration_sec(cfs_time_current() -
-                                               pl->pl_update_time);
+        recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
         if (recalc_interval_sec > 0) {
                 /* Update statistics */
                 ldlm_pool_recalc_stats(pl);
@@ -230,12 +235,12 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
 
                 /* Update grant_plan for new period. */
                 ldlm_pool_recalc_grant_plan(pl);
-                pl->pl_update_time = cfs_time_current();
 
                 /* Zero out all rates and speed for the last period. */
                 atomic_set(&pl->pl_grant_rate, 0);
                 atomic_set(&pl->pl_cancel_rate, 0);
                 atomic_set(&pl->pl_grant_speed, 0);
+                pl->pl_recalc_time = cfs_time_current_sec();
         }
         spin_unlock(&pl->pl_lock);
         RETURN(0);
@@ -246,30 +251,36 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
 static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
                                 int nr, unsigned int gfp_mask)
 {
-        __u32 granted, limit;
-        __u64 slv_delta;
+        __u32 limit;
         ENTRY;
 
-        /* Client already canceled locks but server is already in shrinker and
-         * can't cancel anything. Let's catch this race. */
-        if ((granted = atomic_read(&pl->pl_granted)) == 0)
+        /* VM is asking how many entries may be potentially freed. */
+        if (nr == 0)
+                RETURN(atomic_read(&pl->pl_granted));
+
+        /* Client already canceled locks but server is already in shrinker
+         * and can't cancel anything. Let's catch this race. */
+        if (atomic_read(&pl->pl_granted) == 0)
                 RETURN(0);
 
         spin_lock(&pl->pl_lock);
 
-        /* Simple proportion but it gives impression on how much should be
-         * SLV changed for request @nr of locks to be canceled.*/
-        slv_delta = nr * ldlm_pool_get_slv(pl);
-        limit = ldlm_pool_get_limit(pl);
-        do_div(slv_delta, granted);
-
-        /* As SLV has some dependence on historical data, that is new value
-         * is based on old one, this decreasing will make clients get some
-         * locks back to the server and after some time it will stabilize.*/
-        if (slv_delta < ldlm_pool_get_slv(pl))
-                ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - slv_delta);
-        else
+        /* We want shrinker to possibly cause cancelation of @nr locks from
+         * clients or grant approximately @nr locks smaller next intervals.
+         *
+         * This is why we decresed SLV by @nr. This effect will only be as
+         * long as one re-calc interval (1s these days) and this should be
+         * enough to pass this decreased SLV to all clients. On next recalc
+         * interval pool will either increase SLV if locks load is not high
+         * or will keep on same level or even decrease again, thus, shrinker
+         * decreased SLV will affect next recalc intervals and this way will
+         * make locking load lower. */
+        if (nr < ldlm_pool_get_slv(pl)) {
+                ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - nr);
+        } else {
+                limit = ldlm_pool_get_limit(pl);
                 ldlm_pool_set_slv(pl, ldlm_pool_slv_min(limit));
+        }
         spin_unlock(&pl->pl_lock);
 
         /* We did not really free any memory here so far, it only will be
@@ -277,6 +288,13 @@ static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
         RETURN(0);
 }
 
+static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit)
+{
+        ENTRY;
+        ldlm_pool_set_limit(pl, limit);
+        RETURN(0);
+}
+
 static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 {
         time_t recalc_interval_sec;
@@ -284,8 +302,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 
         spin_lock(&pl->pl_lock);
 
-        recalc_interval_sec = cfs_duration_sec(cfs_time_current() -
-                                               pl->pl_update_time);
+        recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
         if (recalc_interval_sec > 0) {
                 /* Update statistics only every T */
                 ldlm_pool_recalc_stats(pl);
@@ -294,28 +311,63 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
                 atomic_set(&pl->pl_grant_rate, 0);
                 atomic_set(&pl->pl_cancel_rate, 0);
                 atomic_set(&pl->pl_grant_speed, 0);
+                pl->pl_recalc_time = cfs_time_current_sec();
         }
         spin_unlock(&pl->pl_lock);
 
-        /* Recalc client pool is done without taking into account pl_update_time
-         * as this may be called voluntary in the case of emergency. Client 
-         * recalc does not calculate anything, we do not risk to have skew 
-         * of some pool param. */
-        ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC);
-        RETURN(0);
+        /* Do not cancel locks in case lru resize is disabled for this ns */
+        if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+                RETURN(0);
+
+        /* In the time of canceling locks on client we do not need to maintain
+         * sharp timing, we only want to cancel locks asap according to new SLV.
+         * This may be called when SLV has changed much, this is why we do not
+         * take into account pl->pl_recalc_time here. */
+        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC, 
+                               LDLM_CANCEL_LRUR));
 }
 
 static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
                                 int nr, unsigned int gfp_mask)
 {
         ENTRY;
-        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC));
+        
+        /* Do not cancel locks in case lru resize is disabled for this ns */
+        if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+                RETURN(0);
+
+        /* Find out how many locks may be released according to shrink 
+         * policy. */
+        if (nr == 0)
+                RETURN(ldlm_cancel_lru_local(ldlm_pl2ns(pl), NULL, 0, 
+                                             0, LDLM_CANCEL_SHRINK));
+
+        /* Cancel @nr locks accoding to shrink policy */
+        RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC, 
+                               LDLM_CANCEL_SHRINK));
 }
 
+struct ldlm_pool_ops ldlm_srv_pool_ops = {
+        .po_recalc = ldlm_srv_pool_recalc,
+        .po_shrink = ldlm_srv_pool_shrink,
+        .po_setup  = ldlm_srv_pool_setup
+};
+
+struct ldlm_pool_ops ldlm_cli_pool_ops = {
+        .po_recalc = ldlm_cli_pool_recalc,
+        .po_shrink = ldlm_cli_pool_shrink
+};
+
 int ldlm_pool_recalc(struct ldlm_pool *pl)
 {
-        if (pl->pl_recalc != NULL && pool_recalc_enabled(pl))
-                return pl->pl_recalc(pl);
+        int count;
+
+        if (pl->pl_ops->po_recalc != NULL && pool_recalc_enabled(pl)) {
+                count = pl->pl_ops->po_recalc(pl);
+                lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, 
+                                    count);
+                return count;
+        }
         return 0;
 }
 EXPORT_SYMBOL(ldlm_pool_recalc);
@@ -323,22 +375,32 @@ EXPORT_SYMBOL(ldlm_pool_recalc);
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
                      unsigned int gfp_mask)
 {
-        if (pl->pl_shrink != NULL && pool_shrink_enabled(pl)) {
-                CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks\n",
-                       pl->pl_name, nr);
-                return pl->pl_shrink(pl, nr, gfp_mask);
+        int cancel = 0;
+        
+        if (pl->pl_ops->po_shrink != NULL && pool_shrink_enabled(pl)) {
+                cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask);
+                if (nr > 0) {
+                        lprocfs_counter_add(pl->pl_stats, 
+                                            LDLM_POOL_SHRINK_REQTD_STAT,
+                                            nr);
+                        lprocfs_counter_add(pl->pl_stats, 
+                                            LDLM_POOL_SHRINK_FREED_STAT,
+                                            cancel);
+                        CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks, "
+                               "shrunk %d\n", pl->pl_name, nr, cancel);
+                }
         }
-        return 0;
+        return cancel;
 }
 EXPORT_SYMBOL(ldlm_pool_shrink);
 
 /* The purpose of this function is to re-setup limit and maximal allowed
  * slv according to the passed limit. */
-int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit)
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit)
 {
         ENTRY;
-        if (ns_is_server(ldlm_pl2ns(pl)))
-                ldlm_pool_set_limit(pl, limit);
+        if (pl->pl_ops->po_setup != NULL)
+                RETURN(pl->pl_ops->po_setup(pl, limit));
         RETURN(0);
 }
 EXPORT_SYMBOL(ldlm_pool_setup);
@@ -368,10 +430,9 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off,
                        pl->pl_name);
         nr += snprintf(page + nr, count - nr, "  SLV: "LPU64"\n", slv);
 
-        if (ns_is_client(ldlm_pl2ns(pl))) {
-                nr += snprintf(page + nr, count - nr, "  LVF: %d\n",
-                               atomic_read(&pl->pl_lock_volume_factor));
-        }
+        nr += snprintf(page + nr, count - nr, "  LVF: %d\n",
+                       atomic_read(&pl->pl_lock_volume_factor));
+
         nr += snprintf(page + nr, count - nr, "  GSP: %d%%\n",
                        grant_step);
         nr += snprintf(page + nr, count - nr, "  GP:  %d\n",
@@ -469,13 +530,11 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl)
                 pool_vars[0].write_fptr = lprocfs_wr_atomic;
         lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
 
-        if (ns_is_client(ns)) {
-                snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor");
-                pool_vars[0].data = &pl->pl_lock_volume_factor;
-                pool_vars[0].read_fptr = lprocfs_rd_uint;
-                pool_vars[0].write_fptr = lprocfs_wr_uint;
-                lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
-        }
+        snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor");
+        pool_vars[0].data = &pl->pl_lock_volume_factor;
+        pool_vars[0].read_fptr = lprocfs_rd_uint;
+        pool_vars[0].write_fptr = lprocfs_wr_uint;
+        lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
 
         snprintf(var_name, MAX_STRING_SIZE, "state");
         pool_vars[0].data = pl;
@@ -483,13 +542,17 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl)
         lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
 
         pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
-                                           LDLM_POOL_GRANTED_STAT, 0);
+                                           LDLM_POOL_FIRST_STAT, 0);
         if (!pl->pl_stats)
                 GOTO(out_free_name, rc = -ENOMEM);
 
         lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
                              LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
                              "granted", "locks");
+        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, 0,
+                             "grant", "locks");
+        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, 0,
+                             "cancel", "locks");
         lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
                              LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
                              "grant_rate", "locks/s");
@@ -502,6 +565,15 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl)
         lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT,
                              LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
                              "slv", "slv");
+        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT,
+                             LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+                             "shrink_request", "locks");
+        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT,
+                             LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+                             "shrink_freed", "locks");
+        lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT,
+                             LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+                             "recalc_freed", "locks");
         lprocfs_register_stats(pl->pl_proc_dir, "stats", pl->pl_stats);
 
         EXIT;
@@ -534,7 +606,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
 
         spin_lock_init(&pl->pl_lock);
         atomic_set(&pl->pl_granted, 0);
-        pl->pl_update_time = cfs_time_current();
+        pl->pl_recalc_time = cfs_time_current_sec();
         atomic_set(&pl->pl_lock_volume_factor, 1);
 
         atomic_set(&pl->pl_grant_rate, 0);
@@ -548,15 +620,13 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns,
                  ns->ns_name, idx);
 
         if (client == LDLM_NAMESPACE_SERVER) {
-                pl->pl_recalc = ldlm_srv_pool_recalc;
-                pl->pl_shrink = ldlm_srv_pool_shrink;
+                pl->pl_ops = &ldlm_srv_pool_ops;
                 ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L);
                 ldlm_pool_set_slv(pl, ldlm_pool_slv_max(LDLM_POOL_HOST_L));
         } else {
                 ldlm_pool_set_slv(pl, 1);
                 ldlm_pool_set_limit(pl, 1);
-                pl->pl_recalc = ldlm_cli_pool_recalc;
-                pl->pl_shrink = ldlm_cli_pool_shrink;
+                pl->pl_ops = &ldlm_cli_pool_ops;
         }
 
         rc = ldlm_pool_proc_init(pl);
@@ -573,8 +643,7 @@ void ldlm_pool_fini(struct ldlm_pool *pl)
 {
         ENTRY;
         ldlm_pool_proc_fini(pl);
-        pl->pl_recalc = NULL;
-        pl->pl_shrink = NULL;
+        pl->pl_ops = NULL;
         EXIT;
 }
 EXPORT_SYMBOL(ldlm_pool_fini);
@@ -586,9 +655,12 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
         atomic_inc(&pl->pl_grant_rate);
         atomic_inc(&pl->pl_grant_speed);
 
-        /* No need to recalc client pools here as this is already done 
-         * on enqueue/cancel and locks to cancel already packed to the
-         * rpc. */
+        lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
+ 
+        /* Do not do pool recalc for client side as all locks which
+         * potentially may be canceled has already been packed into 
+         * enqueue/cancel rpc. Also we do not want to run out of stack
+         * with too long call paths. */
         if (ns_is_server(ldlm_pl2ns(pl)))
                 ldlm_pool_recalc(pl);
         EXIT;
@@ -603,7 +675,8 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
         atomic_inc(&pl->pl_cancel_rate);
         atomic_dec(&pl->pl_grant_speed);
         
-        /* Same as in ldlm_pool_add() */
+        lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
+
         if (ns_is_server(ldlm_pl2ns(pl)))
                 ldlm_pool_recalc(pl);
         EXIT;
@@ -675,11 +748,22 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
 
         /* Find out how many resources we may release. */
-        mutex_down(ldlm_namespace_lock(client));
-        list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain)
-                total += ldlm_pool_granted(&ns->ns_pool);
-        mutex_up(ldlm_namespace_lock(client));
-
+        for (nr_ns = atomic_read(ldlm_namespace_nr(client)); 
+             nr_ns > 0; nr_ns--) 
+        {
+                mutex_down(ldlm_namespace_lock(client));
+                if (list_empty(ldlm_namespace_list(client))) {
+                        mutex_up(ldlm_namespace_lock(client));
+                        return 0;
+                }
+                ns = ldlm_namespace_first(client);
+                ldlm_namespace_get(ns);
+                ldlm_namespace_move(ns, client);
+                mutex_up(ldlm_namespace_lock(client));
+                total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
+                ldlm_namespace_put(ns, 1);
+        }
+ 
         if (nr == 0 || total == 0)
                 return total;
 
@@ -727,15 +811,18 @@ void ldlm_pools_recalc(ldlm_side_t client)
 {
         __u32 nr_l = 0, nr_p = 0, l;
         struct ldlm_namespace *ns;
-        int rc, nr, equal = 0;
+        int nr, equal = 0;
 
-        /* Check all modest namespaces. */
-        mutex_down(ldlm_namespace_lock(client));
-        list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) {
-                if (ns->ns_appetite != LDLM_NAMESPACE_MODEST)
-                        continue;
+        /* No need to setup pool limit for client pools. */
+        if (client == LDLM_NAMESPACE_SERVER) {
+                /* Check all modest namespaces first. */
+                mutex_down(ldlm_namespace_lock(client));
+                list_for_each_entry(ns, ldlm_namespace_list(client), 
+                                    ns_list_chain) 
+                {
+                        if (ns->ns_appetite != LDLM_NAMESPACE_MODEST)
+                                continue;
 
-                if (client == LDLM_NAMESPACE_SERVER) {
                         l = ldlm_pool_granted(&ns->ns_pool);
                         if (l == 0)
                                 l = 1;
@@ -747,21 +834,24 @@ void ldlm_pools_recalc(ldlm_side_t client)
                         nr_l += l;
                         nr_p++;
                 }
-        }
 
-        /* Make sure that modest namespaces did not eat more that 2/3 of limit */
-        if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) {
-                CWARN("Modest pools eat out 2/3 of locks limit. %d of %lu. "
-                      "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L);
-                equal = 1;
-        }
+                /* Make sure that modest namespaces did not eat more that 2/3 
+                 * of limit */
+                if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) {
+                        CWARN("\"Modest\" pools eat out 2/3 of server locks "
+                              "limit (%d of %lu). This means that you have too "
+                              "many clients for this amount of server RAM. "
+                              "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L);
+                        equal = 1;
+                }
 
-        /* The rest is given to greedy namespaces. */
-        list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) {
-                if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY)
-                        continue;
+                /* The rest is given to greedy namespaces. */
+                list_for_each_entry(ns, ldlm_namespace_list(client), 
+                                    ns_list_chain) 
+                {
+                        if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY)
+                                continue;
 
-                if (client == LDLM_NAMESPACE_SERVER) {
                         if (equal) {
                                 /* In the case 2/3 locks are eaten out by
                                  * modest pools, we re-setup equal limit
@@ -777,8 +867,8 @@ void ldlm_pools_recalc(ldlm_side_t client)
                         }
                         ldlm_pool_setup(&ns->ns_pool, l);
                 }
+                mutex_up(ldlm_namespace_lock(client));
         }
-        mutex_up(ldlm_namespace_lock(client));
 
         /* Recalc at least ldlm_namespace_nr(client) namespaces. */
         for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
@@ -798,11 +888,7 @@ void ldlm_pools_recalc(ldlm_side_t client)
                 mutex_up(ldlm_namespace_lock(client));
 
                 /* After setup is done - recalc the pool. */
-                rc = ldlm_pool_recalc(&ns->ns_pool);
-                if (rc)
-                        CERROR("%s: pool recalculation error "
-                               "%d\n", ns->ns_pool.pl_name, rc);
-
+                ldlm_pool_recalc(&ns->ns_pool);
                 ldlm_namespace_put(ns, 1);
         }
 }
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index 3e2bce2..2b34473 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -515,15 +515,20 @@ struct ptlrpc_request *ldlm_prep_enqueue_req(struct obd_export *exp,
                 /* Estimate the amount of available space in the request. */
                 int avail = ldlm_req_handles_avail(exp, size, bufcount,
                                                    LDLM_ENQUEUE_CANCEL_OFF);
+                int flags, cancel;
+
                 LASSERT(avail >= count);
 
+                flags = ns_connect_lru_resize(ns) ? 
+                        LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+                cancel = ns_connect_lru_resize(ns) ? 0 : 1;
+
                 /* Cancel lru locks here _only_ if the server supports 
                  * EARLY_CANCEL. Otherwise we have to send extra CANCEL
                  * rpc right on enqueue, what will make it slower, vs. 
                  * asynchronous rpc in blocking thread. */
-                count += ldlm_cancel_lru_local(ns, cancels,
-                                               ns_connect_lru_resize(ns) ? 0 : 1,
-                                               avail - count, LDLM_CANCEL_AGED);
+                count += ldlm_cancel_lru_local(ns, cancels, cancel,
+                                               avail - count, flags);
                 size[DLM_LOCKREQ_OFF] =
                         ldlm_request_bufsize(count, LDLM_ENQUEUE);
         }
@@ -964,27 +969,42 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req)
         pl = ldlm_imp2pl(req->rq_import);
         
         spin_lock(&pl->pl_lock);
-#ifdef __KERNEL__
+
+        /* Check if we need to wakeup pools thread for fast SLV change. 
+         * This is only done when threads period is noticably long like 
+         * 10s or more. */
+#if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10)
         {
-                __u64 old_slv, fast_slv_change;
+                __u64 old_slv, new_slv, fast_change;
 
                 old_slv = ldlm_pool_get_slv(pl);
-                fast_slv_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE;
-                do_div(fast_slv_change, 100);
-#endif
-                pl->pl_update_time = cfs_time_current();
-                ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg));
-                ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg));
-#ifdef __KERNEL__
+                new_slv = lustre_msg_get_slv(req->rq_repmsg);
+                fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE;
+                do_div(fast_change, 100);
+
                 /* Wake up pools thread only if SLV has changed more than 
-                 * 5% since last update. In this case we want to react asap. 
+                 * 50% since last update. In this case we want to react asap. 
                  * Otherwise it is no sense to wake up pools as they are 
-                 * re-calculated every 1s anyways. */
-                if (old_slv > ldlm_pool_get_slv(pl) && 
-                    old_slv - ldlm_pool_get_slv(pl) > fast_slv_change)
+                 * re-calculated every LDLM_POOLS_THREAD_PERIOD anyways. */
+                if (old_slv > new_slv && old_slv - new_slv > fast_change)
                         ldlm_pools_wakeup();
         }
 #endif
+        /* In some cases RPC may contain slv and limit zeroed out. This is 
+         * the case when server does not support lru resize feature. This is
+         * also possible in some recovery cases when server side reqs have no
+         * ref to obd export and thus access to server side namespace is no 
+         * possible. */
+        if (lustre_msg_get_slv(req->rq_repmsg) != 0 && 
+            lustre_msg_get_limit(req->rq_repmsg) != 0) {
+                ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg));
+                ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg));
+        } else {
+                DEBUG_REQ(D_HA, req, "zero SLV or Limit found "
+                          "(SLV: "LPU64", Limit: %u)", 
+                          lustre_msg_get_slv(req->rq_repmsg), 
+                          lustre_msg_get_limit(req->rq_repmsg));
+        }
         spin_unlock(&pl->pl_lock);
 
         RETURN(0);
@@ -1011,13 +1031,18 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
         if (rc == LDLM_FL_BL_AST) {
                 rc = ldlm_cli_cancel_req(lock->l_conn_export, &cancels, 1, 0);
         } else if (rc == LDLM_FL_CANCELING) {
+                struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
                 int avail = ldlm_cancel_handles_avail(lock->l_conn_export);
-                int count = 1;
+                int flags, cancel;
                 LASSERT(avail > 0);
-                count += ldlm_cancel_lru_local(lock->l_resource->lr_namespace,
-                                               &cancels, 0, avail - 1,
-                                               LDLM_CANCEL_AGED);
-                ldlm_cli_cancel_list(&cancels, count, NULL, 0, 0);
+                
+                flags = ns_connect_lru_resize(ns) ? 
+                        LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+                cancel = ns_connect_lru_resize(ns) ? 0 : 1;
+                
+                cancel += ldlm_cancel_lru_local(ns, &cancels, 0, 
+                                                avail - cancel, flags);
+                ldlm_cli_cancel_list(&cancels, cancel, NULL, 0, 0);
         }
         if (rc != LDLM_FL_CANCELING)
                 LDLM_LOCK_PUT(lock);
@@ -1081,6 +1106,123 @@ static int ldlm_cancel_list_local(struct list_head *cancels, int count)
         RETURN(count);
 }
 
+/* Return 1 if @lock should be canceled according to shrinker policy. 
+ * Return zero otherwise. */
+static int ldlm_cancel_shrink_policy(struct ldlm_namespace *ns,
+                                     struct ldlm_lock *lock,
+                                     int unused, int added, 
+                                     int asked)
+{
+        int lock_cost;
+        __u64 page_nr;
+
+        if (lock->l_resource->lr_type == LDLM_EXTENT) {
+                struct ldlm_extent *l_extent;
+
+                /* For all extent locks cost is 1 + number of pages in
+                 * their extent. */
+                l_extent = &lock->l_policy_data.l_extent;
+                page_nr = (l_extent->end - l_extent->start);
+                do_div(page_nr, CFS_PAGE_SIZE);
+
+#ifdef __KERNEL__
+                /* XXX: In fact this is evil hack, we can't access inode
+                 * here. For doing it right we need somehow to have number
+                 * of covered by lock. This should be fixed later when 10718 
+                 * is landed. */
+                if (lock->l_ast_data != NULL) {
+                        struct inode *inode = lock->l_ast_data;
+                        if (page_nr > inode->i_mapping->nrpages)
+                                page_nr = inode->i_mapping->nrpages;
+                }
+#endif
+                lock_cost = 1 + page_nr;
+        } else {
+                /* For all locks which are not extent ones cost is 1 */
+                lock_cost = 1;
+        }
+
+        /* Keep all expensive locks in lru for the memory pressure time
+         * cancel policy. They anyways may be canceled by lru resize
+         * pplicy if they have not small enough CLV. */
+        return (lock_cost <= ns->ns_shrink_thumb);
+}
+
+/* Return 1 if @lock should be canceled according to lru resize policy. 
+ * Return zero otherwise. */
+static int ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+                                   struct ldlm_lock *lock, 
+                                   int unused, int added, 
+                                   int asked)
+{
+        cfs_time_t cur = cfs_time_current();
+        struct ldlm_pool *pl = &ns->ns_pool;
+        __u64 slv, lvf, lv;
+        cfs_time_t la;
+
+        spin_lock(&pl->pl_lock);
+        slv = ldlm_pool_get_slv(pl);
+        lvf = atomic_read(&pl->pl_lock_volume_factor);
+        spin_unlock(&pl->pl_lock);
+
+        la = cfs_duration_sec(cfs_time_sub(cur, 
+                              lock->l_last_used));
+
+        /* Stop when slv is not yet come from server or 
+         * lv is smaller than it is. */
+        lv = lvf * la * unused;
+        return (slv > 1 && lv >= slv);
+}
+
+/* Return 1 if @lock should be canceled according to passed policy. 
+ * Return zero otherwise. */
+static int ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+                                     struct ldlm_lock *lock, 
+                                     int unused, int added,
+                                     int asked)
+{
+        /* Do nothing here, we allow canceling all locks which
+         * are passed here from upper layer logic. So that locks
+         * number to be canceled will be limited by @count and
+         * @max in ldlm_cancel_lru_local(). */
+        return 1;
+}
+
+/* Return 1 if @lock should be canceled according to aged policy. 
+ * Return zero otherwise. */
+static int ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+                                   struct ldlm_lock *lock, 
+                                   int unused, int added,
+                                   int asked)
+{
+        /* Cancel old locks if reached asked limit. */
+        return !((added >= asked) && 
+                 cfs_time_before_64(cfs_time_current(),
+                                    cfs_time_add(lock->l_last_used,
+                                                 ns->ns_max_age)));
+}
+
+typedef int (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, 
+                                        struct ldlm_lock *, int, 
+                                        int, int);
+
+static ldlm_cancel_lru_policy_t
+ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
+{
+        if (ns_connect_lru_resize(ns)) {
+                if (flags & LDLM_CANCEL_SHRINK)
+                        return ldlm_cancel_shrink_policy;
+                else if (flags & LDLM_CANCEL_LRUR)
+                        return ldlm_cancel_lrur_policy;
+                else if (flags & LDLM_CANCEL_PASSED)
+                        return ldlm_cancel_passed_policy;
+        } else {
+                if (flags & LDLM_CANCEL_AGED)
+                        return ldlm_cancel_aged_policy;
+        }
+        return NULL;
+}
+ 
 /* - Free space in lru for @count new locks,
  *   redundant unused locks are canceled locally;
  * - also cancel locally unused aged locks;
@@ -1092,14 +1234,25 @@ static int ldlm_cancel_list_local(struct list_head *cancels, int count)
  * There are the following use cases: ldlm_cancel_resource_local(),
  * ldlm_cancel_lru_local() and ldlm_cli_cancel(), which check&set this
  * flag properly. As any attempt to cancel a lock rely on this flag,
- * l_bl_ast list is accessed later without any special locking. */
+ * l_bl_ast list is accessed later without any special locking.
+ *
+ * Calling policies for enabled lru resize:
+ * ----------------------------------------
+ * flags & LDLM_CANCEL_LRUR - use lru resize policy (SLV from server) to
+ *                            cancel not more than @count locks;
+ *
+ * flags & LDLM_CANCEL_PASSED - cancel @count number of old locks (located at
+ *                              the beginning of lru list);
+ *
+ * flags & LDLM_CANCEL_SHRINK - cancel not more than @count locks according to
+ *                              memory pressre policy function.
+ */
 int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                           int count, int max, int flags)
 {
-        cfs_time_t cur = cfs_time_current();
-        int added = 0, unused;
-        struct ldlm_lock *lock;
-        __u64 slv, lvf, lv;
+        ldlm_cancel_lru_policy_t cancel_lru_policy_func;
+        int added = 0, unused, cancel;
+        struct ldlm_lock *lock, *next;
         ENTRY;
 
         spin_lock(&ns->ns_unused_lock);
@@ -1108,103 +1261,85 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
         if (!ns_connect_lru_resize(ns))
                 count += unused - ns->ns_max_unused;
 
-        while (!list_empty(&ns->ns_unused_list)) {
-                struct ldlm_pool *pl = &ns->ns_pool;
-
-                LASSERT(unused >= 0);
+        cancel_lru_policy_func = ldlm_cancel_lru_policy(ns, flags);
+     
+        list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) {
+                /* Make sure that we skip locks being already in cancel. */
+                if ((lock->l_flags & LDLM_FL_CANCELING) ||
+                    (lock->l_flags & LDLM_FL_BL_AST))
+                        continue;
 
-                if (max && added >= max)
+                /* For any flags, stop scanning if @max or passed @count is
+                 * reached. */
+                if ((max && added >= max) || (count && added >= count))
                         break;
 
-                list_for_each_entry(lock, &ns->ns_unused_list, l_lru) {
-                        /* somebody is already doing CANCEL or there is a
-                         * blocking request will send cancel. */
-                        if (!(lock->l_flags & LDLM_FL_CANCELING) &&
-                            !(lock->l_flags & LDLM_FL_BL_AST))
+                /* Pass the lock through the policy filter and see if it
+                 * should stay in lru. */
+                if (cancel_lru_policy_func != NULL) {
+                        cancel = cancel_lru_policy_func(ns, lock, unused, 
+                                                        added, count);
+                     
+                        /* Take next lock for shrink policy, we need to check
+                         * whole list. Stop scanning for other policies. */
+                        if ((flags & LDLM_CANCEL_SHRINK) && !cancel)
+                                continue;
+                        else if (!cancel)
                                 break;
                 }
-                if (&lock->l_lru == &ns->ns_unused_list)
-                        break;
 
-                if (ns_connect_lru_resize(ns)) {
-                        cfs_time_t la;
-
-                        /* Take into account SLV only if cpount == 0. */
-                        if (count == 0) {
-                                /* Calculate lv for every lock. */
-                                spin_lock(&pl->pl_lock);
-                                slv = ldlm_pool_get_slv(pl);
-                                lvf = atomic_read(&pl->pl_lock_volume_factor);
-                                spin_unlock(&pl->pl_lock);
-
-                                la = cfs_duration_sec(cfs_time_sub(cur, 
-                                                      lock->l_last_used));
-                                if (la == 0)
-                                        la = 1;
-
-                                /* Stop when slv is not yet come from server 
-                                 * or lv is smaller than it is. */
-                                lv = lvf * la * unused;
-                                if (slv == 1 || lv < slv)
-                                        break;
-                        } else {
-                                if (added >= count)
-                                        break;
+                if (cancels != NULL) {
+                        LDLM_LOCK_GET(lock); /* dropped by bl thread */
+                        spin_unlock(&ns->ns_unused_lock);
+
+                        lock_res_and_lock(lock);
+                        /* Check flags again under the lock. */
+                        if ((lock->l_flags & LDLM_FL_CANCELING) ||
+                            (lock->l_flags & LDLM_FL_BL_AST) ||
+                            (ldlm_lock_remove_from_lru(lock) == 0)) {
+                                /* other thread is removing lock from lru or
+                                 * somebody is already doing CANCEL or
+                                 * there is a blocking request which will send
+                                 * cancel by itseft. */
+                                unlock_res_and_lock(lock);
+                                LDLM_LOCK_PUT(lock);
+                                spin_lock(&ns->ns_unused_lock);
+                                continue;
                         }
-                } else {
-                        if ((added >= count) && 
-                            (!(flags & LDLM_CANCEL_AGED) ||
-                             cfs_time_before_64(cur, ns->ns_max_age +
-                                                lock->l_last_used)))
-                                break;
-                }
-
-                LDLM_LOCK_GET(lock); /* dropped by bl thread */
-                spin_unlock(&ns->ns_unused_lock);
-
-                lock_res_and_lock(lock);
-                /* Check flags again under the lock. */
-                if ((lock->l_flags & LDLM_FL_CANCELING) ||
-                    (lock->l_flags & LDLM_FL_BL_AST) ||
-                    (ldlm_lock_remove_from_lru(lock) == 0)) {
-                        /* other thread is removing lock from lru or
-                         * somebody is already doing CANCEL or
-                         * there is a blocking request which will send
-                         * cancel by itseft. */
+                        LASSERT(!lock->l_readers && !lock->l_writers);
+
+                        /* If we have chosen to cancel this lock voluntarily, we
+                         * better send cancel notification to server, so that it 
+                         * frees appropriate state. This might lead to a race 
+                         * where while we are doing cancel here, server is also 
+                         * silently cancelling this lock. */
+                        lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+
+                        /* Setting the CBPENDING flag is a little misleading, but
+                         * prevents an important race; namely, once CBPENDING is
+                         * set, the lock can accumulate no more readers/writers.
+                         * Since readers and writers are already zero here, 
+                         * ldlm_lock_decref() won't see this flag and call 
+                         * l_blocking_ast */
+                        lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
+
+                        /* We can't re-add to l_lru as it confuses the refcounting
+                         * in ldlm_lock_remove_from_lru() if an AST arrives after 
+                         * we drop ns_lock below. We use l_bl_ast and can't use 
+                         * l_pending_chain as it is used both on server and client
+                         * nevertheless bug 5666 says it is used only on server */
+                        LASSERT(list_empty(&lock->l_bl_ast));
+                        list_add(&lock->l_bl_ast, cancels);
                         unlock_res_and_lock(lock);
-                        LDLM_LOCK_PUT(lock);
                         spin_lock(&ns->ns_unused_lock);
-                        continue;
                 }
-                LASSERT(!lock->l_readers && !lock->l_writers);
-
-                /* If we have chosen to canecl this lock voluntarily, we better
-                   send cancel notification to server, so that it frees
-                   appropriate state. This might lead to a race where while
-                   we are doing cancel here, server is also silently
-                   cancelling this lock. */
-                lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
-
-                /* Setting the CBPENDING flag is a little misleading, but
-                 * prevents an important race; namely, once CBPENDING is set,
-                 * the lock can accumulate no more readers/writers.  Since
-                 * readers and writers are already zero here, ldlm_lock_decref
-                 * won't see this flag and call l_blocking_ast */
-                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
-                /* We can't re-add to l_lru as it confuses the refcounting in
-                 * ldlm_lock_remove_from_lru() if an AST arrives after we drop
-                 * ns_lock below. We use l_bl_ast and can't use l_pending_chain
-                 * as it is used both on server and client nevertheles bug 5666
-                 * says it is used only on server. --umka */
-
-                LASSERT(list_empty(&lock->l_bl_ast));
-                list_add(&lock->l_bl_ast, cancels);
-                unlock_res_and_lock(lock);
-                spin_lock(&ns->ns_unused_lock);
                 added++;
                 unused--;
         }
         spin_unlock(&ns->ns_unused_lock);
+  
+        if (cancels == NULL)
+                RETURN(added);
 
         RETURN(ldlm_cancel_list(cancels, added));
 }
@@ -1213,7 +1348,8 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
  * in a thread and this function will return after the thread has been
  * asked to call the callback.  when called with LDLM_SYNC the blocking
  * callback will be performed in this function. */
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync)
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync, 
+                    int flags)
 {
         CFS_LIST_HEAD(cancels);
         int count, rc;
@@ -1222,7 +1358,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync)
 #ifndef __KERNEL__
         sync = LDLM_SYNC; /* force to be sync in user space */
 #endif
-        count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, 0);
+        count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, flags);
         if (sync == LDLM_ASYNC) {
                 rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count);
                 if (rc == 0)
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c
index 1a2e3de..16d831f 100644
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -152,7 +152,8 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
                         int canceled, unused  = ns->ns_nr_unused;
                         
                         /* Try to cancel all @ns_nr_unused locks. */
-                        canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC);
+                        canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC, 
+                                                   LDLM_CANCEL_PASSED);
                         if (canceled < unused) {
                                 CERROR("not all requested locks are canceled, "
                                        "requested: %d, canceled: %d\n", unused, 
@@ -162,7 +163,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
                 } else {
                         tmp = ns->ns_max_unused;
                         ns->ns_max_unused = 0;
-                        ldlm_cancel_lru(ns, 0, LDLM_SYNC);
+                        ldlm_cancel_lru(ns, 0, LDLM_SYNC, LDLM_CANCEL_PASSED);
                         ns->ns_max_unused = tmp;
                 }
                 return count;
@@ -185,7 +186,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
                 
                 CDEBUG(D_DLMTRACE, "changing namespace %s unused locks from %u to %u\n", 
                        ns->ns_name, ns->ns_nr_unused, (unsigned int)tmp);
-                ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC);
+                ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC, LDLM_CANCEL_PASSED);
                 
                 if (!lru_resize) {
                         CDEBUG(D_DLMTRACE, "disable lru_resize for namespace %s\n", 
@@ -196,7 +197,7 @@ static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
                 CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
                        ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
                 ns->ns_max_unused = (unsigned int)tmp;
-                ldlm_cancel_lru(ns, 0, LDLM_ASYNC);
+                ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED);
                 
                 /* Make sure that originally lru resize was supported before 
                  * turning it on here. */
@@ -248,13 +249,19 @@ void ldlm_proc_namespace(struct ldlm_namespace *ns)
                 lock_vars[0].write_fptr = lprocfs_wr_lru_size;
                 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
+                         ns->ns_name);
+                lock_vars[0].data = ns;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
                 snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
                          ns->ns_name);
                 lock_vars[0].data = &ns->ns_max_age;
                 lock_vars[0].read_fptr = lprocfs_rd_uint;
                 lock_vars[0].write_fptr = lprocfs_wr_uint;
                 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
-
         }
 }
 #undef MAX_STRING_SIZE
@@ -284,6 +291,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, ldlm_side_t client,
         if (!ns->ns_hash)
                 GOTO(out_ns, NULL);
 
+        ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
         ns->ns_appetite = apt;
         namelen = strlen(name);
         OBD_ALLOC(ns->ns_name, namelen + 1);
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c
index fb1bb1d..5da190c 100644
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -342,6 +342,9 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult)
         lustre_msg_set_opc(req->rq_repmsg,
                 req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
 
+        if (req->rq_export && req->rq_export->exp_obd)
+                target_pack_pool_reply(req);
+
         if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
                 conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
         else
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index 6e0de6e..486f185 100644
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -4672,12 +4672,21 @@ test_121() { #bug #10589
 }
 run_test 121 "read cancel race ========="
 
+cmd_cancel_lru_locks() {
+        NS=$1
+        test "x$NS" = "x" && NS="mdc"
+        for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+                if test -f $d/lru_size; then
+                        cancel_lru_locks $d
+                fi
+        done
+}
+
 test_124a() {
 	[ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \
                skip "no lru resize on server" && return 0
-        cancel_lru_locks mdc
+        cmd_cancel_lru_locks "mdc"
         lru_resize_enable
-        NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1`
 
         # we want to test main pool functionality, that is cancel based on SLV
         # this is why shrinkers are disabled
@@ -4687,20 +4696,33 @@ test_124a() {
         NR=2000
         mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
 
-        LRU_SIZE=`cat $NSDIR/lru_size`
-
         # use touch to produce $NR new locks
         log "create $NR files at $DIR/$tdir"
         for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done
+        
+        NSDIR=""
+        LRU_SIZE=0
+        for d in `find $LPROC/ldlm/namespaces | grep mdc-`; do
+                if test -f $d/lru_size; then
+                        LRU_SIZE=`cat $d/lru_size`
+                        if test $LRU_SIZE -gt 0; then
+                                log "using $d namespace"
+                                NSDIR=$d
+                                break
+                        fi
+                fi
+        done
 
-        LRU_SIZE_B=`cat $NSDIR/lru_size`
-        if test $LRU_SIZE -ge $LRU_SIZE_B; then
+        if test -z $NSDIR; then
                 skip "No cached locks created!"
-                cat $NSDIR/pool/state
                 return 0
         fi
-        LRU_SIZE_B=$((LRU_SIZE_B-LRU_SIZE))
-        log "created $LRU_SIZE_B lock(s)"
+
+        if test $LRU_SIZE -lt 100; then
+                skip "Not enough cached locks created!"
+                return 0
+        fi
+        log "created $LRU_SIZE lock(s)"
 
         # we want to sleep 30s to not make test too long
         SLEEP=30
@@ -4718,6 +4740,7 @@ test_124a() {
         # Use $LRU_SIZE_B here to take into account real number of locks created
         # in the case of CMD, LRU_SIZE_B != $NR in most of cases
         LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP))
+        LRU_SIZE_B=$LRU_SIZE
         log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)"
         OLD_LVF=`cat $NSDIR/pool/lock_volume_factor`
         echo "$LVF" > $NSDIR/pool/lock_volume_factor
@@ -4740,39 +4763,82 @@ test_124a() {
 }
 run_test 124a "lru resize ======================================="
 
+set_lru_size() {
+        NS=$1
+        SIZE=$2
+        test "x$NS" = "x" && NS="mdc"
+        test "x$SIZE" = "x" && SIZE="0"
+        test $SIZE -lt 0 && SIZE="0"
+        test $SIZE -gt 0 && ACTION="disabled" || ACTION="enabled"
+        for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+                if test -f $d/lru_size; then
+                        log "$(basename $d):"
+                        log "  lru resize $ACTION"
+                        log "  lru_size=$SIZE"
+                        echo $SIZE > $d/lru_size
+                fi
+        done
+}
+
+get_lru_size() {
+        NS=$1
+        test "x$NS" = "x" && NS="mdc"
+        for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+                if test -f $d/lru_size; then
+                        log "$(basename $d):"
+                        log "  lru_size=$(cat $d/lru_size)"
+                fi
+        done
+}
+
 test_124b() {
 	[ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \
                skip "no lru resize on server" && return 0
-        cleanup -f || error "failed to unmount"
-        MOUNTOPT="$MOUNTOPT,nolruresize"
-        setup
 
-        NR=2000
-        mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
+        NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1`
+        LIMIT=`cat $NSDIR/pool/limit`
+
+        NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l)
+	# 100 locks here is default value for non-shrinkable lru as well
+        # as the order to switch to static lru managing policy
+	# define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
+        LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU))
+
+        NR=$((LIMIT-(LIMIT/3)))
+        log "starting lru resize disable cycle"
+        set_lru_size "mdc-" $LDLM_DEFAULT_LRU_SIZE
 
-        createmany -o $DIR/$tdir/f $NR
-        log "doing ls -la $DIR/$tdir 3 times (lru resize disabled)"
+        mkdir -p $DIR/$tdir/disable_lru_resize || 
+                error "failed to create $DIR/$tdir/disable_lru_resize"
+
+        createmany -o $DIR/$tdir/disable_lru_resize/f $NR
+        log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times"
         stime=`date +%s`
-        ls -la $DIR/$tdir > /dev/null
-        ls -la $DIR/$tdir > /dev/null
-        ls -la $DIR/$tdir > /dev/null
+        ls -la $DIR/$tdir/disable_lru_resize > /dev/null
+        ls -la $DIR/$tdir/disable_lru_resize > /dev/null
+        ls -la $DIR/$tdir/disable_lru_resize > /dev/null
         etime=`date +%s`
         nolruresize_delta=$((etime-stime))
         log "ls -la time: $nolruresize_delta seconds"
+        get_lru_size "mdc-"
+    
+        log "starting lru resize enable cycle"
+        mkdir -p $DIR/$tdir/enable_lru_resize || 
+                error "failed to create $DIR/$tdir/enable_lru_resize"
 
-        cleanup -f || error "failed to unmount"
-        MOUNTOPT=`echo $MOUNTOPT | sed "s/nolruresize/lruresize/"`
-        setup
+	# 0 locks means here flush lru and switch to lru resize policy 
+        set_lru_size "mdc-" 0
 
-        createmany -o $DIR/$tdir/f $NR
-        log "doing ls -la $DIR/$tdir 3 times (lru resize enabled)"
+        createmany -o $DIR/$tdir/enable_lru_resize/f $NR
+        log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times"
         stime=`date +%s`
-        ls -la $DIR/$tdir > /dev/null
-        ls -la $DIR/$tdir > /dev/null
-        ls -la $DIR/$tdir > /dev/null
+        ls -la $DIR/$tdir/enable_lru_resize > /dev/null
+        ls -la $DIR/$tdir/enable_lru_resize > /dev/null
+        ls -la $DIR/$tdir/enable_lru_resize > /dev/null
         etime=`date +%s`
         lruresize_delta=$((etime-stime))
         log "ls -la time: $lruresize_delta seconds"
+        get_lru_size "mdc-"
 
         if test $lruresize_delta -gt $nolruresize_delta; then
                 log "ls -la is $((lruresize_delta - $nolruresize_delta))s slower with lru resize enabled"
@@ -4781,8 +4847,6 @@ test_124b() {
         else
                 log "lru resize performs the same with no lru resize"
         fi
-
-        unlinkmany $DIR/$tdir/f $NR
 }
 run_test 124b "lru resize (performance test) ======================="