From c995eec54b4031ae78574a3080a5b51ecd121287 Mon Sep 17 00:00:00 2001
From: shadow <shadow>
Date: Mon, 19 Nov 2007 14:57:20 +0000
Subject: [PATCH] If client receive lock with CBPENDING flag ldlm need send
 cancel as separate rpc, to avoid situation when cancel request can't
 processed due all i/o threads stay in wait lock.

b=13917
i=green
i=vitaly
---
 lustre/ChangeLog           |   9 +++
 lustre/ldlm/ldlm_request.c | 149 ++++++++++++++++++++++++---------------------
 2 files changed, 88 insertions(+), 70 deletions(-)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index c267c0b..6546c5b 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -14,6 +14,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * Recommended e2fsprogs version: 1.40.2-cfs4
        * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
 
+
 Severity   : normal
 Bugzilla   : 3462
 Description: Fix a replay issue
@@ -108,6 +109,14 @@ Details    : Change the structure of stats under obdfilter and mds to
        * Recommended e2fsprogs version: 1.40.2-cfs1
        * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
 
+Severity   : major
+Frequency  : occasional
+Bugzilla   : 13917
+Description: MDS hang or stay in waiting lock
+Details    : If client receive lock with CBPENDING flag ldlm need send lock
+	     cancel as separate rpc, to avoid situation when cancel request
+	     can't processed due all i/o threads stay in wait lock.
+
 Severity   : normal
 Frequency  : occasional
 Bugzilla   : 13730
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index 1bf69f7..8db7f89 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -450,7 +450,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
              * bug 7311). */
             (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
                 lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING;
+                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
                 unlock_res_and_lock(lock);
                 LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
         }
@@ -804,7 +804,7 @@ static int ldlm_cli_cancel_local(struct ldlm_lock *lock)
 {
         int rc = LDLM_FL_LOCAL_ONLY;
         ENTRY;
-        
+
         if (lock->l_conn_export) {
                 int local_only;
 
@@ -853,7 +853,7 @@ static void ldlm_cancel_pack(struct ptlrpc_request *req, int off,
         LASSERT(dlm != NULL);
 
         /* Check the room in the request buffer. */
-        max = lustre_msg_buflen(req->rq_reqmsg, off) - 
+        max = lustre_msg_buflen(req->rq_reqmsg, off) -
                 sizeof(struct ldlm_request);
         max /= sizeof(struct lustre_handle);
         max += LDLM_LOCKREQ_HANDLES;
@@ -900,7 +900,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp,
         free = ldlm_req_handles_avail(exp, size, 2, 0);
         if (count > free)
                 count = free;
-        
+
         size[DLM_LOCKREQ_OFF] = ldlm_request_bufsize(count, LDLM_CANCEL);
         while (1) {
                 imp = class_exp2cliimp(exp);
@@ -946,7 +946,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp,
                 sent = count;
                 break;
         }
-        
+
         ptlrpc_req_finished(req);
         EXIT;
 out:
@@ -1025,7 +1025,63 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
 
 }
 
-/* - Free space in lru for @count new locks, 
+/* XXX until we will have compound requests and can cut cancels from generic rpc
+ * we need send cancels with LDLM_FL_BL_AST flag as separate rpc */
+static int ldlm_cancel_list(struct list_head *cancels, int count)
+{
+        CFS_LIST_HEAD(head);
+        struct ldlm_lock *lock, *next;
+        int left = 0, bl_ast = 0, rc;
+
+        left = count;
+        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
+                if (left-- == 0)
+                        break;
+
+                rc = ldlm_cli_cancel_local(lock);
+                if (rc == LDLM_FL_BL_AST) {
+                        LDLM_DEBUG(lock, "Cancel lock separately");
+                        list_del_init(&lock->l_bl_ast);
+                        list_add(&lock->l_bl_ast, &head);
+                        bl_ast ++;
+                        continue;
+                }
+                if (rc == LDLM_FL_LOCAL_ONLY) {
+                        /* CANCEL RPC should not be sent to server. */
+                        list_del_init(&lock->l_bl_ast);
+                        LDLM_LOCK_PUT(lock);
+                        count--;
+                }
+
+        }
+        if(bl_ast > 0) {
+                count -= bl_ast;
+                ldlm_cli_cancel_list(&head, bl_ast, NULL, 0);
+        }
+
+        RETURN(count);
+}
+
+/* cancel lock list without sending rpc to server*/
+static int ldlm_cancel_list_local(struct list_head *cancels, int count)
+{
+        struct ldlm_lock *lock, *next;
+        int left = 0;
+
+        left = count;
+        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
+                if (left-- == 0)
+                        break;
+                ldlm_lock_cancel(lock);
+                /* CANCEL RPC should not be sent to server. */
+                list_del_init(&lock->l_bl_ast);
+                LDLM_LOCK_PUT(lock);
+                count--;
+        }
+        RETURN(count);
+}
+
+/* - Free space in lru for @count new locks,
  *   redundant unused locks are canceled locally;
  * - also cancel locally unused aged locks;
  * - do not cancel more than @max locks;
@@ -1040,17 +1096,17 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
 int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                           int count, int max, int flags)
 {
-        int rc, added = 0, left, unused;
+        int added = 0, unused;
         cfs_time_t cur = cfs_time_current();
-        struct ldlm_lock *lock, *next;
+        struct ldlm_lock *lock;
         ENTRY;
 
         spin_lock(&ns->ns_unused_lock);
         unused = ns->ns_nr_unused;
-        
+
         if (!ns_connect_lru_resize(ns))
                 count += unused - ns->ns_max_unused;
-                
+
         while (!list_empty(&ns->ns_unused_list)) {
                 struct ldlm_pool *pl;
                 __u64 slv, lvf, lv;
@@ -1069,7 +1125,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                         break;
 
                 pl = &ns->ns_pool;
-                
+
                 if (ns_connect_lru_resize(ns)) {
                         cfs_time_t la;
 
@@ -1085,7 +1141,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                                                       lock->l_last_used));
                                 if (la == 0)
                                         la = 1;
-                                
+
                                 /* Stop when slv is not yet come from server or 
                                  * lv is smaller than it is. */
                                 lv = lvf * la * unused;
@@ -1102,7 +1158,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                                                 lock->l_last_used)))
                                 break;
                 }
-                
+
                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
                 spin_unlock(&ns->ns_unused_lock);
 
@@ -1150,30 +1206,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
         }
         spin_unlock(&ns->ns_unused_lock);
 
-        /* Handle only @added inserted locks. */
-        left = added;
-        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
-                if (left-- == 0)
-                        break;
-
-                rc = ldlm_cli_cancel_local(lock);
-                if (rc == LDLM_FL_BL_AST) {
-                        CFS_LIST_HEAD(head);
-
-                        LDLM_DEBUG(lock, "Cancel lock separately");
-                        list_del_init(&lock->l_bl_ast);
-                        list_add(&lock->l_bl_ast, &head);
-                        ldlm_cli_cancel_req(lock->l_conn_export, &head, 1);
-                        rc = LDLM_FL_LOCAL_ONLY;
-                }
-                if (rc == LDLM_FL_LOCAL_ONLY) {
-                        /* CANCEL RPC should not be sent to server. */
-                        list_del_init(&lock->l_bl_ast);
-                        LDLM_LOCK_PUT(lock);
-                        added--;
-                }
-        } 
-        RETURN(added);
+        RETURN (ldlm_cancel_list(cancels, added));
 }
 
 /* when called with LDLM_ASYNC the blocking callback will be handled
@@ -1218,8 +1251,8 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
                                ldlm_mode_t mode, int lock_flags,
                                int flags, void *opaque)
 {
-        struct ldlm_lock *lock, *next;
-        int count = 0, left;
+        struct ldlm_lock *lock;
+        int count = 0;
         ENTRY;
 
         lock_res(res);
@@ -1267,40 +1300,16 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
         unlock_res(res);
 
         /* Handle only @count inserted locks. */
-        left = count;
-        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
-                int rc = LDLM_FL_LOCAL_ONLY;
+        if ((flags & LDLM_FL_LOCAL_ONLY))
+                RETURN(ldlm_cancel_list_local(cancels, count));
 
-                if (left-- == 0)
-                        break;
-                if (flags & LDLM_FL_LOCAL_ONLY)
-                        ldlm_lock_cancel(lock);
-                else
-                        rc = ldlm_cli_cancel_local(lock);
-
-                if (rc == LDLM_FL_BL_AST) {
-                        CFS_LIST_HEAD(head);
-
-                        LDLM_DEBUG(lock, "Cancel lock separately");
-                        list_del_init(&lock->l_bl_ast);
-                        list_add(&lock->l_bl_ast, &head);
-                        ldlm_cli_cancel_req(lock->l_conn_export, &head, 1);
-                        rc = LDLM_FL_LOCAL_ONLY;
-                }
-                if (rc == LDLM_FL_LOCAL_ONLY) {
-                        /* CANCEL RPC should not be sent to server. */
-                        list_del_init(&lock->l_bl_ast);
-                        LDLM_LOCK_PUT(lock);
-                        count--;
-                }
-        }
-        RETURN(count);
+        RETURN(ldlm_cancel_list(cancels, count));
 }
 
-/* If @req is NULL, send CANCEL request to server with handles of locks 
- * in the @cancels. If EARLY_CANCEL is not supported, send CANCEL requests 
+/* If @req is NULL, send CANCEL request to server with handles of locks
+ * in the @cancels. If EARLY_CANCEL is not supported, send CANCEL requests
  * separately per lock.
- * If @req is not NULL, put handles of locks in @cancels into the request 
+ * If @req is not NULL, put handles of locks in @cancels into the request
  * buffer at the offset @off.
  * Destroy @cancels at the end. */
 int ldlm_cli_cancel_list(struct list_head *cancels, int count,
@@ -1312,7 +1321,7 @@ int ldlm_cli_cancel_list(struct list_head *cancels, int count,
 
         if (list_empty(cancels) || count == 0)
                 RETURN(0);
-        
+
         while (count) {
                 LASSERT(!list_empty(cancels));
                 lock = list_entry(cancels->next, struct ldlm_lock, l_bl_ast);
-- 
1.8.3.1