If client receive lock with CBPENDING flag ldlm need send

author shadow <shadow>

Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)

committer shadow <shadow>

Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)
author shadow <shadow>
Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)
committer shadow <shadow>
Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index c267c0b..6546c5b 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -14,6 +14,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
         * Recommended e2fsprogs version: 1.40.2-cfs4
         * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
  
+
  Severity   : normal
  Bugzilla   : 3462
  Description: Fix a replay issue
@@ -108,6 +109,14 @@ Details    : Change the structure of stats under obdfilter and mds to
         * Recommended e2fsprogs version: 1.40.2-cfs1
         * Note that reiserfs quotas are disabled on SLES 10 in this kernel.
  
+Severity   : major
+Frequency  : occasional
+Bugzilla   : 13917
+Description: MDS hang or stay in waiting lock
+Details    : If client receive lock with CBPENDING flag ldlm need send lock
+            cancel as separate rpc, to avoid situation when cancel request
+            can't processed due all i/o threads stay in wait lock.
+
  Severity   : normal
  Frequency  : occasional
  Bugzilla   : 13730
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 1bf69f7..8db7f89 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -450,7 +450,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
               * bug 7311). */
              (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
                  lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING;
+                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
                  unlock_res_and_lock(lock);
                  LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
          }
@@ -804,7 +804,7 @@ static int ldlm_cli_cancel_local(struct ldlm_lock *lock)
  {
          int rc = LDLM_FL_LOCAL_ONLY;
          ENTRY;
-        
+
          if (lock->l_conn_export) {
                  int local_only;
  
@@ -853,7 +853,7 @@ static void ldlm_cancel_pack(struct ptlrpc_request *req, int off,
          LASSERT(dlm != NULL);
  
          /* Check the room in the request buffer. */
-        max = lustre_msg_buflen(req->rq_reqmsg, off) - 
+        max = lustre_msg_buflen(req->rq_reqmsg, off) -
                  sizeof(struct ldlm_request);
          max /= sizeof(struct lustre_handle);
          max += LDLM_LOCKREQ_HANDLES;
@@ -900,7 +900,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp,
          free = ldlm_req_handles_avail(exp, size, 2, 0);
          if (count > free)
                  count = free;
-        
+
          size[DLM_LOCKREQ_OFF] = ldlm_request_bufsize(count, LDLM_CANCEL);
          while (1) {
                  imp = class_exp2cliimp(exp);
@@ -946,7 +946,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp,
                  sent = count;
                  break;
          }
-        
+
          ptlrpc_req_finished(req);
          EXIT;
  out:
@@ -1025,7 +1025,63 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
  
  }
  
-/* - Free space in lru for @count new locks, 
+/* XXX until we will have compound requests and can cut cancels from generic rpc
+ * we need send cancels with LDLM_FL_BL_AST flag as separate rpc */
+static int ldlm_cancel_list(struct list_head *cancels, int count)
+{
+        CFS_LIST_HEAD(head);
+        struct ldlm_lock *lock, *next;
+        int left = 0, bl_ast = 0, rc;
+
+        left = count;
+        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
+                if (left-- == 0)
+                        break;
+
+                rc = ldlm_cli_cancel_local(lock);
+                if (rc == LDLM_FL_BL_AST) {
+                        LDLM_DEBUG(lock, "Cancel lock separately");
+                        list_del_init(&lock->l_bl_ast);
+                        list_add(&lock->l_bl_ast, &head);
+                        bl_ast ++;
+                        continue;
+                }
+                if (rc == LDLM_FL_LOCAL_ONLY) {
+                        /* CANCEL RPC should not be sent to server. */
+                        list_del_init(&lock->l_bl_ast);
+                        LDLM_LOCK_PUT(lock);
+                        count--;
+                }
+
+        }
+        if(bl_ast > 0) {
+                count -= bl_ast;
+                ldlm_cli_cancel_list(&head, bl_ast, NULL, 0);
+        }
+
+        RETURN(count);
+}
+
+/* cancel lock list without sending rpc to server*/
+static int ldlm_cancel_list_local(struct list_head *cancels, int count)
+{
+        struct ldlm_lock *lock, *next;
+        int left = 0;
+
+        left = count;
+        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
+                if (left-- == 0)
+                        break;
+                ldlm_lock_cancel(lock);
+                /* CANCEL RPC should not be sent to server. */
+                list_del_init(&lock->l_bl_ast);
+                LDLM_LOCK_PUT(lock);
+                count--;
+        }
+        RETURN(count);
+}
+
+/* - Free space in lru for @count new locks,
   *   redundant unused locks are canceled locally;
   * - also cancel locally unused aged locks;
   * - do not cancel more than @max locks;
@@ -1040,17 +1096,17 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
  int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                            int count, int max, int flags)
  {
-        int rc, added = 0, left, unused;
+        int added = 0, unused;
          cfs_time_t cur = cfs_time_current();
-        struct ldlm_lock *lock, *next;
+        struct ldlm_lock *lock;
          ENTRY;
  
          spin_lock(&ns->ns_unused_lock);
          unused = ns->ns_nr_unused;
-        
+
          if (!ns_connect_lru_resize(ns))
                  count += unused - ns->ns_max_unused;
-                
+
          while (!list_empty(&ns->ns_unused_list)) {
                  struct ldlm_pool *pl;
                  __u64 slv, lvf, lv;
@@ -1069,7 +1125,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                          break;
  
                  pl = &ns->ns_pool;
-                
+
                  if (ns_connect_lru_resize(ns)) {
                          cfs_time_t la;
  
@@ -1085,7 +1141,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                                                        lock->l_last_used));
                                  if (la == 0)
                                          la = 1;
-                                
+
                                  /* Stop when slv is not yet come from server or 
                                   * lv is smaller than it is. */
                                  lv = lvf * la * unused;
@@ -1102,7 +1158,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
                                                  lock->l_last_used)))
                                  break;
                  }
-                
+
                  LDLM_LOCK_GET(lock); /* dropped by bl thread */
                  spin_unlock(&ns->ns_unused_lock);
  
@@ -1150,30 +1206,7 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
          }
          spin_unlock(&ns->ns_unused_lock);
  
-        /* Handle only @added inserted locks. */
-        left = added;
-        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
-                if (left-- == 0)
-                        break;
-
-                rc = ldlm_cli_cancel_local(lock);
-                if (rc == LDLM_FL_BL_AST) {
-                        CFS_LIST_HEAD(head);
-
-                        LDLM_DEBUG(lock, "Cancel lock separately");
-                        list_del_init(&lock->l_bl_ast);
-                        list_add(&lock->l_bl_ast, &head);
-                        ldlm_cli_cancel_req(lock->l_conn_export, &head, 1);
-                        rc = LDLM_FL_LOCAL_ONLY;
-                }
-                if (rc == LDLM_FL_LOCAL_ONLY) {
-                        /* CANCEL RPC should not be sent to server. */
-                        list_del_init(&lock->l_bl_ast);
-                        LDLM_LOCK_PUT(lock);
-                        added--;
-                }
-        } 
-        RETURN(added);
+        RETURN (ldlm_cancel_list(cancels, added));
  }
  
  /* when called with LDLM_ASYNC the blocking callback will be handled
@@ -1218,8 +1251,8 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
                                 ldlm_mode_t mode, int lock_flags,
                                 int flags, void *opaque)
  {
-        struct ldlm_lock *lock, *next;
-        int count = 0, left;
+        struct ldlm_lock *lock;
+        int count = 0;
          ENTRY;
  
          lock_res(res);
@@ -1267,40 +1300,16 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
          unlock_res(res);
  
          /* Handle only @count inserted locks. */
-        left = count;
-        list_for_each_entry_safe(lock, next, cancels, l_bl_ast) {
-                int rc = LDLM_FL_LOCAL_ONLY;
+        if ((flags & LDLM_FL_LOCAL_ONLY))
+                RETURN(ldlm_cancel_list_local(cancels, count));
  
-                if (left-- == 0)
-                        break;
-                if (flags & LDLM_FL_LOCAL_ONLY)
-                        ldlm_lock_cancel(lock);
-                else
-                        rc = ldlm_cli_cancel_local(lock);
-
-                if (rc == LDLM_FL_BL_AST) {
-                        CFS_LIST_HEAD(head);
-
-                        LDLM_DEBUG(lock, "Cancel lock separately");
-                        list_del_init(&lock->l_bl_ast);
-                        list_add(&lock->l_bl_ast, &head);
-                        ldlm_cli_cancel_req(lock->l_conn_export, &head, 1);
-                        rc = LDLM_FL_LOCAL_ONLY;
-                }
-                if (rc == LDLM_FL_LOCAL_ONLY) {
-                        /* CANCEL RPC should not be sent to server. */
-                        list_del_init(&lock->l_bl_ast);
-                        LDLM_LOCK_PUT(lock);
-                        count--;
-                }
-        }
-        RETURN(count);
+        RETURN(ldlm_cancel_list(cancels, count));
  }
  
-/* If @req is NULL, send CANCEL request to server with handles of locks 
- * in the @cancels. If EARLY_CANCEL is not supported, send CANCEL requests 
+/* If @req is NULL, send CANCEL request to server with handles of locks
+ * in the @cancels. If EARLY_CANCEL is not supported, send CANCEL requests
   * separately per lock.
- * If @req is not NULL, put handles of locks in @cancels into the request 
+ * If @req is not NULL, put handles of locks in @cancels into the request
   * buffer at the offset @off.
   * Destroy @cancels at the end. */
  int ldlm_cli_cancel_list(struct list_head *cancels, int count,
@@ -1312,7 +1321,7 @@ int ldlm_cli_cancel_list(struct list_head *cancels, int count,
  
          if (list_empty(cancels) || count == 0)
                  RETURN(0);
-        
+
          while (count) {
                  LASSERT(!list_empty(cancels));
                  lock = list_entry(cancels->next, struct ldlm_lock, l_bl_ast);
author	shadow <shadow>
	Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)
committer	shadow <shadow>
	Mon, 19 Nov 2007 14:57:20 +0000 (14:57 +0000)
lustre/ChangeLog		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history