branch: HEAD

[fs/lustre-release.git] / lustre / ldlm / ldlm_extent.c
diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c

index f1f88ce..03172d6 100644 (file)
--- a/lustre/ldlm/ldlm_extent.c
+++ b/lustre/ldlm/ldlm_extent.c
@@ -1,36 +1,55 @@
  /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   * vim:expandtab:shiftwidth=8:tabstop=8:
   *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
- *   Author: Peter Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
+ * GPL HEADER START
   *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
   *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
   *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ldlm/ldlm_extent.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
   */
  
  #define DEBUG_SUBSYSTEM S_LDLM
  #ifndef __KERNEL__
  # include <liblustre.h>
+#else
+# include <libcfs/libcfs.h>
  #endif
  
  #include <lustre_dlm.h>
  #include <obd_support.h>
+#include <obd.h>
+#include <obd_class.h>
  #include <lustre_lib.h>
  
  #include "ldlm_internal.h"
@@ -46,7 +65,7 @@ static void ldlm_extent_internal_policy_fixup(struct ldlm_lock *req,
          __u64 req_start = req->l_req_extent.start;
          __u64 req_end = req->l_req_extent.end;
          __u64 req_align, mask;
- 
+
          if (conflicting > 32 && (req_mode == LCK_PW || req_mode == LCK_CW)) {
                  if (req_end < req_start + LDLM_MAX_GROWN_EXTENT)
                          new_ex->end = min(req_start + LDLM_MAX_GROWN_EXTENT,
@@ -116,7 +135,9 @@ static void ldlm_extent_internal_policy_granted(struct ldlm_lock *req,
                          limiter.start = req_start;
  
                  if (interval_is_overlapped(tree->lit_root, &ext))
-                        printk("req_mode = %d, tree->lit_mode = %d, tree->lit_size = %d\n",
+                        CDEBUG(D_INFO, 
+                               "req_mode = %d, tree->lit_mode = %d, "
+                               "tree->lit_size = %d\n",
                                 req_mode, tree->lit_mode, tree->lit_size);
                  interval_expand(tree->lit_root, &ext, &limiter);
                  limiter.start = max(limiter.start, ext.start);
@@ -196,7 +217,7 @@ ldlm_extent_internal_policy_waiting(struct ldlm_lock *req,
                          continue;
  
                  /* We grow extents downwards only as far as they don't overlap
-                 * with already-granted locks, on the assumtion that clients
+                 * with already-granted locks, on the assumption that clients
                   * will be writing beyond the initial requested end and would
                   * then need to enqueue a new lock beyond previous request.
                   * l_req_extent->end strictly < req_start, checked above. */
@@ -259,10 +280,26 @@ static void ldlm_extent_policy(struct ldlm_resource *res,
          }
  }
  
+static int ldlm_check_contention(struct ldlm_lock *lock, int contended_locks)
+{
+        struct ldlm_resource *res = lock->l_resource;
+        cfs_time_t now = cfs_time_current();
+
+        if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_SET_CONTENTION))
+                return 1;
+
+        CDEBUG(D_DLMTRACE, "contended locks = %d\n", contended_locks);
+        if (contended_locks > res->lr_namespace->ns_contended_locks)
+                res->lr_contention_time = now;
+        return cfs_time_before(now, cfs_time_add(res->lr_contention_time,
+                cfs_time_seconds(res->lr_namespace->ns_contention_time)));
+}
+
  struct ldlm_extent_compat_args {
          struct list_head *work_list;
          struct ldlm_lock *lock;
          ldlm_mode_t mode;
+        int *locks;
          int *compat;
  };
  
@@ -271,9 +308,11 @@ static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
  {
          struct ldlm_extent_compat_args *priv = data;
          struct ldlm_interval *node = to_ldlm_interval(n);
+        struct ldlm_extent *extent;
          struct list_head *work_list = priv->work_list;
          struct ldlm_lock *lock, *enq = priv->lock;
          ldlm_mode_t mode = priv->mode;
+        int count = 0;
          ENTRY;
  
          LASSERT(!list_empty(&node->li_group));
@@ -284,11 +323,17 @@ static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
                           "mode = %s, lock->l_granted_mode = %s\n",
                           ldlm_lockname[mode],
                           ldlm_lockname[lock->l_granted_mode]);
-
+                count++;
                  if (lock->l_blocking_ast)
                          ldlm_add_ast_work_item(lock, enq, work_list);
          }
  
+        /* don't count conflicting glimpse locks */
+        extent = ldlm_interval_extent(node);
+        if (!(mode == LCK_PR &&
+            extent->start == 0 && extent->end == OBD_OBJECT_EOF))
+                *priv->locks += count;
+
          if (priv->compat)
                  *priv->compat = 0;
  
@@ -307,7 +352,7 @@ static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
  static int
  ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
                           int *flags, ldlm_error_t *err,
-                         struct list_head *work_list)
+                         struct list_head *work_list, int *contended_locks)
  {
          struct list_head *tmp;
          struct ldlm_lock *lock;
@@ -317,6 +362,7 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
          __u64 req_end = req->l_req_extent.end;
          int compat = 1;
          int scan = 0;
+        int check_contention;
          ENTRY;
  
          lockmode_verify(req_mode);
@@ -326,6 +372,7 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
                  struct ldlm_interval_tree *tree;
                  struct ldlm_extent_compat_args data = {.work_list = work_list,
                                                 .lock = req,
+                                               .locks = contended_locks,
                                                 .compat = &compat };
                  struct interval_node_extent ex = { .start = req_start,
                                                     .end = req_end };
@@ -382,157 +429,179 @@ ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
                                          compat = 0;
                          }
                  }
-                RETURN(compat);
-        }
+        } else { /* for waiting queue */
+                list_for_each(tmp, queue) {
+                        check_contention = 1;
+
+                        lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+
+                        if (req == lock)
+                                break;
+
+                        if (unlikely(scan)) {
+                                /* We only get here if we are queuing GROUP lock
+                                   and met some incompatible one. The main idea of this
+                                   code is to insert GROUP lock past compatible GROUP
+                                   lock in the waiting queue or if there is not any,
+                                   then in front of first non-GROUP lock */
+                                if (lock->l_req_mode != LCK_GROUP) {
+                                        /* Ok, we hit non-GROUP lock, there should
+                                         * be no more GROUP locks later on, queue in
+                                         * front of first non-GROUP lock */
+
+                                        ldlm_resource_insert_lock_after(lock, req);
+                                        list_del_init(&lock->l_res_link);
+                                        ldlm_resource_insert_lock_after(req, lock);
+                                        compat = 0;
+                                        break;
+                                }
+                                if (req->l_policy_data.l_extent.gid ==
+                                    lock->l_policy_data.l_extent.gid) {
+                                        /* found it */
+                                        ldlm_resource_insert_lock_after(lock, req);
+                                        compat = 0;
+                                        break;
+                                }
+                                continue;
+                        }
  
-        /* for waiting queue */
-        list_for_each(tmp, queue) {
-                lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+                        /* locks are compatible, overlap doesn't matter */
+                        if (lockmode_compat(lock->l_req_mode, req_mode)) {
+                                if (req_mode == LCK_PR &&
+                                    ((lock->l_policy_data.l_extent.start <=
+                                      req->l_policy_data.l_extent.start) &&
+                                     (lock->l_policy_data.l_extent.end >=
+                                      req->l_policy_data.l_extent.end))) {
+                                        /* If we met a PR lock just like us or wider,
+                                           and nobody down the list conflicted with
+                                           it, that means we can skip processing of
+                                           the rest of the list and safely place
+                                           ourselves at the end of the list, or grant
+                                           (dependent if we met an conflicting locks
+                                           before in the list).
+                                           In case of 1st enqueue only we continue
+                                           traversing if there is something conflicting
+                                           down the list because we need to make sure
+                                           that something is marked as AST_SENT as well,
+                                           in cse of empy worklist we would exit on
+                                           first conflict met. */
+                                        /* There IS a case where such flag is
+                                           not set for a lock, yet it blocks
+                                           something. Luckily for us this is
+                                           only during destroy, so lock is
+                                           exclusive. So here we are safe */
+                                        if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
+                                                RETURN(compat);
+                                        }
+                                }
  
-                if (req == lock)
-                        RETURN(compat);
-
-                if (unlikely(scan)) {
-                        /* We only get here if we are queuing GROUP lock
-                           and met some incompatible one. The main idea of this
-                           code is to insert GROUP lock past compatible GROUP
-                           lock in the waiting queue or if there is not any,
-                           then in front of first non-GROUP lock */
-                        if (lock->l_req_mode != LCK_GROUP) {
-                                /* Ok, we hit non-GROUP lock, there should
-                                 * be no more GROUP locks later on, queue in
-                                 * front of first non-GROUP lock */
-
-                                ldlm_resource_insert_lock_after(lock, req);
-                                list_del_init(&lock->l_res_link);
-                                ldlm_resource_insert_lock_after(req, lock);
-                                RETURN(0);
-                        }
-                        if (req->l_policy_data.l_extent.gid ==
-                             lock->l_policy_data.l_extent.gid) {
-                                /* found it */
-                                ldlm_resource_insert_lock_after(lock, req);
-                                RETURN(0);
-                        }
-                        continue;
-                }
+                                /* non-group locks are compatible, overlap doesn't
+                                   matter */
+                                if (likely(req_mode != LCK_GROUP))
+                                        continue;
  
-                /* locks are compatible, overlap doesn't matter */
-                if (lockmode_compat(lock->l_req_mode, req_mode)) {
-                        if (req_mode == LCK_PR &&
-                            ((lock->l_policy_data.l_extent.start <=
-                             req->l_policy_data.l_extent.start) &&
-                             (lock->l_policy_data.l_extent.end >=
-                              req->l_policy_data.l_extent.end))) {
-                                /* If we met a PR lock just like us or wider,
-                                   and nobody down the list conflicted with
-                                   it, that means we can skip processing of
-                                   the rest of the list and safely place
-                                   ourselves at the end of the list, or grant
-                                   (dependent if we met an conflicting locks
-                                   before in the list).
-                                   In case of 1st enqueue only we continue
-                                   traversing if there is something conflicting
-                                   down the list because we need to make sure
-                                   that something is marked as AST_SENT as well,
-                                   in cse of empy worklist we would exit on
-                                   first conflict met. */
-                                /* There IS a case where such flag is
-                                   not set for a lock, yet it blocks
-                                   something. Luckily for us this is
-                                   only during destroy, so lock is
-                                   exclusive. So here we are safe */
-                                if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
-                                        RETURN(compat);
+                                /* If we are trying to get a GROUP lock and there is
+                                   another one of this kind, we need to compare gid */
+                                if (req->l_policy_data.l_extent.gid ==
+                                    lock->l_policy_data.l_extent.gid) {
+                                        /* If existing lock with matched gid is granted,
+                                           we grant new one too. */
+                                        if (lock->l_req_mode == lock->l_granted_mode)
+                                                RETURN(2);
+
+                                        /* Otherwise we are scanning queue of waiting
+                                         * locks and it means current request would
+                                         * block along with existing lock (that is
+                                         * already blocked.
+                                         * If we are in nonblocking mode - return
+                                         * immediately */
+                                        if (*flags & LDLM_FL_BLOCK_NOWAIT) {
+                                                compat = -EWOULDBLOCK;
+                                                goto destroylock;
+                                        }
+                                        /* If this group lock is compatible with another
+                                         * group lock on the waiting list, they must be
+                                         * together in the list, so they can be granted
+                                         * at the same time.  Otherwise the later lock
+                                         * can get stuck behind another, incompatible,
+                                         * lock. */
+                                        ldlm_resource_insert_lock_after(lock, req);
+                                        /* Because 'lock' is not granted, we can stop
+                                         * processing this queue and return immediately.
+                                         * There is no need to check the rest of the
+                                         * list. */
+                                        RETURN(0);
                                  }
                          }
  
-                        /* non-group locks are compatible, overlap doesn't
-                           matter */
-                        if (likely(req_mode != LCK_GROUP))
+                        if (unlikely(req_mode == LCK_GROUP &&
+                                     (lock->l_req_mode != lock->l_granted_mode))) {
+                                scan = 1;
+                                compat = 0;
+                                if (lock->l_req_mode != LCK_GROUP) {
+                                        /* Ok, we hit non-GROUP lock, there should be no
+                                           more GROUP locks later on, queue in front of
+                                           first non-GROUP lock */
+
+                                        ldlm_resource_insert_lock_after(lock, req);
+                                        list_del_init(&lock->l_res_link);
+                                        ldlm_resource_insert_lock_after(req, lock);
+                                        break;
+                                }
+                                if (req->l_policy_data.l_extent.gid ==
+                                    lock->l_policy_data.l_extent.gid) {
+                                        /* found it */
+                                        ldlm_resource_insert_lock_after(lock, req);
+                                        break;
+                                }
                                  continue;
+                        }
  
-                        /* If we are trying to get a GROUP lock and there is
-                           another one of this kind, we need to compare gid */
-                        if (req->l_policy_data.l_extent.gid ==
-                            lock->l_policy_data.l_extent.gid) {
-                                /* If existing lock with matched gid is granted,
-                                   we grant new one too. */
-                                if (lock->l_req_mode == lock->l_granted_mode)
-                                        RETURN(2);
-
-                                /* Otherwise we are scanning queue of waiting
-                                 * locks and it means current request would
-                                 * block along with existing lock (that is
-                                 * already blocked.
-                                 * If we are in nonblocking mode - return
-                                 * immediately */
+                        if (unlikely(lock->l_req_mode == LCK_GROUP)) {
+                                /* If compared lock is GROUP, then requested is PR/PW/
+                                 * so this is not compatible; extent range does not
+                                 * matter */
                                  if (*flags & LDLM_FL_BLOCK_NOWAIT) {
                                          compat = -EWOULDBLOCK;
                                          goto destroylock;
+                                } else {
+                                        *flags |= LDLM_FL_NO_TIMEOUT;
                                  }
-                                /* If this group lock is compatible with another
-                                 * group lock on the waiting list, they must be
-                                 * together in the list, so they can be granted
-                                 * at the same time.  Otherwise the later lock
-                                 * can get stuck behind another, incompatible,
-                                 * lock. */
-                                ldlm_resource_insert_lock_after(lock, req);
-                                /* Because 'lock' is not granted, we can stop
-                                 * processing this queue and return immediately.
-                                 * There is no need to check the rest of the
-                                 * list. */
-                                RETURN(0);
+                        } else if (lock->l_policy_data.l_extent.end < req_start ||
+                                   lock->l_policy_data.l_extent.start > req_end) {
+                                /* if a non group lock doesn't overlap skip it */
+                                continue;
+                        } else if (lock->l_req_extent.end < req_start ||
+                                   lock->l_req_extent.start > req_end) {
+                                /* false contention, the requests doesn't really overlap */
+                                check_contention = 0;
                          }
-                }
  
-                if (unlikely(req_mode == LCK_GROUP &&
-                    (lock->l_req_mode != lock->l_granted_mode))) {
-                        scan = 1;
-                        compat = 0;
-                        if (lock->l_req_mode != LCK_GROUP) {
-                        /* Ok, we hit non-GROUP lock, there should be no
-                           more GROUP locks later on, queue in front of
-                           first non-GROUP lock */
-
-                                ldlm_resource_insert_lock_after(lock, req);
-                                list_del_init(&lock->l_res_link);
-                                ldlm_resource_insert_lock_after(req, lock);
+                        if (!work_list)
                                  RETURN(0);
-                        }
-                        if (req->l_policy_data.l_extent.gid ==
-                             lock->l_policy_data.l_extent.gid) {
-                                /* found it */
-                                ldlm_resource_insert_lock_after(lock, req);
-                                RETURN(0);
-                        }
-                        continue;
-                }
  
-                if (unlikely(lock->l_req_mode == LCK_GROUP)) {
-                        /* If compared lock is GROUP, then requested is PR/PW/
-                         * so this is not compatible; extent range does not
-                         * matter */
-                        if (*flags & LDLM_FL_BLOCK_NOWAIT) {
-                                compat = -EWOULDBLOCK;
-                                goto destroylock;
-                        } else {
-                                *flags |= LDLM_FL_NO_TIMEOUT;
-                        }
-                } else if (lock->l_policy_data.l_extent.end < req_start ||
-                           lock->l_policy_data.l_extent.start > req_end) {
-                        /* if a non group lock doesn't overlap skip it */
-                        continue;
-                }
+                        /* don't count conflicting glimpse locks */
+                        if (lock->l_req_mode == LCK_PR &&
+                            lock->l_policy_data.l_extent.start == 0 &&
+                            lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
+                                check_contention = 0;
  
-                if (!work_list)
-                        RETURN(0);
+                        *contended_locks += check_contention;
  
-                compat = 0;
-                if (lock->l_blocking_ast)
-                        ldlm_add_ast_work_item(lock, req, work_list);
+                        compat = 0;
+                        if (lock->l_blocking_ast)
+                                ldlm_add_ast_work_item(lock, req, work_list);
+                }
          }
  
+        if (ldlm_check_contention(req, *contended_locks) &&
+            compat == 0 &&
+            (*flags & LDLM_FL_DENY_ON_CONTENTION) &&
+            req->l_req_mode != LCK_GROUP &&
+            req_end - req_start <=
+            req->l_resource->lr_namespace->ns_max_nolock_size)
+                GOTO(destroylock, compat = -EUSERS);
+
          RETURN(compat);
  destroylock:
          list_del_init(&req->l_res_link);
@@ -541,6 +610,27 @@ destroylock:
          RETURN(compat);
  }
  
+static void discard_bl_list(struct list_head *bl_list)
+{
+        struct list_head *tmp, *pos;
+        ENTRY;
+
+        list_for_each_safe(pos, tmp, bl_list) {
+                struct ldlm_lock *lock =
+                        list_entry(pos, struct ldlm_lock, l_bl_ast);
+
+                list_del_init(&lock->l_bl_ast);
+                LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+                lock->l_flags &= ~LDLM_FL_AST_SENT;
+                LASSERT(lock->l_bl_ast_run == 0);
+                LASSERT(lock->l_blocking_lock);
+                LDLM_LOCK_RELEASE(lock->l_blocking_lock);
+                lock->l_blocking_lock = NULL;
+                LDLM_LOCK_RELEASE(lock);
+        }
+        EXIT;
+}
+
  /* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
    *   - blocking ASTs have already been sent
    *   - must call this function with the ns lock held
@@ -552,11 +642,14 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                               ldlm_error_t *err, struct list_head *work_list)
  {
          struct ldlm_resource *res = lock->l_resource;
-        struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list);
+        CFS_LIST_HEAD(rpc_list);
          int rc, rc2;
+        int contended_locks = 0;
          ENTRY;
  
          LASSERT(list_empty(&res->lr_converting));
+        LASSERT(!(*flags & LDLM_FL_DENY_ON_CONTENTION) ||
+                !(lock->l_flags & LDLM_AST_DISCARD_DATA));
          check_res_locked(res);
          *err = ELDLM_OK;
  
@@ -568,10 +661,11 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                   * being true, we want to find out. */
                  LASSERT(*flags == 0);
                  rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags,
-                                              err, NULL);
+                                              err, NULL, &contended_locks);
                  if (rc == 1) {
                          rc = ldlm_extent_compat_queue(&res->lr_waiting, lock,
-                                                      flags, err, NULL);
+                                                      flags, err, NULL,
+                                                      &contended_locks);
                  }
                  if (rc == 0)
                          RETURN(LDLM_ITER_STOP);
@@ -585,13 +679,16 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
          }
  
   restart:
-        rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags, err, &rpc_list);
+        contended_locks = 0;
+        rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags, err,
+                                      &rpc_list, &contended_locks);
          if (rc < 0)
                  GOTO(out, rc); /* lock was destroyed */
          if (rc == 2)
                  goto grant;
  
-        rc2 = ldlm_extent_compat_queue(&res->lr_waiting, lock, flags, err, &rpc_list);
+        rc2 = ldlm_extent_compat_queue(&res->lr_waiting, lock, flags, err,
+                                       &rpc_list, &contended_locks);
          if (rc2 < 0)
                  GOTO(out, rc = rc2); /* lock was destroyed */
  
@@ -611,16 +708,31 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                          ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                  unlock_res(res);
                  rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_BL_AST);
-                lock_res(res);
  
+                if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_FAIL_RACE) &&
+                    !ns_is_client(res->lr_namespace))
+                        class_fail_export(lock->l_export);
+ 
+                lock_res(res);
                  if (rc == -ERESTART) {
+
+                        /* 15715: The lock was granted and destroyed after
+                         * resource lock was dropped. Interval node was freed
+                         * in ldlm_lock_destroy. Anyway, this always happens
+                         * when a client is being evicted. So it would be
+                         * ok to return an error. -jay */
+                        if (lock->l_destroyed) {
+                                *err = -EAGAIN;
+                                GOTO(out, rc = -EAGAIN);
+                        }
+
                          /* lock was granted while resource was unlocked. */
                          if (lock->l_granted_mode == lock->l_req_mode) {
                                  /* bug 11300: if the lock has been granted,
                                   * break earlier because otherwise, we will go
                                   * to restart and ldlm_resource_unlink will be
                                   * called and it causes the interval node to be
-                                 * freed. Then we will fail at 
+                                 * freed. Then we will fail at
                                   * ldlm_extent_add_lock() */
                                  *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
                                              LDLM_FL_BLOCK_WAIT);
@@ -636,8 +748,12 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                  *flags |= LDLM_FL_NO_TIMEOUT;
  
          }
-        rc = 0;
+        RETURN(0);
  out:
+        if (!list_empty(&rpc_list)) {
+                LASSERT(!(lock->l_flags & LDLM_AST_DISCARD_DATA));
+                discard_bl_list(&rpc_list);
+        }
          RETURN(rc);
  }
  
@@ -685,7 +801,7 @@ struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
          ENTRY;
  
          LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
-        OBD_SLAB_ALLOC(node, ldlm_interval_slab, CFS_ALLOC_IO, sizeof(*node));
+        OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, CFS_ALLOC_IO);
          if (node == NULL)
                  RETURN(NULL);
  
@@ -698,6 +814,7 @@ void ldlm_interval_free(struct ldlm_interval *node)
  {
          if (node) {
                  LASSERT(list_empty(&node->li_group));
+                LASSERT(!interval_is_intree(&node->li_node));
                  OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
          }
  }
@@ -750,6 +867,7 @@ void ldlm_extent_add_lock(struct ldlm_resource *res,
  
          node = lock->l_tree_node;
          LASSERT(node != NULL);
+        LASSERT(!interval_is_intree(&node->li_node));
  
          idx = lock_mode_to_index(lock->l_granted_mode);
          LASSERT(lock->l_granted_mode == 1 << idx);
@@ -777,14 +895,13 @@ void ldlm_extent_add_lock(struct ldlm_resource *res,
  void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
  {
          struct ldlm_resource *res = lock->l_resource;
-        struct ldlm_interval *node;
+        struct ldlm_interval *node = lock->l_tree_node;
          struct ldlm_interval_tree *tree;
          int idx;
  
-        if (lock->l_granted_mode != lock->l_req_mode)
+        if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */
                  return;
  
-        LASSERT(lock->l_tree_node != NULL);
          idx = lock_mode_to_index(lock->l_granted_mode);
          LASSERT(lock->l_granted_mode == 1 << idx);
          tree = &res->lr_itree[idx];