Whamcloud - gitweb
LU-3027 clio: Do not shrink sublock at cancel
[fs/lustre-release.git] / lustre / lov / lovsub_lock.c
index f02a2ce..8da2366 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -42,7 +42,9 @@
 
 #include "lov_cl_internal.h"
 
-/** \addtogroup lov lov @{ */
+/** \addtogroup lov
+ *  @{
+ */
 
 /*****************************************************************************
  *
@@ -57,7 +59,7 @@ static void lovsub_lock_fini(const struct lu_env *env,
 
         ENTRY;
         lsl = cl2lovsub_lock(slice);
-        LASSERT(list_empty(&lsl->lss_parents));
+        LASSERT(cfs_list_empty(&lsl->lss_parents));
         OBD_SLAB_FREE_PTR(lsl, lovsub_lock_kmem);
         EXIT;
 }
@@ -86,28 +88,6 @@ static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
         EXIT;
 }
 
-static void lovsub_lock_state_one(const struct lu_env *env,
-                                  const struct lovsub_lock *lovsub,
-                                  struct lov_lock *lov)
-{
-        struct cl_lock       *parent;
-        const struct cl_lock *child;
-
-        ENTRY;
-        parent = lov->lls_cl.cls_lock;
-        child  = lovsub->lss_cl.cls_lock;
-
-        if (lovsub->lss_active != parent) {
-                lovsub_parent_lock(env, lov);
-                if (child->cll_error != 0)
-                        cl_lock_error(env, parent, child->cll_error);
-                else
-                        cl_lock_signal(env, parent);
-                lovsub_parent_unlock(env, lov);
-        }
-        EXIT;
-}
-
 /**
  * Implements cl_lock_operations::clo_state() method for lovsub layer, which
  * method is called whenever sub-lock state changes. Propagates state change
@@ -119,23 +99,20 @@ static void lovsub_lock_state(const struct lu_env *env,
 {
         struct lovsub_lock   *sub = cl2lovsub_lock(slice);
         struct lov_lock_link *scan;
-        struct lov_lock_link *temp;
 
         LASSERT(cl_lock_is_mutexed(slice->cls_lock));
         ENTRY;
 
-        /*
-         * Use _safe() version, because
-         *
-         *     lovsub_lock_state_one()
-         *       ->cl_lock_error()
-         *         ->cl_lock_delete()
-         *           ->lov_lock_delete()
-         *
-         * can unlink parent from the parent list.
-         */
-        list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list)
-                lovsub_lock_state_one(env, sub, scan->lll_super);
+        cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+                struct lov_lock *lov    = scan->lll_super;
+                struct cl_lock  *parent = lov->lls_cl.cls_lock;
+
+                if (sub->lss_active != parent) {
+                        lovsub_parent_lock(env, lov);
+                        cl_lock_signal(env, parent);
+                        lovsub_parent_unlock(env, lov);
+                }
+        }
         EXIT;
 }
 
@@ -154,7 +131,7 @@ static unsigned long lovsub_lock_weigh(const struct lu_env *env,
 
         LASSERT(cl_lock_is_mutexed(slice->cls_lock));
 
-        if (!list_empty(&lock->lss_parents)) {
+        if (!cfs_list_empty(&lock->lss_parents)) {
                 /*
                  * It is not clear whether all parents have to be asked and
                  * their estimations summed, or it is enough to ask one. For
@@ -176,10 +153,9 @@ static unsigned long lovsub_lock_weigh(const struct lu_env *env,
  * Maps start/end offsets within a stripe, to offsets within a file.
  */
 static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
-                                  struct lov_object *obj,
-                                  int stripe, struct cl_lock_descr *out)
+                                 struct lov_object *lov,
+                                 int stripe, struct cl_lock_descr *out)
 {
-        struct lov_stripe_md *lsm = lov_r0(obj)->lo_lsm;
         pgoff_t size; /* stripe size in pages */
         pgoff_t skip; /* how many pages in every stripe are occupied by
                        * "other" stripes */
@@ -190,12 +166,9 @@ static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
         start = in->cld_start;
         end   = in->cld_end;
 
-        /*
-         * XXX join file support.
-         */
-        if (lsm->lsm_stripe_count > 1) {
-                size = cl_index(lov2cl(obj), lsm->lsm_stripe_size);
-                skip = (lsm->lsm_stripe_count - 1) * size;
+       if (lov->lo_lsm->lsm_stripe_count > 1) {
+               size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
+               skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
 
                 /* XXX overflow check here? */
                 start += start/size * skip + stripe * size;
@@ -231,7 +204,6 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
                        const struct cl_lock_descr *d, int idx)
 {
         struct cl_lock       *parent;
-        struct cl_lock       *child;
         struct lovsub_object *subobj;
         struct cl_lock_descr *pd;
         struct cl_lock_descr *parent_descr;
@@ -241,12 +213,12 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
         parent_descr = &parent->cll_descr;
         LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
 
-        child  = sublock->lss_cl.cls_lock;
         subobj = cl2lovsub(sublock->lss_cl.cls_obj);
         pd     = &lov_env_info(env)->lti_ldescr;
 
         pd->cld_obj  = parent_descr->cld_obj;
         pd->cld_mode = parent_descr->cld_mode;
+        pd->cld_gid  = parent_descr->cld_gid;
         lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
         lov->lls_sub[idx].sub_got = *d;
         /*
@@ -273,7 +245,7 @@ static int lovsub_lock_modify(const struct lu_env *env,
 
         LASSERT(cl_lock_mode_match(d->cld_mode,
                                    s->cls_lock->cll_descr.cld_mode));
-        list_for_each_entry(scan, &lock->lss_parents, lll_list) {
+        cfs_list_for_each_entry(scan, &lock->lss_parents, lll_list) {
                 int rc;
 
                 lov = scan->lll_super;
@@ -300,7 +272,7 @@ static int lovsub_lock_closure(const struct lu_env *env,
         sub    = cl2lovsub_lock(slice);
         result = 0;
 
-        list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+        cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
                 parent = scan->lll_super->lls_cl.cls_lock;
                 result = cl_lock_closure_build(env, parent, closure);
                 if (result != 0)
@@ -310,75 +282,155 @@ static int lovsub_lock_closure(const struct lu_env *env,
 }
 
 /**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
+ * A helper function for lovsub_lock_delete() that deals with a given parent
+ * top-lock.
  */
-static void lovsub_lock_delete(const struct lu_env *env,
-                               const struct cl_lock_slice *slice)
+static int lovsub_lock_delete_one(const struct lu_env *env,
+                                  struct cl_lock *child, struct lov_lock *lov)
 {
-        struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-        struct lov_lock      *lov;
-        struct cl_lock       *parent;
-        struct lov_lock_link *scan;
-        struct lov_lock_link *temp;
-        struct lov_lock_sub  *subdata;
-
-        LASSERT(cl_lock_is_mutexed(slice->cls_lock));
+        struct cl_lock *parent;
+        int             result;
         ENTRY;
 
-        list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list) {
-                lov     = scan->lll_super;
-                subdata = &lov->lls_sub[scan->lll_idx];
-                parent  = lov->lls_cl.cls_lock;
-                lovsub_parent_lock(env, lov);
-                subdata->sub_got = subdata->sub_descr;
-                lov_lock_unlink(env, scan, sub);
-                CDEBUG(D_DLMTRACE, "%p %p %i %i\n", parent, sub,
-                       lov->lls_nr_filled, parent->cll_state);
-                switch (parent->cll_state) {
-                case CLS_NEW:
-                case CLS_QUEUING:
-                case CLS_ENQUEUED:
-                case CLS_FREEING:
-                        cl_lock_signal(env, parent);
-                        break;
-                case CLS_UNLOCKING:
-                        /*
-                         * Here lies a problem: a sub-lock is canceled while
-                         * top-lock is being unlocked. Top-lock cannot be
-                         * moved into CLS_NEW state, because unlocking has to
-                         * succeed eventually by placing lock into CLS_CACHED
-                         * (or failing it), see cl_unuse_try(). Nor can
-                         * top-lock be left in CLS_CACHED state, because lov
-                         * maintains an invariant that all sub-locks exist in
-                         * CLS_CACHED (this allows cached top-lock to be
-                         * reused immediately). Nor can we wait for top-lock
-                         * state to change, because this can be synchronous to
-                         * the current thread.
+        parent = lov->lls_cl.cls_lock;
+        if (parent->cll_error)
+                RETURN(0);
+
+        result = 0;
+        switch (parent->cll_state) {
+       case CLS_ENQUEUED:
+               /* See LU-1355 for the case that a glimpse lock is
+                * interrupted by signal */
+               LASSERT(parent->cll_flags & CLF_CANCELLED);
+               break;
+        case CLS_QUEUING:
+        case CLS_FREEING:
+                cl_lock_signal(env, parent);
+                break;
+        case CLS_INTRANSIT:
+                /*
+                 * Here lies a problem: a sub-lock is canceled while top-lock
+                 * is being unlocked. Top-lock cannot be moved into CLS_NEW
+                 * state, because unlocking has to succeed eventually by
+                 * placing lock into CLS_CACHED (or failing it), see
+                 * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
+                 * state, because lov maintains an invariant that all
+                 * sub-locks exist in CLS_CACHED (this allows cached top-lock
+                 * to be reused immediately). Nor can we wait for top-lock
+                 * state to change, because this can be synchronous to the
+                 * current thread.
+                 *
+                 * We know for sure that lov_lock_unuse() will be called at
+                 * least one more time to finish un-using, so leave a mark on
+                 * the top-lock, that will be seen by the next call to
+                 * lov_lock_unuse().
+                 */
+                if (cl_lock_is_intransit(parent))
+                        lov->lls_cancel_race = 1;
+                break;
+        case CLS_CACHED:
+                /*
+                 * if a sub-lock is canceled move its top-lock into CLS_NEW
+                 * state to preserve an invariant that a top-lock in
+                 * CLS_CACHED is immediately ready for re-use (i.e., has all
+                 * sub-locks), and so that next attempt to re-use the top-lock
+                 * enqueues missing sub-lock.
+                 */
+                cl_lock_state_set(env, parent, CLS_NEW);
+                /* fall through */
+        case CLS_NEW:
+                /*
+                 * if last sub-lock is canceled, destroy the top-lock (which
+                 * is now `empty') proactively.
+                 */
+                if (lov->lls_nr_filled == 0) {
+                        /* ... but unfortunately, this cannot be done easily,
+                         * as cancellation of a top-lock might acquire mutices
+                         * of its other sub-locks, violating lock ordering,
+                         * see cl_lock_{cancel,delete}() preconditions.
+                         *
+                         * To work around this, the mutex of this sub-lock is
+                         * released, top-lock is destroyed, and sub-lock mutex
+                         * acquired again. The list of parents has to be
+                         * re-scanned from the beginning after this.
+                         *
+                         * Only do this if no mutices other than on @child and
+                         * @parent are held by the current thread.
                          *
-                         * We know for sure that lov_lock_unuse() will be
-                         * called at least one more time to finish un-using,
-                         * so leave a mark on the top-lock, that will be seen
-                         * by the next call to lov_lock_unuse().
+                         * TODO: The lock modal here is too complex, because
+                         * the lock may be canceled and deleted by voluntarily:
+                         *    cl_lock_request
+                         *      -> osc_lock_enqueue_wait
+                         *        -> osc_lock_cancel_wait
+                         *          -> cl_lock_delete
+                         *            -> lovsub_lock_delete
+                         *              -> cl_lock_cancel/delete
+                         *                -> ...
+                         *
+                         * The better choice is to spawn a kernel thread for
+                         * this purpose. -jay
                          */
-                        lov->lls_unuse_race = 1;
-                        break;
-                case CLS_CACHED:
-                        cl_lock_state_set(env, parent, CLS_NEW);
-                        if (lov->lls_nr_filled == 0) {
+                        if (cl_lock_nr_mutexed(env) == 2) {
+                                cl_lock_mutex_put(env, child);
                                 cl_lock_cancel(env, parent);
                                 cl_lock_delete(env, parent);
-                                cl_lock_signal(env, parent);
+                                result = 1;
                         }
-                        break;
-                case CLS_HELD:
-                default:
-                        CERROR("Impossible state: %i\n", parent->cll_state);
-                        LBUG();
                 }
-                lovsub_parent_unlock(env, lov);
+                break;
+        case CLS_HELD:
+                CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
+        default:
+                CERROR("Impossible state: %d\n", parent->cll_state);
+                LBUG();
+                break;
         }
+
+        RETURN(result);
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_delete() method. This is
+ * invoked in "bottom-to-top" delete, when lock destruction starts from the
+ * sub-lock (e.g, as a result of ldlm lock LRU policy).
+ */
+static void lovsub_lock_delete(const struct lu_env *env,
+                               const struct cl_lock_slice *slice)
+{
+        struct cl_lock     *child = slice->cls_lock;
+        struct lovsub_lock *sub   = cl2lovsub_lock(slice);
+        int restart;
+
+        LASSERT(cl_lock_is_mutexed(child));
+
+        ENTRY;
+        /*
+         * Destruction of a sub-lock might take multiple iterations, because
+         * when the last sub-lock of a given top-lock is deleted, top-lock is
+         * canceled proactively, and this requires to release sub-lock
+         * mutex. Once sub-lock mutex has been released, list of its parents
+         * has to be re-scanned from the beginning.
+         */
+        do {
+                struct lov_lock      *lov;
+                struct lov_lock_link *scan;
+                struct lov_lock_link *temp;
+
+                restart = 0;
+                cfs_list_for_each_entry_safe(scan, temp,
+                                             &sub->lss_parents, lll_list) {
+                        lov     = scan->lll_super;
+                        lovsub_parent_lock(env, lov);
+                        lov_lock_unlink(env, scan, sub);
+                        restart = lovsub_lock_delete_one(env, child, lov);
+                        lovsub_parent_unlock(env, lov);
+
+                        if (restart) {
+                                cl_lock_mutex_get(env, child);
+                                break;
+                        }
+               }
+        } while (restart);
         EXIT;
 }
 
@@ -389,7 +441,7 @@ static int lovsub_lock_print(const struct lu_env *env, void *cookie,
         struct lov_lock      *lov;
         struct lov_lock_link *scan;
 
-        list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+        cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
                 lov = scan->lll_super;
                 (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
                 if (lov != NULL)
@@ -417,7 +469,7 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
         int result;
 
         ENTRY;
-        OBD_SLAB_ALLOC_PTR(lsk, lovsub_lock_kmem);
+       OBD_SLAB_ALLOC_PTR_GFP(lsk, lovsub_lock_kmem, __GFP_IO);
         if (lsk != NULL) {
                 CFS_INIT_LIST_HEAD(&lsk->lss_parents);
                 cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);