-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include "lov_cl_internal.h"
-/** \addtogroup lov lov @{ */
+/** \addtogroup lov
+ * @{
+ */
/*****************************************************************************
*
ENTRY;
lsl = cl2lovsub_lock(slice);
- LASSERT(list_empty(&lsl->lss_parents));
+ LASSERT(cfs_list_empty(&lsl->lss_parents));
OBD_SLAB_FREE_PTR(lsl, lovsub_lock_kmem);
EXIT;
}
static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
{
- struct cl_lock *parent;
-
- ENTRY;
- parent = lov->lls_cl.cls_lock;
- cl_lock_get(parent);
- lu_ref_add(&parent->cll_reference, "lovsub-parent", cfs_current());
- cl_lock_mutex_get(env, parent);
- EXIT;
+ struct cl_lock *parent;
+
+ ENTRY;
+ parent = lov->lls_cl.cls_lock;
+ cl_lock_get(parent);
+ lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
+ cl_lock_mutex_get(env, parent);
+ EXIT;
}
static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
{
- struct cl_lock *parent;
-
- ENTRY;
- parent = lov->lls_cl.cls_lock;
- cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
- lu_ref_del(&parent->cll_reference, "lovsub-parent", cfs_current());
- cl_lock_put(env, parent);
- EXIT;
-}
-
-static void lovsub_lock_state_one(const struct lu_env *env,
- const struct lovsub_lock *lovsub,
- struct lov_lock *lov)
-{
- struct cl_lock *parent;
- const struct cl_lock *child;
-
- ENTRY;
- parent = lov->lls_cl.cls_lock;
- child = lovsub->lss_cl.cls_lock;
-
- if (lovsub->lss_active != parent) {
- lovsub_parent_lock(env, lov);
- if (child->cll_error != 0)
- cl_lock_error(env, parent, child->cll_error);
- else
- cl_lock_signal(env, parent);
- lovsub_parent_unlock(env, lov);
- }
- EXIT;
+ struct cl_lock *parent;
+
+ ENTRY;
+ parent = lov->lls_cl.cls_lock;
+ cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
+ lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
+ cl_lock_put(env, parent);
+ EXIT;
}
/**
{
struct lovsub_lock *sub = cl2lovsub_lock(slice);
struct lov_lock_link *scan;
- struct lov_lock_link *temp;
LASSERT(cl_lock_is_mutexed(slice->cls_lock));
ENTRY;
- /*
- * Use _safe() version, because
- *
- * lovsub_lock_state_one()
- * ->cl_lock_error()
- * ->cl_lock_delete()
- * ->lov_lock_delete()
- *
- * can unlink parent from the parent list.
- */
- list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list)
- lovsub_lock_state_one(env, sub, scan->lll_super);
+ cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ struct lov_lock *lov = scan->lll_super;
+ struct cl_lock *parent = lov->lls_cl.cls_lock;
+
+ if (sub->lss_active != parent) {
+ lovsub_parent_lock(env, lov);
+ cl_lock_signal(env, parent);
+ lovsub_parent_unlock(env, lov);
+ }
+ }
EXIT;
}
LASSERT(cl_lock_is_mutexed(slice->cls_lock));
- if (!list_empty(&lock->lss_parents)) {
+ if (!cfs_list_empty(&lock->lss_parents)) {
/*
* It is not clear whether all parents have to be asked and
* their estimations summed, or it is enough to ask one. For
* Maps start/end offsets within a stripe, to offsets within a file.
*/
static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
- struct lov_object *obj,
- int stripe, struct cl_lock_descr *out)
+ struct lov_object *lov,
+ int stripe, struct cl_lock_descr *out)
{
- struct lov_stripe_md *lsm = lov_r0(obj)->lo_lsm;
pgoff_t size; /* stripe size in pages */
pgoff_t skip; /* how many pages in every stripe are occupied by
* "other" stripes */
start = in->cld_start;
end = in->cld_end;
- /*
- * XXX join file support.
- */
- if (lsm->lsm_stripe_count > 1) {
- size = cl_index(lov2cl(obj), lsm->lsm_stripe_size);
- skip = (lsm->lsm_stripe_count - 1) * size;
+ if (lov->lo_lsm->lsm_stripe_count > 1) {
+ size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
+ skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
/* XXX overflow check here? */
start += start/size * skip + stripe * size;
const struct cl_lock_descr *d, int idx)
{
struct cl_lock *parent;
- struct cl_lock *child;
struct lovsub_object *subobj;
struct cl_lock_descr *pd;
struct cl_lock_descr *parent_descr;
parent_descr = &parent->cll_descr;
LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
- child = sublock->lss_cl.cls_lock;
subobj = cl2lovsub(sublock->lss_cl.cls_obj);
pd = &lov_env_info(env)->lti_ldescr;
pd->cld_obj = parent_descr->cld_obj;
pd->cld_mode = parent_descr->cld_mode;
+ pd->cld_gid = parent_descr->cld_gid;
lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
- lov->lls_sub[idx].sub_got = *d;
+
+ /* LU-3027: only update extent of lock, plus the change in
+ * lovsub_lock_delete() that lock extent is modified after a sublock
+ * is canceled, we can make sure that the lock extent won't be updated
+ * any more. Therefore, lov_lock_fits_into() will always find feasible
+ * locks */
+ lov->lls_sub[idx].sub_got.cld_start = d->cld_start;
+ lov->lls_sub[idx].sub_got.cld_end = d->cld_end;
/*
* Notify top-lock about modification, if lock description changes
* materially.
LASSERT(cl_lock_mode_match(d->cld_mode,
s->cls_lock->cll_descr.cld_mode));
- list_for_each_entry(scan, &lock->lss_parents, lll_list) {
+ cfs_list_for_each_entry(scan, &lock->lss_parents, lll_list) {
int rc;
lov = scan->lll_super;
sub = cl2lovsub_lock(slice);
result = 0;
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
parent = scan->lll_super->lls_cl.cls_lock;
result = cl_lock_closure_build(env, parent, closure);
if (result != 0)
}
/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
+ * A helper function for lovsub_lock_delete() that deals with a given parent
+ * top-lock.
*/
-static void lovsub_lock_delete(const struct lu_env *env,
- const struct cl_lock_slice *slice)
+static int lovsub_lock_delete_one(const struct lu_env *env,
+ struct cl_lock *child, struct lov_lock *lov)
{
- struct lovsub_lock *sub = cl2lovsub_lock(slice);
- struct lov_lock *lov;
- struct cl_lock *parent;
- struct lov_lock_link *scan;
- struct lov_lock_link *temp;
- struct lov_lock_sub *subdata;
-
- LASSERT(cl_lock_is_mutexed(slice->cls_lock));
+ struct cl_lock *parent;
+ int result;
ENTRY;
- list_for_each_entry_safe(scan, temp, &sub->lss_parents, lll_list) {
- lov = scan->lll_super;
- subdata = &lov->lls_sub[scan->lll_idx];
- parent = lov->lls_cl.cls_lock;
- lovsub_parent_lock(env, lov);
- subdata->sub_got = subdata->sub_descr;
- lov_lock_unlink(env, scan, sub);
- CDEBUG(D_DLMTRACE, "%p %p %i %i\n", parent, sub,
- lov->lls_nr_filled, parent->cll_state);
- switch (parent->cll_state) {
- case CLS_NEW:
- case CLS_QUEUING:
- case CLS_ENQUEUED:
- case CLS_FREEING:
- cl_lock_signal(env, parent);
- break;
- case CLS_UNLOCKING:
- /*
- * Here lies a problem: a sub-lock is canceled while
- * top-lock is being unlocked. Top-lock cannot be
- * moved into CLS_NEW state, because unlocking has to
- * succeed eventually by placing lock into CLS_CACHED
- * (or failing it), see cl_unuse_try(). Nor can
- * top-lock be left in CLS_CACHED state, because lov
- * maintains an invariant that all sub-locks exist in
- * CLS_CACHED (this allows cached top-lock to be
- * reused immediately). Nor can we wait for top-lock
- * state to change, because this can be synchronous to
- * the current thread.
+ parent = lov->lls_cl.cls_lock;
+ if (parent->cll_error)
+ RETURN(0);
+
+ result = 0;
+ lov->lls_ever_canceled = 1;
+ switch (parent->cll_state) {
+ case CLS_ENQUEUED:
+ /* See LU-1355 for the case that a glimpse lock is
+ * interrupted by signal */
+ LASSERT(parent->cll_flags & CLF_CANCELLED);
+ break;
+ case CLS_QUEUING:
+ case CLS_FREEING:
+ cl_lock_signal(env, parent);
+ break;
+ case CLS_INTRANSIT:
+ /*
+ * Here lies a problem: a sub-lock is canceled while top-lock
+ * is being unlocked. Top-lock cannot be moved into CLS_NEW
+ * state, because unlocking has to succeed eventually by
+ * placing lock into CLS_CACHED (or failing it), see
+ * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
+ * state, because lov maintains an invariant that all
+ * sub-locks exist in CLS_CACHED (this allows cached top-lock
+ * to be reused immediately). Nor can we wait for top-lock
+ * state to change, because this can be synchronous to the
+ * current thread.
+ *
+ * We know for sure that lov_lock_unuse() will be called at
+ * least one more time to finish un-using, so leave a mark on
+ * the top-lock, that will be seen by the next call to
+ * lov_lock_unuse().
+ */
+ if (cl_lock_is_intransit(parent))
+ lov->lls_cancel_race = 1;
+ break;
+ case CLS_CACHED:
+ /*
+ * if a sub-lock is canceled move its top-lock into CLS_NEW
+ * state to preserve an invariant that a top-lock in
+ * CLS_CACHED is immediately ready for re-use (i.e., has all
+ * sub-locks), and so that next attempt to re-use the top-lock
+ * enqueues missing sub-lock.
+ */
+ cl_lock_state_set(env, parent, CLS_NEW);
+ /* fall through */
+ case CLS_NEW:
+ /*
+ * if last sub-lock is canceled, destroy the top-lock (which
+ * is now `empty') proactively.
+ */
+ if (lov->lls_nr_filled == 0) {
+ /* ... but unfortunately, this cannot be done easily,
+ * as cancellation of a top-lock might acquire mutices
+ * of its other sub-locks, violating lock ordering,
+ * see cl_lock_{cancel,delete}() preconditions.
+ *
+ * To work around this, the mutex of this sub-lock is
+ * released, top-lock is destroyed, and sub-lock mutex
+ * acquired again. The list of parents has to be
+ * re-scanned from the beginning after this.
+ *
+ * Only do this if no mutices other than on @child and
+ * @parent are held by the current thread.
*
- * We know for sure that lov_lock_unuse() will be
- * called at least one more time to finish un-using,
- * so leave a mark on the top-lock, that will be seen
- * by the next call to lov_lock_unuse().
+ * TODO: The lock modal here is too complex, because
+ * the lock may be canceled and deleted by voluntarily:
+ * cl_lock_request
+ * -> osc_lock_enqueue_wait
+ * -> osc_lock_cancel_wait
+ * -> cl_lock_delete
+ * -> lovsub_lock_delete
+ * -> cl_lock_cancel/delete
+ * -> ...
+ *
+ * The better choice is to spawn a kernel thread for
+ * this purpose. -jay
*/
- lov->lls_unuse_race = 1;
- break;
- case CLS_CACHED:
- cl_lock_state_set(env, parent, CLS_NEW);
- if (lov->lls_nr_filled == 0) {
+ if (cl_lock_nr_mutexed(env) == 2) {
+ cl_lock_mutex_put(env, child);
cl_lock_cancel(env, parent);
cl_lock_delete(env, parent);
- cl_lock_signal(env, parent);
+ result = 1;
}
- break;
- case CLS_HELD:
- default:
- CERROR("Impossible state: %i\n", parent->cll_state);
- LBUG();
}
- lovsub_parent_unlock(env, lov);
+ break;
+ case CLS_HELD:
+ CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
+ /* falling through */
+ default:
+ CERROR("Impossible state: %d\n", parent->cll_state);
+ LBUG();
+ break;
}
+
+ RETURN(result);
+}
+
+/**
+ * An implementation of cl_lock_operations::clo_delete() method. This is
+ * invoked in "bottom-to-top" delete, when lock destruction starts from the
+ * sub-lock (e.g, as a result of ldlm lock LRU policy).
+ */
+static void lovsub_lock_delete(const struct lu_env *env,
+ const struct cl_lock_slice *slice)
+{
+ struct cl_lock *child = slice->cls_lock;
+ struct lovsub_lock *sub = cl2lovsub_lock(slice);
+ int restart;
+
+ LASSERT(cl_lock_is_mutexed(child));
+
+ ENTRY;
+ /*
+ * Destruction of a sub-lock might take multiple iterations, because
+ * when the last sub-lock of a given top-lock is deleted, top-lock is
+ * canceled proactively, and this requires to release sub-lock
+ * mutex. Once sub-lock mutex has been released, list of its parents
+ * has to be re-scanned from the beginning.
+ */
+ do {
+ struct lov_lock *lov;
+ struct lov_lock_link *scan;
+ struct lov_lock_link *temp;
+
+ restart = 0;
+ cfs_list_for_each_entry_safe(scan, temp,
+ &sub->lss_parents, lll_list) {
+ lov = scan->lll_super;
+ lovsub_parent_lock(env, lov);
+ lov_lock_unlink(env, scan, sub);
+ restart = lovsub_lock_delete_one(env, child, lov);
+ lovsub_parent_unlock(env, lov);
+
+ if (restart) {
+ cl_lock_mutex_get(env, child);
+ break;
+ }
+ }
+ } while (restart);
EXIT;
}
struct lov_lock *lov;
struct lov_lock_link *scan;
- list_for_each_entry(scan, &sub->lss_parents, lll_list) {
+ cfs_list_for_each_entry(scan, &sub->lss_parents, lll_list) {
lov = scan->lll_super;
(*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
if (lov != NULL)
};
int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io)
+ struct cl_lock *lock, const struct cl_io *io)
{
- struct lovsub_lock *lsk;
- int result;
-
- ENTRY;
- OBD_SLAB_ALLOC_PTR_GFP(lsk, lovsub_lock_kmem, CFS_ALLOC_IO);
- if (lsk != NULL) {
- CFS_INIT_LIST_HEAD(&lsk->lss_parents);
- cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
- result = 0;
- } else
- result = -ENOMEM;
- RETURN(result);
+ struct lovsub_lock *lsk;
+ int result;
+
+ ENTRY;
+ OBD_SLAB_ALLOC_PTR_GFP(lsk, lovsub_lock_kmem, GFP_NOFS);
+ if (lsk != NULL) {
+ CFS_INIT_LIST_HEAD(&lsk->lss_parents);
+ cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
+ result = 0;
+ } else
+ result = -ENOMEM;
+ RETURN(result);
}
/** @} lov */