X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fldlm%2Fldlm_flock.c;h=4090ae09b4351fac783eb7a194970c815f4701b2;hb=71d68757c76cb01ba4d9afa09c766fe48dce369c;hp=b3b2cb1cff545429cb8f07bcf5d162ed3e56bc75;hpb=b15f3875f46eec3c5186fe6b84cf2cda7ceb8518;p=fs%2Flustre-release.git diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index b3b2cb1..4090ae0 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,12 +24,14 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved - * Use is subject to license terms. - * * Copyright (c) 2003 Hewlett-Packard Development Company LP. * Developed under the sponsorship of the US Government under * Subcontract No. B514193 + * + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2010, 2011, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -55,16 +55,6 @@ #define l_flock_waitq l_lru -/** - * Wait queue for Posix lock deadlock detection, added with - * ldlm_lock::l_flock_waitq. - */ -static CFS_LIST_HEAD(ldlm_flock_waitq); -/** - * Lock protecting access to ldlm_flock_waitq. - */ -spinlock_t ldlm_flock_waitq_lock = SPIN_LOCK_UNLOCKED; - int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -82,8 +72,8 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, static inline int ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new) { - return((new->l_policy_data.l_flock.pid == - lock->l_policy_data.l_flock.pid) && + return((new->l_policy_data.l_flock.owner == + lock->l_policy_data.l_flock.owner) && (new->l_export == lock->l_export)); } @@ -96,6 +86,43 @@ ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new) lock->l_policy_data.l_flock.start)); } +static inline void ldlm_flock_blocking_link(struct ldlm_lock *req, + struct ldlm_lock *lock) +{ + /* For server only */ + if (req->l_export == NULL) + return; + + LASSERT(cfs_list_empty(&req->l_flock_waitq)); + cfs_write_lock(&req->l_export->exp_flock_wait_lock); + + req->l_policy_data.l_flock.blocking_owner = + lock->l_policy_data.l_flock.owner; + req->l_policy_data.l_flock.blocking_export = + class_export_get(lock->l_export); + + cfs_list_add_tail(&req->l_flock_waitq, + &req->l_export->exp_flock_wait_list); + cfs_write_unlock(&req->l_export->exp_flock_wait_lock); +} + +static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req) +{ + /* For server only */ + if (req->l_export == NULL) + return; + + cfs_write_lock(&req->l_export->exp_flock_wait_lock); + if (!cfs_list_empty(&req->l_flock_waitq)) { + cfs_list_del_init(&req->l_flock_waitq); + + class_export_put(req->l_policy_data.l_flock.blocking_export); + req->l_policy_data.l_flock.blocking_owner = 0; + req->l_policy_data.l_flock.blocking_export = NULL; + } + cfs_write_unlock(&req->l_export->exp_flock_wait_lock); +} + static inline void ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) { @@ -105,9 +132,9 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) mode, flags); /* Safe to not lock here, since it should be empty anyway */ - LASSERT(list_empty(&lock->l_flock_waitq)); + LASSERT(cfs_list_empty(&lock->l_flock_waitq)); - list_del_init(&lock->l_res_link); + cfs_list_del_init(&lock->l_res_link); if (flags == LDLM_FL_WAIT_NOREPROC && !(lock->l_flags & LDLM_FL_FAILED)) { /* client side - set a flag to prevent sending a CANCEL */ @@ -123,44 +150,57 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) } static int -ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *blocking_lock) +ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) { - struct obd_export *req_export = req->l_export; - struct obd_export *blocking_export = blocking_lock->l_export; - pid_t req_pid = req->l_policy_data.l_flock.pid; - pid_t blocking_pid = blocking_lock->l_policy_data.l_flock.pid; + struct obd_export *req_exp = req->l_export; + struct obd_export *bl_exp = bl_lock->l_export; + struct obd_export *bl_exp_new; + __u64 req_owner = req->l_policy_data.l_flock.owner; + __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner; struct ldlm_lock *lock; - spin_lock(&ldlm_flock_waitq_lock); + /* For server only */ + if (req_exp == NULL) + return 0; + + class_export_get(bl_exp); restart: - list_for_each_entry(lock, &ldlm_flock_waitq, l_flock_waitq) { - if ((lock->l_policy_data.l_flock.pid != blocking_pid) || - (lock->l_export != blocking_export)) + cfs_read_lock(&bl_exp->exp_flock_wait_lock); + cfs_list_for_each_entry(lock, &bl_exp->exp_flock_wait_list, + l_flock_waitq) { + struct ldlm_flock *flock = &lock->l_policy_data.l_flock; + + /* want to find something from same client and same process */ + if (flock->owner != bl_owner) continue; - blocking_pid = lock->l_policy_data.l_flock.blocking_pid; - blocking_export = (struct obd_export *)(long) - lock->l_policy_data.l_flock.blocking_export; - if (blocking_pid == req_pid && blocking_export == req_export) { - spin_unlock(&ldlm_flock_waitq_lock); + bl_owner = flock->blocking_owner; + bl_exp_new = class_export_get(flock->blocking_export); + cfs_read_unlock(&bl_exp->exp_flock_wait_lock); + class_export_put(bl_exp); + bl_exp = bl_exp_new; + + if (bl_owner == req_owner && bl_exp == req_exp) { + class_export_put(bl_exp); return 1; } goto restart; } - spin_unlock(&ldlm_flock_waitq_lock); + cfs_read_unlock(&bl_exp->exp_flock_wait_lock); + class_export_put(bl_exp); return 0; } int ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, - ldlm_error_t *err, struct list_head *work_list) + ldlm_error_t *err, cfs_list_t *work_list) { struct ldlm_resource *res = req->l_resource; - struct ldlm_namespace *ns = res->lr_namespace; - struct list_head *tmp; - struct list_head *ownlocks = NULL; + struct ldlm_namespace *ns = ldlm_res_to_ns(res); + cfs_list_t *tmp; + cfs_list_t *ownlocks = NULL; struct ldlm_lock *lock = NULL; struct ldlm_lock *new = req; struct ldlm_lock *new2 = NULL; @@ -172,8 +212,9 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, const struct ldlm_callback_suite null_cbs = { NULL }; ENTRY; - CDEBUG(D_DLMTRACE, "flags %#x pid %u mode %u start "LPU64" end "LPU64 - "\n", *flags, new->l_policy_data.l_flock.pid, mode, + CDEBUG(D_DLMTRACE, "flags %#x owner "LPU64" pid %u mode %u start "LPU64 + " end "LPU64"\n", *flags, new->l_policy_data.l_flock.owner, + new->l_policy_data.l_flock.pid, mode, req->l_policy_data.l_flock.start, req->l_policy_data.l_flock.end); @@ -192,8 +233,9 @@ reprocess: if ((*flags == LDLM_FL_WAIT_NOREPROC) || (mode == LCK_NL)) { /* This loop determines where this processes locks start * in the resource lr_granted list. */ - list_for_each(tmp, &res->lr_granted) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); + cfs_list_for_each(tmp, &res->lr_granted) { + lock = cfs_list_entry(tmp, struct ldlm_lock, + l_res_link); if (ldlm_same_flock_owner(lock, req)) { ownlocks = tmp; break; @@ -204,8 +246,9 @@ reprocess: /* This loop determines if there are existing locks * that conflict with the new lock request. */ - list_for_each(tmp, &res->lr_granted) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); + cfs_list_for_each(tmp, &res->lr_granted) { + lock = cfs_list_entry(tmp, struct ldlm_lock, + l_res_link); if (ldlm_same_flock_owner(lock, req)) { if (!ownlocks) @@ -248,16 +291,8 @@ reprocess: RETURN(LDLM_ITER_STOP); } - req->l_policy_data.l_flock.blocking_pid = - lock->l_policy_data.l_flock.pid; - req->l_policy_data.l_flock.blocking_export = - (long)(void *)lock->l_export; - - LASSERT(list_empty(&req->l_flock_waitq)); - spin_lock(&ldlm_flock_waitq_lock); - list_add_tail(&req->l_flock_waitq, &ldlm_flock_waitq); - spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_link(req, lock); ldlm_resource_add_lock(res, &res->lr_waiting, req); *flags |= LDLM_FL_BLOCK_GRANTED; RETURN(LDLM_ITER_STOP); @@ -273,9 +308,7 @@ reprocess: /* In case we had slept on this lock request take it off of the * deadlock detection waitq. */ - spin_lock(&ldlm_flock_waitq_lock); - list_del_init(&req->l_flock_waitq); - spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(req); /* Scan the locks owned by this process that overlap this request. * We may have to merge or split existing locks. */ @@ -284,7 +317,7 @@ reprocess: ownlocks = &res->lr_granted; list_for_remaining_safe(ownlocks, tmp, &res->lr_granted) { - lock = list_entry(ownlocks, struct ldlm_lock, l_res_link); + lock = cfs_list_entry(ownlocks, struct ldlm_lock, l_res_link); if (!ldlm_same_flock_owner(lock, new)) break; @@ -370,7 +403,7 @@ reprocess: * it must see the original lock data in the reply. */ /* XXX - if ldlm_lock_new() can sleep we should - * release the ns_lock, allocate the new lock, + * release the lr_lock, allocate the new lock, * and restart processing this lock. */ if (!new2) { unlock_res_and_lock(req); @@ -392,6 +425,8 @@ reprocess: new2->l_granted_mode = lock->l_granted_mode; new2->l_policy_data.l_flock.pid = new->l_policy_data.l_flock.pid; + new2->l_policy_data.l_flock.owner = + new->l_policy_data.l_flock.owner; new2->l_policy_data.l_flock.start = lock->l_policy_data.l_flock.start; new2->l_policy_data.l_flock.end = @@ -400,12 +435,12 @@ reprocess: new->l_policy_data.l_flock.end + 1; new2->l_conn_export = lock->l_conn_export; if (lock->l_export != NULL) { - new2->l_export = class_export_lock_get(lock->l_export); + new2->l_export = class_export_lock_get(lock->l_export, new2); if (new2->l_export->exp_lock_hash && - hlist_unhashed(&new2->l_exp_hash)) - lustre_hash_add(new2->l_export->exp_lock_hash, - &new2->l_remote_handle, - &new2->l_exp_hash); + cfs_hlist_unhashed(&new2->l_exp_hash)) + cfs_hash_add(new2->l_export->exp_lock_hash, + &new2->l_remote_handle, + &new2->l_exp_hash); } if (*flags == LDLM_FL_WAIT_NOREPROC) ldlm_lock_addref_internal_nolock(new2, @@ -426,12 +461,13 @@ reprocess: /* Add req to the granted queue before calling ldlm_reprocess_all(). */ if (!added) { - list_del_init(&req->l_res_link); + cfs_list_del_init(&req->l_res_link); /* insert new lock before ownlocks in list. */ ldlm_resource_add_lock(res, ownlocks, req); } if (*flags != LDLM_FL_WAIT_NOREPROC) { +#ifdef HAVE_SERVER_SUPPORT if (first_enq) { /* If this is an unlock, reprocess the waitq and * send completions ASTs for locks that can now be @@ -450,7 +486,7 @@ restart: &rpc_list); unlock_res_and_lock(req); - rc = ldlm_run_ast_work(&rpc_list, + rc = ldlm_run_ast_work(ns, &rpc_list, LDLM_WORK_CP_AST); lock_res_and_lock(req); if (rc == -ERESTART) @@ -460,6 +496,13 @@ restart: LASSERT(req->l_completion_ast); ldlm_add_ast_work_item(req, NULL, work_list); } +#else /* !HAVE_SERVER_SUPPORT */ + /* The only one possible case for client-side calls flock + * policy function is ldlm_flock_completion_ast inside which + * carries LDLM_FL_WAIT_NOREPROC flag. */ + CERROR("Illegal parameter for client-side-only module.\n"); + LBUG(); +#endif /* HAVE_SERVER_SUPPORT */ } /* In case we're reprocessing the requested lock we can't destroy @@ -482,25 +525,17 @@ static void ldlm_flock_interrupted_wait(void *data) { struct ldlm_lock *lock; - struct lustre_handle lockh; - int rc; ENTRY; lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; /* take lock off the deadlock detection waitq. */ - spin_lock(&ldlm_flock_waitq_lock); - list_del_init(&lock->l_flock_waitq); - spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); /* client side - set flag to prevent lock from being put on lru list */ + lock_res_and_lock(lock); lock->l_flags |= LDLM_FL_CBPENDING; - - ldlm_lock_decref_internal(lock, lock->l_req_mode); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_cancel: %d\n", rc); + unlock_res_and_lock(lock); EXIT; } @@ -534,17 +569,17 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) * references being held, so that it can go away. No point in * holding the lock even if app still believes it has it, since * server already dropped it anyway. Only for granted locks too. */ - lock_res_and_lock(lock); if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) { - unlock_res_and_lock(lock); if (lock->l_req_mode == lock->l_granted_mode && lock->l_granted_mode != LCK_NL && NULL == data) ldlm_lock_decref_internal(lock, lock->l_req_mode); + + /* Need to wake up the waiter if we were evicted */ + cfs_waitq_signal(&lock->l_waitq); RETURN(0); } - unlock_res_and_lock(lock); LASSERT(flags != LDLM_FL_WAIT_NOREPROC); @@ -568,9 +603,9 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) imp = obd->u.cli.cl_import; if (NULL != imp) { - spin_lock(&imp->imp_lock); + cfs_spin_lock(&imp->imp_lock); fwd.fwd_generation = imp->imp_generation; - spin_unlock(&imp->imp_lock); + cfs_spin_unlock(&imp->imp_lock); } lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd); @@ -587,28 +622,30 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - lock_res_and_lock(lock); - if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) { + if (lock->l_destroyed) { LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - unlock_res(lock->l_resource); + RETURN(0); + } + + if (lock->l_flags & LDLM_FL_FAILED) { + LDLM_DEBUG(lock, "client-side enqueue waking up: failed"); RETURN(-EIO); } + if (rc) { LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", rc); - unlock_res_and_lock(lock); RETURN(rc); } LDLM_DEBUG(lock, "client-side enqueue granted"); /* take lock off the deadlock detection waitq. */ - spin_lock(&ldlm_flock_waitq_lock); - list_del_init(&lock->l_flock_waitq); - spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); + lock_res_and_lock(lock); /* ldlm_lock_enqueue() has already placed lock on the granted list. */ - list_del_init(&lock->l_res_link); + cfs_list_del_init(&lock->l_res_link); if (flags & LDLM_FL_TEST_LOCK) { /* fcntl(F_GETLK) request */ @@ -647,17 +684,46 @@ EXPORT_SYMBOL(ldlm_flock_completion_ast); int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) { - struct ldlm_namespace *ns; ENTRY; LASSERT(lock); LASSERT(flag == LDLM_CB_CANCELING); - ns = lock->l_resource->lr_namespace; - /* take lock off the deadlock detection waitq. */ - spin_lock(&ldlm_flock_waitq_lock); - list_del_init(&lock->l_flock_waitq); - spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); RETURN(0); } + +void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy, + ldlm_policy_data_t *lpolicy) +{ + memset(lpolicy, 0, sizeof(*lpolicy)); + lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; + lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; + lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; + /* Compat code, old clients had no idea about owner field and + * relied solely on pid for ownership. Introduced in LU-104, 2.1, + * April 2011 */ + lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid; +} + + +void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy, + ldlm_policy_data_t *lpolicy) +{ + memset(lpolicy, 0, sizeof(*lpolicy)); + lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; + lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; + lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; + lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner; +} + +void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, + ldlm_wire_policy_data_t *wpolicy) +{ + memset(wpolicy, 0, sizeof(*wpolicy)); + wpolicy->l_flock.lfw_start = lpolicy->l_flock.start; + wpolicy->l_flock.lfw_end = lpolicy->l_flock.end; + wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid; + wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner; +}