From: bobijam Date: Wed, 18 Jun 2008 01:48:38 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_9_50~366 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0c03efd50800fb0d5fbda3ef9c5788b51a3804e1 Branch HEAD b=14480 o=green i=shadow, bobijam Description: LBUG during stress test Details : Need properly lock accesses the flock deadlock detection list. --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 2ad66ba..569da49 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,6 +12,11 @@ tbd Sun Microsystems, Inc. * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a removed cwd "./" (refer to Bugzilla 14399). +Severity : normal +Bugzilla : 14480 +Description: LBUG during stress test +Details : Need properly lock accesses the flock deadlock detection list. + Severity : minor Bugzilla : 15837 Description: oops in page fault handler diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 477819e..273c50c 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -42,6 +42,7 @@ #define l_flock_waitq l_lru static CFS_LIST_HEAD(ldlm_flock_waitq); +spinlock_t ldlm_flock_waitq_lock = SPIN_LOCK_UNLOCKED; int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -82,6 +83,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%x)", mode, flags); + /* Safe to not lock here, since it should be empty anyway */ LASSERT(list_empty(&lock->l_flock_waitq)); list_del_init(&lock->l_res_link); @@ -104,6 +106,7 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *blocking_lock) pid_t blocking_pid = blocking_lock->l_policy_data.l_flock.pid; struct ldlm_lock *lock; + spin_lock(&ldlm_flock_waitq_lock); restart: list_for_each_entry(lock, &ldlm_flock_waitq, l_flock_waitq) { if ((lock->l_policy_data.l_flock.pid != blocking_pid) || @@ -113,11 +116,14 @@ restart: blocking_pid = lock->l_policy_data.l_flock.blocking_pid; blocking_export = (struct obd_export *)(long) lock->l_policy_data.l_flock.blocking_export; - if (blocking_pid == req_pid && blocking_export == req_export) + if (blocking_pid == req_pid && blocking_export == req_export) { + spin_unlock(&ldlm_flock_waitq_lock); return 1; + } goto restart; } + spin_unlock(&ldlm_flock_waitq_lock); return 0; } @@ -220,7 +226,9 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, (long)(void *)lock->l_export; LASSERT(list_empty(&req->l_flock_waitq)); + spin_lock(&ldlm_flock_waitq_lock); list_add_tail(&req->l_flock_waitq, &ldlm_flock_waitq); + spin_unlock(&ldlm_flock_waitq_lock); ldlm_resource_add_lock(res, &res->lr_waiting, req); *flags |= LDLM_FL_BLOCK_GRANTED; @@ -237,7 +245,9 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, /* In case we had slept on this lock request take it off of the * deadlock detection waitq. */ + spin_lock(&ldlm_flock_waitq_lock); list_del_init(&req->l_flock_waitq); + spin_unlock(&ldlm_flock_waitq_lock); /* Scan the locks owned by this process that overlap this request. * We may have to merge or split existing locks. */ @@ -436,7 +446,9 @@ ldlm_flock_interrupted_wait(void *data) lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; /* take lock off the deadlock detection waitq. */ + spin_lock(&ldlm_flock_waitq_lock); list_del_init(&lock->l_flock_waitq); + spin_unlock(&ldlm_flock_waitq_lock); /* client side - set flag to prevent lock from being put on lru list */ lock->l_flags |= LDLM_FL_CBPENDING; @@ -466,6 +478,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) CDEBUG(D_DLMTRACE, "flags: 0x%x data: %p getlk: %p\n", flags, data, getlk); + /* Import invalidation. We need to actually release the lock + * references being held, so that it can go away. No point in + * holding the lock even if app still believes it has it, since + * server already dropped it anyway. Only for granted locks too. */ + lock_res_and_lock(lock); + if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == + (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) { + unlock_res_and_lock(lock); + if (lock->l_req_mode == lock->l_granted_mode && + lock->l_granted_mode != LCK_NL) + ldlm_lock_decref_internal(lock, lock->l_req_mode); + RETURN(0); + } + unlock_res_and_lock(lock); + LASSERT(flags != LDLM_FL_WAIT_NOREPROC); if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | @@ -505,7 +532,9 @@ granted: lock_res(lock->l_resource); /* take lock off the deadlock detection waitq. */ + spin_lock(&ldlm_flock_waitq_lock); list_del_init(&lock->l_flock_waitq); + spin_unlock(&ldlm_flock_waitq_lock); /* ldlm_lock_enqueue() has already placed lock on the granted list. */ list_del_init(&lock->l_res_link); @@ -554,8 +583,8 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, ns = lock->l_resource->lr_namespace; /* take lock off the deadlock detection waitq. */ - lock_res_and_lock(lock); + spin_lock(&ldlm_flock_waitq_lock); list_del_init(&lock->l_flock_waitq); - unlock_res_and_lock(lock); + spin_unlock(&ldlm_flock_waitq_lock); RETURN(0); }