From: dzogin Date: Sun, 2 Aug 2009 15:54:54 +0000 (+0000) Subject: b=20321 X-Git-Tag: v1_8_1_1~74 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=966d557d99510b4bc12d6f6fcc5af3018bd17224;p=fs%2Flustre-release.git b=20321 i=dmitry.zogin i=adilger Severity : normal Bugzilla : 20321 Description: Deadlock between filter_destroy() and filter_commitrw_write(). Details : filter_destroy() does not hold the DLM lock over the whole operation. If the DLM lock is dropped, filter_commitrw() can go through, causing the deadlock between page lock and i_mutex. --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index fb6be26..f641c6e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -15,6 +15,13 @@ * ext4 support for RHEL5 is experimental and thus should not be used in production. +Severity : normal +Bugzilla : 20321 +Description: Deadlock between filter_destroy() and filter_commitrw_write(). +Details : filter_destroy() does not hold the DLM lock over the whole + operation. If the DLM lock is dropped, filter_commitrw() can go + through, causing the deadlock between page lock and i_mutex. + Severity : enhancement Bugzilla : 19847 Description: Update kernel to SLES10 SP2 2.6.16.60-0.39.3. diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 936adef..ed7b2f9 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1435,9 +1435,9 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, RETURN(dchild); } -static int filter_prepare_destroy(struct obd_device *obd, obd_id objid) +static int filter_prepare_destroy(struct obd_device *obd, obd_id objid, + struct lustre_handle *lockh) { - struct lustre_handle lockh; int flags = LDLM_AST_DISCARD_DATA, rc; struct ldlm_res_id res_id = { .name = { objid } }; ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; @@ -1448,15 +1448,23 @@ static int filter_prepare_destroy(struct obd_device *obd, obd_id objid) rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_EXTENT, &policy, LCK_PW, &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, NULL, 0, NULL, - &lockh); - - /* We only care about the side-effects, just drop the lock. */ - if (rc == ELDLM_OK) - ldlm_lock_decref(&lockh, LCK_PW); + lockh); + if (rc != ELDLM_OK) { + lockh->cookie = 0; + CERROR("%s: failed to get lock to destroy objid "LPU64" (%d)\n", + obd->obd_name, objid, rc); + } RETURN(rc); } +static void filter_fini_destroy(struct obd_device *obd, + struct lustre_handle *lockh) +{ + if (lustre_handle_is_used(lockh)) + ldlm_lock_decref(lockh, LCK_PW); +} + /* This is vfs_unlink() without down(i_sem). If we call regular vfs_unlink() * we have 2.6 lock ordering issues with filter_commitrw_write() as it takes * i_sem before starting a handle, while filter_destroy() + vfs_unlink do the @@ -3504,6 +3512,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, struct obd_device *obd; struct filter_obd *filter; struct dentry *dchild = NULL, *dparent = NULL; + struct lustre_handle lockh = { 0 }; struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; @@ -3543,7 +3552,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc = -ENOENT); } - filter_prepare_destroy(obd, oa->o_id); + rc = filter_prepare_destroy(obd, oa->o_id, &lockh); + if (rc) + GOTO(cleanup, rc); /* Our MDC connection is established by the MDS to us */ if (oa->o_valid & OBD_MD_FLCOOKIE) { @@ -3639,6 +3650,8 @@ cleanup: case 3: filter_parent_unlock(dparent); case 2: + filter_fini_destroy(obd, &lockh); + f_dput(dchild); if (fcc != NULL) OBD_FREE(fcc, sizeof(*fcc));