From 94ad88b8083907d14d4069d0e7bb10456c0d2cb4 Mon Sep 17 00:00:00 2001 From: green Date: Tue, 22 Jan 2008 20:20:45 +0000 Subject: [PATCH] r=adilger,vitaly b=14360 Do not enqueue locks under och_sem. --- lustre/ChangeLog | 7 +++++++ lustre/llite/file.c | 25 ++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 88cd228..216b287 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -623,6 +623,13 @@ Details : ldlm_completion_ast() assumes that a lock is granted when the req mode is equal to the granted mode. However, it should also check that LDLM_FL_CP_REQD is not set. +Severity : normal +Bugzilla : 14360 +Description: Heavy nfs access might result in deadlocks +Details : After ELC code landed, it is now improper to enqueue any mds + locks under och_sem, because enqueue might want to decide to + cancel open locks for same inode we are holding och_sem for. + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 6e83792..7a83bce 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -513,6 +513,7 @@ int ll_file_open(struct inode *inode, struct file *file) it = &oit; } +restart: /* Let's see if we have file open on MDS already. */ if (it->it_flags & FMODE_WRITE) { och_p = &lli->lli_mds_write_och; @@ -549,19 +550,19 @@ int ll_file_open(struct inode *inode, struct file *file) } } else { LASSERT(*och_usecount == 0); - OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); - if (!*och_p) { - ll_file_data_put(fd); - GOTO(out_och_free, rc = -ENOMEM); - } - (*och_usecount)++; if (!it->d.lustre.it_disposition) { + /* We cannot just request lock handle now, new ELC code + means that one of other OPEN locks for this file + could be cancelled, and since blocking ast handler + would attempt to grab och_sem as well, that would + result in a deadlock */ + up(&lli->lli_och_sem); it->it_flags |= O_CHECK_STALE; rc = ll_intent_file_open(file, NULL, 0, it); it->it_flags &= ~O_CHECK_STALE; if (rc) { ll_file_data_put(fd); - GOTO(out_och_free, rc); + GOTO(out_openerr, rc); } /* Got some error? Release the request */ @@ -572,7 +573,14 @@ int ll_file_open(struct inode *inode, struct file *file) md_set_lock_data(ll_i2sbi(inode)->ll_md_exp, &it->d.lustre.it_lock_handle, file->f_dentry->d_inode); + goto restart; } + OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); + if (!*och_p) { + ll_file_data_put(fd); + GOTO(out_och_free, rc = -ENOMEM); + } + (*och_usecount)++; req = it->d.lustre.it_data; /* md_intent_lock() didn't get a request ref if there was an @@ -626,6 +634,9 @@ out_och_free: (*och_usecount)--; } up(&lli->lli_och_sem); +out_openerr: ;/* Looks weierd, eh? Just wait for statahead code to insert + a statement here <-- remove this comment after statahead + landing */ } return rc; -- 1.8.3.1