From: panda Date: Fri, 16 Oct 2009 12:19:32 +0000 (+0000) Subject: b=20201 X-Git-Tag: v1_8_2_01~1^2~16 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=715be97578f241c8919718dfb7e149e91d5af125;p=fs%2Flustre-release.git b=20201 o=Oleg Drokin i=Andrew Perepechko i=Alexey Lyashkov Problem is write_sem taking. As it was before the patch, a deadlock was possible along the lines of: process 1: get group lock process 2: write syscall get write_sem attempt to get write lock, block on group lock process 1: write syscall block on write_sem tests 3,4 in parallel group lock exercise this scenario. The patch actually avoids taking write_sem if we do not do any dlm locking as pointless, main reason behind it was to avoid cascading timeouts with multiple ldlm locks held (introduced in bug 9332, comment #139) --- https://bugzilla.lustre.org/show_bug.cgi?id=20201#c7 --- diff --git a/lustre/llite/file.c b/lustre/llite/file.c index c10bace..eddc914 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1766,6 +1766,8 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, struct iovec *iov_copy = NULL; unsigned long nrsegs_copy, nrsegs_orig = 0; size_t count, iov_offset = 0; + int got_write_sem = 0; + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); ENTRY; count = ll_file_get_iov_count(iov, &nr_segs); @@ -1788,8 +1790,11 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov, LASSERT(ll_i2info(inode)->lli_smd != NULL); /* signal(7) specifies that write(2) and writev(2) should be restarted */ - if (down_interruptible(&ll_i2info(inode)->lli_write_sem)) - RETURN(-ERESTARTSYS); + if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK)) { + got_write_sem = 1; + if ( down_interruptible(&ll_i2info(inode)->lli_write_sem)) + RETURN(-ERESTARTSYS); + } ltd.ltd_magic = LTD_MAGIC; ll_td_set(<d); @@ -1952,7 +1957,8 @@ out: goto repeat; } - up(&ll_i2info(inode)->lli_write_sem); + if (got_write_sem) + up(&ll_i2info(inode)->lli_write_sem); ll_td_set(NULL); if (iov_copy && iov_copy != iov)