From 161c7ffe93f5985ade87c44a786029c0d308a386 Mon Sep 17 00:00:00 2001 From: phil Date: Thu, 25 Sep 2003 04:54:47 +0000 Subject: [PATCH] re-committing on b_devel: b=1997 r=alex My previous fix for truncate/write lock inversion was close, but badly flawed. I failed to remember that truncate will internally restart the transaction -- start a new one, for all intents and purposes -- so the ordering was backwards. "i_sem before transaction" is the cardinal rule. I tried to avoid that because I didn't want to hold the i_sem across the entire disk I/O in filter_commitrw_write. After some discussion with Alex, however, we decided that the i_sem need only be held for the block allocation, and that the BKL suffices for updating i_size. So the ordering in the write path is now: take i_sem, start transaction, call filter_direct_io, f_d_io does block alloc, drops i_sem, does I/O. --- lustre/obdfilter/filter_io_24.c | 43 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 41647f0..0cb17ee 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -55,21 +55,22 @@ void inode_update_time(struct inode *inode, int ctime_too) int ext3_map_inode_page(struct inode *inode, struct page *page, unsigned long *blocks, int *created, int create); -int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) +/* Must be called with i_sem taken; this will drop it */ +static int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) { struct page *page; unsigned long *b = iobuf->blocks; int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page, *created; - int *cr, cleanup_phase; + int *cr, cleanup_phase = 0; ENTRY; blocks_per_page = PAGE_SIZE >> inode->i_blkbits; if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) - RETURN(-EINVAL); + GOTO(cleanup, rc = -EINVAL); OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page); if (created == NULL) - RETURN(-ENOMEM); + GOTO(cleanup, rc = -ENOMEM); cleanup_phase = 1; rc = lock_kiovec(1, &iobuf, 1); @@ -77,8 +78,6 @@ int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) GOTO(cleanup, rc); cleanup_phase = 2; - down(&inode->i_sem); - cleanup_phase = 3; for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){ page = iobuf->maplist[i]; @@ -90,7 +89,7 @@ int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) cr += blocks_per_page; } up(&inode->i_sem); - cleanup_phase = 2; + cleanup_phase = 3; rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks, 1 << inode->i_blkbits); @@ -106,18 +105,21 @@ int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf) EXIT; cleanup: switch(cleanup_phase) { - case 3: - up(&inode->i_sem); - case 2: - unlock_kiovec(1, &iobuf); - case 1: - OBD_FREE(created, sizeof(*created) * - iobuf->nr_pages*blocks_per_page); - break; - default: - CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase); - LBUG(); + case 3: + case 2: + unlock_kiovec(1, &iobuf); + case 1: + OBD_FREE(created, sizeof(*created) * + iobuf->nr_pages*blocks_per_page); + case 0: + if (cleanup_phase == 3) break; + up(&inode->i_sem); + break; + default: + CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase); + LBUG(); + break; } return rc; } @@ -175,6 +177,7 @@ int filter_commitrw_write(struct obd_export *exp, int objcount, push_ctxt(&saved, &obd->obd_ctxt, NULL); cleanup_phase = 2; + down(&inode->i_sem); oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti); if (IS_ERR(oti->oti_handle)) { rc = PTR_ERR(oti->oti_handle); @@ -189,7 +192,7 @@ int filter_commitrw_write(struct obd_export *exp, int objcount, rc = filter_direct_io(OBD_BRW_WRITE, inode, iobuf); if (rc == 0) { - down(&inode->i_sem); + lock_kernel(); inode_update_time(inode, 1); if (iattr.ia_size > inode->i_size) { CDEBUG(D_INFO, "setting i_size to "LPU64"\n", @@ -197,7 +200,7 @@ int filter_commitrw_write(struct obd_export *exp, int objcount, fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0); } - up(&inode->i_sem); + unlock_kernel(); } if (time_after(jiffies, now + 15 * HZ)) -- 1.8.3.1