From c139827b023c4ced7db36060832550ff9eeaec03 Mon Sep 17 00:00:00 2001 From: Andrew Perepechko Date: Mon, 1 Feb 2010 11:27:18 -0800 Subject: [PATCH] b=21489 fix several write+utimes race conditions Disable mtime updates on write and serialize fmd_mactime_xid checks in order to avoid certain write(2)+utimes(2) race conditions on OSS i=Johann Lombardi i=Vitaly Fertman --- lustre/include/obd_support.h | 1 + lustre/obdfilter/filter.c | 9 +++++++++ lustre/obdfilter/filter_io.c | 22 +++++++++++++--------- lustre/ost/ost_handler.c | 7 +++++++ lustre/tests/sanity.sh | 19 +++++++++++++++---- 5 files changed, 45 insertions(+), 13 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 0d90654..c93a200 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -273,6 +273,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224 #define OBD_FAIL_OST_CONNECT_NET2 0x225 #define OBD_FAIL_OST_NOMEM 0x226 +#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 13c2530..711746a 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -3437,12 +3437,21 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, filter = &exp->exp_obd->u.filter; push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); + /* + * We need to be atomic against a concurrent write + * (which takes the semaphore for reading). fmd_mactime_xid + * checks will have no effect if a write request with lower + * xid starts just before a setattr and finishes later than + * the setattr (see bug 21489, comment 27). + */ if (oa->o_valid & (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) { + down_write(&dentry->d_inode->i_alloc_sem); fmd = filter_fmd_get(exp, oa->o_id, oa->o_gr); if (fmd && fmd->fmd_mactime_xid < oti->oti_xid) fmd->fmd_mactime_xid = oti->oti_xid; filter_fmd_put(exp, fmd); + up_write(&dentry->d_inode->i_alloc_sem); } /* setting objects attributes (including owner/group) */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 7a2700f..3e7fee20 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -702,6 +702,14 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, fsfilt_check_slow(obd, now, "preprw_write setup"); + /* Filter truncate first locks i_mutex then partially truncated + * page, filter write code first locks pages then take + * i_mutex. To avoid a deadlock in case of concurrent + * punch/write requests from one client, filter writes and + * filter truncates are serialized by i_alloc_sem, allowing + * multiple writes or single truncate. */ + down_read(&dentry->d_inode->i_alloc_sem); + /* Don't update inode timestamps if this write is older than a * setattr which modifies the timestamps. b=10150 */ /* XXX when we start having persistent reservations this needs to @@ -737,18 +745,12 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, cfs_spin_unlock(&obd->obd_osfs_lock); filter_fmd_put(exp, fmd); + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK2, (obd_timeout + 1) / 4); + if (rc) GOTO(cleanup, rc); cleanup_phase = 4; - /* Filter truncate first locks i_mutex then partally truncated - * page, filter write code first locks pages then take - * i_mutex. To avoid a deadlock in case of concurrent - * punch/write requests from one client, filter writes and - * filter truncates are serialized by i_alloc_sem, allowing - * multiple writes or single truncate. */ - down_read(&dentry->d_inode->i_alloc_sem); - cfs_gettimeofday(&start); for (i = 0, lnb = res; i < *npages; i++, lnb++) { @@ -836,9 +838,11 @@ cleanup: lnb->page = NULL; } } - up_read(&dentry->d_inode->i_alloc_sem); } case 3: + if (rc) + up_read(&dentry->d_inode->i_alloc_sem); + filter_iobuf_put(&obd->u.filter, iobuf, oti); case 2: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 9c20008..28f3deb 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1129,6 +1129,13 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) repbody->oa.o_gid = o_gid; } + /* + * Disable sending mtime back to the client. If the client locked the + * whole object, then it has already updated the mtime on its side, + * otherwise it will have to glimpse anyway (see bug 21489, comment 32) + */ + repbody->oa.o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLATIME); + if (unlikely(client_cksum != server_cksum && rc == 0)) { int new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type); char *msg; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index be74422..7541e1e 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -74,7 +74,7 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} init_logging -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24v 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24v 27m 36f 36g 36h 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false @@ -1830,15 +1830,15 @@ test_36e() { } run_test 36e "utime on non-owned file (should return error) ====" -test_36f() { +subr_36fh() { + local fl="$1" local LANG_SAVE=$LANG local LC_LANG_SAVE=$LC_LANG export LANG=C LC_LANG=C # for date language DATESTR="Dec 20 2000" mkdir -p $DIR/$tdir - #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - lctl set_param fail_loc=0x80000214 + lctl set_param fail_loc=$fl date; date +%s cp /etc/hosts $DIR/$tdir/$tfile sync & # write RPC generated with "current" inode timestamp, but delayed @@ -1856,6 +1856,11 @@ test_36f() { export LANG=$LANG_SAVE LC_LANG=$LC_LANG_SAVE } + +test_36f() { + #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 + subr_36fh "0x80000214" +} run_test 36f "utime on file racing with OST BRW write ==========" test_36g() { @@ -1873,6 +1878,12 @@ test_36g() { } run_test 36g "filter mod data cache expiry =====================" +test_36h() { + #define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 + subr_36fh "0x80000227" +} +run_test 36h "utime on file racing with OST BRW write ==========" + test_37() { mkdir -p $DIR/$tdir echo f > $DIR/$tdir/fbugfile -- 1.8.3.1