From cb648ab36e84bdcedcb59f70d5561f966631dd7e Mon Sep 17 00:00:00 2001 From: Andrew Perepechko Date: Sat, 19 Dec 2009 01:09:00 +0300 Subject: [PATCH] b=21489 make xid checks atomic i=Johann Lombardi i=Vitaly Fertman lock when checking xid value during setattr and write --- lustre/include/obd_support.h | 1 + lustre/obdfilter/filter.c | 9 +++++++++ lustre/obdfilter/filter_io.c | 22 +++++++++++++--------- lustre/tests/sanity.sh | 19 +++++++++++++++---- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index f9c82b1..b7898cd 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -232,6 +232,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224 #define OBD_FAIL_OST_CONNECT_NET2 0x225 #define OBD_FAIL_OST_NOMEM 0x226 +#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 722bc7a..24f370c 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2950,12 +2950,21 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, filter = &exp->exp_obd->u.filter; push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); + /* + * We need to be atomic against a concurrent write + * (which takes the semaphore for reading). fmd_mactime_xid + * checks will have no effect if a write request with lower + * xid starts just before a setattr and finishes later than + * the setattr (see bug 21489, comment 27). + */ if (oinfo->oi_oa->o_valid & (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) { + down_write(&dentry->d_inode->i_alloc_sem); fmd = filter_fmd_get(exp,oinfo->oi_oa->o_id,oinfo->oi_oa->o_gr); if (fmd && fmd->fmd_mactime_xid < oti->oti_xid) fmd->fmd_mactime_xid = oti->oti_xid; filter_fmd_put(exp, fmd); + up_write(&dentry->d_inode->i_alloc_sem); } /* setting objects attributes (including owner/group) */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index a42568f..863d607 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -710,6 +710,14 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, fsfilt_check_slow(obd, now, "preprw_write setup"); + /* Filter truncate first locks i_mutex then partially truncated + * page, filter write code first locks pages then take + * i_mutex. To avoid a deadlock in case of concurrent + * punch/write requests from one client, filter writes and + * filter truncates are serialized by i_alloc_sem, allowing + * multiple writes or single truncate. */ + down_read(&dentry->d_inode->i_alloc_sem); + /* Don't update inode timestamps if this write is older than a * setattr which modifies the timestamps. b=10150 */ /* XXX when we start having persistent reservations this needs to @@ -745,18 +753,12 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, spin_unlock(&obd->obd_osfs_lock); filter_fmd_put(exp, fmd); + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK2, (obd_timeout + 1) / 4); + if (rc) GOTO(cleanup, rc); cleanup_phase = 4; - /* Filter truncate first locks i_mutex then partally truncated - * page, filter write code first locks pages then take - * i_mutex. To avoid a deadlock in case of concurrent - * punch/write requests from one client, filter writes and - * filter truncates are serialized by i_alloc_sem, allowing - * multiple writes or single truncate. */ - down_read(&dentry->d_inode->i_alloc_sem); - do_gettimeofday(&start); for (i = 0, lnb = res; i < *pages; i++, lnb++) { @@ -847,9 +849,11 @@ cleanup: } } filter_grant_commit(exp, *pages, res); - up_read(&dentry->d_inode->i_alloc_sem); } case 3: + if (rc) + up_read(&dentry->d_inode->i_alloc_sem); + filter_iobuf_put(&obd->u.filter, iobuf, oti); case 2: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 17d1adc..16f858f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -67,7 +67,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 36h 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=${FAIL_ON_ERROR:-false} @@ -1677,13 +1677,13 @@ test_36e() { } run_test 36e "utime on non-owned file (should return error) ====" -test_36f() { +subr_36fh() { + local fl="$1" export LANG=C LC_LANG=C # for date language DATESTR="Dec 20 2000" mkdir -p $DIR/$tdir - #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - lctl set_param fail_loc=0x80000214 + lctl set_param fail_loc=$fl date; date +%s cp /etc/hosts $DIR/$tdir/$tfile sync & # write RPC generated with "current" inode timestamp, but delayed @@ -1699,6 +1699,11 @@ test_36f() { echo "WANT : $DATESTR" && \ error "$DIR/$tdir/$tfile timestamps changed" || true } + +test_36f() { + #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 + subr_36fh "0x80000214" +} run_test 36f "utime on file racing with OST BRW write ==========" test_36g() { @@ -1716,6 +1721,12 @@ test_36g() { } run_test 36g "filter mod data cache expiry =====================" +test_36h() { + #define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 + subr_36fh "0x80000227" +} +run_test 36h "utime on file racing with OST BRW write ==========" + test_37() { mkdir -p $DIR/$tdir echo f > $DIR/$tdir/fbugfile -- 1.8.3.1