From 4b6f6519c86355f8ac9899a06e2510348a0f9413 Mon Sep 17 00:00:00 2001 From: grev Date: Wed, 12 Mar 2008 23:10:24 +0000 Subject: [PATCH] b=13665 (Christopher Morrone) i=Scjody i=Adilger multiop race fix --- lustre/tests/Makefile.am | 4 +- lustre/tests/conf-sanity.sh | 3 +- lustre/tests/multiop.c | 11 +++- lustre/tests/recovery-small.sh | 28 +++++----- lustre/tests/replay-dual.sh | 16 ++---- lustre/tests/replay-single.sh | 108 +++++++++++---------------------------- lustre/tests/runmultiop_bg_pause | 11 ++++ lustre/tests/sanity.sh | 38 +++++--------- lustre/tests/sanityN.sh | 44 +++++++--------- lustre/tests/test-framework.sh | 29 +++++++++++ 10 files changed, 134 insertions(+), 158 deletions(-) create mode 100644 lustre/tests/runmultiop_bg_pause diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 7a565f2..9485b64 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -11,8 +11,8 @@ noinst_SCRIPTS += sanity.sh rundbench acceptance-small.sh compile.sh noinst_SCRIPTS += conf-sanity.sh insanity.sh lfscktest.sh oos.sh oos2.sh noinst_SCRIPTS += llog-test.sh recovery-small.sh replay-dual.sh sanity-quota.sh noinst_SCRIPTS += replay-ost-single.sh replay-single.sh run-llog.sh sanityN.sh -noinst_SCRIPTS += lockorder.sh socketclient socketserver sanity-sec.sh -noinst_SCRIPTS += sanity-gss.sh krb5_login.sh +noinst_SCRIPTS += lockorder.sh socketclient socketserver runmultiop_bg_pause +noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh nobase_noinst_SCRIPTS = cfg/insanity-local.sh nobase_noinst_SCRIPTS += cfg/local.sh acl/make-tree acl/run nobase_noinst_SCRIPTS += rmtacl/make-tree rmtacl/run diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 77c3ed1..5b9ee54 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1280,8 +1280,7 @@ run_test 33b "Drop cancel during umount" test_34a() { setup - do_facet client multiop $DIR/file O_c & - sleep 0.500s + do_facet client "sh runmultiop_bg_pause $DIR/file O_c" manual_umount_client rc=$? do_facet client killall -USR1 multiop diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 765c00b..3bd2128 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -145,6 +145,7 @@ int main(int argc, char **argv) int rc, len, fd = -1; int flags; int save_errno; + int verbose = 0; if (argc < 3) { fprintf(stderr, usage, argv[0]); @@ -158,8 +159,13 @@ int main(int argc, char **argv) for (commands = argv[2]; *commands; commands++) { switch (*commands) { case '_': - if (usr1_received == 0) + if (usr1_received == 0) { + if (verbose) { + printf("PAUSING\n"); + fflush(stdout); + } pause(); + } usr1_received = 0; signal(SIGUSR1, usr1_handler); break; @@ -337,6 +343,9 @@ int main(int argc, char **argv) exit(save_errno); } break; + case 'v': + verbose = 1; + break; case 'w': len = atoi(commands+1); if (len <= 0) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 054782a..dc797be 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -125,10 +125,9 @@ test_12(){ $LCTL mark multiop $MOUNT/$tfile OS_c do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x115" clear_failloc $SINGLEMDS $((TIMEOUT * 2)) & - multiop $MOUNT/$tfile OS_c & + multiop_bg_pause $MOUNT/$tfile OS_c || return 1 PID=$! #define OBD_FAIL_MDS_CLOSE_NET 0x115 - sleep 2 kill -USR1 $PID echo "waiting for multiop $PID" wait $PID || return 2 @@ -322,9 +321,8 @@ run_test 19b "test expired_lock_main on ost (2867)" test_20a() { # bug 2983 - ldlm_handle_enqueue cleanup mkdir -p $DIR/$tdir - multiop $DIR/$tdir/${tfile} O_wc & + multiop_bg_pause $DIR/$tdir/${tfile} O_wc || return 1 MULTI_PID=$! - sleep 1 cancel_lru_locks osc #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 do_facet ost1 sysctl -w lustre.fail_loc=0x80000308 @@ -349,7 +347,7 @@ run_test 20b "ldlm_handle_enqueue error (should return error)" test_21a() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 close_pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129" @@ -375,7 +373,7 @@ run_test 21a "drop close request while close and open are both in flight" test_21b() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 close_pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" @@ -398,7 +396,7 @@ run_test 21b "drop open request while close and open are both in flight" test_21c() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 close_pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" @@ -424,7 +422,7 @@ run_test 21c "drop both request while close and open are both in flight" test_21d() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000129" @@ -448,7 +446,7 @@ run_test 21d "drop close reply while close and open are both in flight" test_21e() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" @@ -470,7 +468,7 @@ run_test 21e "drop open reply while close and open are both in flight" test_21f() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" @@ -493,7 +491,7 @@ run_test 21f "drop both reply while close and open are both in flight" test_21g() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000119" @@ -516,7 +514,7 @@ run_test 21g "drop open reply and close request while close and open are both in test_21h() { mkdir -p $DIR/$tdir-1 mkdir -p $DIR/$tdir-2 - multiop $DIR/$tdir-1/f O_c & + multiop_bg_pause $DIR/$tdir-1/f O_c || return 1 pid=$! do_facet $SINGLEMDS "sysctl -w lustre.fail_loc=0x80000107" @@ -560,7 +558,7 @@ test_22() { run_test 22 "drop close request and do mknod" test_23() { #b=4561 - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 1 pid=$! # give a chance for open sleep 5 @@ -577,11 +575,9 @@ run_test 23 "client hang when close a file after mds crash" test_24() { # bug 2248 - eviction fails writeback but app doesn't see it mkdir -p $DIR/$tdir cancel_lru_locks osc - multiop $DIR/$tdir/$tfile Owy_wyc & + multiop_bg_pause $DIR/$tdir/$tfile Owy_wyc || return 1 MULTI_PID=$! - usleep 500 ost_evict_client - usleep 500 kill -USR1 $MULTI_PID wait $MULTI_PID rc=$? diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index c4f12d1..bc55eaf 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -92,10 +92,8 @@ run_test 4 "|X| mkdir adir (-EEXIST), mkdir adir/bdir " test_5() { # multiclient version of replay_single.sh/test_8 mcreate $MOUNT1/a - multiop $MOUNT2/a o_tSc & + multiop_bg_pause $MOUNT2/a o_tSc || return 1 pid=$! - # give multiop a chance to open - sleep 1 rm -f $MOUNT1/a replay_barrier $SINGLEMDS kill -USR1 $pid @@ -110,12 +108,10 @@ run_test 5 "open, unlink |X| close" test_6() { mcreate $MOUNT1/a - multiop $MOUNT2/a o_c & + multiop_bg_pause $MOUNT2/a o_c || return 1 pid1=$! - multiop $MOUNT1/a o_c & + multiop_bg_pause $MOUNT1/a o_c || return 1 pid2=$! - # give multiop a chance to open - sleep 1 rm -f $MOUNT1/a replay_barrier $SINGLEMDS kill -USR1 $pid1 @@ -197,9 +193,8 @@ run_test 11 "both clients timeout during replay" test_12() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile mo_c & + multiop_bg_pause $DIR/$tfile mo_c || return 1 MULTIPID=$! - sleep 5 #define OBD_FAIL_LDLM_ENQUEUE 0x302 do_facet $SINGLEMDS sysctl -w lustre.fail_loc=0x80000302 @@ -218,9 +213,8 @@ test_12() { run_test 12 "open resend timeout" test_13() { - multiop $DIR/$tfile mo_c & + multiop_bg_pause $DIR/$tfile mo_c || return 1 MULTIPID=$! - sleep 5 replay_barrier $SINGLEMDS diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 4587024..8335246 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -253,9 +253,8 @@ test_8() { # make sure no side-effect from previous test. rm -f $DIR/$tfile replay_barrier $SINGLEMDS - multiop $DIR/$tfile mo_c & + multiop_bg_pause $DIR/$tfile mo_c || return 4 MULTIPID=$! - sleep 1 fail $SINGLEMDS ls $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || return 1 @@ -313,10 +312,8 @@ run_test 11 "create open write rename |X| create-old-name read" test_12() { mcreate $DIR/$tfile - multiop $DIR/$tfile o_tSc & + multiop_bg_pause $DIR/$tfile o_tSc || return 3 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile replay_barrier $SINGLEMDS kill -USR1 $pid @@ -333,10 +330,8 @@ run_test 12 "open, unlink |X| close" # a regular open a failure test_13() { mcreate $DIR/$tfile - multiop $DIR/$tfile O_wc & + multiop_bg_pause $DIR/$tfile O_wc || return 3 pid=$! - # give multiop a chance to open - sleep 1 chmod 0 $DIR/$tfile $CHECKSTAT -p 0 $DIR/$tfile replay_barrier $SINGLEMDS @@ -350,10 +345,8 @@ test_13() { run_test 13 "open chmod 0 |x| write close" test_14() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 4 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile replay_barrier $SINGLEMDS kill -USR1 $pid || return 1 @@ -366,10 +359,8 @@ test_14() { run_test 14 "open(O_CREAT), unlink |X| close" test_15() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 5 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile replay_barrier $SINGLEMDS touch $DIR/g11 || return 1 @@ -398,10 +389,8 @@ run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new" test_17() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 4 pid=$! - # give multiop a chance to open - sleep 1 fail $SINGLEMDS kill -USR1 $pid || return 1 wait $pid || return 2 @@ -412,10 +401,8 @@ run_test 17 "|X| open(O_CREAT), |replay| close" test_18() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 8 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile touch $DIR/$tfile-2 || return 1 echo "pid: $pid will close" @@ -447,10 +434,8 @@ run_test 19 "|X| mcreate, open, write, rename " test_20() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 3 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile fail $SINGLEMDS @@ -491,10 +476,8 @@ test_20b() { # bug 10480 run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)" test_20c() { # bug 10480 - multiop $DIR/$tfile Ow_c & + multiop_bg_pause $DIR/$tfile Ow_c || return 1 pid=$! - # give multiop a chance to open - sleep 1 ls -la $DIR/$tfile @@ -511,10 +494,8 @@ run_test 20c "check that client eviction does not affect file content" test_21() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 5 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile touch $DIR/g11 || return 1 @@ -528,10 +509,8 @@ test_21() { run_test 21 "|X| open(O_CREAT), unlink touch new, replay, close (test mds_cleanup_orphans)" test_22() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 3 pid=$! - # give multiop a chance to open - sleep 1 replay_barrier $SINGLEMDS rm -f $DIR/$tfile @@ -545,10 +524,8 @@ test_22() { run_test 22 "open(O_CREAT), |X| unlink, replay, close (test mds_cleanup_orphans)" test_23() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 5 pid=$! - # give multiop a chance to open - sleep 1 replay_barrier $SINGLEMDS rm -f $DIR/$tfile @@ -564,10 +541,8 @@ test_23() { run_test 23 "open(O_CREAT), |X| unlink touch new, replay, close (test mds_cleanup_orphans)" test_24() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 3 pid=$! - # give multiop a chance to open - sleep 1 replay_barrier $SINGLEMDS fail $SINGLEMDS @@ -580,10 +555,8 @@ test_24() { run_test 24 "open(O_CREAT), replay, unlink, close (test mds_cleanup_orphans)" test_25() { - multiop $DIR/$tfile O_tSc & + multiop_bg_pause $DIR/$tfile O_tSc || return 3 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile replay_barrier $SINGLEMDS @@ -597,12 +570,10 @@ run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)" test_26() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 kill -USR1 $pid2 @@ -619,12 +590,10 @@ run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test m test_27() { replay_barrier $SINGLEMDS - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 @@ -640,12 +609,10 @@ test_27() { run_test 27 "|X| open(O_CREAT), unlink two, replay, close two (test mds_cleanup_orphans)" test_28() { - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 replay_barrier $SINGLEMDS rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 @@ -662,12 +629,10 @@ test_28() { run_test 28 "open(O_CREAT), |X| unlink two, close one, replay, close one (test mds_cleanup_orphans)" test_29() { - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 replay_barrier $SINGLEMDS rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 @@ -684,12 +649,10 @@ test_29() { run_test 29 "open(O_CREAT), |X| unlink two, replay, close two (test mds_cleanup_orphans)" test_30() { - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile-1 rm -f $DIR/$tfile-2 @@ -706,12 +669,10 @@ test_30() { run_test 30 "open(O_CREAT) two, unlink two, replay, close two (test mds_cleanup_orphans)" test_31() { - multiop $DIR/$tfile-1 O_tSc & + multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5 pid1=$! - multiop $DIR/$tfile-2 O_tSc & + multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6 pid2=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile-1 replay_barrier $SINGLEMDS @@ -730,12 +691,10 @@ run_test 31 "open(O_CREAT) two, unlink one, |X| unlink one, close two (test mds_ # tests for bug 2104; completion without crashing is success. The close is # stale, but we always return 0 for close, so the app never sees it. test_32() { - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 2 pid1=$! - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 3 pid2=$! - # give multiop a chance to open - sleep 1 mds_evict_client df $MOUNT || sleep 1 && df $MOUNT || return 1 kill -USR1 $pid1 @@ -771,10 +730,8 @@ test_33a() { run_test 33a "fid shouldn't be reused after abort recovery" test_34() { - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 2 pid=$! - # give multiop a chance to open - sleep 1 rm -f $DIR/$tfile replay_barrier $SINGLEMDS @@ -821,10 +778,8 @@ run_test 36 "don't resend cancel" # directory orphans can't be unlinked from PENDING directory test_37() { rmdir $DIR/$tfile 2>/dev/null - multiop $DIR/$tfile dD_c & + multiop_bg_pause $DIR/$tfile dD_c || return 2 pid=$! - # give multiop a chance to open - sleep 1 rmdir $DIR/$tfile replay_barrier $SINGLEMDS @@ -996,9 +951,8 @@ test_45() { [ "$mdcdev" ] || exit 2 $LCTL --device $mdcdev recover - multiop $DIR/$tfile O_c & + multiop_bg_pause $DIR/$tfile O_c || return 1 pid=$! - sleep 1 # This will cause the CLOSE to fail before even # allocating a reply buffer diff --git a/lustre/tests/runmultiop_bg_pause b/lustre/tests/runmultiop_bg_pause new file mode 100644 index 0000000..823ebdd --- /dev/null +++ b/lustre/tests/runmultiop_bg_pause @@ -0,0 +1,11 @@ +#!/bin/bash + +# Run multiop in the background, but wait for it to print +# "PAUSING" to its stdout before returning from this function. + +PTLDEBUG=${PTLDEBUG:--1} +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh + +multiop_bg_pause $* +exit $? diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index cdcf445..6a730e6 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -685,9 +685,8 @@ run_test 24p "mkdir .../R12{a,b}; rename .../R12a .../R12b" test_24q() { mkdir $DIR/R13{a,b} DIRINO=`ls -lid $DIR/R13a | awk '{ print $1 }'` - multiop $DIR/R13b D_c & + multiop_bg_pause $DIR/R13b D_c || return 1 MULTIPID=$! - usleep 500 mrename $DIR/R13a $DIR/R13b $CHECKSTAT -a $DIR/R13a || error @@ -1173,10 +1172,9 @@ run_test 31b "unlink file with multiple links while open =======" test_31c() { touch $DIR/f31 || error ln $DIR/f31 $DIR/f31c || error - multiop $DIR/f31 O_uc & + multiop_bg_pause $DIR/f31 O_uc || return 1 MULTIPID=$! multiop $DIR/f31c Ouc - usleep 500 kill -USR1 $MULTIPID wait $MULTIPID } @@ -1201,22 +1199,18 @@ test_31f() { # bug 4554 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f $GETSTRIPE $DIR/d31f/hosts - multiop $DIR/d31f D_c & + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID=$! - sleep 1 - rm -rv $DIR/d31f || error "first of $DIR/d31f" mkdir $DIR/d31f $SETSTRIPE $DIR/d31f -s 1048576 -c 1 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f - $DIR/d31f/hosts - multiop $DIR/d31f D_c & + $DIR/d31f/hosts + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID2=$! - sleep 6 - kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running" wait $MULTIPID || error "first opendir $MULTIPID failed" @@ -1895,12 +1889,11 @@ run_test 43 "execution of file opened for write should return -ETXTBSY" test_43a() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR/d43/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" @@ -1908,12 +1901,11 @@ run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" test_43b() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! truncate $DIR/d43/multiop 0 && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43b "truncate of file being executed should return -ETXTBSY" @@ -3047,10 +3039,8 @@ run_test 72 "Test that remove suid works properly (bug5695) ====" test_73() { mkdir $DIR/d73-1 mkdir $DIR/d73-2 - multiop $DIR/d73-1/f73-1 O_c & + multiop_bg_pause $DIR/d73-1/f73-1 O_c || return 1 pid1=$! - #give multiop a chance to open - usleep 500 echo 0x80000129 > /proc/sys/lustre/fail_loc multiop $DIR/d73-1/f73-2 Oc & diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 9a55449..8370a8b 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -221,9 +221,8 @@ run_test 10b "write of file with sub-page size on multiple mounts " test_11() { mkdir $DIR1/d11 - multiop $DIR1/d11/f O_c & + multiop_bg_pause $DIR1/d11/f O_c || return 1 MULTIPID=$! - usleep 200 cp -p /bin/ls $DIR1/d11/f $DIR2/d11/f RC=$? @@ -267,12 +266,11 @@ run_test 14 "execution of file open for write returns -ETXTBSY =" test_14a() { mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR2/d14/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" @@ -280,13 +278,12 @@ run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" test_14b() { # bug 3192, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 - truncate $DIR2/d14/multiop 0 && kill -9 $MULTIPID && \ + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! + truncate $DIR2/d14/multiop 0 && kill -9 $MULTIOP_PID && \ error "expected truncate error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -295,12 +292,11 @@ run_test 14b "truncate of executing file returns -ETXTBSY ======" test_14c() { # bug 3430, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! cp /etc/hosts $DIR2/d14/multiop && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -309,13 +305,12 @@ run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" test_14d() { # bug 10921 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! log chmod chmod 600 $DIR1/d14/multiop || error "chmod failed" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -445,10 +440,9 @@ test_23() { # Bug 5972 time1=`date +%s` sleep 2 - multiop $DIR1/f23 or20_c & + multiop_bg_pause $DIR1/f23 or20_c || return 1 MULTIPID=$! - sleep 2 time2=`stat -c "%X" $DIR2/f23` if (( $time2 <= $time1 )); then diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 91856a4..f36169c 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1581,3 +1581,32 @@ check_runas_id() { add user $myRUNAS_ID:$myRUNAS_ID on these nodes." rm -rf $DIR/d0_runas_test } + +# Run multiop in the background, but wait for it to print +# "PAUSING" to its stdout before returning from this function. +multiop_bg_pause() { + MULTIOP_PROG=${MULTIOP_PROG:-multiop} + FILE=$1 + ARGS=$2 + + TMPPIPE=/tmp/multiop_open_wait_pipe.$$ + mkfifo $TMPPIPE + + echo "$MULTIOP_PROG $FILE v$ARGS" + $MULTIOP_PROG $FILE v$ARGS > $TMPPIPE & + + echo "TMPPIPE=${TMPPIPE}" + read -t 60 multiop_output < $TMPPIPE + if [ $? -ne 0 ]; then + rm -f $TMPPIPE + return 1 + fi + rm -f $TMPPIPE + if [ "$multiop_output" != "PAUSING" ]; then + echo "Incorrect multiop output: $multiop_output" + kill -9 $PID + return 1 + fi + + return 0 +} -- 1.8.3.1