+
+test_108() {
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -c 1 -i 0 $DIR/$tdir
+
+ dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=256 &
+ local dd_pid=$!
+ sleep 0.1
+
+ ost_evict_client
+
+ wait $dd_pid
+
+ client_up || error "reconnect failed"
+ rm -f $DIR/$tdir/$tfile
+}
+run_test 108 "client eviction don't crash"
+
+test_110a () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local mdtidx=1
+ local num
+
+ #prepare for 110 test, which need set striped dir on remote MDT.
+ for num in $(seq $MDSCOUNT); do
+ do_facet mds$num \
+ lctl set_param -n mdt.$FSNAME*.enable_remote_dir=1 \
+ 2>/dev/null
+ done
+
+ mkdir -p $DIR/$tdir
+ drop_request "$LFS mkdir -i $mdtidx -c2 $remote_dir" ||
+ error "lfs mkdir failed"
+ local diridx=$($LFS getstripe -m $remote_dir)
+ [ $diridx -eq $mdtidx ] || error "$diridx != $mdtidx"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110a "create remote directory: drop client req"
+
+test_110b () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local mdtidx=1
+
+ mkdir -p $DIR/$tdir
+ drop_reint_reply "$LFS mkdir -i $mdtidx -c2 $remote_dir" ||
+ error "lfs mkdir failed"
+
+ diridx=$($LFS getstripe -m $remote_dir)
+ [ $diridx -eq $mdtidx ] || error "$diridx != $mdtidx"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110b "create remote directory: drop Master rep"
+
+test_110c () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local mdtidx=1
+
+ mkdir -p $DIR/$tdir
+ drop_update_reply $mdtidx "$LFS mkdir -i $mdtidx -c2 $remote_dir" ||
+ error "lfs mkdir failed"
+
+ diridx=$($LFS getstripe -m $remote_dir)
+ [ $diridx -eq $mdtidx ] || error "$diridx != $mdtidx"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110c "create remote directory: drop update rep on slave MDT"
+
+test_110d () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local MDTIDX=1
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i $MDTIDX -c2 $remote_dir || error "lfs mkdir failed"
+
+ drop_request "rm -rf $remote_dir" || error "rm remote dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110d "remove remote directory: drop client req"
+
+test_110e () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local MDTIDX=1
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i $MDTIDX -c2 $remote_dir || error "lfs mkdir failed"
+ drop_reint_reply "rm -rf $remote_dir" || error "rm remote dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110e "remove remote directory: drop master rep"
+
+test_110f () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local remote_dir=$DIR/$tdir/remote_dir
+ local MDTIDX=1
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i $MDTIDX -c2 $remote_dir || error "lfs mkdir failed"
+ drop_update_reply $MDTIDX "rm -rf $remote_dir" ||
+ error "rm remote dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110f "remove remote directory: drop slave rep"
+
+test_110g () {
+ [[ "$MDS1_VERSION" -ge $(version_code 2.11.0) ]] ||
+ skip "Need MDS version at least 2.11.0"
+
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+
+ mkdir -p $DIR/$tdir
+ touch $DIR/$tdir/$tfile
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds1 $LCTL set_param fail_loc=0x119
+ $LFS migrate -m 1 $DIR/$tdir &
+ migrate_pid=$!
+ sleep 5
+ do_facet mds1 $LCTL set_param fail_loc=0
+ wait $migrate_pid
+
+ local mdt_index
+ mdt_index=$($LFS getstripe -m $DIR/$tdir)
+ [ $mdt_index == 1 ] || error "$tdir is not on MDT1"
+ mdt_index=$($LFS getstripe -m $DIR/$tdir/$tfile)
+ [ $mdt_index == 1 ] || error "$tfile is not on MDT1"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 110g "drop reply during migration"
+
+test_110h () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] ||
+ skip "Need MDS version at least 2.7.56"
+
+ local src_dir=$DIR/$tdir/source_dir
+ local tgt_dir=$DIR/$tdir/target_dir
+ local MDTIDX=1
+
+ mkdir -p $src_dir
+ $LFS mkdir -i $MDTIDX $tgt_dir
+
+ dd if=/etc/hosts of=$src_dir/src_file
+ touch $tgt_dir/tgt_file
+ drop_update_reply $MDTIDX \
+ "mrename $src_dir/src_file $tgt_dir/tgt_file" ||
+ error "mrename failed"
+
+ $CHECKSTAT -t file $src_dir/src_file &&
+ error "src_file present after rename"
+
+ diff /etc/hosts $tgt_dir/tgt_file ||
+ error "file changed after rename"
+
+}
+run_test 110h "drop update reply during cross-MDT file rename"
+
+test_110i () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] ||
+ skip "Need MDS version at least 2.7.56"
+
+ local src_dir=$DIR/$tdir/source_dir
+ local tgt_dir=$DIR/$tdir/target_dir
+ local MDTIDX=1
+
+ mkdir -p $src_dir
+ $LFS mkdir -i $MDTIDX $tgt_dir
+
+ mkdir $src_dir/src_dir
+ touch $src_dir/src_dir/a
+ mkdir $tgt_dir/tgt_dir
+ drop_update_reply $MDTIDX \
+ "mrename $src_dir/src_dir $tgt_dir/tgt_dir" ||
+ error "mrename failed"
+
+ $CHECKSTAT -t dir $src_dir/src_dir &&
+ error "src_dir present after rename"
+
+ $CHECKSTAT -t dir $tgt_dir/tgt_dir ||
+ error "tgt_dir not present after rename"
+
+ $CHECKSTAT -t file $tgt_dir/tgt_dir/a ||
+ error "a not present after rename"
+}
+run_test 110i "drop update reply during cross-MDT dir rename"
+
+test_110j () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] ||
+ skip "Need MDS version at least 2.7.56"
+
+ local remote_dir=$DIR/$tdir/remote_dir
+ local local_dir=$DIR/$tdir/local_dir
+ local MDTIDX=1
+
+ mkdir -p $DIR/$tdir
+ mkdir $DIR/$tdir/local_dir
+ $LFS mkdir -i $MDTIDX $remote_dir
+
+ touch $local_dir/local_file
+ drop_update_reply $MDTIDX \
+ "ln $local_dir/local_file $remote_dir/remote_file" ||
+ error "ln failed"
+
+ $CHECKSTAT -t file $remote_dir/remote_file ||
+ error "remote not present after ln"
+}
+run_test 110j "drop update reply during cross-MDT ln"
+
+test_110k() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTS"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.12.55) ]] ||
+ skip "Need MDS version at least 2.12.55"
+
+ stop mds2 || error "stop mds2 failed"
+ umount $MOUNT
+
+#define OBD_FAIL_FLD_QUERY_REQ 0x1103
+ do_facet mds2 lctl set_param fail_loc=0x1103
+ start mds2 $(mdsdevname 2) -o abort_recovery ||
+ error "start MDS with abort_recovery should succeed"
+ do_facet mds2 lctl set_param fail_loc=0
+
+ # cleanup
+ stop mds2 || error "cleanup: stop mds2 failed"
+ start mds2 $(mdsdevname 2) || error "cleanup: start mds2 failed"
+ zconf_mount $(hostname) $MOUNT || error "cleanup: mount failed"
+ client_up || error "post-failover df failed"
+}
+run_test 110k "FID_QUERY failed during recovery"
+
+# LU-2844 mdt prepare fail should not cause umount oops
+test_111 ()
+{
+ [[ "$MDS1_VERSION" -ge $(version_code 2.3.62) ]] ||
+ skip "Need MDS version at least 2.3.62"
+
+#define OBD_FAIL_MDS_CHANGELOG_INIT 0x151
+ do_facet $SINGLEMDS lctl set_param fail_loc=0x151
+ stop $SINGLEMDS || error "stop MDS failed"
+ start $SINGLEMDS $(mdsdevname ${SINGLEMDS//mds/}) &&
+ error "start MDS should fail"
+ do_facet $SINGLEMDS lctl set_param fail_loc=0
+ start $SINGLEMDS $(mdsdevname ${SINGLEMDS//mds/}) ||
+ error "start MDS failed"
+}
+run_test 111 "mdd setup fail should not cause umount oops"
+
+# LU-793
+test_112a() {
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ do_facet_random_file client $TMP/$tfile 100K ||
+ error_noexit "Create random file $TMP/$tfile"
+
+ pause_bulk "cp $TMP/$tfile $DIR/$tfile" $TIMEOUT ||
+ error_noexit "Can't pause_bulk copy"
+
+ df $DIR
+ # expect cmp to succeed, client resent bulk
+ cmp $TMP/$tfile $DIR/$tfile ||
+ error_noexit "Wrong data has been written"
+ rm $DIR/$tfile ||
+ error_noexit "Can't remove file"
+ rm $TMP/$tfile
+}
+run_test 112a "bulk resend while orignal request is in progress"
+
+test_115_read() {
+ local fail1=$1
+ local fail2=$2
+
+ df $DIR
+ dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1
+ cancel_lru_locks osc
+
+ # OST_READ = 3,
+ $LCTL set_param fail_loc=$fail1 fail_val=3
+ dd of=/dev/null if=$DIR/$tfile bs=4096 count=1 &
+ pid=$!
+ sleep 1
+
+ set_nodes_failloc "$(osts_nodes)" $fail2
+
+ wait $pid || error "dd failed"
+ return 0
+}
+
+test_115_write() {
+ local fail1=$1
+ local fail2=$2
+ local error=$3
+ local fail_val2=${4:-0}
+
+ df $DIR
+ touch $DIR/$tfile
+
+ # OST_WRITE = 4,
+ $LCTL set_param fail_loc=$fail1 fail_val=4
+ dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 oflag=dsync &
+ pid=$!
+ sleep 1
+
+ df $MOUNT
+ set_nodes_failloc "$(osts_nodes)" $fail2 $fail_val2
+
+ wait $pid
+ rc=$?
+ [ $error -eq 0 ] && [ $rc -ne 0 ] && error "dd error ($rc)"
+ [ $error -ne 0 ] && [ $rc -eq 0 ] && error "dd success"
+ return 0
+}
+
+test_115a() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
+ #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
+ test_115_read 0x8000051b 0x8000051a
+}
+run_test 115a "read: late REQ MDunlink and no bulk"
+
+test_115b() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
+ #define OBD_FAIL_OST_ENOSPC 0x215
+
+ # pass $OSTCOUNT for the fail_loc to be caught
+ # appropriately by the IO thread
+ test_115_write 0x8000051b 0x80000215 1 $OSTCOUNT
+}
+run_test 115b "write: late REQ MDunlink and no bulk"
+
+test_115c() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
+ #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
+ test_115_read 0x8000050f 0x8000051a
+}
+run_test 115c "read: late Reply MDunlink and no bulk"
+
+test_115d() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
+ #define OBD_FAIL_OST_ENOSPC 0x215
+ test_115_write 0x8000050f 0x80000215 0
+}
+run_test 115d "write: late Reply MDunlink and no bulk"
+
+test_115e() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
+ #define OBD_FAIL_OST_ALL_REPLY_NET 0x211
+ test_115_read 0x80000510 0x80000211
+}
+run_test 115e "read: late Bulk MDunlink and no reply"
+
+test_115f() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
+ #define OBD_FAIL_OST_ALL_REPLY_NET 0x211
+ test_115_read 0x8000051b 0x80000211
+}
+run_test 115f "read: late REQ MDunlink and no reply"
+
+test_115g() {
+ [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] &&
+ skip "need at least 2.8.50 on OST"
+
+ #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
+ test_115_read 0x8000051c 0
+}
+run_test 115g "read: late REQ MDunlink and Reply MDunlink"
+
+# parameters: fail_loc CMD RC
+test_120_reply() {
+ local PID
+ local PID2
+ local rc=5
+ local fail
+
+ #define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320
+ #define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321
+ #define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
+ #define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
+
+ echo
+ echo -n "** FLOCK REPLY vs. EVICTION race, lock $2"
+ [ "$1" = "CLEANUP" ] &&
+ fail=0x80000320 && echo ", $1 cp first"
+ [ "$1" = "REPLY" ] &&
+ fail=0x80000321 && echo ", $1 cp first"
+ [ "$1" = "DEADLOCK CLEANUP" ] &&
+ fail=0x80000322 && echo " DEADLOCK, CLEANUP cp first"
+ [ "$1" = "DEADLOCK REPLY" ] &&
+ fail=0x80000323 && echo " DEADLOCK, REPLY cp first"
+
+ if [ x"$2" = x"get" ]; then
+ #for TEST lock, take a conflict in advance
+ # sleep longer than evictor to not confuse fail_loc: 2+2+4
+ echo "** Taking conflict **"
+ flocks_test 5 set read sleep 10 $DIR/$tfile &
+ PID2=$!
+
+ sleep 2
+ fi
+
+ $LCTL set_param fail_loc=$fail
+
+ flocks_test 5 $2 write $DIR/$tfile &
+ PID=$!
+
+ sleep 2
+ echo "** Evicting and re-connecting client **"
+ mds_evict_client
+
+ client_reconnect
+
+ if [ x"$2" = x"get" ]; then
+ wait $PID2
+ fi
+
+ wait $PID
+ rc=$?
+
+ # check if the return value is allowed
+ [ $rc -eq $3 ] && rc=0
+
+ $LCTL set_param fail_loc=0
+ return $rc
+}
+
+# a lock is taken, unlock vs. cleanup_resource() race for destroying
+# the ORIGINAL lock.
+test_120_destroy()
+{
+ local PID
+
+ flocks_test 5 set write sleep 4 $DIR/$tfile &
+ PID=$!
+ sleep 2
+
+ # let unlock to sleep in CP CB
+ $LCTL set_param fail_loc=$1
+ sleep 4
+
+ # let cleanup to cleep in CP CB
+ mds_evict_client
+
+ client_reconnect
+
+ wait $PID
+ rc=$?
+
+ $LCTL set_param fail_loc=0
+ return $rc
+}
+
+test_120() {
+ flock_is_enabled || { skip "mount w/o flock enabled" && return; }
+ touch $DIR/$tfile
+
+ test_120_reply "CLEANUP" set 5 || error "SET race failed"
+ test_120_reply "CLEANUP" get 5 || error "GET race failed"
+ test_120_reply "CLEANUP" unlock 5 || error "UNLOCK race failed"
+
+ test_120_reply "REPLY" set 5 || error "SET race failed"
+ test_120_reply "REPLY" get 5 || error "GET race failed"
+ test_120_reply "REPLY" unlock 5 || error "UNLOCK race failed"
+
+ # DEADLOCK tests
+ test_120_reply "DEADLOCK CLEANUP" set 5 || error "DEADLOCK race failed"
+ test_120_reply "DEADLOCK REPLY" set 35 || error "DEADLOCK race failed"
+
+ test_120_destroy 0x320 || error "unlock-cleanup race failed"
+}
+run_test 120 "flock race: completion vs. evict"
+
+test_113() {
+ local BEFORE=$(date +%s)
+ local EVICT
+
+ # modify dir so that next revalidate would not obtain UPDATE lock
+ touch $DIR
+
+ # drop 1 reply with UPDATE lock,
+ # resend should not create 2nd lock on server
+ mcreate $DIR/$tfile || error "mcreate failed: $?"
+ drop_mdt_ldlm_reply_once "stat $DIR/$tfile" || error "stat failed: $?"
+
+ # 2 BL AST will be sent to client, both must find the same lock,
+ # race them to not get EINVAL for 2nd BL AST
+ #define OBD_FAIL_LDLM_PAUSE_CANCEL2 0x31f
+ $LCTL set_param fail_loc=0x8000031f
+
+ $LCTL set_param ldlm.namespaces.*.early_lock_cancel=0 > /dev/null
+ chmod 0777 $DIR/$tfile || error "chmod failed: $?"
+ $LCTL set_param ldlm.namespaces.*.early_lock_cancel=1 > /dev/null
+
+ # let the client reconnect
+ client_reconnect
+ EVICT=$($LCTL get_param mdc.$FSNAME-MDT*.state |
+ awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+
+ [ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened"
+}
+run_test 113 "ldlm enqueue dropped reply should not cause deadlocks"
+
+T130_PID=0
+test_130_base() {
+ test_mkdir -p $DIR/$tdir
+
+ # Prevent interference from layout intent RPCs due to
+ # asynchronous writeback. These will be tested in 130c below.
+ do_nodes ${CLIENTS:-$HOSTNAME} sync
+
+ # get only LOOKUP lock on $tdir
+ cancel_lru_locks mdc
+ ls $DIR/$tdir/$tfile 2>/dev/null
+
+ # get getattr by fid on $tdir
+ #
+ # we need to race with unlink, unlink must complete before we will
+ # take a DLM lock, otherwise unlink will wait until getattr will
+ # complete; but later than getattr starts so that getattr found
+ # the object
+#define OBD_FAIL_MDS_INTENT_DELAY 0x160
+ set_nodes_failloc "$(mdts_nodes)" 0x80000160
+ stat $DIR/$tdir &
+ T130_PID=$!
+ sleep 2
+
+ rm -rf $DIR/$tdir
+
+ # drop the reply so that resend happens on an unlinked file.
+#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ set_nodes_failloc "$(mdts_nodes)" 0x80000157
+}
+
+test_130a() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.7.2) ]] ||
+ skip "Need server version newer than 2.7.1"
+
+ test_130_base
+
+ wait $T130_PID || [ $? -eq 0 ] && error "stat should fail"
+ return 0
+}
+run_test 130a "enqueue resend on not existing file"
+
+test_130b() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.7.2) ]] ||
+ skip "Need server version newer than 2.7.1"
+
+ test_130_base
+ # let the reply to be dropped
+ sleep 10
+
+#define OBD_FAIL_SRV_ENOENT 0x217
+ set_nodes_failloc "$(mdts_nodes)" 0x80000217
+
+ wait $T130_PID || [ $? -eq 0 ] && error "stat should fail"
+ return 0
+}
+run_test 130b "enqueue resend on a stale inode"
+
+test_130c() {
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+
+ do_nodes ${CLIENTS:-$HOSTNAME} sync
+ echo XXX > $DIR/$tfile
+
+ cancel_lru_locks mdc
+
+ # Trigger writeback on $tfile.
+ #
+ # we need to race with unlink, unlink must complete before we will
+ # take a DLM lock, otherwise unlink will wait until intent will
+ # complete; but later than intent starts so that intent found
+ # the object
+#define OBD_FAIL_MDS_INTENT_DELAY 0x160
+ set_nodes_failloc "$(mdts_nodes)" 0x80000160
+ sync &
+ T130_PID=$!
+ sleep 2
+
+ rm $DIR/$tfile
+
+ # drop the reply so that resend happens on an unlinked file.
+#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ set_nodes_failloc "$(mdts_nodes)" 0x80000157
+
+ # let the reply to be dropped
+ sleep 10
+
+#define OBD_FAIL_SRV_ENOENT 0x217
+ set_nodes_failloc "$(mdts_nodes)" 0x80000217
+
+ wait $T130_PID
+
+ return 0
+}
+run_test 130c "layout intent resend on a stale inode"
+
+test_132() {
+ local before=$(date +%s)
+ local evict
+
+ mount_client $MOUNT2 || error "mount filed"
+
+ rm -f $DIR/$tfile
+ # get a lock on client so that export would reach the stale list
+ $LFS setstripe -i 0 $DIR/$tfile || error "setstripe failed"
+ dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 conv=fsync ||
+ error "dd failed"
+
+ #define OBD_FAIL_OST_PAUSE_PUNCH 0x236
+ do_facet ost1 $LCTL set_param fail_val=120 fail_loc=0x80000236
+
+ $TRUNCATE $DIR/$tfile 100 &
+
+ sleep 1
+ dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1 conv=notrunc ||
+ error "dd failed"
+
+ wait
+ umount_client $MOUNT2
+
+ evict=$(do_facet client $LCTL get_param \
+ osc.$FSNAME-OST0000-osc-*/state |
+ awk -F"[ [,]" '/EVICTED ]$/ { if (t<$5) {t=$5;} } END { print t }')
+
+ [ -z "$evict" ] || [[ $evict -le $before ]] ||
+ (do_facet client $LCTL get_param \
+ osc.$FSNAME-OST0000-osc-*/state;
+ error "eviction happened: $evict before:$before")
+}
+run_test 132 "long punch"
+
+test_131() {
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ rm -f $DIR/$tfile
+ # get a lock on client so that export would reach the stale list
+ $LFS setstripe -i 0 $DIR/$tfile || error "setstripe failed"
+ dd if=/dev/zero of=$DIR/$tfile count=1 || error "dd failed"
+
+ # another IO under the same lock
+ #define OBD_FAIL_OSC_DELAY_IO 0x414
+ $LCTL set_param fail_loc=0x80000414
+ $LCTL set_param fail_val=4 fail_loc=0x80000414
+ dd if=/dev/zero of=$DIR/$tfile count=1 conv=notrunc oflag=dsync &
+ local pid=$!
+ sleep 1
+
+ #define OBD_FAIL_LDLM_BL_EVICT 0x31e
+ set_nodes_failloc "$(osts_nodes)" 0x8000031e
+ ost_evict_client
+ client_reconnect
+
+ wait $pid && error "dd succeeded"
+ return 0
+}
+run_test 131 "IO vs evict results to IO under staled lock"
+
+test_133() {
+ local list=$(comma_list $(mdts_nodes))
+
+ local t=$((TIMEOUT * 2))
+ touch $DIR/$tfile
+
+ flock $DIR/$tfile -c "echo bl lock;sleep $t;echo bl flock unlocked" &
+ sleep 1
+ multiop_bg_pause $DIR/$tfile O_jc || return 1
+ PID=$!
+
+ #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ do_nodes $list $LCTL set_param fail_loc=0x80000157
+ kill -USR1 $PID
+ echo "waiting for multiop $PID"
+ wait $PID || return 2
+
+ rm -f $DIR/$tfile
+
+ return 0
+}
+run_test 133 "don't fail on flock resend"
+
+test_134() {
+ [ -z "$CLIENTS" ] && skip "Need two or more clients" && return
+ [ $CLIENTCOUNT -lt 2 ] &&
+ { skip "Need 2+ clients, have $CLIENTCOUNT" && return; }
+
+ mkdir -p $MOUNT/$tdir/1 $MOUNT/$tdir/2 || error "mkdir failed"
+ touch $MOUNT/$tdir/1/$tfile $MOUNT/$tdir/2/$tfile ||
+ error "touch failed"
+ zconf_umount_clients $CLIENTS $MOUNT
+ zconf_mount_clients $CLIENTS $MOUNT
+
+#define OBD_FAIL_TGT_REPLY_DATA_RACE 0x722
+ # assume commit interval is 5
+ do_facet mds1 "$LCTL set_param fail_loc=0x722 fail_val=5"
+
+ local -a clients=(${CLIENTS//,/ })
+ local client1=${clients[0]}
+ local client2=${clients[1]}
+
+ do_node $client1 rm $MOUNT/$tdir/1/$tfile &
+ rmpid=$!
+ do_node $client2 mv $MOUNT/$tdir/2/$tfile $MOUNT/$tdir/2/${tfile}_2 &
+ mvpid=$!
+ fail mds1
+ wait $rmpid || error "rm failed"
+ wait $mvpid || error "mv failed"
+ return 0
+}
+run_test 134 "race between failover and search for reply data free slot"
+
+test_135() {
+ [ "$MDS1_VERSION" -lt $(version_code 2.12.51) ] &&
+ skip "Need MDS version at least 2.12.51"
+
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -L mdt $DIR/$tdir
+ # to have parent dir write lock before open/resend
+ touch $DIR/$tdir/$tfile
+ #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x80000157
+ openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tdir/$tfile ||
+ error "Failed to open DOM file"
+}
+run_test 135 "DOM: open/create resend to return size"
+
+test_136() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.12.52) ]] ||
+ skip "Need MDS version at least 2.12.52"
+
+ local mdts=$(comma_list $(mdts_nodes))
+ local MDT0=$(facet_svc $SINGLEMDS)
+
+ local clog=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n)
+ [ -n "$clog" ] || error "changelog_register failed"
+ cl_mask=$(do_facet mds1 $LCTL get_param \
+ mdd.$MDT0.changelog_mask -n)
+ changelog_chmask "ALL"
+
+ # generate some changelog records to accumulate
+ test_mkdir -i 0 -c 0 $DIR/$tdir || error "mkdir $tdir failed"
+ createmany -m $DIR/$tdir/$tfile 10000 ||
+ error "create $DIR/$tdir/$tfile failed"
+
+ local size1=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size1"
+
+ #define OBD_FAIL_LLOG_PURGE_DELAY 0x1318
+ do_nodes $mdts $LCTL set_param fail_loc=0x1318 fail_val=30
+
+ # launch changelog_deregister in background on MDS
+ do_facet mds1 "nohup $LCTL --device $MDT0 changelog_deregister $clog \
+ > foo.out 2> foo.err < /dev/null &"
+ # give time to reach fail_loc
+ sleep 15
+
+ # fail_loc will make MDS sleep in the middle of changelog_deregister
+ # take this opportunity to abruptly kill MDS
+ FAILURE_MODE_save=$FAILURE_MODE
+ FAILURE_MODE=HARD
+ fail mds1
+ FAILURE_MODE=$FAILURE_MODE_save
+
+ do_nodes $mdts $LCTL set_param fail_loc=0x0 fail_val=0
+
+ local size2=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size2"
+ local clog2=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.$MDT0.changelog_users | grep $clog")
+ echo "After crash, changelog user $clog2"
+
+ [ -n "$clog2" -o $size2 -lt $size1 ] ||
+ error "changelog record count unchanged"
+
+ do_facet mds1 $LCTL set_param mdd.$MDT0.changelog_mask=\'$cl_mask\' -n
+}
+run_test 136 "changelog_deregister leaving pending records"
+
+test_137() {
+ df $DIR
+ mkdir -p $DIR/d1
+ mkdir -p $DIR/d2
+ dd if=/dev/zero of=$DIR/d1/$tfile bs=4096 count=1
+ dd if=/dev/zero of=$DIR/d2/$tfile bs=4096 count=1
+ cancel_lru_locks osc
+
+ #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000525"
+
+ # RPC1: any reply is to be delayed to disable last_xid logic
+ ln $DIR/d1/$tfile $DIR/d1/f2 &
+ sleep 1
+
+ # RPC2: setattr1 reply is delayed & resent
+ # original reply comes to client; the resend get asleep
+ chmod 666 $DIR/d2/$tfile
+
+ # RPC3: setattr2 on the same file; run ahead of RPC2 resend
+ chmod 777 $DIR/d2/$tfile
+
+ # RPC2 resend wakes up
+ sleep 5
+ [ $(stat -c "%a" $DIR/d2/$tfile) == 777 ] || error "resend got applied"
+}
+run_test 137 "late resend must be skipped if already applied"
+