rm -f $OOS
sync; sleep 1; sync
+wait_delete_completed 300
+
if [ $SUCCESS -eq 1 ]; then
echo "Success!"
rm -f $LOG
run_test 14a "timeouts waiting for lost client during replay"
test_14b() {
+ wait_mds_ost_sync
+ wait_delete_completed
BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
#lfs setstripe --index=0 --count=1 $MOUNT1
mkdir -p $MOUNT1/$tdir
zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
wait_mds_ost_sync || return 5
- wait_destroy_complete || return 6
+ wait_delete_completed || return 6
AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
log "before $BEFOREUSED, after $AFTERUSED"
sync
# let the delete happen
wait_mds_ost_sync || return 4
- wait_destroy_complete || return 5
+ wait_delete_completed || return 5
after=`kbytesfree`
log "before: $before after: $after"
(( $before <= $after + 40 )) || return 3 # take OST logs into account
sync
# let the delete happen
wait_mds_ost_sync || return 4
- wait_destroy_complete || return 5
+ wait_delete_completed || return 5
after=`kbytesfree`
log "before: $before after: $after"
(( $before <= $after + 40 )) || return 3 # take OST logs into account
[ -z "$(lctl get_param -n mdc.${FSNAME}-*.connect_flags|grep einprogress)" \
] && skip_env "MDS doesn't support EINPROGRESS" && return
#define OBD_FAIL_MDS_DQACQ_NET 0x187
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x187"
+ do_facet mds "lctl set_param fail_loc=0x187"
# test the non-intent create path
mcreate $TDIR/$tfile &
cpid=$!
error "mknod finished incorrectly"
return 1
fi
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ do_facet mds "lctl set_param fail_loc=0"
wait $cpid || return 2
stat $TDIR/$tfile || error "mknod failed"
rm $TDIR/$tfile
#define OBD_FAIL_MDS_DQACQ_NET 0x187
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x187"
+ do_facet mds "lctl set_param fail_loc=0x187"
# test the intent create path
openfile -f O_RDWR:O_CREAT $TDIR/$tfile &
cpid=$!
error "open finished incorrectly"
return 3
fi
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ do_facet mds "lctl set_param fail_loc=0"
wait $cpid || return 4
stat $TDIR/$tfile || error "open failed"
}
[ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1)))
do_nodes $list lctl set_param fail_loc=0
+ sync # initiate all OST_DESTROYs from MDS to OST
sleep_maxage
}
set_read_ahead $old_READAHEAD
rm -f $file
+ wait_delete_completed
[ $time_ra_ON -lt $time_ra_OFF ] ||
error "read-ahead enabled time read (${time_ra_ON}s) is more than
$LFS setstripe -c 1 -o 0 ${testdir}/${tfile}
sync
cancel_lru_locks osc
+ wait_delete_completed
# clear stats.
local dev=$(get_mds_mdt_device_proc_path)
check_stats_facet ost1 "punch" 1
rm -f ${testdir}/${tfile} || error "file remove failed"
+ wait_delete_completed
check_stats_facet ost1 "destroy" 1
rm -rf $DIR/${tdir}
test_15() { # bug 974 - ENOSPC
echo "PATH=$PATH"
sh oos2.sh $MOUNT1 $MOUNT2
+ wait_delete_completed
grant_error=`dmesg | grep "> available"`
[ -z "$grant_error" ] || error "$grant_error"
}
test_31b() {
remote_ost || { skip "local OST" && return 0; }
remote_ost_nodsh && skip "remote OST w/o dsh" && return 0
+
+ # make sure there is no local locks due to destroy
+ wait_mds_ost_sync || error "wait_mds_ost_sync()"
+ wait_delete_completed || error "wait_delete_completed()"
+
mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir"
lfs setstripe $DIR/$tdir/$tfile -i 0 -c 1
cp /etc/hosts $DIR/$tdir/$tfile
rm -f $DIR1/$tdir/file000
kill -USR1 $read_pid
wait $read_pid
- sleep 1
+ wait_delete_completed
local after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }')
echo "*** cycle($i) *** before($before):after_dd($after_dd):after($after)"
# this free space! not used
wait_update $(facet_active_host $facet) "$@"
}
-wait_delete_completed () {
- local TOTALPREV=`lctl get_param -n osc.*.kbytesavail | \
- awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
-
- local WAIT=0
- local MAX_WAIT=20
- while [ "$WAIT" -ne "$MAX_WAIT" ]; do
- sleep 1
- TOTAL=`lctl get_param -n osc.*.kbytesavail | \
- awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
- [ "$TOTAL" -eq "$TOTALPREV" ] && return 0
- echo "Waiting delete completed ... prev: $TOTALPREV current: $TOTAL "
- TOTALPREV=$TOTAL
- WAIT=$(( WAIT + 1))
- done
- echo "Delete is not completed in $MAX_WAIT sec"
- return 1
+wait_delete_completed_mds() {
+ [[ $(lustre_version_code mds) -lt $(version_code 2.2.58) ]] &&
+ return 0
+
+ local MAX_WAIT=${1:-20}
+ local mds2sync=""
+ local stime=`date +%s`
+ local etime
+ local node
+ local changes
+
+ # find MDS with pending deletions
+ for node in $(mdts_nodes); do
+ changes=$(do_node $node "lctl get_param -n osc.*MDT*.sync_*" \
+ 2>/dev/null | calc_sum)
+ if [ -z "$changes" ] || [ $changes -eq 0 ]; then
+ continue
+ fi
+ mds2sync="$mds2sync $node"
+ done
+ if [ "$mds2sync" == "" ]; then
+ return
+ fi
+ mds2sync=$(comma_list $mds2sync)
+
+ # sync MDS transactions
+ do_nodes $mds2sync "lctl set_param -n osd*.*MD*.force_sync 1"
+
+ # wait till all changes are sent and commmitted by OSTs
+ # for ldiskfs space is released upon execution, but DMU
+ # do this upon commit
+
+ local WAIT=0
+ while [ "$WAIT" -ne "$MAX_WAIT" ]; do
+ changes=$(do_nodes $mds2sync "lctl get_param -n osc.*MDT*.sync_*" \
+ | calc_sum)
+ #echo "$node: $changes changes on all"
+ if [ "$changes" -eq "0" ]; then
+ etime=`date +%s`
+ #echo "delete took $((etime - stime)) seconds"
+ return
+ fi
+ sleep 1
+ WAIT=$(( WAIT + 1))
+ done
+
+ etime=`date +%s`
+ echo "Delete is not completed in $((etime - stime)) seconds"
+ do_nodes $mds2sync "lctl get_param osc.*MDT*.sync_*"
}
wait_for_host() {
}
wait_destroy_complete () {
- echo "Waiting for destroy to be done..."
- # MAX value shouldn't be big as this mean server responsiveness
- # never increase this just to make test pass but investigate
- # why it takes so long time
- local MAX=5
- local WAIT=0
- while [ $WAIT -lt $MAX ]; do
- local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
- local con=1
- for ((i=0; i<${#RPCs[@]}; i++)); do
- [ ${RPCs[$i]} -eq 0 ] && continue
- # there are still some destroy RPCs in flight
- con=0
- break;
- done
- sleep 1
- [ ${con} -eq 1 ] && return 0 # done waiting
- echo "Waiting $WAIT secs for destroys to be done."
- WAIT=$((WAIT + 1))
- done
- echo "Destroys weren't done in $MAX sec."
- return 1
+ echo "Waiting for local destroys to complete"
+ # MAX value shouldn't be big as this mean server responsiveness
+ # never increase this just to make test pass but investigate
+ # why it takes so long time
+ local MAX=5
+ local WAIT=0
+ while [ $WAIT -lt $MAX ]; do
+ local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
+ local con=1
+ local i
+
+ for ((i=0; i<${#RPCs[@]}; i++)); do
+ [ ${RPCs[$i]} -eq 0 ] && continue
+ # there are still some destroy RPCs in flight
+ con=0
+ break;
+ done
+ sleep 1
+ [ ${con} -eq 1 ] && return 0 # done waiting
+ echo "Waiting ${WAIT}s for local destroys to complete"
+ WAIT=$((WAIT + 1))
+ done
+ echo "Local destroys weren't done in $MAX sec."
+ return 1
+}
+
+wait_delete_completed() {
+ wait_delete_completed_mds $1 || return $?
+ wait_destroy_complete
}
wait_exit_ST () {