ONLY=${ONLY:-"$*"}
# bug number for skipped test: 3815 3939
ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36 40"
+# bug number for skipped test:4178 4176
+ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
-if [[ $MDSCOUNT -ge 2 ]]; then
- skip_env "Only run with single MDT for now" && exit
-fi
+# script only handles up to 10 MDTs (because of MDT_PREFIX)
+[ $MDSCOUNT -gt 9 ] &&
+ error "script cannot handle more than 9 MDTs, please fix" && exit
check_and_setup_lustre
HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
HSM_ARCHIVE_NUMBER=2
- MDT_PARAM="mdt.$FSNAME-MDT0000"
- HSM_PARAM="$MDT_PARAM.hsm"
+ # The test only support up to 10 MDTs
+ MDT_PREFIX="mdt.$FSNAME-MDT000"
+ HSM_PARAM="${MDT_PREFIX}0.hsm"
# archive is purged at copytool setup
HSM_ARCHIVE_PURGE=true
cdt_set_sanity_policy
}
+get_mdt_devices() {
+ local mdtno
+ # get MDT device for each mdc
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ local idx=$(($mdtno - 1))
+ MDT[$idx]=$($LCTL get_param -n \
+ mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
+ awk '{gsub(/_UUID/,""); print $1}' | head -1)
+ done
+}
+
search_and_kill_copytool() {
local agents=${1:-$(facet_active_host $SINGLEAGT)}
copytool_cleanup() {
trap - EXIT
local agents=${1:-$(facet_active_host $SINGLEAGT)}
+ local mdtno
+ local idx
+ local oldstate
+ local mdt_hsmctrl
do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
sleep 1
echo "Copytool is stopped on $agents"
+
+ # clean all CDTs orphans requests from previous tests
+ # that would otherwise need to timeout to clear.
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ idx=$(($mdtno - 1))
+ mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control"
+ oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \
+ "$mdt_hsmctrl")
+ # skip already stop[ed,ing] CDTs
+ echo $oldstate | grep stop && continue
+
+ do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown"
+ wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \
+ "stopped" 20 ||
+ error "mds${mdtno} cdt state is not stopped"
+ do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate"
+ wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \
+ "$oldstate" 20 ||
+ error "mds${mdtno} cdt state is not $oldstate"
+ done
}
copytool_suspend() {
copytool_remove_backend() {
local fid=$1
- local be=$(find $HSM_ARCHIVE -name $fid)
+ local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid)
echo "Remove from backend: $fid = $be"
do_facet $SINGLEAGT rm -f $be
}
do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file"
}
+mdts_set_param() {
+ local arg=$1
+ local key=$2
+ local value=$3
+ local mdtno
+ local rc=0
+ if [[ "$value" != "" ]]; then
+ value="=$value"
+ fi
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ local idx=$(($mdtno - 1))
+ local facet=mds${mdtno}
+ # if $arg include -P option, run 1 set_param per MDT on the MGS
+ # else, run set_param on each MDT
+ [[ $arg = *"-P"* ]] && facet=mgs
+ do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value
+ [[ $? != 0 ]] && rc=1
+ done
+ return $rc
+}
+
+mdts_check_param() {
+ local key="$1"
+ local target="$2"
+ local timeout="$3"
+ local mdtno
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ local idx=$(($mdtno - 1))
+ wait_result mds${mdtno} \
+ "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
+ $timeout ||
+ error "$key state is not '$target' on mds${mdtno}"
+ done
+}
+
changelog_setup() {
- CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0\
- changelog_register -n)
- do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog_mask="+hsm"
- $LFS changelog_clear $MDT0 $CL_USER 0
+ CL_USERS=()
+ local mdtno
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ local idx=$(($mdtno - 1))
+ local cl_user=$(do_facet mds${mdtno} $LCTL \
+ --device ${MDT[$idx]} \
+ changelog_register -n)
+ CL_USERS+=($cl_user)
+ do_facet mds${mdtno} lctl set_param \
+ mdd.${MDT[$idx]}.changelog_mask="+hsm"
+ $LFS changelog_clear ${MDT[$idx]} $cl_user 0
+ done
}
changelog_cleanup() {
-# $LFS changelog $MDT0
- [[ -n "$CL_USER" ]] || return 0
-
- $LFS changelog_clear $MDT0 $CL_USER 0
- do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER
- CL_USER=
+ local mdtno
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ local idx=$(($mdtno - 1))
+ [[ -z ${CL_USERS[$idx]} ]] && continue
+ $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0
+ do_facet mds${mdtno} lctl --device ${MDT[$idx]} \
+ changelog_deregister ${CL_USERS[$idx]}
+ done
+ CL_USERS=()
}
changelog_get_flags() {
local param=$1
local value=$2
local opt=$3
- if [[ "$value" != "" ]]; then
- value="=$value"
- fi
- do_facet $SINGLEMDS $LCTL set_param $opt -n $HSM_PARAM.$param$value
+ mdts_set_param "$opt -n" "hsm.$param" "$value"
return $?
}
set_test_state() {
local cmd=$1
local target=$2
- do_facet $SINGLEMDS $LCTL set_param $MDT_PARAM.hsm_control=$cmd
- wait_result $SINGLEMDS "$LCTL get_param -n $MDT_PARAM.hsm_control"\
- $target 10 || error "cdt state is not $target"
+ mdts_set_param "" hsm_control "$cmd"
+ mdts_check_param hsm_control "$target" 10
}
cdt_set_sanity_policy() {
if [[ "$CDT_POLICY_HAD_CHANGED" ]]
then
# clear all
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+NRA
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-NBR
+ mdts_set_param "" hsm.policy "+NRA"
+ mdts_set_param "" hsm.policy "-NBR"
CDT_POLICY_HAD_CHANGED=
fi
}
cdt_set_no_retry() {
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+NRA
+ mdts_set_param "" hsm.policy "+NRA"
CDT_POLICY_HAD_CHANGED=true
}
cdt_clear_no_retry() {
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-NRA
+ mdts_set_param "" hsm.policy "-NRA"
CDT_POLICY_HAD_CHANGED=true
}
cdt_set_non_blocking_restore() {
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+NBR
+ mdts_set_param "" hsm.policy "+NBR"
CDT_POLICY_HAD_CHANGED=true
}
cdt_clear_non_blocking_restore() {
- do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-NBR
+ mdts_set_param "" hsm.policy "-NBR"
CDT_POLICY_HAD_CHANGED=true
}
cdt_clear_mount_state() {
- do_facet $SINGLEMDS $LCTL set_param -d -P $MDT_PARAM.hsm_control
+ mdts_set_param "-P -d" hsm_control ""
}
cdt_set_mount_state() {
- do_facet $SINGLEMDS $LCTL set_param -P $MDT_PARAM.hsm_control=$1
+ mdts_set_param "-P" hsm_control "$1"
}
cdt_check_state() {
- local target=$1
- wait_result $SINGLEMDS\
- "$LCTL get_param -n $MDT_PARAM.hsm_control" "$target" 20 ||
- error "cdt state is not $target"
+ mdts_check_param hsm_control "$1" 20
}
cdt_disable() {
}
cleanup_large_files() {
- local ratio=$(df $MOUNT |awk '{print $5}' |sed 's/%//g' |grep -v Use)
+ local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' |
+ sed 's/%//g')
[ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \;
}
sleep $val
}
-MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -1)
+# populate MDT device array
+get_mdt_devices
# initiate variables
init_agt_vars
CURR_FILE="$CURR_DIR/$tfile.$f"
# write file-specific data
do_facet $SINGLEAGT \
- echo "d=$d, f=$f, dir=$CURR_DIR, "\
- "file=$CURR_FILE" > $CURR_FILE
+ "echo d=$d, f=$f, dir=$CURR_DIR, "\
+ "file=$CURR_FILE > $CURR_FILE"
done
done
# import to Lustre
local fid=$(make_small $f)
check_hsm_flags $f "0x00000000"
+ local orig_size=$(stat -c "%s" $f)
+ local orig_blocks=$(stat -c "%b" $f)
+
+ start_full_debug_logging
+
$LFS hsm_archive $f || error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
- [ $(stat -c "%b" $f) -ne "1" ] || error "wrong block number"
- local sz=$(stat -c "%s" $f)
- [ $sz -ne "0" ] || error "file size should not be zero"
+ local blocks=$(stat -c "%b" $f)
+ [ $blocks -eq $orig_blocks ] ||
+ error "$f: wrong block number after archive: " \
+ "$blocks != $orig_blocks"
+ local size=$(stat -c "%s" $f)
+ [ $size -eq $orig_size ] ||
+ error "$f: wrong size after archive: $size != $orig_size"
# Release and check states
$LFS hsm_release $f || error "could not release file"
check_hsm_flags $f "0x0000000d"
- [ $(stat -c "%b" $f) -eq "1" ] || error "wrong block number"
- [ $(stat -c "%s" $f) -eq $sz ] || error "wrong file size"
+ blocks=$(stat -c "%b" $f)
+ [ $blocks -gt 5 ] &&
+ error "$f: too many blocks after release: $blocks > 5"
+ size=$(stat -c "%s" $f)
+ [ $size -ne $orig_size ] &&
+ error "$f: wrong size after release: $size != $orig_size"
# Check we can release an file without stripe info
f=$f.nolov
$LFS hsm_release $f || fail "second release should succeed"
check_hsm_flags $f "0x0000000d"
+ stop_full_debug_logging
+
copytool_cleanup
}
run_test 21 "Simple release tests"
}
run_test 30b "Restore at exec (release case)"
+test_30c() {
+ needclients 2 || return 0
+
+ # test needs a running copytool
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/SLEEP
+ local fid=$(copy_file /bin/sleep $f)
+ chmod 755 $f
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f
+ check_hsm_flags $f "0x0000000d"
+ # set no retry action mode
+ cdt_set_no_retry
+ do_node $CLIENT2 "$f 10" &
+ local pid=$!
+ sleep 3
+ echo 'Hi!' > $f
+ [[ $? == 0 ]] && error "Update during exec of released file must fail"
+ wait $pid
+ [[ $? == 0 ]] || error "Execution failed during run"
+ cmp /bin/sleep $f
+ [[ $? == 0 ]] || error "Binary overwritten during exec"
+
+ # cleanup
+ # remove no try action mode
+ cdt_clear_no_retry
+ check_hsm_flags $f "0x00000009"
+
+ copytool_cleanup
+}
+run_test 30c "Update during exec of released file must fail"
+
restore_and_check_size() {
local f=$1
local fid=$2
cpt=$((cpt + 1))
done
if [[ $cpt -lt 10 ]]; then
- echo " restore is too long"
- else
echo " "done
+ else
+ echo " restore is too long"
+ wait_request_state $fid RESTORE SUCCEED
fi
- wait_request_state $fid RESTORE SUCCEED
return $err
}
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
changelog_cleanup
local target=0x0
wait_request_state $fid ARCHIVE CANCELED
wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
local target=0x7d
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
$LFS hsm_restore $f
wait_request_state $fid RESTORE SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
local target=0x80
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
wait_request_state $fid RESTORE SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
local target=0x80
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
wait_request_state $fid RESTORE CANCELED
wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
local target=0xfd
[[ $flags == $target ]] ||
wait_request_state $fid RESTORE CANCELED
wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
local target=0xfd
[[ $flags == $target ]] ||
$LFS hsm_remove $f
wait_request_state $fid REMOVE SUCCEED
- local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
+ local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1)
local target=0x200
[[ $flags == $target ]] ||
wait_request_state $fid REMOVE CANCELED
wait_request_state $fid CANCEL SUCCEED
- flags=$(changelog_get_flags $MDT0 RENME $fid2)
- local flags=$($LFS changelog $MDT0 | grep HSM | grep $fid | tail -1 |
- awk '{print $5}')
+ flags=$(changelog_get_flags ${MDT[0]} RENME $fid2)
+ local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid |
+ tail -n 1 | awk '{print $5}')
local target=0x27d
[[ $flags == $target ]] ||
rm $f1 || error "rm $f1 failed"
- local flags=$(changelog_get_flags $MDT0 UNLNK $fid1)
+ local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1)
local target=0x3
[[ $flags == $target ]] ||
mv $f3 $f2 || error "mv $f3 $f2 failed"
- flags=$(changelog_get_flags $MDT0 RENME $fid2)
+ flags=$(changelog_get_flags ${MDT[0]} RENME $fid2)
target=0x3
[[ $flags == $target ]] ||
local target=0x280
$LFS hsm_set --$hsm_flag $f ||
error "Cannot set $hsm_flag on $f"
- local flags=($(changelog_get_flags $MDT0 HSM $fid))
+ local flags=($(changelog_get_flags ${MDT[0]} HSM $fid))
local seen=${#flags[*]}
cnt=$((fst + cnt))
[[ $seen == $cnt ]] ||
$LFS hsm_clear --$hsm_flag $f ||
error "Cannot clear $hsm_flag on $f"
- flags=($(changelog_get_flags $MDT0 HSM $fid))
+ flags=($(changelog_get_flags ${MDT[0]} HSM $fid))
seen=${#flags[*]}
cnt=$(($cnt + 1))
[[ $cnt == $seen ]] ||