3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
11 export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
14 # bug number for skipped test:
15 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
16 # skip test cases failed before landing - Jinshan
17 ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 12a 12b 12n 13 24 30a 31a 34 35 36 58 59"
18 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 110a 200 201 221 222a 223a 223b 225"
20 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
22 . $LUSTRE/tests/test-framework.sh
24 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
27 MULTIOP=${MULTIOP:-multiop}
28 OPENFILE=${OPENFILE:-openfile}
29 MCREATE=${MCREATE:-mcreate}
30 MOUNT_2=${MOUNT_2:-"yes"}
33 if [ $MDSCOUNT -ge 2 ]; then
34 skip_env "Only run with single MDT for now" && exit
37 check_and_setup_lustre
39 if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then
40 skip_env "Need MDS version at least 2.4.53" && exit
43 # $RUNAS_ID may get set incorrectly somewhere else
44 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
45 error "\$RUNAS_ID set to 0, but \$UID is also 0!"
47 check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
51 # the standard state when starting a test is
54 # as some test changes the default, we need to re-make it
57 if ! is_mounted $MOUNT2
64 export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
65 export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
66 export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
67 HSM_ARCHIVE=${HSM_ARCHIVE:-$TMP/arc}
70 MDT_PARAM="mdt.$FSNAME-MDT0000"
71 HSM_PARAM="$MDT_PARAM.hsm"
73 # archive is purged at copytool setup
74 HSM_ARCHIVE_PURGE=true
76 search_and_kill_copytool() {
77 echo "Killing existing copy tools"
78 killall -q $HSMTOOL_BASE || true
82 if pkill -CONT -x $HSMTOOL_BASE; then
83 echo "Wakeup copytool"
87 if $HSM_ARCHIVE_PURGE; then
88 echo "Purging archive"
92 echo "Starting copytool"
94 # bandwidth is limited to 1MB/s so the copy time is known and
95 # independent of hardware
96 local CMD="$HSMTOOL $HSMTOOL_VERBOSE --hsm-root $HSM_ARCHIVE"
97 CMD=$CMD" --daemon --bandwidth 1 $MOUNT"
98 [[ -z "$1" ]] || CMD+=" --archive $1"
107 pkill -INT -x $HSMTOOL_BASE || return 0
109 echo "Copytool is stopped"
113 pkill -STOP -x $HSMTOOL_BASE || return 0
114 echo "Copytool is suspended"
117 copytool_remove_backend() {
119 local be=$(find $HSM_ARCHIVE -name $fid)
120 echo "Remove from backend: $fid = $be"
125 $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE \
126 --import $1 $2 $MOUNT || error "import of $1 to $2 failed"
130 local file=$HSM_ARCHIVE/$1
131 mkdir -p $(dirname $file)
132 dd if=/dev/urandom of=$file count=32 bs=1000000 ||
133 error "cannot create $file"
137 CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0\
138 changelog_register -n)
139 do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog_mask="+hsm"
140 $LFS changelog_clear $MDT0 $CL_USER 0
143 changelog_cleanup() {
144 # $LFS changelog $MDT0
145 [[ -n "$CL_USER" ]] || return 0
147 $LFS changelog_clear $MDT0 $CL_USER 0
148 do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER
152 changelog_get_flags() {
157 $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}"
162 local val=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.$param)
169 do_facet $SINGLEMDS $LCTL set_param -n $HSM_PARAM.$param=$value
176 do_facet $SINGLEMDS $LCTL set_param $MDT_PARAM.hsm_control=$cmd
177 wait_result $SINGLEMDS "$LCTL get_param -n $MDT_PARAM.hsm_control"\
178 $target 10 || error "cdt state is not $target"
181 cdt_set_sanity_policy() {
183 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nra
184 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nbr
185 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-gc
189 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+nra
192 cdt_clear_no_retry() {
193 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nra
196 cdt_set_no_blocking_restore() {
197 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+nbr
200 cdt_clear_no_blocking_restore() {
201 do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nbr
204 cdt_clear_mount_state() {
205 # /!\ conf_param and set_param syntax differ +> we cannot use
207 do_facet $SINGLEMDS $LCTL conf_param -d $FSNAME-MDT0000.mdt.hsm_control
210 cdt_set_mount_state() {
211 # /!\ conf_param and set_param syntax differ +> we cannot use
213 do_facet $SINGLEMDS $LCTL conf_param $FSNAME-MDT0000.mdt.hsm_control=$1
218 wait_result $SINGLEMDS\
219 "$LCTL get_param -n $MDT_PARAM.hsm_control" "$target" 20 ||
220 error "cdt state is not $target"
224 set_test_state disabled disabled
228 set_test_state enabled enabled
232 set_test_state shutdown stopped
236 set_test_state purge enabled
242 cdt_set_sanity_policy
246 if [[ $CLIENTCOUNT -lt 2 ]]; then
247 skip "Need two or more clients, have $CLIENTCOUNT"
254 $LFS path2fid $1 | tr -d '[]'
261 if [[ $u == "user" ]]
263 local st=$($RUNAS $LFS hsm_state $f)
265 local st=$($LFS hsm_state $f)
269 [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)"
271 st=$(echo $st | cut -f 2 -d" " | tr -d "()," )
275 get_hsm_archive_id() {
277 local st=$($LFS hsm_state $f)
278 [[ $? == 0 ]] || error "$LFS hsm_state $f failed"
280 local ar=$(echo $st | grep "archive_id" | cut -f5 -d" " |
289 local st=$(get_hsm_flags $f)
290 [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl"
293 check_hsm_flags_user() {
297 local st=$(get_hsm_flags $f user)
298 [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl"
316 cp $1 $f || error "cannot copy $1 to $f"
317 path2fid $f || error "cannot get fid on $f"
321 local file2=${1/$DIR/$DIR2}
322 dd if=/dev/urandom of=$file2 count=2 bs=1M conv=fsync ||
323 error "cannot create $file2"
324 path2fid $1 || error "cannot get fid on $1"
327 make_large_for_striping() {
328 local file2=${1/$DIR/$DIR2}
329 local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -1)
330 dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync ||
331 error "cannot create $file2"
332 path2fid $1 || error "cannot get fid on $1"
335 make_large_for_progress() {
336 local file2=${1/$DIR/$DIR2}
337 # big file is large enough, so copy time is > 30s
338 # so copytool make 1 progress
339 # size is not a multiple of 1M to avoid stripe
341 dd if=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync ||
342 error "cannot create $file2"
343 path2fid $1 || error "cannot get fid on $1"
346 make_large_for_progress_aligned() {
347 local file2=${1/$DIR/$DIR2}
348 # big file is large enough, so copy time is > 30s
349 # so copytool make 1 progress
350 # size is a multiple of 1M to have stripe
352 dd if=/dev/urandom of=$file2 count=33 bs=1M conv=fsync ||
353 error "cannot create $file2"
354 path2fid $1 || error "cannot get fid on $1"
357 make_large_for_cancel() {
358 local file2=${1/$DIR/$DIR2}
359 # Copy timeout is 100s. 105MB => 105s
360 dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync ||
361 error "cannot create $file2"
362 path2fid $1 || error "cannot get fid on $1"
368 wait_update --verbose $(facet_active_host $facet) "$@"
376 wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
377 grep $fid | grep action=$request |\
378 cut -f 13 -d ' ' | cut -f 2 -d =" $state 100 ||
379 error "request on $fid is not $state"
386 do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
387 grep $fid | grep action=$request |\
388 cut -f 13 -d ' ' | cut -f 2 -d ="
395 do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
396 grep $fid | grep action=$request | wc -l"
402 wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
403 egrep 'WAITING|STARTED' " "" $timeout ||
404 error "requests did not complete"
407 wait_for_grace_delay()
409 local val=$(get_hsm_param grace_delay)
414 $LCTL get_param -n llite.$FSNAME-*.uuid
417 MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid |
418 awk '{gsub(/_UUID/,""); print $1}' | head -1)
420 # cleanup from previous bad setup
421 search_and_kill_copytool
423 # for recovery tests, coordinator needs to be started at mount
425 # the lustre conf must be without hsm on (like for sanity.sh)
426 echo "Set HSM on and start"
427 cdt_set_mount_state enabled
428 cdt_check_state enabled
430 echo "Start copytool"
433 # finished requests are quickly removed from list
434 set_hsm_param grace_delay 10
440 local f=$DIR/$tdir/$tfile
444 check_hsm_flags_user $f "0x00000000"
446 $RUNAS $LFS hsm_set --norelease $f ||
447 error "user could not change hsm flags"
448 check_hsm_flags_user $f "0x00000010"
450 $RUNAS $LFS hsm_clear --norelease $f ||
451 error "user could not clear hsm flags"
452 check_hsm_flags_user $f "0x00000000"
454 # User could not change those flags...
455 $RUNAS $LFS hsm_set --exists $f &&
456 error "user should not set this flag"
457 check_hsm_flags_user $f "0x00000000"
460 $LFS hsm_set --exists $f ||
461 error "root could not change hsm flags"
462 check_hsm_flags_user $f "0x00000001"
464 $LFS hsm_clear --exists $f ||
465 error "root could not clear hsm state"
466 check_hsm_flags_user $f "0x00000000"
469 run_test 1 "lfs hsm flags root/non-root access"
473 local f=$DIR/$tdir/$tfile
475 # New files are not dirty
476 check_hsm_flags $f "0x00000000"
478 # For test, we simulate an archived file.
479 $LFS hsm_set --exists $f || error "user could not change hsm flags"
480 check_hsm_flags $f "0x00000001"
482 # chmod do not put the file dirty
483 chmod 600 $f || error "could not chmod test file"
484 check_hsm_flags $f "0x00000001"
486 # chown do not put the file dirty
487 chown $RUNAS_ID $f || error "could not chown test file"
488 check_hsm_flags $f "0x00000001"
490 # truncate put the file dirty
491 $TRUNCATE $f 1 || error "could not truncate test file"
492 check_hsm_flags $f "0x00000003"
494 $LFS hsm_clear --dirty $f || error "could not clear hsm flags"
495 check_hsm_flags $f "0x00000001"
497 run_test 2 "Check file dirtyness when doing setattr"
503 # New files are not dirty
505 check_hsm_flags $f "0x00000000"
507 # For test, we simulate an archived file.
508 $LFS hsm_set --exists $f ||
509 error "user could not change hsm flags"
510 check_hsm_flags $f "0x00000001"
512 # Reading a file, does not set dirty
513 cat $f > /dev/null || error "could not read file"
514 check_hsm_flags $f "0x00000001"
516 # Open for write without modifying data, does not set dirty
517 openfile -f O_WRONLY $f || error "could not open test file"
518 check_hsm_flags $f "0x00000001"
520 # Append to a file sets it dirty
521 cp -p /etc/passwd $f.append || error "could not create file"
522 $LFS hsm_set --exists $f.append ||
523 error "user could not change hsm flags"
524 dd if=/etc/passwd of=$f.append bs=1 count=3\
525 conv=notrunc oflag=append status=noxfer ||
526 error "could not append to test file"
527 check_hsm_flags $f.append "0x00000003"
529 # Modify a file sets it dirty
530 cp -p /etc/passwd $f.modify || error "could not create file"
531 $LFS hsm_set --exists $f.modify ||
532 error "user could not change hsm flags"
533 dd if=/dev/zero of=$f.modify bs=1 count=3\
534 conv=notrunc status=noxfer ||
535 error "could not modify test file"
536 check_hsm_flags $f.modify "0x00000003"
538 # Open O_TRUNC sets dirty
539 cp -p /etc/passwd $f.trunc || error "could not create file"
540 $LFS hsm_set --exists $f.trunc ||
541 error "user could not change hsm flags"
542 cp /etc/group $f.trunc || error "could not override a file"
543 check_hsm_flags $f.trunc "0x00000003"
545 # Mmapped a file sets dirty
546 cp -p /etc/passwd $f.mmap || error "could not create file"
547 $LFS hsm_set --exists $f.mmap ||
548 error "user could not change hsm flags"
549 multiop $f.mmap OSMWUc || error "could not mmap a file"
550 check_hsm_flags $f.mmap "0x00000003"
552 run_test 3 "Check file dirtyness when opening for write"
556 local f=$DIR/$tdir/$tfile
557 local fid=$(make_small $f)
560 local st=$(get_request_state $fid CANCEL)
561 [[ -z "$st" ]] || error "hsm_cancel must not be registered (state=$st)"
563 run_test 4 "Useless cancel must not be registered"
566 # test needs a running copytool
570 local f=$DIR/$tdir/$tfile
571 local fid=$(copy_file /etc/passwd $f)
573 wait_request_state $fid ARCHIVE SUCCEED
575 check_hsm_flags $f "0x00000009"
579 run_test 8 "Test default archive number"
583 local f=$DIR/$tdir/$tfile
584 local fid=$(copy_file /etc/passwd $f)
585 # we do not use the default one to be sure
586 local new_an=$((HSM_ARCHIVE_NUMBER+ 1))
588 copytool_setup $new_an
589 $LFS hsm_archive --archive $new_an $f
590 wait_request_state $fid ARCHIVE SUCCEED
592 check_hsm_flags $f "0x00000009"
596 run_test 9 "Use of explict archive number, with dedicated copytool"
599 # test needs a running copytool
602 mkdir -p $DIR/$tdir/d1
603 local f=$DIR/$tdir/$tfile
604 local fid=$(copy_file /etc/hosts $f)
605 $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f ||
606 error "hsm_archive failed"
607 wait_request_state $fid ARCHIVE SUCCEED
609 local AFILE=$(ls $HSM_ARCHIVE/*/*/*/*/*/*/$fid) ||
610 error "fid $fid not in archive $HSM_ARCHIVE"
611 echo "Verifying content"
612 diff $f $AFILE || error "archived file differs"
613 echo "Verifying hsm state "
614 check_hsm_flags $f "0x00000009"
616 echo "Verifying archive number is $HSM_ARCHIVE_NUMBER"
617 local st=$(get_hsm_archive_id $f)
618 [[ $st == $HSM_ARCHIVE_NUMBER ]] ||
619 error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER"
624 run_test 10a "Archive a file"
627 # test needs a running copytool
630 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
631 local f=$DIR/$tdir/$tfile
632 local fid=$(copy_file /etc/hosts $f)
633 $LFS hsm_archive $f || error "archive request failed"
634 wait_request_state $fid ARCHIVE SUCCEED
636 $LFS hsm_archive $f || error "archive of non dirty file failed"
637 local cnt=$(get_request_count $fid ARCHIVE)
638 [[ "$cnt" == "1" ]] ||
639 error "archive of non dirty file must not make a request"
643 run_test 10b "Archive of non dirty file must work without doing request"
646 # test needs a running copytool
649 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
650 local f=$DIR/$tdir/$tfile
651 local fid=$(copy_file /etc/hosts $f)
652 $LFS hsm_set --noarchive $f
653 $LFS hsm_archive $f && error "archive a noarchive file must fail"
657 run_test 10c "Check forbidden archive"
660 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
661 cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
662 local f=$DIR/$tdir/$tfile
664 import_file $tdir/$tfile $f
665 echo -n "Verifying released state: "
666 check_hsm_flags $f "0x0000000d"
668 local LSZ=$(stat -c "%s" $f)
669 local ASZ=$(stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile)
671 echo "Verifying imported size $LSZ=$ASZ"
672 [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ"
673 echo -n "Verifying released pattern: "
674 local PTRN=$($GETSTRIPE -L $f)
676 [[ $PTRN == 80000001 ]] || error "Is not released"
677 local fid=$(path2fid $f)
678 echo "Verifying new fid $fid in archive"
680 local AFILE=$(ls $HSM_ARCHIVE/*/*/*/*/*/*/$fid) ||
681 error "fid $fid not in archive $HSM_ARCHIVE"
683 run_test 11 "Import a file"
686 # test needs a running copytool
689 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
690 cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
691 local f=$DIR/$tdir/$tfile
692 import_file $tdir/$tfile $f
693 local f=$DIR2/$tdir/$tfile
694 echo "Verifying released state: "
695 check_hsm_flags $f "0x0000000d"
697 local fid=$(path2fid $f)
699 wait_request_state $fid RESTORE SUCCEED
701 echo "Verifying file state: "
702 check_hsm_flags $f "0x00000009"
704 diff -q $HSM_ARCHIVE/$tdir/$tfile $f
706 [[ $? -eq 0 ]] || error "Restored file differs"
710 run_test 12a "Restore an imported file explicitly"
713 # test needs a running copytool
716 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
717 cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
718 local f=$DIR/$tdir/$tfile
719 import_file $tdir/$tfile $f
720 echo "Verifying released state: "
721 check_hsm_flags $f "0x0000000d"
723 cat $f > /dev/null || error "File read failed"
725 echo "Verifying file state after restore: "
726 check_hsm_flags $f "0x00000009"
728 diff -q $HSM_ARCHIVE/$tdir/$tfile $f
730 [[ $? -eq 0 ]] || error "Restored file differs"
734 run_test 12b "Restore an imported file implicitly"
737 [ "$OSTCOUNT" -lt "2" ] && skip_env "skipping 2-stripe test" && return
739 # test needs a running copytool
743 local f=$DIR/$tdir/$tfile
744 $LFS setstripe -c 2 $f
745 local fid=$(make_large_for_striping $f)
746 local FILE_CRC=$(md5sum $f)
748 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
749 wait_request_state $fid ARCHIVE SUCCEED
750 $LFS hsm_release $f || error "release $f failed"
752 echo "$FILE_CRC" | md5sum -c
754 [[ $? -eq 0 ]] || error "Restored file differs"
758 run_test 12c "Restore a file with stripe of 2"
761 # test needs a running copytool
764 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
765 local f=$DIR/$tdir/$tfile
766 local fid=$(copy_file /etc/hosts $f)
767 $LFS hsm_restore $f || error "restore of non archived file failed"
768 local cnt=$(get_request_count $fid RESTORE)
769 [[ "$cnt" == "0" ]] ||
770 error "restore non archived must not make a request"
771 $LFS hsm_archive $f ||
772 error "archive request failed"
773 wait_request_state $fid ARCHIVE SUCCEED
774 $LFS hsm_restore $f ||
775 error "restore of non released file failed"
776 local cnt=$(get_request_count $fid RESTORE)
777 [[ "$cnt" == "0" ]] ||
778 error "restore a non dirty file must not make a request"
782 run_test 12d "Restore of a non archived, non released file must work"\
783 " without doing request"
786 # test needs a running copytool
789 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
790 local f=$DIR/$tdir/$tfile
791 local fid=$(copy_file /etc/hosts $f)
792 $LFS hsm_archive $f || error "archive request failed"
793 wait_request_state $fid ARCHIVE SUCCEED
800 $LFS hsm_restore $f && error "restore a dirty file must fail"
804 run_test 12e "Check forbidden restore"
807 # test needs a running copytool
811 local f=$DIR/$tdir/$tfile
812 local fid=$(copy_file /etc/hosts $f)
814 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
815 wait_request_state $fid ARCHIVE SUCCEED
816 $LFS hsm_release $f || error "release of $f failed"
818 wait_request_state $fid RESTORE SUCCEED
820 echo -n "Verifying file state: "
821 check_hsm_flags $f "0x00000009"
823 diff -q /etc/hosts $f
825 [[ $? -eq 0 ]] || error "Restored file differs"
829 run_test 12f "Restore a released file explicitly"
832 # test needs a running copytool
836 local f=$DIR/$tdir/$tfile
837 local fid=$(copy_file /etc/hosts $f)
839 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
840 wait_request_state $fid ARCHIVE SUCCEED
841 $LFS hsm_release $f || error "release of $f failed"
843 diff -q /etc/hosts $f
846 # we check we had a restore done
847 wait_request_state $fid RESTORE SUCCEED
849 [[ $st -eq 0 ]] || error "Restored file differs"
853 run_test 12g "Restore a released file implicitly"
856 need2clients || return 0
858 # test needs a running copytool
862 local f=$DIR/$tdir/$tfile
863 local fid=$(copy_file /etc/hosts $f)
865 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
866 wait_request_state $fid ARCHIVE SUCCEED
867 $LFS hsm_release $f || error "release of $f failed"
869 do_node $CLIENT2 diff -q /etc/hosts $f
872 # we check we had a restore done
873 wait_request_state $fid RESTORE SUCCEED
875 [[ $st -eq 0 ]] || error "Restored file differs"
879 run_test 12h "Restore a released file implicitly from a second node"
882 # test needs a running copytool
886 local f=$DIR/$tdir/$tfile
887 local fid=$(copy_file /etc/passwd $f)
888 $LFS hsm_archive $f || error "archive of $f failed"
889 wait_request_state $fid ARCHIVE SUCCEED
891 $LFS hsm_release $f || error "release of $f failed"
895 [[ $? -eq 0 ]] || error "Restored file differs"
899 run_test 12m "Archive/release/implicit restore"
902 # test needs a running copytool
905 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
906 cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
907 local f=$DIR/$tdir/$tfile
908 import_file $tdir/$tfile $f
910 cmp /etc/hosts $f || error "Restored file differs"
912 $LFS hsm_release $f || error "release of $f failed"
916 run_test 12n "Import/implicit restore/release"
919 # test needs a running copytool
922 local ARC_SUBDIR="import.orig"
926 # populate directory to be imported
927 for d in $(seq 1 10); do
928 local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d"
930 for f in $(seq 1 10); do
931 CURR_FILE="$CURR_DIR/$tfile.$f"
932 # write file-specific data
933 echo "d=$d, f=$f, dir=$CURR_DIR, file=$CURR_FILE"\
938 import_file "$ARC_SUBDIR" $DIR/$tdir
939 # diff lustre content and origin (triggers file restoration)
940 # there must be 10x10 identical files, and no difference
941 local cnt_ok=$(diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \
942 $DIR/$tdir/$ARC_SUBDIR |
943 grep identical | wc -l)
944 local cnt_diff=$(diff -r $HSM_ARCHIVE/$ARC_SUBDIR \
945 $DIR/$tdir/$ARC_SUBDIR |
948 [ $cnt_diff -eq 0 ] ||
949 error "$cnt_diff imported files differ from read data"
950 [ $cnt_ok -eq 100 ] ||
951 error "not enough identical files ($cnt_ok != 100)"
955 run_test 13 "Recursively import and restore a directory"
958 # test needs a running copytool
963 local f=$DIR/$tdir/$tfile
964 local fid=$(make_small $f)
965 local sum=$(md5sum $f | awk '{print $1}')
966 $LFS hsm_archive $f || error "could not archive file"
967 wait_request_state $fid ARCHIVE SUCCEED
971 # create released file (simulate llapi_hsm_import call)
973 local fid2=$(path2fid $f)
974 $LFS hsm_set --archived --exists $f || error "could not force hsm flags"
975 $LFS hsm_release $f || error "could not release file"
977 # rebind the archive to the newly created file
978 echo "rebind $fid to $fid2"
979 $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\
980 --rebind $fid $fid2 $DIR ||
981 error "could not rebind file"
983 # restore file and compare md5sum
984 local sum2=$(md5sum $f | awk '{print $1}')
986 [[ $sum == $sum2 ]] || error "md5sum mismatch after restore"
990 run_test 14 "Rebind archived file to a new fid"
993 # test needs a running copytool
998 local f=$DIR/$tdir/$tfile
1000 local tmpfile=$TMP/tmp.$$
1004 for i in $(seq 1 $count); do
1005 fids[$i]=$(make_small $f.$i)
1006 sums[$i]=$(md5sum $f.$i | awk '{print $1}')
1007 $LFS hsm_archive $f.$i || error "could not archive file"
1009 wait_all_done $(($count*60))
1013 for i in $(seq 1 $count); do
1016 local fid2=$(path2fid $f.$i)
1017 # add the rebind operation to the list
1018 echo ${fids[$i]} $fid2 >> $tmpfile
1020 # set it released (simulate llapi_hsm_import call)
1021 $LFS hsm_set --archived --exists $f.$i ||
1022 error "could not force hsm flags"
1023 $LFS hsm_release $f.$i || error "could not release file"
1025 nl=$(wc -l < $tmpfile)
1026 [[ $nl == $count ]] || error "$nl files in list, $count expected"
1028 echo "rebind list of files"
1029 $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\
1030 --rebind $tmpfile $DIR ||
1031 error "could not rebind file list"
1033 # restore files and compare md5sum
1034 for i in $(seq 1 $count); do
1035 local sum2=$(md5sum $f.$i | awk '{print $1}')
1036 [[ $sum2 == ${sums[$i]} ]] ||
1037 error "md5sum mismatch after restore ($sum2 != ${sums[$i]})"
1043 run_test 15 "Rebind a list of files"
1046 # test needs a running copytool
1050 # create a known size file so we can verify transfer speed
1053 dd if=/dev/zero of=$ref bs=1M count=20
1056 local f=$DIR/$tdir/$tfile
1057 local fid=$(copy_file $ref $f)
1059 local start=$(date +%s)
1061 wait_request_state $fid ARCHIVE SUCCEED
1062 local end=$(date +%s)
1063 local duration=$((end - start))
1065 [[ $duration -ge $goal ]] ||
1066 error "Transfer is too fast $duration < $goal"
1070 run_test 16 "Test CT bandwith control option"
1075 local f=$DIR/$tdir/$tfile
1076 touch $f || error "touch $f failed"
1078 # Could not release a non-archived file
1079 $LFS hsm_release $f && error "release should not succeed"
1081 # For following tests, we must test them with HS_ARCHIVED set
1082 $LFS hsm_set --exists --archived $f || error "could not add flag"
1084 # Could not release a file if no-release is set
1085 $LFS hsm_set --norelease $f || error "could not add flag"
1086 $LFS hsm_release $f && error "release should not succeed"
1087 $LFS hsm_clear --norelease $f || error "could not remove flag"
1089 # Could not release a file if lost
1090 $LFS hsm_set --lost $f || error "could not add flag"
1091 $LFS hsm_release $f && error "release should not succeed"
1092 $LFS hsm_clear --lost $f || error "could not remove flag"
1094 # Could not release a file if dirty
1095 $LFS hsm_set --dirty $f || error "could not add flag"
1096 $LFS hsm_release $f && error "release should not succeed"
1097 $LFS hsm_clear --dirty $f || error "could not remove flag"
1099 run_test 20 "Release is not permitted"
1102 # test needs a running copytool
1106 local f=$DIR/$tdir/test_release
1108 # Create a file and check its states
1109 local fid=$(make_small $f)
1110 check_hsm_flags $f "0x00000000"
1112 $LFS hsm_archive $f || error "could not archive file"
1113 wait_request_state $fid ARCHIVE SUCCEED
1115 [ $(stat -c "%b" $f) -ne "0" ] || error "wrong block number"
1116 local sz=$(stat -c "%s" $f)
1117 [ $sz -ne "0" ] || error "file size should not be zero"
1119 # Release and check states
1120 $LFS hsm_release $f || error "could not release file"
1121 check_hsm_flags $f "0x0000000d"
1123 [ $(stat -c "%b" $f) -eq "0" ] || error "wrong block number"
1124 [ $(stat -c "%s" $f) -eq $sz ] || error "wrong file size"
1126 # Check we can release an file without stripe info
1130 check_hsm_flags $f "0x00000000"
1131 $LFS hsm_archive $f || error "could not archive file"
1132 wait_request_state $fid ARCHIVE SUCCEED
1134 # Release and check states
1135 $LFS hsm_release $f || error "could not release file"
1136 check_hsm_flags $f "0x0000000d"
1138 # Release again a file that is already released is OK
1139 $LFS hsm_release $f || fail "second release should succeed"
1140 check_hsm_flags $f "0x0000000d"
1144 run_test 21 "Simple release tests"
1147 # test needs a running copytool
1152 local f=$DIR/$tdir/test_release
1153 local swap=$DIR/$tdir/test_swap
1155 # Create a file and check its states
1156 local fid=$(make_small $f)
1157 check_hsm_flags $f "0x00000000"
1159 $LFS hsm_archive $f || error "could not archive file"
1160 wait_request_state $fid ARCHIVE SUCCEED
1162 # Release and check states
1163 $LFS hsm_release $f || error "could not release file"
1164 check_hsm_flags $f "0x0000000d"
1167 $LFS swap_layouts $swap $f && error "swap_layouts should failed"
1172 run_test 22 "Could not swap a release file"
1175 # test needs a running copytool
1180 local f=$DIR/$tdir/test_mtime
1182 # Create a file and check its states
1183 local fid=$(make_small $f)
1184 check_hsm_flags $f "0x00000000"
1186 $LFS hsm_archive $f || error "could not archive file"
1187 wait_request_state $fid ARCHIVE SUCCEED
1189 # Set modification time in the past
1190 touch -m -a -d @978261179 $f
1192 # Release and check states
1193 $LFS hsm_release $f || error "could not release file"
1194 check_hsm_flags $f "0x0000000d"
1196 local MTIME=$(stat -c "%Y" $f)
1197 local ATIME=$(stat -c "%X" $f)
1198 [ $MTIME -eq "978261179" ] || fail "bad mtime: $MTIME"
1199 [ $ATIME -eq "978261179" ] || fail "bad atime: $ATIME"
1203 run_test 23 "Release does not change a/mtime (utime)"
1206 # test needs a running copytool
1211 local f=$DIR/$tdir/test_mtime
1213 # Create a file and check its states
1214 local fid=$(make_small $f)
1215 check_hsm_flags $f "0x00000000"
1217 # make mtime is different
1220 local MTIME=$(stat -c "%Y" $f)
1221 local ATIME=$(stat -c "%X" $f)
1223 $LFS hsm_archive $f || error "could not archive file"
1224 wait_request_state $fid ARCHIVE SUCCEED
1226 # Release and check states
1227 $LFS hsm_release $f || error "could not release file"
1228 check_hsm_flags $f "0x0000000d"
1230 [ "$(stat -c "%Y" $f)" -eq "$MTIME" ] ||
1231 error "mtime should be $MTIME"
1233 [ "$(stat -c "%X" $f)" -eq "$ATIME" ] ||
1234 error "atime should be $ATIME"
1238 run_test 24 "Release does not change a/mtime (i/o)"
1241 # test needs a running copytool
1244 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
1245 cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile
1246 local f=$DIR/$tdir/$tfile
1248 import_file $tdir/$tfile $f
1250 $LFS hsm_set --lost $f
1255 [[ $st == 1 ]] || error "lost file access should failed (returns $st)"
1259 run_test 25a "Restore lost file (HS_LOST flag) from import"\
1260 " (Operation not permitted)"
1263 # test needs a running copytool
1268 local f=$DIR/$tdir/$tfile
1269 local fid=$(copy_file /etc/passwd $f)
1271 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1272 wait_request_state $fid ARCHIVE SUCCEED
1275 $LFS hsm_set --lost $f
1279 [[ $st == 1 ]] || error "lost file access should failed (returns $st)"
1283 run_test 25b "Restore lost file (HS_LOST flag) after release"\
1284 " (Operation not permitted)"
1287 # test needs a running copytool
1291 local f=$DIR/$tdir/$tfile
1292 local fid=$(make_large_for_progress $f)
1293 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1294 wait_request_state $fid ARCHIVE SUCCEED
1297 wait_request_state $fid REMOVE SUCCEED
1299 check_hsm_flags $f "0x00000000"
1303 run_test 26 "Remove the archive of a valid file"
1306 # test needs a running copytool
1310 make_archive $tdir/$tfile
1311 local f=$DIR/$tdir/$tfile
1312 import_file $tdir/$tfile $f
1313 local fid=$(path2fid $f)
1317 [[ $? != 0 ]] || error "Remove of a released file should fail"
1321 run_test 27a "Remove the archive of an imported file (Operation not permitted)"
1324 # test needs a running copytool
1328 local f=$DIR/$tdir/$tfile
1329 local fid=$(make_large_for_progress $f)
1330 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1331 wait_request_state $fid ARCHIVE SUCCEED
1336 [[ $? != 0 ]] || error "Remove of a released file should fail"
1340 run_test 27b "Remove the archive of a relased file (Operation not permitted)"
1343 # test needs a running copytool
1347 local f=$DIR/$tdir/$tfile
1348 local fid=$(make_large_for_progress $f)
1349 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1350 wait_request_state $fid ARCHIVE SUCCEED
1359 wait_request_state $fid REMOVE SUCCEED
1363 run_test 28 "Concurrent archive/file remove"
1366 # restore at exec cannot work on agent node (because of Linux kernel
1367 # protection of executables)
1368 need2clients || return 0
1370 # test needs a running copytool
1373 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
1374 cp -p /bin/true $HSM_ARCHIVE/$tdir/$tfile
1375 local f=$DIR/$tdir/true
1376 import_file $tdir/$tfile $f
1378 local fid=$(path2fid $f)
1380 # set no retry action mode
1386 # remove no try action mode
1390 [[ $st == 0 ]] || error "Failed to exec a released file"
1394 run_test 30a "Restore at exec (import case)"
1397 # restore at exec cannot work on agent node (because of Linux kernel
1398 # protection of executables)
1399 need2clients || return 0
1401 # test needs a running copytool
1405 local f=$DIR/$tdir/true
1406 local fid=$(copy_file /bin/true $f)
1408 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1409 wait_request_state $fid ARCHIVE SUCCEED
1412 # set no retry action mode
1418 # remove no try action mode
1422 [[ $st == 0 ]] || error "Failed to exec a released file"
1426 run_test 30b "Restore at exec (release case)"
1428 restore_and_check_size()
1432 local s=$(stat -c "%s" $f)
1434 local st=$(get_hsm_flags $f)
1438 while [[ "$st" != "0x00000009" && $cpt -le 10 ]]
1440 n=$(stat -c "%s" $f)
1441 # we echo in both cases to show stat is not
1445 echo "size seen is $n != $s"
1448 echo "size seen is right: $n == $s"
1450 st=$(get_hsm_flags $f)
1454 if [[ $cpt -lt 10 ]]
1456 echo " restore is too long"
1460 wait_request_state $fid RESTORE SUCCEED
1465 # test needs a running copytool
1470 make_archive $tdir/$tfile
1471 local f=$DIR/$tdir/$tfile
1472 import_file $tdir/$tfile $f
1473 local fid=$($LFS path2fid $f)
1474 HSM_ARCHIVE_PURGE=false copytool_setup
1476 restore_and_check_size $f $fid
1479 [[ $err -eq 0 ]] || error "File size changed during restore"
1483 run_test 31a "Import a large file and check size during restore"
1487 # test needs a running copytool
1492 local f=$DIR/$tdir/$tfile
1493 local fid=$(make_large_for_progress $f)
1494 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1495 wait_request_state $fid ARCHIVE SUCCEED
1498 restore_and_check_size $f $fid
1501 [[ $err -eq 0 ]] || error "File size changed during restore"
1505 run_test 31b "Restore a large unaligned file and check size during restore"
1508 # test needs a running copytool
1513 local f=$DIR/$tdir/$tfile
1514 local fid=$(make_large_for_progress_aligned $f)
1515 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1516 wait_request_state $fid ARCHIVE SUCCEED
1519 restore_and_check_size $f $fid
1522 [[ $err -eq 0 ]] || error "File size changed during restore"
1526 run_test 31c "Restore a large aligned file and check size during restore"
1529 # test needs a running copytool
1534 local f=$DIR/$tdir/$tfile
1535 local fid=$(make_large_for_progress $f)
1536 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1537 wait_request_state $fid ARCHIVE SUCCEED
1540 md5sum $f >/dev/null &
1542 wait_request_state $fid RESTORE STARTED
1547 # Check restore trigger process was killed
1548 local killed=$(ps -o pid,comm hp $pid >/dev/null)
1552 wait_request_state $fid RESTORE CANCELED
1553 wait_request_state $fid CANCEL SUCCEED
1556 error "Cannot kill process waiting for restore ($killed)"
1560 run_test 33 "Kill a restore waiting process"
1563 # test needs a running copytool
1568 local f=$DIR/$tdir/$tfile
1569 local fid=$(make_large_for_progress $f)
1570 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1571 wait_request_state $fid ARCHIVE SUCCEED
1574 md5sum $f >/dev/null &
1576 wait_request_state $fid RESTORE STARTED
1578 rm $f || error "rm $f failed"
1579 # rm must not block during restore
1580 wait_request_state $fid RESTORE STARTED
1582 wait_request_state $fid RESTORE SUCCEED
1583 # check md5sum pgm finished
1584 local there=$(ps -o pid,comm hp $pid >/dev/null)
1585 [[ -z $there ]] || error "Restore initiator does not exit"
1587 local rc=$(wait $pid)
1588 [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
1592 run_test 34 "Remove file during restore"
1595 # test needs a running copytool
1600 local f=$DIR/$tdir/$tfile
1601 local f1=$DIR/$tdir/$tfile-1
1602 local fid=$(make_large_for_progress $f)
1603 local fid1=$(copy_file /etc/passwd $f1)
1604 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1605 wait_request_state $fid ARCHIVE SUCCEED
1608 md5sum $f >/dev/null &
1610 wait_request_state $fid RESTORE STARTED
1612 mv $f1 $f || error "mv $f1 $f failed"
1613 # mv must not block during restore
1614 wait_request_state $fid RESTORE STARTED
1616 wait_request_state $fid RESTORE SUCCEED
1617 # check md5sum pgm finished
1618 local there=$(ps -o pid,comm hp $pid >/dev/null)
1619 [[ -z $there ]] || error "Restore initiator does not exit"
1621 local rc=$(wait $pid)
1622 [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
1625 [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1"
1629 run_test 35 "Overwrite file during restore"
1632 # test needs a running copytool
1637 local f=$DIR/$tdir/$tfile
1638 local fid=$(make_large_for_progress $f)
1639 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
1640 wait_request_state $fid ARCHIVE SUCCEED
1643 md5sum $f >/dev/null &
1645 wait_request_state $fid RESTORE STARTED
1648 # rm must not block during restore
1649 wait_request_state $fid RESTORE STARTED
1651 wait_request_state $fid RESTORE SUCCEED
1652 # check md5sum pgm finished
1653 local there=$(ps -o pid,comm hp $pid >/dev/null)
1655 error "Restore initiator does not exit"
1657 local rc=$(wait $pid)
1659 error "Restore initiator failed with $rc"
1663 run_test 36 "Move file during restore"
1670 for n in $(seq 1 $count); do
1671 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $prefix.$n
1673 echo "$count archive requests submitted"
1677 local stream_count=4
1678 local file_count=100
1680 local f=$DIR/$tdir/$tfile
1685 for i in $(seq 1 $file_count); do
1686 for p in $(seq 1 $stream_count); do
1687 fid=$(copy_file /etc/hosts $f.$p.$i)
1691 # to be sure wait_all_done will not be mislead by previous tests
1693 wait_for_grace_delay
1695 # start archive streams in background (archive files in parallel)
1696 for p in $(seq 1 $stream_count); do
1697 multi_archive $f.$p $file_count &
1700 echo -n "Wait for all requests being enqueued..."
1706 run_test 40 "Parallel archive requests"
1709 # test needs a running copytool
1712 # Test behave badly if 2 mount points are present
1713 umount_client $MOUNT2
1716 local f=$DIR/$tdir/$tfile
1717 local fid=$(copy_file /etc/motd $f 1)
1719 $LFS hsm_archive $f || error "could not archive file"
1720 wait_request_state $fid ARCHIVE SUCCEED
1721 check_hsm_flags $f "0x00000009"
1723 multiop_bg_pause $f O_c || error "multiop failed"
1727 client_up || client_up || true
1729 kill -USR1 $MULTIPID
1730 wait $MULTIPID || error "multiop close failed"
1732 check_hsm_flags $f "0x0000000b"
1734 # Restore test environment
1735 mount_client $MOUNT2
1739 run_test 52 "Opened for write file on an evicted client should be set dirty"
1742 # test needs a running copytool
1745 # Checks are wrong with 2 mount points
1746 umount_client $MOUNT2
1749 local f=$DIR/$tdir/$tfile
1750 local fid=$(copy_file /etc/motd $f 1)
1752 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1753 error "could not archive file"
1754 wait_request_state $fid ARCHIVE SUCCEED
1755 check_hsm_flags $f "0x00000009"
1757 multiop_bg_pause $f o_c || error "multiop failed"
1761 client_up || client_up || true
1763 kill -USR1 $MULTIPID
1764 wait $MULTIPID || error "multiop close failed"
1766 check_hsm_flags $f "0x00000009"
1768 mount_client $MOUNT2
1772 run_test 53 "Opened for read file on an evicted client should not be set dirty"
1775 # test needs a running copytool
1779 local f=$DIR/$tdir/$tfile
1780 local fid=$(make_small $f)
1782 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1783 error "could not archive file"
1784 wait_request_state $fid ARCHIVE STARTED
1786 check_hsm_flags $f "0x00000001"
1788 # Avoid coordinator resending this request as soon it has failed.
1793 wait_request_state $fid ARCHIVE FAILED
1795 check_hsm_flags $f "0x00000003"
1800 run_test 54 "Write during an archive cancels it"
1803 # test needs a running copytool
1807 local f=$DIR/$tdir/$tfile
1808 local fid=$(make_small $f)
1810 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1811 error "could not archive file"
1812 wait_request_state $fid ARCHIVE STARTED
1814 check_hsm_flags $f "0x00000001"
1816 # Avoid coordinator resending this request as soon it has failed.
1819 $TRUNCATE $f 1024 || error "truncate failed"
1821 wait_request_state $fid ARCHIVE FAILED
1823 check_hsm_flags $f "0x00000003"
1828 run_test 55 "Truncate during an archive cancels it"
1831 # test needs a running copytool
1835 local f=$DIR/$tdir/$tfile
1836 local fid=$(make_large_for_progress $f)
1838 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1839 error "could not archive file"
1840 wait_request_state $fid ARCHIVE STARTED
1842 check_hsm_flags $f "0x00000001"
1844 # Change metadata and sync to be sure we are not changing only
1849 wait_request_state $fid ARCHIVE SUCCEED
1851 check_hsm_flags $f "0x00000009"
1855 run_test 56 "Setattr during an archive is ok"
1858 # Need one client for I/O, one for request
1859 need2clients || return 0
1861 # test needs a running copytool
1865 local f=$DIR/$tdir/test_archive_remote
1866 # Create a file on a remote node
1867 do_node $CLIENT2 "dd if=/dev/urandom of=$f bs=1M "\
1868 "count=2 conv=fsync"
1871 do_node $CLIENT2 "$LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f" ||
1872 error "hsm_archive failed"
1873 local fid=$(path2fid $f)
1874 wait_request_state $fid ARCHIVE SUCCEED
1876 # Release and implicit restore it
1877 do_node $CLIENT2 "$LFS hsm_release $f" ||
1878 error "hsm_release failed"
1879 do_node $CLIENT2 "md5sum $f" ||
1880 error "hsm_restore failed"
1882 wait_request_state $fid RESTORE SUCCEED
1886 run_test 57 "Archive a file with dirty cache on another node"
1889 # test needs a running copytool
1893 local f=$DIR/$tdir/$tfile
1894 local fid=$(make_small $f)
1896 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1897 error "could not archive file"
1898 wait_request_state $fid ARCHIVE SUCCEED
1900 $LFS hsm_release $f || error "could not release file"
1902 $TRUNCATE $f 0 || error "truncate failed"
1905 local sz=$(stat -c %s $f)
1906 [[ $sz == 0 ]] || error "size after truncate is $sz != 0"
1910 check_hsm_flags $f "0x0000000b"
1912 local state=$(get_request_state $fid RESTORE)
1913 [[ "$state" == "" ]] ||
1914 error "truncate 0 trigs a restore, state = $state"
1918 run_test 58 "Truncate 0 on a released file must not trigger restore"
1921 # test needs a running copytool
1925 local f=$DIR/$tdir/$tfile
1926 local fid=$(copy_file /etc/passwd $f)
1929 local sz=$(stat -c %s $ref)
1933 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
1934 error "could not archive file"
1935 wait_request_state $fid ARCHIVE SUCCEED
1937 $LFS hsm_release $f || error "could not release file"
1939 $TRUNCATE $f $sz || error "truncate failed"
1942 local sz1=$(stat -c %s $f)
1943 [[ $sz1 == $sz ]] || error "size after truncate is $sz1 != $sz"
1947 check_hsm_flags $f "0x0000000b"
1949 local state=$(get_request_state $fid RESTORE)
1950 [[ "$state" == "SUCCEED" ]] ||
1951 error "truncate $sz does not trig a successfull restore,"\
1954 cmp $ref $f || error "file data wrong after truncate"
1958 run_test 59 "Truncate != 0 on a released file"
1963 local f=$DIR/$tdir/$tfile
1964 local FILELIST=/tmp/filelist.txt
1968 for i in $(seq 1 $file_count); do
1969 fid=$(copy_file /etc/hosts $f.$i)
1970 echo $f.$i >> $FILELIST
1973 # to be sure wait_all_done will not be mislead by previous tests
1975 wait_for_grace_delay
1976 $LFS hsm_archive --filelist $FILELIST ||
1977 error "cannot archive a file list"
1979 $LFS hsm_release --filelist $FILELIST ||
1980 error "cannot release a file list"
1981 $LFS hsm_restore --filelist $FILELIST ||
1982 error "cannot restore a file list"
1986 run_test 90 "Archive/restore a file list"
1988 double_verify_reset_ham_param() {
1990 echo "Testing $HSM_PARAM.$p"
1991 local val=$(get_hsm_param $p)
1993 local val2=$(($val * 2))
1994 set_hsm_param $p $val2
1995 val=$(get_hsm_param $p)
1996 [[ $val == $val2 ]] ||
1997 error "$HSM_PARAM.$p: $val != $val2 should be (2 * $save)"
1998 echo "Set $p to 0 must failed"
2002 set_hsm_param $p $save
2006 error "we must not be able to set $HSM_PARAM.$p to 0"
2011 double_verify_reset_ham_param loop_period
2012 double_verify_reset_ham_param grace_delay
2013 double_verify_reset_ham_param request_timeout
2014 double_verify_reset_ham_param max_requests
2016 run_test 100 "Set coordinator /proc tunables"
2023 run_test 102 "Verify coordinator control"
2026 # test needs a running copytool
2033 for i in $(seq 1 20); do
2034 fid=$(copy_file /etc/passwd $DIR/$tdir/$i)
2036 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/*
2040 echo "Current requests"
2041 local res=$(do_facet $SINGLEMDS "$LCTL get_param -n\
2042 $HSM_PARAM.agent_actions |\
2043 grep -v CANCELED | grep -v SUCCEED | grep -v FAILED")
2045 [[ -z "$res" ]] || error "Some request have not been canceled"
2049 run_test 103 "Purge all requests"
2054 # test needs a running copytool
2058 local f=$DIR/$tdir/$tfile
2059 local fid=$(make_large_for_progress $f)
2060 # if cdt is on, it can serve too quickly the request
2062 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f
2063 local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
2064 $HSM_PARAM.agent_actions |\
2065 grep $fid | cut -f16 -d=")
2068 [[ "$data1" == "$DATAHEX" ]] ||
2069 error "Data field in records is ($data1) and not ($DATAHEX)"
2073 run_test 104 "Copy tool data field"
2080 for i in $(seq -w 1 10); do
2081 cp /etc/passwd $DIR/$tdir/$i
2082 $LFS hsm_archive $DIR/$tdir/$i
2084 local reqcnt1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
2085 $HSM_PARAM.agent_actions |\
2086 grep WAITING | wc -l")
2089 local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\
2090 $HSM_PARAM.agent_actions |\
2091 grep WAITING | wc -l")
2094 [[ "$reqcnt1" == "$reqcnt2" ]] ||
2095 error "Requests count after shutdown $reqcnt2 != "\
2096 "before shutdown $reqcnt1"
2098 run_test 105 "Restart of coordinator"
2101 # Test behave badly if 2 mount points are present
2102 umount_client $MOUNT2
2104 # test needs a running copytool
2107 local uuid=$(my_uuid)
2108 local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
2111 [[ ! -z "$agent" ]] || error "My uuid $uuid not found in agent list"
2112 local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
2114 [[ -z "$agent" ]] ||
2115 error "My uuid $uuid still found in agent list,"\
2116 " after copytool shutdown"
2118 local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
2121 [[ ! -z "$agent" ]] ||
2122 error "My uuid $uuid not found in agent list after"\
2125 # Restore test environment
2126 mount_client $MOUNT2
2128 run_test 106 "Copytool register/unregister"
2131 # test needs a running copytool
2133 # create and archive file
2135 local f1=$DIR/$tdir/$tfile
2136 local fid=$(copy_file /etc/passwd $f1)
2137 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1
2138 wait_request_state $fid ARCHIVE SUCCEED
2139 # shutdown and restart MDS
2141 # check the copytool still gets messages from MDT
2142 local f2=$DIR/$tdir/2
2143 local fid=$(copy_file /etc/passwd $f2)
2144 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2
2145 # main check of this sanity: this request MUST succeed
2146 wait_request_state $fid ARCHIVE SUCCEED
2149 run_test 107 "Copytool re-register after MDS restart"
2152 # test needs a running copytool
2155 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
2156 cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile
2157 local f=$DIR/$tdir/$tfile
2158 import_file $tdir/$tfile $f
2159 local fid=$(path2fid $f)
2161 cdt_set_no_blocking_restore
2166 wait_request_state $fid RESTORE SUCCEED
2167 cdt_clear_no_blocking_restore
2171 error "md5sum returns $st != 1, "\
2172 "should also perror ENODATA (No data available)"
2176 run_test 110a "Non blocking restore policy (import case)"
2179 # test needs a running copytool
2183 local f=$DIR/$tdir/$tfile
2184 local fid=$(copy_file /etc/passwd $f)
2185 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2186 wait_request_state $fid ARCHIVE SUCCEED
2189 cdt_set_no_blocking_restore
2194 wait_request_state $fid RESTORE SUCCEED
2195 cdt_clear_no_blocking_restore
2199 error "md5sum returns $st != 1, "\
2200 "should also perror ENODATA (No data available)"
2204 run_test 110b "Non blocking restore policy (release case)"
2207 # test needs a running copytool
2210 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
2211 local f=$DIR/$tdir/$tfile
2212 cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile
2213 import_file $tdir/$tfile $f
2214 local fid=$(path2fid $f)
2218 copytool_remove_backend $fid
2221 wait_request_state $fid RESTORE FAILED
2228 [[ $st == 0 ]] || error "Restore does not failed"
2232 run_test 111a "No retry policy (import case), restore will error"\
2233 " (No such file or directory)"
2236 # test needs a running copytool
2240 local f=$DIR/$tdir/$tfile
2241 local fid=$(copy_file /etc/passwd $f)
2243 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2244 wait_request_state $fid ARCHIVE SUCCEED
2247 copytool_remove_backend $fid
2250 wait_request_state $fid RESTORE FAILED
2257 [[ $st == 0 ]] || error "Restore does not failed"
2261 run_test 111b "No retry policy (release case), restore will error"\
2262 " (No such file or directory)"
2265 # test needs a running copytool
2269 local f=$DIR/$tdir/$tfile
2270 local fid=$(copy_file /etc/passwd $f)
2272 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2273 local l=$($LFS hsm_action $f)
2275 local res=$(echo $l | cut -f 2- -d" " | grep ARCHIVE)
2279 wait_request_state $fid ARCHIVE SUCCEED
2282 [[ ! -z "$res" ]] || error "action is $l which is not an ARCHIVE"
2286 run_test 112 "State of recorded request"
2289 # test needs a running copytool
2293 local f=$DIR/$tdir/$tfile
2294 local fid=$(make_large_for_cancel $f)
2295 # test with cdt on is made in test_221
2297 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2300 wait_request_state $fid ARCHIVE CANCELED
2301 wait_request_state $fid CANCEL SUCCEED
2305 run_test 200 "Register/Cancel archive"
2308 # test needs a running copytool
2312 local f=$DIR/$tdir/$tfile
2313 make_archive $tdir/$tfile
2314 import_file $tdir/$tfile $f
2315 local fid=$(path2fid $f)
2317 # test with cdt on is made in test_222
2322 wait_request_state $fid RESTORE CANCELED
2323 wait_request_state $fid CANCEL SUCCEED
2327 run_test 201 "Register/Cancel restore"
2330 # test needs a running copytool
2334 local f=$DIR/$tdir/$tfile
2335 local fid=$(make_large_for_progress $f)
2336 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2337 wait_request_state $fid ARCHIVE SUCCEED
2343 wait_request_state $fid REMOVE CANCELED
2347 run_test 202 "Register/Cancel remove"
2350 # test needs a running copytool
2355 local f=$DIR/$tdir/$tfile
2356 local fid=$(copy_file /etc/passwd $f)
2360 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2361 wait_request_state $fid ARCHIVE SUCCEED
2363 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2367 [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
2371 run_test 220 "Changelog for archive"
2374 # test needs a running copytool
2379 local f=$DIR/$tdir/$tfile
2380 local fid=$(make_large_for_cancel $f)
2384 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2385 wait_request_state $fid ARCHIVE STARTED
2387 wait_request_state $fid ARCHIVE CANCELED
2388 wait_request_state $fid CANCEL SUCCEED
2390 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2393 [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
2398 run_test 221 "Changelog for archive canceled"
2401 # test needs a running copytool
2404 mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
2405 local f=$DIR/$tdir/$tfile
2406 cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile
2407 import_file $tdir/$tfile $f
2408 local fid=$(path2fid $f)
2413 wait_request_state $fid RESTORE SUCCEED
2415 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2418 [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
2423 run_test 222a "Changelog for explicit restore"
2426 # test needs a running copytool
2430 local f=$DIR/$tdir/$tfile
2431 local fid=$(copy_file /etc/passwd $f)
2434 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2435 wait_request_state $fid ARCHIVE SUCCEED
2440 wait_request_state $fid RESTORE SUCCEED
2442 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2445 [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
2450 run_test 222b "Changelog for implicit restore"
2453 # test needs a running copytool
2458 local f=$DIR/$tdir/$tfile
2459 make_archive $tdir/$tfile
2463 import_file $tdir/$tfile $f
2464 local fid=$(path2fid $f)
2467 wait_request_state $fid RESTORE STARTED
2469 wait_request_state $fid RESTORE CANCELED
2470 wait_request_state $fid CANCEL SUCCEED
2472 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2475 [[ $flags == $target ]] ||
2476 error "Changelog flag is $flags not $target"
2481 run_test 223a "Changelog for restore canceled (import case)"
2484 # test needs a running copytool
2489 local f=$DIR/$tdir/$tfile
2490 local fid=$(make_large_for_progress $f)
2493 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2494 wait_request_state $fid ARCHIVE SUCCEED
2497 wait_request_state $fid RESTORE STARTED
2499 wait_request_state $fid RESTORE CANCELED
2500 wait_request_state $fid CANCEL SUCCEED
2502 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2505 [[ $flags == $target ]] ||
2506 error "Changelog flag is $flags not $target"
2511 run_test 223b "Changelog for restore canceled (release case)"
2514 # test needs a running copytool
2519 local f=$DIR/$tdir/$tfile
2520 local fid=$(copy_file /etc/passwd $f)
2523 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2524 wait_request_state $fid ARCHIVE SUCCEED
2527 wait_request_state $fid REMOVE SUCCEED
2529 local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1)
2532 [[ $flags == $target ]] ||
2533 error "Changelog flag is $flags not $target"
2538 run_test 224 "Changelog for remove"
2541 # test needs a running copytool
2544 # test is not usable because remove request is too fast
2545 # so it is always finished before cancel can be done ...
2546 echo "Test disabled"
2551 local f=$DIR/$tdir/$tfile
2552 local fid=$(make_large_for_progress $f)
2555 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2556 wait_request_state $fid ARCHIVE SUCCEED
2558 # if cdt is on, it can serve too quickly the request
2563 wait_request_state $fid REMOVE CANCELED
2564 wait_request_state $fid CANCEL SUCCEED
2566 flags=$(changelog_get_flags $MDT0 RENME $fid2)
2567 local flags=$($LFS changelog $MDT0 | grep HSM | grep $fid | tail -1 |
2571 [[ $flags == $target ]] ||
2572 error "Changelog flag is $flags not $target"
2577 run_test 225 "Changelog for remove canceled"
2580 # test needs a running copytool
2585 local f1=$DIR/$tdir/$tfile-1
2586 local f2=$DIR/$tdir/$tfile-2
2587 local f3=$DIR/$tdir/$tfile-3
2588 local fid1=$(copy_file /etc/passwd $f1)
2589 local fid2=$(copy_file /etc/passwd $f2)
2590 copy_file /etc/passwd $f3
2593 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1
2594 wait_request_state $fid1 ARCHIVE SUCCEED
2596 $LFS hsm_archive $f2
2597 wait_request_state $fid2 ARCHIVE SUCCEED
2599 rm $f1 || error "rm $f1 failed"
2601 local flags=$(changelog_get_flags $MDT0 UNLNK $fid1)
2604 [[ $flags == $target ]] ||
2605 error "Changelog flag is $flags not $target"
2607 mv $f3 $f2 || error "mv $f3 $f2 failed"
2609 flags=$(changelog_get_flags $MDT0 RENME $fid2)
2612 [[ $flags == $target ]] ||
2613 error "Changelog flag is $flags not $target"
2618 run_test 226 "changelog for last rm/mv with exiting archive"
2620 check_flags_changes() {
2628 $LFS hsm_set --$hsm_flag $f ||
2629 error "Cannot set $hsm_flag on $f"
2630 local flags=($(changelog_get_flags $MDT0 HSM $fid))
2631 local seen=${#flags[*]}
2633 [[ $seen == $cnt ]] ||
2634 error "set $hsm_flag: Changelog events $seen != $cnt"
2635 [[ ${flags[$((cnt - 1))]} == $target ]] ||
2636 error "set $hsm_flag: Changelog flags are "\
2637 "${flags[$((cnt - 1))]} not $target"
2639 $LFS hsm_clear --$hsm_flag $f ||
2640 error "Cannot clear $hsm_flag on $f"
2641 flags=($(changelog_get_flags $MDT0 HSM $fid))
2644 [[ $cnt == $seen ]] ||
2645 error "clear $hsm_flag: Changelog events $seen != $cnt"
2647 [[ ${flags[$((cnt - 1))]} == $target ]] ||
2648 error "clear $hsm_flag: Changelog flag is "\
2649 "${flags[$((cnt - 1))]} not $target"
2653 # test needs a running copytool
2660 for i in norelease noarchive exists archived
2662 local f=$DIR/$tdir/$tfile-$i
2663 local fid=$(copy_file /etc/passwd $f)
2664 check_flags_changes $f $fid $i 0 1
2667 f=$DIR/$tdir/$tfile---lost
2668 fid=$(copy_file /etc/passwd $f)
2669 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2670 wait_request_state $fid ARCHIVE SUCCEED
2671 check_flags_changes $f $fid lost 3 1
2676 run_test 227 "changelog when explicit setting of HSM flags"
2679 # test needs a running copytool
2683 local maxrequest=$(get_hsm_param max_requests)
2684 local rqcnt=$(($maxrequest * 3))
2688 for i in $(seq -w 1 $rqcnt); do
2690 dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync
2692 # we do it in 2 steps, so all requests arrive at the same time
2693 for i in $(seq -w 1 $rqcnt); do
2694 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i
2699 while [[ $cnt != 0 || $wt != 0 ]]; do
2701 cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\
2702 $HSM_PARAM.agent_actions |\
2703 grep STARTED | grep -v CANCEL | wc -l")
2704 [[ $cnt -le $maxrequest ]] ||
2705 error "$cnt > $maxrequest too many started requests"
2706 wt=$(do_facet $SINGLEMDS "$LCTL get_param\
2707 $HSM_PARAM.agent_actions |\
2708 grep WAITING | wc -l")
2709 echo "max=$maxrequest started=$cnt waiting=$wt"
2714 run_test 250 "Coordinator max request"
2717 # test needs a running copytool
2721 local f=$DIR/$tdir/$tfile
2722 local fid=$(make_large_for_cancel $f)
2725 # to have a short test
2726 local old_to=$(get_hsm_param request_timeout)
2727 set_hsm_param request_timeout 4
2728 # to be sure the cdt will wake up frequently so
2729 # it will be able to cancel the "old" request
2730 local old_loop=$(get_hsm_param loop_period)
2731 set_hsm_param loop_period 2
2734 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
2735 wait_request_state $fid ARCHIVE STARTED
2737 wait_request_state $fid ARCHIVE CANCELED
2739 set_hsm_param request_timeout $old_to
2740 set_hsm_param loop_period $old_loop
2744 run_test 251 "Coordinator request timeout"
2747 # the only way to test ondisk conf is to restart MDS ...
2748 echo "Stop coordinator and remove coordinator state at mount"
2751 # clean on disk conf set by default
2752 cdt_clear_mount_state
2753 cdt_check_state stopped
2755 # check cdt still off after umount/remount
2757 cdt_check_state stopped
2759 echo "Set coordinator start at mount, and start coordinator"
2760 cdt_set_mount_state enabled
2763 cdt_check_state enabled
2765 # check cdt still on after umount/remount
2767 cdt_check_state enabled
2769 # we are back to original state (cdt started at mount)
2771 run_test 300 "On disk coordinator state kept between MDT umount/mount"
2776 check_and_cleanup_lustre