X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=661be1a7b6d5a86a6b708ade0848bed4bb030bf2;hp=cb248474004b08ee2c18c31b2a7438e6a244007e;hb=cd5660a4c5732e0ac81bb0be4dee0c75163a8e38;hpb=0136a91b6d629556ef091f5ca210c13772207df9;ds=sidebyside diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index cb24847..661be1a 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -16,6 +16,24 @@ ALWAYS_EXCEPT=" 42a 42b 42c 42d 45 51d 68b $SANITY_EXCE # bug number for skipped tests: LU-2036 ALWAYS_EXCEPT=" 76 $ALWAYS_EXCEPT" +is_sles11() # LU-4341 +{ + if [ -r /etc/SuSE-release ] + then + local vers=$(grep VERSION /etc/SuSE-release | awk '{print $3}') + local patchlev=$(grep PATCHLEVEL /etc/SuSE-release | + awk '{ print $3 }') + if [ $vers -eq 11 ] && [ $patchlev -ge 3 ]; then + return 0 + fi + fi + return 1 +} + +if is_sles11; then # LU-4341 + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 170" +fi + SRCDIR=$(cd $(dirname $0); echo $PWD) export PATH=$PATH:/sbin @@ -38,7 +56,6 @@ SOCKETCLIENT=${SOCKETCLIENT:-socketclient} MEMHOG=${MEMHOG:-memhog} DIRECTIO=${DIRECTIO:-directio} ACCEPTOR_PORT=${ACCEPTOR_PORT:-988} -UMOUNT=${UMOUNT:-"umount -d"} STRIPES_PER_OBJ=-1 CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} @@ -58,7 +75,7 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} init_logging -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24D 27m 64b 68 71 77f 78 115 124b" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24D 27m 64b 68 71 77f 78 115 124b 300o" if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then # bug number for skipped test: LU-4536 LU-1957 LU-2805 @@ -88,17 +105,11 @@ check_swap_layouts_support() } check_and_setup_lustre - DIR=${DIR:-$MOUNT} assert_DIR MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | awk '{ gsub(/_UUID/,""); print $1 }' | head -n1) -LOVNAME=$($LCTL get_param -n llite.*.lov.common_name | tail -n 1) -OSTCOUNT=$($LCTL get_param -n lov.$LOVNAME.numobd) -STRIPECOUNT=$($LCTL get_param -n lov.$LOVNAME.stripecount) -STRIPESIZE=$($LCTL get_param -n lov.$LOVNAME.stripesize) -ORIGFREE=$($LCTL get_param -n lov.$LOVNAME.kbytesavail) MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo @@ -539,11 +550,7 @@ run_test 17l "Ensure lgetxattr's returned xattr size is consistent ========" test_17m() { local short_sym="0123456789" local WDIR=$DIR/${tdir}m - local mds_index - local devname - local cmd local i - local rc=0 remote_mds_nodsh && skip "remote MDS with nodsh" && return [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.2.0) ] && @@ -576,49 +583,42 @@ test_17m() { echo "recreate the 512 symlink files with a shorter string" for ((i = 0; i < 512; ++i)); do # rewrite the symlink file with a shorter string - ln -sf ${long_sym} $WDIR/long-$i - ln -sf ${short_sym} $WDIR/short-$i + ln -sf ${long_sym} $WDIR/long-$i || error "long_sym failed" + ln -sf ${short_sym} $WDIR/short-$i || error "short_sym failed" done - mds_index=$($LFS getstripe -M $WDIR) - mds_index=$((mds_index+1)) - devname=$(mdsdevname $mds_index) - cmd="$E2FSCK -fnvd $devname" + local mds_index=$(($($LFS getstripe -M $WDIR) + 1)) + local devname=$(mdsdevname $mds_index) - echo "stop and checking mds${mds_index}: $cmd" + echo "stop and checking mds${mds_index}:" # e2fsck should not return error stop mds${mds_index} - do_facet mds${mds_index} $cmd || rc=$? + run_e2fsck $(facet_active_host mds${mds_index}) $devname -n + rc=$? start mds${mds_index} $devname $MDS_MOUNT_OPTS || error "start failed" df $MOUNT > /dev/null 2>&1 - [ $rc -ne 0 ] && error "e2fsck should not report error upon "\ - "short/long symlink MDT: rc=$rc" - return $rc + [ $rc -eq 0 ] || + error "e2fsck detected error for short/long symlink: rc=$rc" } run_test 17m "run e2fsck against MDT which contains short/long symlink" check_fs_consistency_17n() { local mdt_index - local devname - local cmd local rc=0 # create/unlink in 17n only change 2 MDTs(MDT1/MDT2), # so it only check MDT1/MDT2 instead of all of MDTs. - for mdt_index in $(seq 1 2); do - devname=$(mdsdevname $mdt_index) - cmd="$E2FSCK -fnvd $devname" - - echo "stop and checking mds${mdt_index}: $cmd" + for mdt_index in 1 2; do + local devname=$(mdsdevname $mdt_index) # e2fsck should not return error stop mds${mdt_index} - do_facet mds${mdt_index} $cmd || rc=$? + run_e2fsck $(facet_active_host mds$mdt_index) $devname -n || + rc=$((rc + $?)) start mds${mdt_index} $devname $MDS_MOUNT_OPTS || - error "mount mds${mdt_index} failed" + error "mount mds$mdt_index failed" df $MOUNT > /dev/null 2>&1 - [ $rc -ne 0 ] && break done return $rc } @@ -664,7 +664,7 @@ test_17n() { mkdir -p $DIR/$tdir/remote_dir_${i} createmany -o $DIR/$tdir/remote_dir_${i}/f 10 || error "create files under remote dir failed $i" - $LFS mv --mdt-index 1 $DIR/$tdir/remote_dir_${i} || + $LFS migrate --mdt-index 1 $DIR/$tdir/remote_dir_${i} || error "migrate remote dir error $i" done check_fs_consistency_17n || error "e2fsck report error after migration" @@ -1080,6 +1080,10 @@ run_test 24w "Reading a file larger than 4Gb" test_24x() { [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return + + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.56) ]] && + skip "Need MDS version at least 2.7.56" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return local MDTIDX=1 local remote_dir=$DIR/$tdir/remote_dir @@ -1357,6 +1361,9 @@ test_27d() { run_test 27d "create file with default settings ================" test_27e() { + # LU-5839 adds check for existed layout before setting it + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.56) ]] && + skip "Need MDS version at least 2.7.56" && return test_mkdir -p $DIR/d27 $SETSTRIPE -c 2 $DIR/d27/f12 || error "setstripe failed" $SETSTRIPE -c 2 $DIR/d27/f12 && error "setstripe succeeded twice" @@ -1422,6 +1429,9 @@ run_test 27l "check setstripe permissions (should return error)" test_27m() { [[ $OSTCOUNT -lt 2 ]] && skip_env "$OSTCOUNT < 2 OSTs -- skipping" && return + + ORIGFREE=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) if [[ $ORIGFREE -gt $MAXFREE ]]; then skip "$ORIGFREE > $MAXFREE skipping out-of-space test on OST0" return @@ -1841,7 +1851,7 @@ check_seq_oid() log "want: stripe:$stripe_nr ost:$obdidx oid:$oid/$hex seq:$seq" seq=$(echo $seq | sed -e "s/^0x//g") - if [ $seq == 0 ]; then + if [ $seq == 0 ] || [ $(facet_fstype ost$ost) == zfs ]; then oid_hex=$(echo $oid) else oid_hex=$(echo $hex | sed -e "s/^0x//g") @@ -1865,6 +1875,7 @@ check_seq_oid() $(facet_mntpt ost$ost)/$obj_file) unmount_fstype ost$ost start ost$ost $dev $OST_MOUNT_OPTS + clients_up fi [ -z "$ff" ] && error "$obj_file: no filter_fid info" @@ -2008,6 +2019,7 @@ run_test 27C "check full striping across all OSTs" test_27D() { [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return local POOL=${POOL:-testpool} local first_ost=0 local last_ost=$(($OSTCOUNT - 1)) @@ -2345,7 +2357,7 @@ run_test 31p "remove of open striped directory" cleanup_test32_mount() { trap 0 - $UMOUNT -d $DIR/$tdir/ext2-mountpoint + $UMOUNT $DIR/$tdir/ext2-mountpoint } test_32a() { @@ -2552,7 +2564,7 @@ run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/$tfile" cleanup_testdir_mount() { trap 0 - $UMOUNT -d $DIR/$tdir + $UMOUNT $DIR/$tdir } test_32q() { @@ -2695,8 +2707,7 @@ test_33d() { error "create" || true $RUNAS $OPENFILE -f O_RDWR:O_CREAT -m 0444 $remote_dir/f33 && error "open RDWR" || true - $RUNAS $OPENFILE -f 1286739555 $remote_dir/f33 && - error "create" || true + $RUNAS $OPENFILE -f 1286739555 $remote_dir/f33 || true } run_test 33d "openfile with 444 modes and malformed flags under remote dir" @@ -4294,7 +4305,7 @@ cleanup_54c() { loopdev="$DIR/loop54c" trap 0 - $UMOUNT -d $DIR/$tdir || rc=$? + $UMOUNT $DIR/$tdir || rc=$? losetup -d $loopdev || true losetup -d $LOOPDEV || true rm -rf $loopdev $DIR/$tfile $DIR/$tdir @@ -4839,7 +4850,9 @@ test_56w() { local file_size=$((stripe_size * OSTCOUNT)) local file_num=$((NUMDIRS * NUMFILES + NUMFILES)) local required_space=$((file_num * file_size)) - local free_space=$($LCTL get_param -n lov.$LOVNAME.kbytesavail) + + local free_space=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) [[ $free_space -le $((required_space / 1024)) ]] && skip_env "need at least $required_space bytes free space," \ "have $free_space kbytes" && return @@ -4947,6 +4960,29 @@ test_56x() { } run_test 56x "lfs migration support" +test_56xa() { + check_swap_layouts_support && return 0 + [[ $OSTCOUNT -lt 2 ]] && + skip_env "need 2 OST, skipping test" && return + + local dir0=$DIR/$tdir/$testnum + test_mkdir -p $dir0 || error "creating dir $dir0" + + local ref1=/etc/passwd + local file1=$dir0/file1 + + $SETSTRIPE -c 2 $file1 + cp $ref1 $file1 + $LFS migrate --block -c 1 $file1 || error "migrate failed rc = $?" + local stripe=$($GETSTRIPE -c $file1) + [[ $stripe == 1 ]] || error "stripe of $file1 is $stripe != 1" + cmp $file1 $ref1 || error "content mismatch $file1 differs from $ref1" + + # clean up + rm -f $file1 +} +run_test 56xa "lfs migration --block support" + test_56y() { [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" && @@ -5123,28 +5159,106 @@ test_60a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_mgs_nodsh && skip "remote MGS with nodsh" && return do_facet mgs "! which run-llog.sh &> /dev/null" && - skip_env "missing subtest run-llog.sh" && return + do_facet mgs "! ls run-llog.sh &> /dev/null" && + skip_env "missing subtest run-llog.sh" && return + log "$TEST60_HEAD - from kernel mode" + do_facet mgs "$LCTL set_param debug=warning; $LCTL dk > /dev/null" do_facet mgs sh run-llog.sh + do_facet mgs $LCTL dk > $TMP/$tfile + + # LU-6388: test llog_reader + local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") + llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} + [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && + skip_env "missing llog_reader" && return + local fstype=$(facet_fstype mgs) + [ $fstype != ldiskfs -a $fstype != zfs ] && + skip_env "Only for ldiskfs or zfs type mgs" && return + + local mntpt=$(facet_mntpt mgs) + local mgsdev=$(mgsdevname 1) + local fid_list + local fid + local rec_list + local rec + local rec_type + local obj_file + local path + local seq + local oid + local pass=true + + #get fid and record list + fid_list=($(awk '/9_sub.*record/ { print $NF }' /$TMP/$tfile | + tail -n 4)) + rec_list=($(awk '/9_sub.*record/ { print $((NF-3)) }' /$TMP/$tfile | + tail -n 4)) + #remount mgs as ldiskfs or zfs type + stop mgs || error "stop mgs failed" + mount_fstype mgs || error "remount mgs failed" + for ((i = 0; i < ${#fid_list[@]}; i++)); do + fid=${fid_list[i]} + rec=${rec_list[i]} + seq=$(echo $fid | awk -F ':' '{ print $1 }' | sed -e "s/^0x//g") + oid=$(echo $fid | awk -F ':' '{ print $2 }' | sed -e "s/^0x//g") + oid=$((16#$oid)) + + case $fstype in + ldiskfs ) + obj_file=$mntpt/O/$seq/d$((oid%32))/$oid ;; + zfs ) + obj_file=$mntpt/oi.$(($((16#$seq))&127))/$fid ;; + esac + echo "obj_file is $obj_file" + do_facet mgs $llog_reader $obj_file + + rec_type=$(do_facet mgs $llog_reader $obj_file | grep "type=" | + awk '{ print $3 }' | sed -e "s/^type=//g") + if [ $rec_type != $rec ]; then + echo "FAILED test_60a wrong record type $rec_type," \ + "should be $rec" + pass=false + break + fi + + #check obj path if record type is LLOG_LOGID_MAGIC + if [ "$rec" == "1064553b" ]; then + path=$(do_facet mgs $llog_reader $obj_file | + grep "path=" | awk '{ print $NF }' | + sed -e "s/^path=//g") + if [ $obj_file != $mntpt/$path ]; then + echo "FAILED test_60a wrong obj path" \ + "$montpt/$path, should be $obj_file" + pass=false + break + fi + fi + done + rm -f $TMP/$tfile + #restart mgs before "error", otherwise it will block the next test + stop mgs || error "stop mgs failed" + start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "start mgs failed" + $pass || error "test failed, see FAILED test_60a messages for specifics" } -run_test 60a "llog sanity tests run from kernel module ==========" +run_test 60a "llog_test run from kernel module and test llog_reader ==========" test_60b() { # bug 6411 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return dmesg > $DIR/$tfile - LLOG_COUNT=`dmesg | awk "/$TEST60_HEAD/{marker = 1; from_marker = 0;} - /llog.test/ { - if (marker) - from_marker++ - from_begin++ - } - END { - if (marker) - print from_marker - else - print from_begin - }"` - [[ $LLOG_COUNT -gt 50 ]] && + LLOG_COUNT=$(dmesg | awk "/$TEST60_HEAD/ { marker = 1; from_marker = 0; } + /llog.test/ { + if (marker) + from_marker++ + from_begin++ + } + END { + if (marker) + print from_marker + else + print from_begin + }") + [[ $LLOG_COUNT -gt 100 ]] && error "CDEBUG_LIMIT not limiting messages ($LLOG_COUNT)" || true } run_test 60b "limit repeated messages from CERROR/CWARN ========" @@ -5180,6 +5294,15 @@ test_60d() { } run_test 60d "test printk console message masking" +test_60e() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + touch $DIR/$tfile +#define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b + do_facet mds1 lctl set_param fail_loc=0x15b + rm $DIR/$tfile +} +run_test 60e "no space while new llog is being created" + test_61() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return f="$DIR/f61" @@ -5275,28 +5398,35 @@ run_test 65a "directory with no stripe info ====================" test_65b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir + local STRIPESIZE=$($GETSTRIPE -S $DIR/$tdir) + $SETSTRIPE -S $((STRIPESIZE * 2)) -i 0 -c 1 $DIR/$tdir || error "setstripe" touch $DIR/$tdir/f2 $LVERIFY $DIR/$tdir $DIR/$tdir/f2 || error "lverify failed" } -run_test 65b "directory setstripe -S $((STRIPESIZE * 2)) -i 0 -c 1" +run_test 65b "directory setstripe -S stripe_size*2 -i 0 -c 1" test_65c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return if [[ $OSTCOUNT -gt 1 ]]; then test_mkdir -p $DIR/$tdir + local STRIPESIZE=$($GETSTRIPE -S $DIR/$tdir) + $SETSTRIPE -S $(($STRIPESIZE * 4)) -i 1 \ -c $(($OSTCOUNT - 1)) $DIR/$tdir || error "setstripe" touch $DIR/$tdir/f3 $LVERIFY $DIR/$tdir $DIR/$tdir/f3 || error "lverify failed" fi } -run_test 65c "directory setstripe -S $((STRIPESIZE*4)) -i 1 -c $((OSTCOUNT-1))" +run_test 65c "directory setstripe -S stripe_size*4 -i 1 -c $((OSTCOUNT-1))" test_65d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir + local STRIPECOUNT=$($GETSTRIPE -c $DIR/$tdir) + local STRIPESIZE=$($GETSTRIPE -S $DIR/$tdir) + if [[ $STRIPECOUNT -le 0 ]]; then sc=1 elif [[ $STRIPECOUNT -gt 2000 ]]; then @@ -5310,7 +5440,7 @@ test_65d() { $LVERIFY $DIR/$tdir $DIR/$tdir/f4 $DIR/$tdir/f5 || error "lverify failed" } -run_test 65d "directory setstripe -S $STRIPESIZE -c stripe_count" +run_test 65d "directory setstripe -S stripe_size -c stripe_count" test_65e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return @@ -5334,6 +5464,8 @@ run_test 65f "dir setstripe permission (should return error) ===" test_65g() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir + local STRIPESIZE=$($GETSTRIPE -S $DIR/$tdir) + $SETSTRIPE -S $((STRIPESIZE * 2)) -i 0 -c 1 $DIR/$tdir || error "setstripe" $SETSTRIPE -d $DIR/$tdir || error "setstripe" @@ -5345,6 +5477,8 @@ run_test 65g "directory setstripe -d ===========================" test_65h() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir + local STRIPESIZE=$($GETSTRIPE -S $DIR/$tdir) + $SETSTRIPE -S $((STRIPESIZE * 2)) -i 0 -c 1 $DIR/$tdir || error "setstripe" test_mkdir -p $DIR/$tdir/dd1 @@ -5543,7 +5677,7 @@ test_68b() { # was test_68 run_test 68b "support swapping to Lustre ========================" # bug5265, obdfilter oa2dentry return -ENOENT -# #define OBD_FAIL_OST_ENOENT 0x217 +# #define OBD_FAIL_SRV_ENOENT 0x217 test_69() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_ost_nodsh && skip "remote OST with nodsh" && return @@ -6105,7 +6239,7 @@ test_99b() { # some versions of cvs import exit(1) when asked to import links or # files they can't read. ignore those files. TOIGNORE=$(find . -type l -printf '-I %f\n' -o \ - ! -perm +4 -printf '-I %f\n') + ! -perm /4 -printf '-I %f\n') $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" $TOIGNORE \ d99reposname vtag rtag } @@ -6442,41 +6576,29 @@ test_101e() { } run_test 101e "check read-ahead for small read(1k) for small files(500k)" -cleanup_test101f() { - trap 0 - $LCTL set_param -n llite.*.max_read_ahead_whole_mb $MAX_WHOLE_MB - rm -rf $DIR/$tfile 2>/dev/null -} - test_101f() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - local file=$DIR/$tfile - local nreads=1000 + which iozone || { skip "no iozone installed" && return; } - MAX_WHOLE_MB=$($LCTL get_param -n llite.*.max_read_ahead_whole_mb) - $LCTL set_param -n llite.*.max_read_ahead_whole_mb 2 - dd if=/dev/zero of=${file} bs=2097152 count=1 2>/dev/null - trap cleanup_test101f EXIT + # create a test file + iozone -i 0 -+n -r 1m -s 128m -w -f $DIR/$tfile > /dev/null 2>&1 - echo Cancel LRU locks on lustre client to flush the client cache - cancel_lru_locks osc + echo Cancel LRU locks on lustre client to flush the client cache + cancel_lru_locks osc + + echo Reset readahead stats + $LCTL set_param -n llite.*.read_ahead_stats 0 - echo Reset readahead stats - $LCTL set_param -n llite.*.read_ahead_stats 0 - # Random read in a 2M file, because max_read_ahead_whole_mb = 2M, - # readahead should read in 2M file on second read, so only miss - # 2 pages. - echo Random 4K reads on 2M file for 1000 times - $READS -f $file -s 2097152 -b 4096 -n $nreads + echo mmap read the file with small block size + iozone -i 1 -+n -r 32k -s 128m -B -f $DIR/$tfile > /dev/null 2>&1 - echo checking missing pages - local miss=$($LCTL get_param -n llite.*.read_ahead_stats | - get_named_value 'misses' | cut -d" " -f1 | calc_total) + echo checking missing pages + local miss=$($LCTL get_param -n llite.*.read_ahead_stats | + get_named_value 'misses' | cut -d" " -f1 | calc_total) - [ $miss -lt 3 ] || error "misses too much pages!" - cleanup_test101f + [ $miss -lt 3 ] || error "misses too much pages!" + rm -f $DIR/$tfile } -run_test 101f "check read-ahead for max_read_ahead_whole_mb" +run_test 101f "check mmap read performance" setup_test102() { test_mkdir -p $DIR/$tdir @@ -6696,7 +6818,7 @@ grow_xattr() { local xsize=${1:-1024} # in bytes local file=$DIR/$tfile - [ -z $(lctl get_param -n mdc.*.connect_flags | grep xattr) ] && + [ -z "$(lctl get_param -n mdc.*.connect_flags | grep xattr)" ] && skip "must have user_xattr" && return 0 [ -z "$(which setfattr 2>/dev/null)" ] && skip_env "could not find setfattr" && return 0 @@ -7292,6 +7414,8 @@ free_min_max () { test_116a() { # was previously test_116() [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $OSTCOUNT -lt 2 ]] && skip_env "$OSTCOUNT < 2 OSTs" && return echo -n "Free space priority " @@ -7411,6 +7535,8 @@ run_test 116a "stripe QOS: free space balance ===================" test_116b() { # LU-2093 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + #define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147 local old_rr=$(do_facet $SINGLEMDS lctl get_param -n \ lo*.$FSNAME-MDT0000-mdtlov.qos_threshold_rr | head -1) @@ -7487,7 +7613,7 @@ test_118b() reset_async - #define OBD_FAIL_OST_ENOENT 0x217 + #define OBD_FAIL_SRV_ENOENT 0x217 set_nodes_failloc "$(osts_nodes)" 0x217 $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? @@ -8022,16 +8148,20 @@ run_test 120d "Early Lock Cancel: setattr test" test_120e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - test_mkdir -p -c1 $DIR/$tdir - [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ - skip "no early lock cancel on server" && return 0 - lru_resize_disable mdc - lru_resize_disable osc - dd if=/dev/zero of=$DIR/$tdir/f1 count=1 - cancel_lru_locks mdc - cancel_lru_locks osc - dd if=$DIR/$tdir/f1 of=/dev/null - stat $DIR/$tdir $DIR/$tdir/f1 > /dev/null + ! $($LCTL get_param -n mdc.*.connect_flags | grep -q early_lock_can) && + skip "no early lock cancel on server" && return 0 + local dlmtrace_set=false + + test_mkdir -p -c1 $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc + ! $LCTL get_param debug | grep -q dlmtrace && + $LCTL set_param debug=+dlmtrace && dlmtrace_set=true + dd if=/dev/zero of=$DIR/$tdir/f1 count=1 + cancel_lru_locks mdc + cancel_lru_locks osc + dd if=$DIR/$tdir/f1 of=/dev/null + stat $DIR/$tdir $DIR/$tdir/f1 > /dev/null # XXX client can not do early lock cancel of OST lock # during unlink (LU-4206), so cancel osc lock now. cancel_lru_locks osc @@ -8047,8 +8177,11 @@ test_120e() { awk '/ldlm_cancel/ {print $2}') blk2=$($LCTL get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}') - [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." - [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." + [ $can1 -ne $can2 ] && error "$((can2 - can1)) cancel RPC occured" && + $LCTL dk $TMP/cancel.debug.txt + [ $blk1 -ne $blk2 ] && error "$((blk2 - blk1)) blocking RPC occured" && + $LCTL dk $TMP/blocking.debug.txt + $dlmtrace_set && $LCTL set_param debug=-dlmtrace lru_resize_enable mdc lru_resize_enable osc } @@ -8588,7 +8721,18 @@ set_dir_limits () { do_facet $facet "test -e $LDPROC/$canondev/max_dir_size" || LDPROC=/sys/fs/ldiskfs do_facet $facet "echo $1 >$LDPROC/$canondev/max_dir_size" + do_facet $facet "test -e $LDPROC/$canondev/warning_dir_size" || + LDPROC=/sys/fs/ldiskfs + do_facet $facet "echo $2 >$LDPROC/$canondev/warning_dir_size" + done +} + +check_mds_dmesg() { + local facets=$(get_facets MDS) + for facet in ${facets//,/ }; do + do_facet $facet "dmesg | tail -3 | grep -q $1" && return 0 done + return 1 } test_129() { @@ -8600,31 +8744,40 @@ test_129() { remote_mds_nodsh && skip "remote MDS with nodsh" && return ENOSPC=28 EFBIG=27 + has_warning=0 rm -rf $DIR/$tdir - test_mkdir -p $DIR/$tdir + mkdir -p $DIR/$tdir # block size of mds1 local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/}) local MDSBLOCKSIZE=$($LCTL get_param -n mdc.*MDT0000*.blocksize) - local MAX=$((MDSBLOCKSIZE * 3)) - set_dir_limits $MAX + local MAX=$((MDSBLOCKSIZE * 5)) + set_dir_limits $MAX $MAX local I=$(stat -c%s "$DIR/$tdir") local J=0 - local STRIPE_COUNT=1 - [[ $MDSCOUNT -ge 2 ]] && STRIPE_COUNT=$($LFS getdirstripe -c $DIR/$tdir) - MAX=$((MAX*STRIPE_COUNT)) while [[ $I -le $MAX ]]; do $MULTIOP $DIR/$tdir/$J Oc rc=$? + if [ $has_warning -eq 0 ]; then + check_mds_dmesg '"is approaching"' && + has_warning=1 + fi #check two errors ENOSPC for new version of ext4 max_dir_size patch #mainline kernel commit df981d03eeff7971ac7e6ff37000bfa702327ef1 #and EFBIG for previous versions if [ $rc -eq $EFBIG -o $rc -eq $ENOSPC ]; then - set_dir_limits 0 + set_dir_limits 0 0 echo "return code $rc received as expected" - multiop $DIR/$tdir/$J Oc || - error_exit "multiop failed w/o dir size limit" + + createmany -o $DIR/$tdir/$J_file_ 1000 || + error_exit "create failed w/o dir size limit" + + check_mds_dmesg '"has reached"' || + error_exit "has reached message should be output" + + [ $has_warning ] || + error_exit "warning message should be output" I=$(stat -c%s "$DIR/$tdir") @@ -8637,7 +8790,7 @@ test_129() { fi error_exit "current dir size $I, previous limit $MAX" elif [ $rc -ne 0 ]; then - set_dir_limits 0 + set_dir_limits 0 0 error_exit "return code $rc received instead of expected " \ "$EFBIG or $ENOSPC, files in dir $I" fi @@ -8645,7 +8798,7 @@ test_129() { I=$(stat -c%s "$DIR/$tdir") done - set_dir_limits 0 + set_dir_limits 0 0 error "exceeded dir size limit $MAX($MDSCOUNT) : $I bytes" } run_test 129 "test directory size limit ========================" @@ -8675,8 +8828,8 @@ test_130a() { skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return [ $RC != 0 ] && error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | grep -A 100 "ext:" | - grep -v "ext:" | grep -v "found") + filefrag_op=$(filefrag -ve $fm_file | + sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') lun=$($GETSTRIPE -i $fm_file) start_blk=`echo $filefrag_op | cut -d: -f2 | cut -d. -f1` @@ -8708,10 +8861,7 @@ run_test 130a "FIEMAP (1-stripe file)" test_130b() { [ "$OSTCOUNT" -lt "2" ] && - skip_env "skipping FIEMAP on 2-stripe file test" && return - - [ "$OSTCOUNT" -ge "10" ] && - skip_env "skipping FIEMAP with >= 10 OSTs" && return + skip_env "skipping FIEMAP on $OSTCOUNT-stripe file" && return local filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") [ -n "$filefrag_op" ] && skip_env "filefrag does not support FIEMAP" && @@ -8720,30 +8870,34 @@ test_130b() { trap cleanup_130 EXIT RETURN local fm_file=$DIR/$tfile - $SETSTRIPE -S 65536 -c 2 $fm_file || error "setstripe on $fm_file" + $SETSTRIPE -S 65536 -c $OSTCOUNT $fm_file || + error "setstripe on $fm_file" [ "$(facet_fstype ost$(($($GETSTRIPE -i $fm_file) + 1)))" = "zfs" ] && skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return - dd if=/dev/zero of=$fm_file bs=1M count=2 || + dd if=/dev/zero of=$fm_file bs=1M count=$OSTCOUNT || error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | grep -A 100 "ext:" | - grep -v "ext:" | grep -v "found") + filefrag_op=$(filefrag -ve $fm_file | + sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') - last_lun=$(echo $filefrag_op | cut -d: -f5) + last_lun=$(echo $filefrag_op | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') IFS=$'\n' tot_len=0 num_luns=1 for line in $filefrag_op do - frag_lun=`echo $line | cut -d: -f5` - ext_len=`echo $line | cut -d: -f4` + frag_lun=$(echo $line | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') + ext_len=$(echo $line | cut -d: -f4) if (( $frag_lun != $last_lun )); then if (( tot_len != 1024 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 256" + error "FIEMAP on $fm_file failed; returned " \ + "len $tot_len for OST $last_lun instead of 1024" return else (( num_luns += 1 )) @@ -8753,25 +8907,23 @@ test_130b() { (( tot_len += ext_len )) last_lun=$frag_lun done - if (( num_luns != 2 || tot_len != 1024 )); then + if (( num_luns != $OSTCOUNT || tot_len != 1024 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + error "FIEMAP on $fm_file failed; returned wrong number of " \ + "luns or wrong len for OST $last_lun" return fi cleanup_130 - echo "FIEMAP on 2-stripe file succeeded" + echo "FIEMAP on $OSTCOUNT-stripe file succeeded" } -run_test 130b "FIEMAP (2-stripe file)" +run_test 130b "FIEMAP ($OSTCOUNT-stripe file)" test_130c() { [ "$OSTCOUNT" -lt "2" ] && skip_env "skipping FIEMAP on 2-stripe file" && return - [ "$OSTCOUNT" -ge "10" ] && - skip_env "skipping FIEMAP with >= 10 OSTs" && return - filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return @@ -8783,30 +8935,36 @@ test_130c() { [ "$(facet_fstype ost$(($($GETSTRIPE -i $fm_file) + 1)))" = "zfs" ] && skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return - dd if=/dev/zero of=$fm_file seek=1 bs=1M count=1 || error "dd failed on $fm_file" + dd if=/dev/zero of=$fm_file seek=1 bs=1M count=1 || + error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"` + filefrag_op=$(filefrag -ve $fm_file | + sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') - last_lun=`echo $filefrag_op | cut -d: -f5` + last_lun=$(echo $filefrag_op | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') IFS=$'\n' tot_len=0 num_luns=1 for line in $filefrag_op do - frag_lun=`echo $line | cut -d: -f5` - ext_len=`echo $line | cut -d: -f4` + frag_lun=$(echo $line | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') + ext_len=$(echo $line | cut -d: -f4) if (( $frag_lun != $last_lun )); then logical=`echo $line | cut -d: -f2 | cut -d. -f1` if (( logical != 512 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned logical start for lun $logical instead of 512" + error "FIEMAP on $fm_file failed; returned " \ + "logical start for lun $logical instead of 512" return fi if (( tot_len != 512 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024" + error "FIEMAP on $fm_file failed; returned " \ + "len $tot_len for OST $last_lun instead of 1024" return else (( num_luns += 1 )) @@ -8818,7 +8976,8 @@ test_130c() { done if (( num_luns != 2 || tot_len != 512 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + error "FIEMAP on $fm_file failed; returned wrong number of " \ + "luns or wrong len for OST $last_lun" return fi @@ -8829,18 +8988,18 @@ test_130c() { run_test 130c "FIEMAP (2-stripe file with hole)" test_130d() { - [ "$OSTCOUNT" -lt "3" ] && skip_env "skipping FIEMAP on N-stripe file test" && return - - [ "$OSTCOUNT" -ge "10" ] && - skip_env "skipping FIEMAP with >= 10 OSTs" && return + [ "$OSTCOUNT" -lt "3" ] && + skip_env "skipping FIEMAP on N-stripe file test" && return filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") - [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && + return trap cleanup_130 EXIT RETURN local fm_file=$DIR/$tfile - $SETSTRIPE -S 65536 -c $OSTCOUNT $fm_file||error "setstripe on $fm_file" + $SETSTRIPE -S 65536 -c $OSTCOUNT $fm_file || + error "setstripe on $fm_file" [ "$(facet_fstype ost$(($($GETSTRIPE -i $fm_file) + 1)))" = "zfs" ] && skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return @@ -8849,22 +9008,25 @@ test_130d() { error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | - grep -v "ext:" | grep -v "found"` + filefrag_op=$(filefrag -ve $fm_file | + sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') - last_lun=`echo $filefrag_op | cut -d: -f5` + last_lun=$(echo $filefrag_op | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') IFS=$'\n' tot_len=0 num_luns=1 for line in $filefrag_op do - frag_lun=`echo $line | cut -d: -f5` - ext_len=`echo $line | cut -d: -f4` + frag_lun=$(echo $line | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') + ext_len=$(echo $line | cut -d: -f4) if (( $frag_lun != $last_lun )); then if (( tot_len != 1024 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024" + error "FIEMAP on $fm_file failed; returned " \ + "len $tot_len for OST $last_lun instead of 1024" return else (( num_luns += 1 )) @@ -8876,7 +9038,8 @@ test_130d() { done if (( num_luns != actual_stripecnt || tot_len != 1024 )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + error "FIEMAP on $fm_file failed; returned wrong number of " \ + "luns or wrong len for OST $last_lun" return fi @@ -8887,10 +9050,8 @@ test_130d() { run_test 130d "FIEMAP (N-stripe file)" test_130e() { - [ "$OSTCOUNT" -lt "2" ] && skip_env "skipping continuation FIEMAP test" && return - - [ "$OSTCOUNT" -ge "10" ] && - skip_env "skipping FIEMAP with >= 10 OSTs" && return + [ "$OSTCOUNT" -lt "2" ] && + skip_env "skipping continuation FIEMAP test" && return filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return @@ -8910,21 +9071,26 @@ test_130e() { done filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=`filefrag -ve $fm_file | grep -A 12000 "ext:" | grep -v "ext:" | grep -v "found"` + filefrag_op=$(filefrag -ve $fm_file | + sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') - last_lun=`echo $filefrag_op | cut -d: -f5` + last_lun=$(echo $filefrag_op | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') IFS=$'\n' tot_len=0 num_luns=1 for line in $filefrag_op do - frag_lun=`echo $line | cut -d: -f5` - ext_len=`echo $line | cut -d: -f4` + frag_lun=$(echo $line | cut -d: -f5 | + sed -e 's/^[ \t]*/0x/' | sed -e 's/0x0x/0x/') + ext_len=$(echo $line | cut -d: -f4) if (( $frag_lun != $last_lun )); then if (( tot_len != $EXPECTED_LEN )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of $EXPECTED_LEN" + error "FIEMAP on $fm_file failed; returned " \ + "len $tot_len for OST $last_lun instead " \ + "of $EXPECTED_LEN" return else (( num_luns += 1 )) @@ -8936,7 +9102,8 @@ test_130e() { done if (( num_luns != 2 || tot_len != $EXPECTED_LEN )); then cleanup_130 - error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + error "FIEMAP on $fm_file failed; returned wrong number " \ + "of luns or wrong len for OST $last_lun" return fi @@ -9290,31 +9457,25 @@ test_133e() { } run_test 133e "Verifying OST {read,write}_bytes nid stats =================" -test_133f() { - local proc_dirs - - local dirs="/proc/fs/lustre/ /proc/sys/lnet/ /proc/sys/lustre/ \ -/sys/fs/lustre/ /sys/fs/lnet/" - local dir - for dir in $dirs; do - if [ -d $dir ]; then - proc_dirs="$proc_dirs $dir" - fi - done - - local facet +proc_dirs="" +for dir in /proc/fs/lustre/ /proc/sys/lnet/ /proc/sys/lustre/ \ + /sys/fs/lustre/ /sys/fs/lnet/ /sys/kernel/debug/lnet/ \ + /sys/kernel/debug/lustre/; do + [[ -d $dir ]] && proc_dirs+=" $dir" +done +test_133f() { remote_mds_nodsh && skip "remote MDS with nodsh" && return remote_ost_nodsh && skip "remote OST with nodsh" && return # First without trusting modes. find $proc_dirs -exec cat '{}' \; &> /dev/null # Second verifying readability. - find $proc_dirs \ - -type f \ - -exec cat '{}' \; &> /dev/null || - error "proc file read failed" + $LCTL get_param -R '*' &> /dev/null || error "proc file read failed" + # eventually, this can also be replaced with "lctl get_param -R", + # but not until that option is always available on the server + local facet for facet in $SINGLEMDS ost1; do do_facet $facet find $proc_dirs \ ! -name req_history \ @@ -9330,20 +9491,7 @@ test_133f() { run_test 133f "Check for LBUGs/Oopses/unreadable files in /proc" test_133g() { - local proc_dirs - - local dirs="/proc/fs/lustre/ /proc/sys/lnet/ /proc/sys/lustre/ \ -/sys/fs/lustre/ /sys/fs/lnet/" - local dir - for dir in $dirs; do - if [ -d $dir ]; then - proc_dirs="$proc_dirs $dir" - fi - done - - local facet - - # Second verifying readability. + # Second verifying writability. find $proc_dirs \ -type f \ -not -name force_lbug \ @@ -9357,6 +9505,7 @@ test_133g() { [ $(lustre_version_code ost1) -le $(version_code 2.5.54) ] && skip "Too old lustre on ost1" && return + local facet for facet in $SINGLEMDS ost1; do do_facet $facet find $proc_dirs \ -type f \ @@ -9374,6 +9523,81 @@ test_133g() { } run_test 133g "Check for Oopses on bad io area writes/reads in /proc" +test_134a() { + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] && + skip "Need MDS version at least 2.7.54" && return + + mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + cancel_lru_locks mdc + + local nsdir="ldlm.namespaces.*-MDT0000-mdc-*" + local unused=$($LCTL get_param -n $nsdir.lock_unused_count) + [ $unused -eq 0 ] || "$unused locks are not cleared" + + local nr=1000 + createmany -o $DIR/$tdir/f $nr || + error "failed to create $nr files in $DIR/$tdir" + unused=$($LCTL get_param -n $nsdir.lock_unused_count) + + #define OBD_FAIL_LDLM_WATERMARK_LOW 0x327 + do_facet mds1 $LCTL set_param fail_loc=0x327 + do_facet mds1 $LCTL set_param fail_val=500 + touch $DIR/$tdir/m + + echo "sleep 10 seconds ..." + sleep 10 + local lck_cnt=$($LCTL get_param -n $nsdir.lock_unused_count) + + do_facet mds1 $LCTL set_param fail_loc=0 + do_facet mds1 $LCTL set_param fail_val=0 + [ $lck_cnt -lt $unused ] || + error "No locks reclaimed, before:$unused, after:$lck_cnt" + + rm $DIR/$tdir/m + unlinkmany $DIR/$tdir/f $nr +} +run_test 134a "Server reclaims locks when reaching lock_reclaim_threshold" + +test_134b() { + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] && + skip "Need MDS version at least 2.7.54" && return + + mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + cancel_lru_locks mdc + + local low_wm=$(do_facet mds1 $LCTL get_param -n \ + ldlm.lock_reclaim_threshold_mb) + # disable reclaim temporarily + do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=0 + + #define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328 + do_facet mds1 $LCTL set_param fail_loc=0x328 + do_facet mds1 $LCTL set_param fail_val=500 + + $LCTL set_param debug=+trace + + local nr=600 + createmany -o $DIR/$tdir/f $nr & + local create_pid=$! + + echo "Sleep $TIMEOUT seconds ..." + sleep $TIMEOUT + if ! ps -p $create_pid > /dev/null 2>&1; then + do_facet mds1 $LCTL set_param fail_loc=0 + do_facet mds1 $LCTL set_param fail_val=0 + do_facet mds1 $LCTL set_param \ + ldlm.lock_reclaim_threshold_mb=${low_wm}m + error "createmany finished incorrectly!" + fi + do_facet mds1 $LCTL set_param fail_loc=0 + do_facet mds1 $LCTL set_param fail_val=0 + do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=${low_wm}m + wait $create_pid || return 1 + + unlinkmany $DIR/$tdir/f $nr +} +run_test 134b "Server rejects lock request when reaching lock_limit_mb" + test_140() { #bug-17379 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" @@ -9745,6 +9969,7 @@ test_154a() { [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.2.51) ]] || { skip "Need MDS version at least 2.2.51"; return 0; } [ -z "$(which setfacl)" ] && skip "must have setfacl tool" && return + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return cp /etc/hosts $DIR/$tfile @@ -9771,6 +9996,7 @@ test_154b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.2.51) ]] || { skip "Need MDS version at least 2.2.51"; return 0; } + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return @@ -9867,6 +10093,7 @@ test_154e() run_test 154e ".lustre is not returned by readdir" test_154f() { + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return # create parent directory on a single MDT to avoid cross-MDT hardlinks test_mkdir -p -c1 $DIR/$tdir/d # test dirs inherit from its stripe @@ -9946,6 +10173,7 @@ test_154g() { [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.92) ]] || { skip "Need MDS version at least 2.6.92"; return 0; } + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return mkdir -p $DIR/$tdir llapi_fid_test -d $DIR/$tdir @@ -10080,142 +10308,141 @@ test_156() { local AFTER local file="$DIR/$tfile" - [ "$(facet_fstype ost1)" = "zfs" ] && - skip "LU-1956/LU-2261: stats unimplemented on OSD ZFS" && + [ "$(facet_fstype ost1)" = "zfs" -a \ + $(lustre_version_code ost1 -lt $(version_code 2.6.93)) ] && + skip "LU-1956/LU-2261: stats not implemented on OSD ZFS" && return roc_hit_init - log "Turn on read and write cache" - set_cache read on - set_cache writethrough on - - log "Write data and read it back." - log "Read should be satisfied from the cache." - dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi - - log "Read again; it should be satisfied from the cache." - BEFORE=$AFTER - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Turn on read and write cache" + set_cache read on + set_cache writethrough on + log "Write data and read it back." + log "Read should be satisfied from the cache." + dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" + BEFORE=$(roc_hit) + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - log "Turn off the read cache and turn on the write cache" - set_cache read off - set_cache writethrough on + log "Read again; it should be satisfied from the cache." + BEFORE=$AFTER + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - log "Read again; it should be satisfied from the cache." - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Turn off the read cache and turn on the write cache" + set_cache read off + set_cache writethrough on - log "Read again; it should not be satisfied from the cache." - BEFORE=$AFTER - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == 0"; then - error "IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Read again; it should be satisfied from the cache." + BEFORE=$(roc_hit) + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - log "Write data and read it back." - log "Read should be satisfied from the cache." - dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Read again; it should not be satisfied from the cache." + BEFORE=$AFTER + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == 0"; then + error "IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - log "Read again; it should not be satisfied from the cache." - BEFORE=$AFTER - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == 0"; then - error "IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Write data and read it back." + log "Read should be satisfied from the cache." + dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" + BEFORE=$(roc_hit) + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi + log "Read again; it should not be satisfied from the cache." + BEFORE=$AFTER + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == 0"; then + error "IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - log "Turn off read and write cache" - set_cache read off - set_cache writethrough off + log "Turn off read and write cache" + set_cache read off + set_cache writethrough off - log "Write data and read it back" - log "It should not be satisfied from the cache." - rm -f $file - dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" - cancel_lru_locks osc - BEFORE=`roc_hit` - cat $file >/dev/null - AFTER=`roc_hit` + log "Write data and read it back" + log "It should not be satisfied from the cache." + rm -f $file + dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" + cancel_lru_locks osc + BEFORE=$(roc_hit) + cat $file >/dev/null + AFTER=$(roc_hit) if ! let "AFTER - BEFORE == 0"; then error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi - log "Turn on the read cache and turn off the write cache" - set_cache read on - set_cache writethrough off + log "Turn on the read cache and turn off the write cache" + set_cache read on + set_cache writethrough off - log "Write data and read it back" - log "It should not be satisfied from the cache." - rm -f $file - dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` + log "Write data and read it back" + log "It should not be satisfied from the cache." + rm -f $file + dd if=/dev/urandom of=$file bs=4k count=$CPAGES || error "dd failed" + BEFORE=$(roc_hit) + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) if ! let "AFTER - BEFORE == 0"; then error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi - log "Read again; it should be satisfied from the cache." - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $file >/dev/null - AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" - fi + log "Read again; it should be satisfied from the cache." + BEFORE=$(roc_hit) + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi - rm -f $file + rm -f $file } -run_test 156 "Verification of tunables ============================" +run_test 156 "Verification of tunables" #Changelogs err17935 () { @@ -10380,6 +10607,8 @@ test_160b() { # LU-3587 run_test 160b "Verify that very long rename doesn't crash in changelog" test_160c() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + local rc=0 local server_version=$(lustre_version_code $SINGLEMDS) @@ -10411,9 +10640,39 @@ test_160c() { } run_test 160c "verify that changelog log catch the truncate event" -test_161a() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - test_mkdir -p -c1 $DIR/$tdir +test_160d() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + local server_version=$(lustre_version_code mds1) + local CL_MASK_PARAM="mdd.$MDT0.changelog_mask" + + [[ $server_version -ge $(version_code 2.7.60) ]] || + { skip "Need MDS version at least 2.7.60+"; return; } + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + + # Registration step + local USER=$(do_facet mds1 $LCTL --device $MDT0 \ + changelog_register -n) + + mkdir -p $DIR/$tdir/migrate_dir + $LFS changelog_clear $MDT0 $USER 0 + + $LFS migrate -m 1 $DIR/$tdir/migrate_dir || error "migrate fails" + $LFS changelog $MDT0 + MIGRATES=$($LFS changelog $MDT0 | tail -5 | grep -c "MIGRT") + $LFS changelog_clear $MDT0 $USER 0 + [ $MIGRATES -eq 1 ] || + error "MIGRATE changelog mask count $MIGRATES != 1" + + # Deregistration step + do_facet mds1 $LCTL --device $MDT0 changelog_deregister $USER +} +run_test 160d "verify that changelog log catch the migrate event" + +test_161a() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + test_mkdir -p -c1 $DIR/$tdir cp /etc/hosts $DIR/$tdir/$tfile test_mkdir -c1 $DIR/$tdir/foo1 test_mkdir -c1 $DIR/$tdir/foo2 @@ -10615,7 +10874,7 @@ check_path() { echo "fid $fid resolves to path $path (expected $expected)" } -test_162() { +test_162a() { # was test_162 # Make changes to filesystem [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p -c1 $DIR/$tdir/d2 @@ -10663,7 +10922,7 @@ test_162() { return 0 } -run_test 162 "path lookup sanity" +run_test 162a "path lookup sanity" test_162b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return @@ -10699,6 +10958,8 @@ run_test 162b "striped directory path lookup sanity" # LU-4239: Verify fid2path works with paths 100 or more directories deep test_162c() { + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.51) ]] && + skip "Need MDS version at least 2.7.51" && return test_mkdir $DIR/$tdir.local test_mkdir $DIR/$tdir.remote local lpath=$tdir.local @@ -10963,31 +11224,32 @@ test_181() { # bug 22177 run_test 181 "Test open-unlinked dir ========================" test_182() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - # disable MDC RPC lock wouldn't crash client local fcount=1000 - local tcount=4 + local tcount=10 mkdir -p $DIR/$tdir || error "creating dir $DIR/$tdir" -#define OBD_FAIL_MDC_RPCS_SEM 0x804 - $LCTL set_param fail_loc=0x804 - for (( i=0; i < $tcount; i++ )) ; do + $LCTL set_param mdc.*.rpc_stats=clear + + for (( i = 0; i < $tcount; i++ )) ; do mkdir $DIR/$tdir/$i + done + + for (( i = 0; i < $tcount; i++ )) ; do createmany -o $DIR/$tdir/$i/f- $fcount & done wait - for (( i=0; i < $tcount; i++ )) ; do + for (( i = 0; i < $tcount; i++ )) ; do unlinkmany $DIR/$tdir/$i/f- $fcount & done wait - rm -rf $DIR/$tdir + $LCTL get_param mdc.*.rpc_stats - $LCTL set_param fail_loc=0 + rm -rf $DIR/$tdir } -run_test 182 "Disable MDC RPCs semaphore wouldn't crash client ================" +run_test 182 "Test parallel modify metadata operations ================" test_183() { # LU-2275 remote_mds_nodsh && skip "remote MDS with nodsh" && return @@ -11200,6 +11462,7 @@ test_185() { # LU-2441 local mtime1=$(stat -c "%Y" $DIR/$tdir) local fid=$($MULTIOP $DIR/$tdir VFw4096c) || error "cannot create/write a volatile file" + [ "$FILESET" == "" ] && $CHECKSTAT -t file $MOUNT/.lustre/fid/$fid 2>/dev/null && error "FID is still valid after close" @@ -11214,8 +11477,10 @@ test_185() { # LU-2441 # is unfortunately eaten by multiop_bg_pause local n=$((${fidv[1]} + 1)) local next_fid="${fidv[0]}:$(printf "0x%x" $n):${fidv[2]}" - $CHECKSTAT -t file $MOUNT/.lustre/fid/$next_fid || - error "FID is missing before close" + if [ "$FILESET" == "" ]; then + $CHECKSTAT -t file $MOUNT/.lustre/fid/$next_fid || + error "FID is missing before close" + fi kill -USR1 $multi_pid # 1 second delay, so if mtime change we will see it sleep 1 @@ -11225,6 +11490,10 @@ test_185() { # LU-2441 run_test 185 "Volatile file support" test_187a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.0) ] && + skip "Need MDS version at least 2.3.0" && return + local dir0=$DIR/$tdir/$testnum mkdir -p $dir0 || error "creating dir $dir0" @@ -11242,6 +11511,10 @@ test_187a() { run_test 187a "Test data version change" test_187b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.0) ] && + skip "Need MDS version at least 2.3.0" && return + local dir0=$DIR/$tdir/$testnum mkdir -p $dir0 || error "creating dir $dir0" @@ -11258,6 +11531,7 @@ run_test 187b "Test data version change on volatile file" test_200() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_mgs_nodsh && skip "remote MGS with nodsh" && return + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return local POOL=${POOL:-cea1} local POOL_ROOT=${POOL_ROOT:-$DIR/d200.pools} @@ -11435,40 +11709,57 @@ elif [ -n "$LOADL_STEPID" ]; then # LoadLeveller elif [ -n "$JOB_ID" ]; then # Sun Grid Engine JOBENV=JOB_ID else - JOBENV=FAKE_JOBID + $LCTL list_param jobid_name > /dev/null 2>&1 + if [ $? -eq 0 ]; then + JOBENV=nodelocal + else + JOBENV=FAKE_JOBID + fi fi verify_jobstats() { - local cmd=$1 - local target=$2 + local cmd=($1) + shift + local facets="$@" + +# we don't really need to clear the stats for this test to work, since each +# command has a unique jobid, but it makes debugging easier if needed. +# for facet in $facets; do +# local dev=$(convert_facet2label $facet) +# # clear old jobstats +# do_facet $facet lctl set_param *.$dev.job_stats="clear" +# done + + # use a new JobID for each test, or we might see an old one + [ "$JOBENV" = "FAKE_JOBID" ] && + FAKE_JOBID=id.$testnum.$(basename ${cmd[0]}).$RANDOM - # clear old jobstats - do_facet $SINGLEMDS lctl set_param mdt.*.job_stats="clear" - do_facet ost1 lctl set_param obdfilter.*.job_stats="clear" + JOBVAL=${!JOBENV} - # use a new JobID for this test, or we might see an old one - [ "$JOBENV" = "FAKE_JOBID" ] && FAKE_JOBID=test_id.$testnum.$RANDOM + [ "$JOBENV" = "nodelocal" ] && { + FAKE_JOBID=id.$testnum.$(basename ${cmd[0]}).$RANDOM + $LCTL set_param jobid_name=$FAKE_JOBID + JOBVAL=$FAKE_JOBID + } - JOBVAL=${!JOBENV} - log "Test: $cmd" + log "Test: ${cmd[*]}" log "Using JobID environment variable $JOBENV=$JOBVAL" if [ $JOBENV = "FAKE_JOBID" ]; then - FAKE_JOBID=$JOBVAL $cmd + FAKE_JOBID=$JOBVAL ${cmd[*]} else - $cmd + ${cmd[*]} fi - if [ "$target" = "mdt" -o "$target" = "both" ]; then - FACET="$SINGLEMDS" # will need to get MDS number for DNE - do_facet $FACET lctl get_param mdt.*.job_stats | - grep $JOBVAL || error "No job stats found on MDT $FACET" - fi - if [ "$target" = "ost" -o "$target" = "both" ]; then - FACET=ost1 - do_facet $FACET lctl get_param obdfilter.*.job_stats | - grep $JOBVAL || error "No job stats found on OST $FACET" - fi + # all files are created on OST0000 + for facet in $facets; do + local stats="*.$(convert_facet2label $facet).job_stats" + if [ $(do_facet $facet lctl get_param $stats | + grep -c $JOBVAL) -ne 1 ]; then + do_facet $facet lctl get_param $stats + error "No jobstats for $JOBVAL found on $facet::$stats" + fi + done } jobstats_set() { @@ -11478,54 +11769,84 @@ jobstats_set() { wait_update $HOSTNAME "$LCTL get_param -n jobid_var" $NEW_JOBENV } +cleanup_205() { + do_facet $SINGLEMDS \ + $LCTL set_param mdt.*.job_cleanup_interval=$OLD_INTERVAL + [ $OLD_JOBENV != $JOBENV ] && jobstats_set $OLD_JOBENV + do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER +} + test_205() { # Job stats [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_mgs_nodsh && skip "remote MGS with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return + [ -z "$(lctl get_param -n mdc.*.connect_flags | grep jobstats)" ] && skip "Server doesn't support jobstats" && return 0 [[ $JOBID_VAR = disable ]] && skip "jobstats is disabled" && return - local cmd OLD_JOBENV=$($LCTL get_param -n jobid_var) if [ $OLD_JOBENV != $JOBENV ]; then jobstats_set $JOBENV - trap jobstats_set EXIT + trap cleanup_205 EXIT fi - local user=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ - changelog_register -n) - echo "Registered as changelog user $user" + CL_USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n) + echo "Registered as changelog user $CL_USER" + OLD_INTERVAL=$(do_facet $SINGLEMDS \ + lctl get_param -n mdt.*.job_cleanup_interval) + local interval_new=5 + do_facet $SINGLEMDS \ + $LCTL set_param mdt.*.job_cleanup_interval=$interval_new + local start=$SECONDS + + local cmd # mkdir - cmd="mkdir $DIR/$tfile" - verify_jobstats "$cmd" "mdt" + cmd="mkdir $DIR/$tdir" + verify_jobstats "$cmd" "$SINGLEMDS" # rmdir - cmd="rm -fr $DIR/$tfile" - verify_jobstats "$cmd" "mdt" + cmd="rmdir $DIR/$tdir" + verify_jobstats "$cmd" "$SINGLEMDS" + # mkdir on secondary MDT + if [ $MDSCOUNT -gt 1 ]; then + cmd="lfs mkdir -i 1 $DIR/$tdir.remote" + verify_jobstats "$cmd" "mds2" + fi # mknod cmd="mknod $DIR/$tfile c 1 3" - verify_jobstats "$cmd" "mdt" + verify_jobstats "$cmd" "$SINGLEMDS" # unlink cmd="rm -f $DIR/$tfile" - verify_jobstats "$cmd" "mdt" + verify_jobstats "$cmd" "$SINGLEMDS" + # create all files on OST0000 so verify_jobstats can find OST stats # open & close cmd="$SETSTRIPE -i 0 -c 1 $DIR/$tfile" - verify_jobstats "$cmd" "mdt" + verify_jobstats "$cmd" "$SINGLEMDS" # setattr cmd="touch $DIR/$tfile" - verify_jobstats "$cmd" "both" + verify_jobstats "$cmd" "$SINGLEMDS ost1" # write cmd="dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=sync" - verify_jobstats "$cmd" "ost" + verify_jobstats "$cmd" "ost1" # read + cancel_lru_locks osc cmd="dd if=$DIR/$tfile of=/dev/null bs=1M count=1 iflag=direct" - verify_jobstats "$cmd" "ost" + verify_jobstats "$cmd" "ost1" # truncate cmd="$TRUNCATE $DIR/$tfile 0" - verify_jobstats "$cmd" "both" + verify_jobstats "$cmd" "$SINGLEMDS ost1" # rename - cmd="mv -f $DIR/$tfile $DIR/jobstats_test_rename" - verify_jobstats "$cmd" "mdt" + cmd="mv -f $DIR/$tfile $DIR/$tdir.rename" + verify_jobstats "$cmd" "$SINGLEMDS" + # jobstats expiry - sleep until old stats should be expired + local left=$((interval_new + 2 - (SECONDS - start))) + [ $left -ge 0 ] && echo "sleep $left for expiry" && sleep $((left + 1)) + cmd="mkdir $DIR/$tdir.expire" + verify_jobstats "$cmd" "$SINGLEMDS" + [ $(do_facet $SINGLEMDS lctl get_param *.*.job_stats | + grep -c "job_id.*mkdir") -gt 1 ] && error "old jobstats not expired" # Ensure that jobid are present in changelog (if supported by MDS) if [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.52) ] @@ -11545,10 +11866,7 @@ test_205() { # Job stats error "Unexpected jobids when jobid_var=$JOBENV" fi - # cleanup - rm -f $DIR/jobstats_test_rename - - [ $OLD_JOBENV != $JOBENV ] && jobstats_set $OLD_JOBENV + cleanup_205 } run_test 205 "Verify job stats" @@ -11603,9 +11921,9 @@ test_208() { # for now as only exclusive open is supported. After generic lease # is done, this test suite should be revised. - Jinshan + remote_mds_nodsh && skip "remote MDS with nodsh" && return [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.52) ]] || { skip "Need MDS version at least 2.4.52"; return 0; } - remote_mds_nodsh && skip "remote MDS with nodsh" && return echo "==== test 1: verify get lease work" $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eRE+eU || error "get lease error" @@ -11933,11 +12251,14 @@ run_test 219 "LU-394: Write partial won't cause uncontiguous pages vec at LND" test_220() { #LU-325 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_ost_nodsh && skip "remote OST with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_mgs_nodsh && skip "remote MGS with nodsh" && return local OSTIDX=0 - test_mkdir -p $DIR/$tdir - local OST=$($LFS osts | grep ${OSTIDX}": " | \ - awk '{print $2}' | sed -e 's/_UUID$//') + # create on MDT0000 so the last_id and next_id are correct + mkdir $DIR/$tdir + local OST=$($LFS df $DIR | awk '/OST:'$OSTIDX'/ { print $1 }') + OST=${OST%_UUID} # on the mdt's osc local mdtosc_proc1=$(get_mdtosc_proc_path $SINGLEMDS $OST) @@ -12103,6 +12424,7 @@ run_test 224c "Don't hang if one of md lost during large bulk RPC" MDSSURVEY=${MDSSURVEY:-$(which mds-survey 2>/dev/null || true)} test_225a () { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return if [ -z ${MDSSURVEY} ]; then skip_env "mds-survey not found" && return fi @@ -12132,7 +12454,7 @@ run_test 225a "Metadata survey sanity with zero-stripe" test_225b () { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - + remote_mds_nodsh && skip "remote MDS with nodsh" && return if [ -z ${MDSSURVEY} ]; then skip_env "mds-survey not found" && return fi @@ -12481,8 +12803,8 @@ test_230b() { ln -s $migrate_dir/$tfile $migrate_dir/${tfile}_ln ln -s $other_dir/$tfile $migrate_dir/${tfile}_ln_other - $LFS mv -v -M $MDTIDX $migrate_dir || - error "migrate remote dir error" + $LFS migrate -m $MDTIDX $migrate_dir || + error "fails on migrating remote dir to MDT1" echo "migratate to MDT1, then checking.." for ((i = 0; i < 10; i++)); do @@ -12545,8 +12867,9 @@ test_230b() { #migrate back to MDT0 MDTIDX=0 - $LFS mv -v -M $MDTIDX $migrate_dir || - error "migrate remote dir error" + + $LFS migrate -m $MDTIDX $migrate_dir || + error "fails on migrating remote dir to MDT0" echo "migrate back to MDT0, checking.." for file in $(find $migrate_dir); do @@ -12603,6 +12926,7 @@ run_test 230b "migrate directory" test_230c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local MDTIDX=1 local mdt_index @@ -12622,8 +12946,14 @@ test_230c() { do_facet mds1 lctl set_param fail_loc=0x20001801 do_facet mds1 lctl set_param fail_val=5 local t=$(ls $migrate_dir | wc -l) - $LFS mv --mdt-index $MDTIDX $migrate_dir && + $LFS migrate --mdt-index $MDTIDX $migrate_dir && error "migrate should fail after 5 entries" + + mkdir $migrate_dir/dir && + error "mkdir succeeds under migrating directory" + touch $migrate_dir/file && + error "touch file succeeds under migrating directory" + local u=$(ls $migrate_dir | wc -l) [ "$u" == "$t" ] || error "$u != $t during migration" @@ -12634,7 +12964,7 @@ test_230c() { do_facet mds1 lctl set_param fail_loc=0 do_facet mds1 lctl set_param fail_val=0 - $LFS mv -M $MDTIDX $migrate_dir || + $LFS migrate -m $MDTIDX $migrate_dir || error "migrate open files should failed with open files" echo "Finish migration, then checking.." @@ -12666,7 +12996,8 @@ test_230d() { error "create files under remote dir failed $i" done - $LFS mv -M $MDTIDX -v $migrate_dir || error "migrate remote dir error" + $LFS migrate -m $MDTIDX $migrate_dir || + error "migrate remote dir error" echo "Finish migration, then checking.." for file in $(find $migrate_dir); do @@ -12679,6 +13010,152 @@ test_230d() { } run_test 230d "check migrate big directory" +test_230e() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local i + local j + local a_fid + local b_fid + + mkdir -p $DIR/$tdir + mkdir $DIR/$tdir/migrate_dir + mkdir $DIR/$tdir/other_dir + touch $DIR/$tdir/migrate_dir/a + ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/b + ls $DIR/$tdir/other_dir + + $LFS migrate -m 1 $DIR/$tdir/migrate_dir || + error "migrate dir fails" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir) + [ $mdt_index == 1 ] || error "migrate_dir is not on MDT1" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) + [ $mdt_index == 0 ] || error "a is not on MDT0" + + $LFS migrate -m 1 $DIR/$tdir/other_dir || + error "migrate dir fails" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/other_dir) + [ $mdt_index == 1 ] || error "other_dir is not on MDT1" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) + [ $mdt_index == 1 ] || error "a is not on MDT1" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/other_dir/b) + [ $mdt_index == 1 ] || error "b is not on MDT1" + + a_fid=$($LFS path2fid $DIR/$tdir/migrate_dir/a) + b_fid=$($LFS path2fid $DIR/$tdir/other_dir/b) + + [ "$a_fid" = "$b_fid" ] || error "different fid after migration" + + rm -rf $DIR/$tdir || error "rm dir failed after migration" +} +run_test 230e "migrate mulitple local link files" + +test_230f() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local a_fid + local ln_fid + + mkdir -p $DIR/$tdir + mkdir $DIR/$tdir/migrate_dir + $LFS mkdir -i1 $DIR/$tdir/other_dir + touch $DIR/$tdir/migrate_dir/a + ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/ln1 + ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/ln2 + ls $DIR/$tdir/other_dir + + # a should be migrated to MDT1, since no other links on MDT0 + $LFS migrate -m 1 $DIR/$tdir/migrate_dir || + error "migrate dir fails" + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir) + [ $mdt_index == 1 ] || error "migrate_dir is not on MDT1" + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) + [ $mdt_index == 1 ] || error "a is not on MDT1" + + # a should stay on MDT1, because it is a mulitple link file + $LFS migrate -m 0 $DIR/$tdir/migrate_dir || + error "migrate dir fails" + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) + [ $mdt_index == 1 ] || error "a is not on MDT1" + + $LFS migrate -m 1 $DIR/$tdir/migrate_dir || + error "migrate dir fails" + + a_fid=$($LFS path2fid $DIR/$tdir/migrate_dir/a) + ln_fid=$($LFS path2fid $DIR/$tdir/other_dir/ln1) + [ "$a_fid" = "$ln_fid" ] || error "different fid after migrate to MDT1" + + rm -rf $DIR/$tdir/other_dir/ln1 || error "unlink ln1 fails" + rm -rf $DIR/$tdir/other_dir/ln2 || error "unlink ln2 fails" + + # a should be migrated to MDT0, since no other links on MDT1 + $LFS migrate -m 0 $DIR/$tdir/migrate_dir || + error "migrate dir fails" + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) + [ $mdt_index == 0 ] || error "a is not on MDT0" + + rm -rf $DIR/$tdir || error "rm dir failed after migration" +} +run_test 230f "migrate mulitple remote link files" + +test_230g() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + mkdir -p $DIR/$tdir/migrate_dir + + $LFS migrate -m 1000 $DIR/$tdir/migrate_dir && + error "migrating dir to non-exist MDT succeeds" + true +} +run_test 230g "migrate dir to non-exist MDT" + +test_230h() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && + skip "Need MDS version at least 2.7.64" && return + local mdt_index + + mkdir -p $DIR/$tdir/migrate_dir + + $LFS migrate -m1 $DIR && + error "migrating mountpoint1 should fail" + + $LFS migrate -m1 $DIR/$tdir/.. && + error "migrating mountpoint2 should fail" + + $LFS migrate -m1 $DIR/$tdir/migrate_dir/.. || + error "migrating $tdir fail" + + mdt_index=$($LFS getstripe -M $DIR/$tdir) + [ $mdt_index == 1 ] || error "$mdt_index != 1 after migration" + + mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir) + [ $mdt_index == 1 ] || error "$mdt_index != 1 after migration" + +} +run_test 230h "migrate .. and root" + +test_230i() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + mkdir -p $DIR/$tdir/migrate_dir + + $LFS migrate -m 1 $DIR/$tdir/migrate_dir/ || + error "migration fails with a tailing slash" + + $LFS migrate -m 0 $DIR/$tdir/migrate_dir// || + error "migration fails with two tailing slashes" +} +run_test 230i "lfs migrate -m tolerates trailing slashes" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc @@ -12690,16 +13167,20 @@ test_231a() # clear the OSC stats $LCTL set_param osc.*.stats=0 &>/dev/null + stop_writeback # Client writes $bulk_size - there must be 1 rpc for $max_pages. dd if=/dev/zero of=$DIR/$tdir/$tfile bs=$bulk_size count=1 \ oflag=direct &>/dev/null || error "dd failed" + sync; sleep 1; sync # just to be safe local nrpcs=$($LCTL get_param osc.*.stats |awk '/ost_write/ {print $2}') if [ x$nrpcs != "x1" ]; then - error "found $nrpc ost_write RPCs, not 1 as expected" + $LCTL get_param osc.*.stats + error "found $nrpcs ost_write RPCs, not 1 as expected" fi + start_writeback # Drop the OSC cache, otherwise we will read from it cancel_lru_locks osc @@ -12712,7 +13193,8 @@ test_231a() nrpcs=$($LCTL get_param osc.*.stats | awk '/ost_read/ { print $2 }') if [ x$nrpcs != "x1" ]; then - error "found $nrpc ost_read RPCs, not 1 as expected" + $LCTL get_param osc.*.stats + error "found $nrpcs ost_read RPCs, not 1 as expected" fi } run_test 231a "checking that reading/writing of BRW RPC size results in one RPC" @@ -12746,6 +13228,7 @@ run_test 232 "failed lock should not block umount" test_233a() { [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.64) ] || { skip "Need MDS version at least 2.3.64"; return; } + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return local fid=$($LFS path2fid $MOUNT) stat $MOUNT/.lustre/fid/$fid > /dev/null || @@ -12756,6 +13239,7 @@ run_test 233a "checking that OBF of the FS root succeeds" test_233b() { [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.90) ] || { skip "Need MDS version at least 2.5.90"; return; } + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return local fid=$($LFS path2fid $MOUNT/.lustre) stat $MOUNT/.lustre/fid/$fid > /dev/null || @@ -12892,6 +13376,28 @@ test_239() { } run_test 239 "osp_sync test" +test_239a() { #LU-5297 + touch $DIR/$tfile + #define OBD_FAIL_OSP_CHECK_INVALID_REC 0x2100 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x2100 + chgrp $RUNAS_GID $DIR/$tfile + wait_delete_completed +} +run_test 239a "process invalid osp sync record correctly" + +test_239b() { #LU-5297 + touch $DIR/$tfile1 + #define OBD_FAIL_OSP_CHECK_ENOMEM 0x2101 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x2101 + chgrp $RUNAS_GID $DIR/$tfile1 + wait_delete_completed + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + touch $DIR/$tfile2 + chgrp $RUNAS_GID $DIR/$tfile2 + wait_delete_completed +} +run_test 239b "process osp sync record with ENOMEM error correctly" + test_240() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return @@ -12915,7 +13421,7 @@ run_test 240 "race between ldlm enqueue and the connection RPC (no ASSERT)" test_241_bio() { for LOOP in $(seq $1); do dd if=$DIR/$tfile of=/dev/null bs=40960 count=1 2>/dev/null - cancel_lru_locks osc + cancel_lru_locks osc || true done } @@ -12977,6 +13483,128 @@ test_244() } run_test 244 "sendfile with group lock tests" +test_245() { + local flagname="multi_mod_rpcs" + local connect_data_name="max_mod_rpcs" + local out + + # check if multiple modify RPCs flag is set + out=$($LCTL get_param mdc.$FSNAME-MDT0000-*.import | + grep "connect_flags:") + echo "$out" + + echo "$out" | grep -qw $flagname + if [ $? -ne 0 ]; then + echo "connect flag $flagname is not set" + return + fi + + # check if multiple modify RPCs data is set + out=$($LCTL get_param mdc.$FSNAME-MDT0000-*.import) + echo "$out" + + echo "$out" | grep -qw $connect_data_name || + error "import should have connect data $connect_data_name" +} +run_test 245 "check mdc connection flag/data: multiple modify RPCs" + +test_246() { # LU-7371 + [ $(lustre_version_code ost1) -lt $(version_code 2.7.62) ] && + skip "Need OST version >= 2.7.62" && return 0 + do_facet ost1 $LCTL set_param fail_val=4095 +#define OBD_FAIL_OST_READ_SIZE 0x234 + do_facet ost1 $LCTL set_param fail_loc=0x234 + $LFS setstripe $DIR/$tfile -i 0 -c 1 + dd if=/dev/zero of=$DIR/$tfile bs=4095 count=1 > /dev/null 2>&1 + cancel_lru_locks $FSNAME-OST0000 + dd if=$DIR/$tfile of=/dev/null bs=1048576 || error "Read failed" +} +run_test 246 "Read file of size 4095 should return right length" + +test_247a() { + lctl get_param -n mdc.$FSNAME-MDT0000*.import | + grep -q subtree || + { skip "Fileset feature is not supported"; return; } + + local submount=${MOUNT}_$tdir + + mkdir $MOUNT/$tdir + mkdir -p $submount || error "mkdir $submount failed" + FILESET="$FILESET/$tdir" mount_client $submount || + error "mount $submount failed" + echo foo > $submount/$tfile || error "write $submount/$tfile failed" + [ $(cat $MOUNT/$tdir/$tfile) = "foo" ] || + error "read $MOUNT/$tdir/$tfile failed" + umount_client $submount || error "umount $submount failed" + rmdir $submount +} +run_test 247a "mount subdir as fileset" + +test_247b() { + lctl get_param -n mdc.$FSNAME-MDT0000*.import | grep -q subtree || + { skip "Fileset feature is not supported"; return; } + + local submount=${MOUNT}_$tdir + + rm -rf $MOUNT/$tdir + mkdir -p $submount || error "mkdir $submount failed" + SKIP_FILESET=1 + FILESET="$FILESET/$tdir" mount_client $submount && + error "mount $submount should fail" + rmdir $submount +} +run_test 247b "mount subdir that dose not exist" + +test_247c() { + lctl get_param -n mdc.$FSNAME-MDT0000*.import | grep -q subtree || + { skip "Fileset feature is not supported"; return; } + + local submount=${MOUNT}_$tdir + + mkdir -p $MOUNT/$tdir/dir1 + mkdir -p $submount || error "mkdir $submount failed" + FILESET="$FILESET/$tdir" mount_client $submount || + error "mount $submount failed" + local fid=$($LFS path2fid $MOUNT/) + $LFS fid2path $submount $fid && error "fid2path should fail" + umount_client $submount || error "umount $submount failed" + rmdir $submount +} +run_test 247c "running fid2path outside root" + +test_247d() { + lctl get_param -n mdc.$FSNAME-MDT0000*.import | grep -q subtree || + { skip "Fileset feature is not supported"; return; } + + local submount=${MOUNT}_$tdir + + mkdir -p $MOUNT/$tdir/dir1 + mkdir -p $submount || error "mkdir $submount failed" + FILESET="$FILESET/$tdir" mount_client $submount || + error "mount $submount failed" + local fid=$($LFS path2fid $submount/dir1) + $LFS fid2path $submount $fid || error "fid2path should succeed" + umount_client $submount || error "umount $submount failed" + rmdir $submount +} +run_test 247d "running fid2path inside root" + +# LU-8037 +test_247e() { + lctl get_param -n mdc.$FSNAME-MDT0000*.import | + grep -q subtree || + { skip "Fileset feature is not supported"; return; } + + local submount=${MOUNT}_$tdir + + mkdir $MOUNT/$tdir + mkdir -p $submount || error "mkdir $submount failed" + FILESET="$FILESET/.." mount_client $submount && + error "mount $submount should fail" + rmdir $submount +} +run_test 247e "mount .. as fileset" + test_250() { [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR/$tfile) + 1)))" = "zfs" ] \ && skip "no 16TB file size limit on ZFS" && return @@ -13008,6 +13636,162 @@ test_251() { } run_test 251 "Handling short read and write correctly" +test_252() { + local tgt + local dev + local out + local uuid + local num + local gen + + if [ "$(facet_fstype ost1)" != "ldiskfs" -o \ + "$(facet_fstype mds1)" != "ldiskfs" ]; then + skip "can only run lr_reader on ldiskfs target" + return + fi + + # check lr_reader on OST0000 + tgt=ost1 + dev=$(facet_device $tgt) + out=$(do_facet $tgt $LR_READER $dev) + [ $? -eq 0 ] || error "$LR_READER failed on target $tgt device $dev" + echo "$out" + uuid=$(echo "$out" | grep -i uuid | awk '{ print $2 }') + [ "$uuid" == "$(ostuuid_from_index 0)" ] || + error "Invalid uuid returned by $LR_READER on target $tgt" + echo -e "uuid returned by $LR_READER is '$uuid'\n" + + # check lr_reader -c on MDT0000 + tgt=mds1 + dev=$(facet_device $tgt) + if ! do_facet $tgt $LR_READER -h | grep -q OPTIONS; then + echo "$LR_READER does not support additional options" + return 0 + fi + out=$(do_facet $tgt $LR_READER -c $dev) + [ $? -eq 0 ] || error "$LR_READER failed on target $tgt device $dev" + echo "$out" + num=$(echo "$out" | grep -c "mdtlov") + [ "$num" -eq $((MDSCOUNT - 1)) ] || + error "Invalid number of mdtlov clients returned by $LR_READER" + echo -e "Number of mdtlov clients returned by $LR_READER is '$num'\n" + + # check lr_reader -cr on MDT0000 + out=$(do_facet $tgt $LR_READER -cr $dev) + [ $? -eq 0 ] || error "$LR_READER failed on target $tgt device $dev" + echo "$out" + echo "$out" | grep -q "^reply_data:$" || + error "$LR_READER should have returned 'reply_data' section" + num=$(echo "$out" | grep -c "client_generation") + echo -e "Number of reply data returned by $LR_READER is '$num'\n" +} +run_test 252 "check lr_reader tool" + +test_254() { + local cl_user + + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + do_facet mds1 $LCTL get_param -n mdd.$MDT0.changelog_size || + { skip "MDS does not support changelog_size" && return; } + + cl_user=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n) + echo "Registered as changelog user $cl_user" + + $LFS changelog_clear $MDT0 $cl_user 0 + + local size1=$(do_facet mds1 \ + $LCTL get_param -n mdd.$MDT0.changelog_size) + echo "Changelog size $size1" + + rm -rf $DIR/$tdir + $LFS mkdir -i 0 $DIR/$tdir + # change something + mkdir -p $DIR/$tdir/pics/2008/zachy + touch $DIR/$tdir/pics/2008/zachy/timestamp + cp /etc/hosts $DIR/$tdir/pics/2008/zachy/pic1.jpg + mv $DIR/$tdir/pics/2008/zachy $DIR/$tdir/pics/zach + ln $DIR/$tdir/pics/zach/pic1.jpg $DIR/$tdir/pics/2008/portland.jpg + ln -s $DIR/$tdir/pics/2008/portland.jpg $DIR/$tdir/pics/desktop.jpg + rm $DIR/$tdir/pics/desktop.jpg + + local size2=$(do_facet mds1 \ + $LCTL get_param -n mdd.$MDT0.changelog_size) + echo "Changelog size after work $size2" + + do_facet mds1 $LCTL --device $MDT0 changelog_deregister $cl_user + + if (( size2 <= size1 )); then + error "Changelog size after work should be greater than original" + fi + return 0 +} +run_test 254 "Check changelog size" + +test_256() { + local cl_user + local cat_sl + local mdt_dev + + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [ "$(facet_fstype mds1)" != "ldiskfs" ] && + skip "non-ldiskfs backend" && return + + mdt_dev=$(mdsdevname 1) + echo $mdt_dev + cl_user=$(do_facet mds1 \ + "$LCTL get_param -n mdd.$MDT0.changelog_users | grep cl") + if [[ -n $cl_user ]]; then + skip "active changelog user" + return + fi + + cl_user=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n) + echo "Registered as changelog user $cl_user" + + rm -rf $DIR/$tdir + mkdir -p $DIR/$tdir + + $LFS changelog_clear $MDT0 $cl_user 0 + + # change something + touch $DIR/$tdir/{1..10} + + # stop the MDT + stop mds1 || error "Fail to stop MDT." + + # remount the MDT + start mds1 $mdt_dev $MDS_MOUNT_OPTS || error "Fail to start MDT." + + #after mount new plainllog is used + touch $DIR/$tdir/{11..19} + cat_sl=$(do_facet mds1 \ + "$DEBUGFS -R \\\"dump changelog_catalog cat.dmp\\\" $mdt_dev; \ + llog_reader cat.dmp | grep \\\"type=1064553b\\\" | wc -l") + + if (( cat_sl != 2 )); then + do_facet mds1 $LCTL --device $MDT0 changelog_deregister $cl_user + error "Changelog catalog has wrong number of slots $cat_sl" + fi + + $LFS changelog_clear $MDT0 $cl_user 0 + + cat_sl=$(do_facet mds1 \ + "$DEBUGFS -R \\\"dump changelog_catalog cat.dmp\\\" $mdt_dev; \ + llog_reader cat.dmp | grep \\\"type=1064553b\\\" | wc -l") + + do_facet mds1 $LCTL --device $MDT0 changelog_deregister $cl_user + + if (( cat_sl == 2 )); then + error "Empty plain llog was not deleted from changelog catalog" + fi + if (( cat_sl != 1 )); then + error "Active plain llog shouldn\`t be deleted from catalog" + fi +} +run_test 256 "Check llog delete for empty and not full state" + cleanup_test_300() { trap 0 umask $SAVE_UMASK @@ -13029,6 +13813,8 @@ test_striped_dir() { local mode=$(stat -c%a $DIR/$tdir/striped_dir) [ "$mode" = "755" ] || error "expect 755 got $mode" + $LFS getdirstripe $DIR/$tdir/striped_dir > /dev/null 2>&1 || + error "getdirstripe failed" stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir) if [ "$stripe_count" != "2" ]; then error "stripe_count is $stripe_count, expect 2" @@ -13168,6 +13954,8 @@ test_300d() { run_test 300d "check default stripe under striped directory" test_300e() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local stripe_count @@ -13212,6 +14000,8 @@ test_300e() { run_test 300e "check rename under striped directory" test_300f() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local stripe_count @@ -13279,6 +14069,7 @@ test_300_check_default_striped_dir() for dir in $(find $DIR/$tdir/$dirname/*); do stripe_count=$($LFS getdirstripe -c $dir) [ $stripe_count -eq $default_count ] || + [ $stripe_count -eq 0 -o $default_count -eq 1 ] || error "stripe count $default_count != $stripe_count for $dir" stripe_index=$($LFS getdirstripe -i $dir) @@ -13298,6 +14089,8 @@ test_300_check_default_striped_dir() } test_300g() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local dir local stripe_count @@ -13329,6 +14122,8 @@ test_300g() { run_test 300g "check default striped directory for normal directory" test_300h() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local dir local stripe_count @@ -13357,6 +14152,8 @@ test_300h() { run_test 300h "check default striped directory for striped directory" test_300i() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return local stripe_count @@ -13396,6 +14193,239 @@ test_300i() { } run_test 300i "client handle unknown hash type striped directory" +test_300j() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local stripe_count + local file + + mkdir $DIR/$tdir + + #define OBD_FAIL_SPLIT_UPDATE_REC 0x1702 + $LCTL set_param fail_loc=0x1702 + $LFS setdirstripe -i 0 -c$MDSCOUNT -t all_char $DIR/$tdir/striped_dir || + error "set striped dir error" + + createmany -o $DIR/$tdir/striped_dir/f- 10 || + error "create files under striped dir failed" + + $LCTL set_param fail_loc=0 + + rm -rf $DIR/$tdir || error "unlink striped dir fails" + + return 0 +} +run_test 300j "test large update record" + +test_300k() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local stripe_count + local file + + mkdir $DIR/$tdir + + #define OBD_FAIL_LARGE_STRIPE 0x1703 + $LCTL set_param fail_loc=0x1703 + $LFS setdirstripe -i 0 -c512 $DIR/$tdir/striped_dir || + error "set striped dir error" + $LCTL set_param fail_loc=0 + + $LFS getdirstripe $DIR/$tdir/striped_dir || + error "getstripeddir fails" + rm -rf $DIR/$tdir/striped_dir || + error "unlink striped dir fails" + + return 0 +} +run_test 300k "test large striped directory" + +test_300l() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local stripe_index + + test_mkdir -p $DIR/$tdir/striped_dir + chown $RUNAS_ID $DIR/$tdir/striped_dir || + error "chown $RUNAS_ID failed" + $LFS setdirstripe -i 1 -D $DIR/$tdir/striped_dir || + error "set default striped dir failed" + + #define OBD_FAIL_MDS_STALE_DIR_LAYOUT 0x158 + $LCTL set_param fail_loc=0x80000158 + $RUNAS mkdir $DIR/$tdir/striped_dir/test_dir || error "create dir fails" + + stripe_index=$($LFS getdirstripe -i $DIR/$tdir/striped_dir/test_dir) + [ $stripe_index -eq 1 ] || + error "expect 1 get $stripe_index for $dir" +} +run_test 300l "non-root user to create dir under striped dir with stale layout" + +test_300m() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -ge 2 ] && skip "Only for single MDT" && return + + mkdir -p $DIR/$tdir/striped_dir + $LFS setdirstripe -D -c 1 $DIR/$tdir/striped_dir || + error "set default stripes dir error" + + mkdir $DIR/$tdir/striped_dir/a || error "mkdir a fails" + + stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir/a) + [ $stripe_count -eq 0 ] || + error "expect 0 get $stripe_count for a" + + $LFS setdirstripe -D -c 2 $DIR/$tdir/striped_dir || + error "set default stripes dir error" + + mkdir $DIR/$tdir/striped_dir/b || error "mkdir b fails" + + stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir/b) + [ $stripe_count -eq 0 ] || + error "expect 0 get $stripe_count for b" + + $LFS setdirstripe -D -c1 -i2 $DIR/$tdir/striped_dir || + error "set default stripes dir error" + + mkdir $DIR/$tdir/striped_dir/c && + error "default stripe_index is invalid, mkdir c should fails" + + rm -rf $DIR/$tdir || error "rmdir fails" +} +run_test 300m "setstriped directory on single MDT FS" + +cleanup_300n() { + local list=$(comma_list $(mdts_nodes)) + + trap 0 + do_nodes $list $LCTL set_param -n mdt.*.enable_remote_dir_gid=0 +} + +test_300n() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local stripe_index + local list=$(comma_list $(mdts_nodes)) + + trap cleanup_300n RETURN EXIT + mkdir -p $DIR/$tdir + chmod 777 $DIR/$tdir + $RUNAS $LFS setdirstripe -i0 -c$MDSCOUNT \ + $DIR/$tdir/striped_dir > /dev/null 2>&1 && + error "create striped dir succeeds with gid=0" + + do_nodes $list $LCTL set_param -n mdt.*.enable_remote_dir_gid=-1 + $RUNAS $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir || + error "create striped dir fails with gid=-1" + + do_nodes $list $LCTL set_param -n mdt.*.enable_remote_dir_gid=0 + $RUNAS $LFS setdirstripe -i 1 -c$MDSCOUNT -D \ + $DIR/$tdir/striped_dir > /dev/null 2>&1 && + error "set default striped dir succeeds with gid=0" + + + do_nodes $list $LCTL set_param -n mdt.*.enable_remote_dir_gid=-1 + $RUNAS $LFS setdirstripe -i 1 -c$MDSCOUNT -D $DIR/$tdir/striped_dir || + error "set default striped dir fails with gid=-1" + + + do_nodes $list $LCTL set_param -n mdt.*.enable_remote_dir_gid=0 + $RUNAS mkdir $DIR/$tdir/striped_dir/test_dir || + error "create test_dir fails" + $RUNAS mkdir $DIR/$tdir/striped_dir/test_dir1 || + error "create test_dir1 fails" + $RUNAS mkdir $DIR/$tdir/striped_dir/test_dir2 || + error "create test_dir2 fails" + cleanup_300n +} +run_test 300n "non-root user to create dir under striped dir with default EA" + +test_300o() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local numfree1 + local numfree2 + + mkdir -p $DIR/$tdir + + numfree1=$(lctl get_param -n mdc.*MDT0000*.filesfree) + numfree2=$(lctl get_param -n mdc.*MDT0001*.filesfree) + if [ $numfree1 -lt 66000 -o $numfree2 -lt 66000 ]; then + skip "not enough free inodes $numfree1 $numfree2" + return + fi + + numfree1=$(lctl get_param -n mdc.*MDT0000-mdc-*.kbytesfree) + numfree2=$(lctl get_param -n mdc.*MDT0001-mdc-*.kbytesfree) + if [ $numfree1 -lt 300000 -o $numfree2 -lt 300000 ]; then + skip "not enough free space $numfree1 $numfree2" + return + fi + + $LFS setdirstripe -c2 $DIR/$tdir/striped_dir || + error "setdirstripe fails" + + createmany -d $DIR/$tdir/striped_dir/d 131000 || + error "create dirs fails" + + $LCTL set_param ldlm.namespaces.*mdc-*.lru_size=0 + ls $DIR/$tdir/striped_dir > /dev/null || + error "ls striped dir fails" + unlinkmany -d $DIR/$tdir/striped_dir/d 131000 || + error "unlink big striped dir fails" +} +run_test 300o "unlink big sub stripe(> 65000 subdirs)" + +test_300p() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + mkdir -p $DIR/$tdir + + #define OBD_FAIL_OUT_ENOSPC 0x1704 + do_facet mds2 lctl set_param fail_loc=0x80001704 + $LFS setdirstripe -c2 $DIR/$tdir/bad_striped_dir > /dev/null 2>&1 && + error "create striped directory should fail" + + [ -e $DIR/$tdir/bad_striped_dir ] && error "striped dir exists" + + $LFS setdirstripe -c2 $DIR/$tdir/bad_striped_dir + true +} +run_test 300p "create striped directory without space" + +test_300q() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + local fd=$(free_fd) + local cmd="exec $fd<$tdir" + cd $DIR + $LFS mkdir -c $MDSCOUNT $tdir || error "create $tdir fails" + eval $cmd + cmd="exec $fd<&-" + trap "eval $cmd" EXIT + cd $tdir || error "cd $tdir fails" + rmdir ../$tdir || error "rmdir $tdir fails" + mkdir local_dir && error "create dir succeeds" + $LFS setdirstripe -i1 remote_dir && error "create remote dir succeeds" + eval $cmd + return 0 +} +run_test 300q "create remote directory under orphan directory" + prepare_remote_file() { mkdir $DIR/$tdir/src_dir || error "create remote source failed" @@ -13527,6 +14557,159 @@ test_400b() { # LU-1606, LU-5011 } run_test 400b "packaged headers can be compiled" +test_401a() { #LU-7437 + #count the number of parameters by "list_param -R" + local params=$($LCTL list_param -R '*' 2>/dev/null | wc -l) + #count the number of parameters by listing proc files + local procs=$(find -L $proc_dirs -mindepth 1 -printf '%P\n' 2>/dev/null| + sort -u | wc -l) + + [ $params -eq $procs ] || + error "found $params parameters vs. $procs proc files" + + # test the list_param -D option only returns directories + params=$($LCTL list_param -R -D '*' 2>/dev/null | wc -l) + #count the number of parameters by listing proc directories + procs=$(find -L $proc_dirs -mindepth 1 -type d -printf '%P\n' 2>/dev/null | + sort -u | wc -l) + + [ $params -eq $procs ] || + error "found $params parameters vs. $procs proc files" +} +run_test 401a "Verify if 'lctl list_param -R' can list parameters recursively" + +test_401b() { + local save=$($LCTL get_param -n jobid_var) + local tmp=testing + + $LCTL set_param foo=bar jobid_var=$tmp bar=baz && + error "no error returned when setting bad parameters" + + local jobid_new=$($LCTL get_param -n foe jobid_var baz) + [[ "$jobid_new" == "$tmp" ]] || error "jobid tmp $jobid_new != $tmp" + + $LCTL set_param -n fog=bam jobid_var=$save bat=fog + local jobid_old=$($LCTL get_param -n foe jobid_var bag) + [[ "$jobid_old" == "$save" ]] || error "jobid new $jobid_old != $save" +} +run_test 401b "Verify 'lctl {get,set}_param' continue after error" + +test_401c() { + local jobid_var_old=$($LCTL get_param -n jobid_var) + local jobid_var_new + + $LCTL set_param jobid_var= && + error "no error returned for 'set_param a='" + + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_old" == "$jobid_var_new" ]] || + error "jobid_var was changed by setting without value" + + $LCTL set_param jobid_var && + error "no error returned for 'set_param a'" + + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_old" == "$jobid_var_new" ]] || + error "jobid_var was changed by setting without value" +} +run_test 401c "Verify 'lctl set_param' without value fails in either format." + +test_401d() { + local jobid_var_old=$($LCTL get_param -n jobid_var) + local jobid_var_new + local new_value="foo=bar" + + $LCTL set_param jobid_var=$new_value || + error "'set_param a=b' did not accept a value containing '='" + + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_new" == "$new_value" ]] || + error "'set_param a=b' failed on a value containing '='" + + # Reset the jobid_var to test the other format + $LCTL set_param jobid_var=$jobid_var_old + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_new" == "$jobid_var_old" ]] || + error "failed to reset jobid_var" + + $LCTL set_param jobid_var $new_value || + error "'set_param a b' did not accept a value containing '='" + + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_new" == "$new_value" ]] || + error "'set_param a b' failed on a value containing '='" + + $LCTL set_param jobid_var $jobid_var_old + jobid_var_new=$($LCTL get_param -n jobid_var) + [[ "$jobid_var_new" == "$jobid_var_old" ]] || + error "failed to reset jobid_var" +} +run_test 401d "Verify 'lctl set_param' accepts values containing '='" + +test_402() { + $LFS setdirstripe -i 0 $DIR/$tdir || error "setdirstripe -i 0 failed" +#define OBD_FAIL_MDS_FLD_LOOKUP 0x15c + do_facet mds1 "lctl set_param fail_loc=0x8000015c" + touch $DIR/$tdir/$tfile && error "touch should fail with ENOENT" || + echo "Touch failed - OK" +} +run_test 402 "Return ENOENT to lod_generate_and_set_lovea" + +test_403() { + local file1=$DIR/$tfile.1 + local file2=$DIR/$tfile.2 + local tfile=$TMP/$tfile + + rm -f $file1 $file2 $tfile + + touch $file1 + ln $file1 $file2 + + # 30 sec OBD_TIMEOUT in ll_getattr() + # right before populating st_nlink + $LCTL set_param fail_loc=0x80001409 + stat -c %h $file1 > $tfile & + + # create an alias, drop all locks and reclaim the dentry + < $file2 + cancel_lru_locks mdc + cancel_lru_locks osc + sysctl -w vm.drop_caches=2 + + wait + + [ `cat $tfile` -gt 0 ] || error "wrong nlink count: `cat $tfile`" + + rm -f $tfile $file1 $file2 +} +run_test 403 "i_nlink should not drop to zero due to aliasing" + +test_404() { # LU-6601 + local mosps=$(do_facet $SINGLEMDS $LCTL dl | + awk '/osp .*-osc-MDT/ { print $4}') + + local osp + for osp in $mosps; do + echo "Deactivate: " $osp + do_facet $SINGLEMDS $LCTL --device %$osp deactivate + local stat=$(do_facet $SINGLEMDS $LCTL dl | + awk -vp=$osp '$4 == p { print $2 }') + [ $stat = IN ] || { + do_facet $SINGLEMDS $LCTL dl | grep -w $osp + error "deactivate error" + } + echo "Activate: " $osp + do_facet $SINGLEMDS $LCTL --device %$osp activate + local stat=$(do_facet $SINGLEMDS $LCTL dl | + awk -vp=$osp '$4 == p { print $2 }') + [ $stat = UP ] || { + do_facet $SINGLEMDS $LCTL dl | grep -w $osp + error "activate error" + } + done +} +run_test 404 "validate manual {de}activated works properly for OSPs" + # # tests that do cleanup/setup should be run at the end #