X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=c352d8978649462917dba2ce02b6d0ca6b99fb8f;hp=c0aac29605ad3b06da4166abf5f38f6068d1627c;hb=a4d0b9612568d20baec2b53a25119bf1e697234e;hpb=72c1f7095203cc1badadf581c66f9546476438ab diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c0aac29..c352d89 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -77,8 +77,8 @@ if (( $LINUX_VERSION_CODE >= $(version_code 4.18.0) && ALWAYS_EXCEPT+=" 411" fi -# 5 12 8 12 (min)" -[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 60i 64b 68 71 115 135 136 300o" +# 5 12 8 12 15 (min)" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 60i 64b 68 71 115 135 136 230d 300o" if [ "$mds1_FSTYPE" = "zfs" ]; then # bug number for skipped test: @@ -5682,38 +5682,51 @@ run_test 51b "exceed 64k subdirectory nlink limit on create, verify unlink" test_51d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" [[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs" + local qos_old test_mkdir $DIR/$tdir + $LFS setstripe -c $OSTCOUNT $DIR/$tdir + + qos_old=$(do_facet mds1 \ + "$LCTL get_param -n lod.$FSNAME-*.qos_threshold_rr" | head -n 1) + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=100" + stack_trap "do_nodes $(comma_list $(mdts_nodes)) \ + '$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=${qos_old%%%}'" + createmany -o $DIR/$tdir/t- 1000 $LFS getstripe $DIR/$tdir > $TMP/$tfile - for N in $(seq 0 $((OSTCOUNT - 1))); do - OBJS[$N]=$(awk -vobjs=0 '($1 == '$N') { objs += 1 } \ - END { printf("%0.0f", objs) }' $TMP/$tfile) - OBJS0[$N]=$(grep -A 1 idx $TMP/$tfile | awk -vobjs=0 \ - '($1 == '$N') { objs += 1 } \ - END { printf("%0.0f", objs) }') - log "OST$N has ${OBJS[$N]} objects, ${OBJS0[$N]} are index 0" + for ((n = 0; n < $OSTCOUNT; n++)); do + objs[$n]=$(awk -vobjs=0 '($1 == '$n') { objs += 1 } \ + END { printf("%0.0f", objs) }' $TMP/$tfile) + objs0[$n]=$(grep -A 1 idx $TMP/$tfile | awk -vobjs=0 \ + '($1 == '$n') { objs += 1 } \ + END { printf("%0.0f", objs) }') + log "OST$n has ${objs[$n]} objects, ${objs0[$n]} are index 0" done unlinkmany $DIR/$tdir/t- 1000 - NLAST=0 - for N in $(seq 1 $((OSTCOUNT - 1))); do - [[ ${OBJS[$N]} -lt $((${OBJS[$NLAST]} - 20)) ]] && - error "OST $N has less objects vs OST $NLAST" \ - " (${OBJS[$N]} < ${OBJS[$NLAST]}" - [[ ${OBJS[$N]} -gt $((${OBJS[$NLAST]} + 20)) ]] && - error "OST $N has less objects vs OST $NLAST" \ - " (${OBJS[$N]} < ${OBJS[$NLAST]}" - - [[ ${OBJS0[$N]} -lt $((${OBJS0[$NLAST]} - 20)) ]] && - error "OST $N has less #0 objects vs OST $NLAST" \ - " (${OBJS0[$N]} < ${OBJS0[$NLAST]}" - [[ ${OBJS0[$N]} -gt $((${OBJS0[$NLAST]} + 20)) ]] && - error "OST $N has less #0 objects vs OST $NLAST" \ - " (${OBJS0[$N]} < ${OBJS0[$NLAST]}" - NLAST=$N + nlast=0 + for ((n = 0; n < $OSTCOUNT; n++)); do + (( ${objs[$n]} > ${objs[$nlast]} * 4 / 5 )) || + { $LFS df && $LFS df -i && + error "OST $n has fewer objects vs. OST $nlast" \ + " (${objs[$n]} < ${objs[$nlast]}"; } + (( ${objs[$n]} < ${objs[$nlast]} * 5 / 4 )) || + { $LFS df && $LFS df -i && + error "OST $n has fewer objects vs. OST $nlast" \ + " (${objs[$n]} < ${objs[$nlast]}"; } + + (( ${objs0[$n]} > ${objs0[$nlast]} * 4 / 5 )) || + { $LFS df && $LFS df -i && + error "OST $n has fewer #0 objects vs. OST $nlast" \ + " (${objs0[$n]} < ${objs0[$nlast]}"; } + (( ${objs0[$n]} < ${objs0[$nlast]} * 5 / 4 )) || + { $LFS df && $LFS df -i && + error "OST $n has fewer #0 objects vs. OST $nlast" \ + " (${objs0[$n]} < ${objs0[$nlast]}"; } + nlast=$n done - rm -f $TMP/$tfile } run_test 51d "check object distribution" @@ -6131,6 +6144,47 @@ test_56d() { } run_test 56d "'lfs df -v' prints only configured devices" +test_56e() { + err_enoent=2 # No such file or directory + err_eopnotsupp=95 # Operation not supported + + enoent_mnt=/pmt1 # Invalid dentry. Path not present + notsup_mnt=/tmp # Valid dentry, but Not a lustreFS + + # Check for handling of path not exists + output=$($LFS df $enoent_mnt 2>&1) + ret=$? + + fs=$(echo $output | awk -F: '{print $2}' | awk '{print $3}' | tr -d \') + [[ $fs = $enoent_mnt && $ret -eq $err_enoent ]] || + error "expect failure $err_enoent, not $ret" + + # Check for handling of non-Lustre FS + output=$($LFS df $notsup_mnt) + ret=$? + + fs=$(echo $output | awk '{print $1}' | awk -F: '{print $2}') + [[ $fs = $notsup_mnt && $ret -eq $err_eopnotsupp ]] || + error "expect success $err_eopnotsupp, not $ret" + + # Check for multiple LustreFS argument + output=$($LFS df $MOUNT $MOUNT $MOUNT | grep -c "filesystem_summary:") + ret=$? + + [[ $output -eq 3 && $ret -eq 0 ]] || + error "expect success 3, not $output, rc = $ret" + + # Check for correct non-Lustre FS handling among multiple + # LustreFS argument + output=$($LFS df $MOUNT $notsup_mnt $MOUNT | + grep -c "filesystem_summary:"; exit ${PIPESTATUS[0]}) + ret=$? + + [[ $output -eq 2 && $ret -eq $err_eopnotsupp ]] || + error "expect success 2, not $output, rc = $ret" +} +run_test 56e "'lfs df' Handle non LustreFS & multiple LustreFS" + NUMFILES=3 NUMDIRS=3 setup_56() { @@ -7644,6 +7698,85 @@ test_56xf() { } run_test 56xf "FID is not lost during migration of a composite layout file" +check_file_ost_range() { + local file="$1" + shift + local range="$*" + local -a file_range + local idx + + file_range=($($LFS getstripe -y "$file" | + awk '/l_ost_idx:/ { print $NF }')) + + if [[ "${#file_range[@]}" = 0 ]]; then + echo "No osts found for $file" + return 1 + fi + + for idx in "${file_range[@]}"; do + [[ " $range " =~ " $idx " ]] || + return 1 + done + + return 0 +} + +sub_test_56xg() { + local stripe_opt="$1" + local pool="$2" + shift 2 + local pool_ostidx="$(seq $* | tr '\n' ' ')" + + $LFS migrate $stripe_opt -p $pool $DIR/$tfile || + error "Fail to migrate $tfile on $pool" + [[ "$($LFS getstripe -p $DIR/$tfile)" = "$pool" ]] || + error "$tfile is not in pool $pool" + check_file_ost_range "$DIR/$tfile" $pool_ostidx || + error "$tfile osts mismatch with pool $pool (osts $pool_ostidx)" +} + +test_56xg() { + [[ $PARALLEL != "yes" ]] || skip "skip parallel run" + [[ $OSTCOUNT -ge 2 ]] || skip "needs >= 2 OSTs" + [[ $MDS1_VERSION -gt $(version_code 2.14.52) ]] || + skip "Need MDS version newer than 2.14.52" + + local -a pool_names=("${TESTNAME}_0" "${TESTNAME}_1" "${TESTNAME}_2") + local -a pool_ranges=("0 0" "1 1" "0 1") + + # init pools + for i in "${!pool_names[@]}"; do + pool_add ${pool_names[$i]} || + error "pool_add failed (pool: ${pool_names[$i]})" + pool_add_targets ${pool_names[$i]} ${pool_ranges[$i]} || + error "pool_add_targets failed (pool: ${pool_names[$i]})" + done + + # init the file to migrate + $LFS setstripe -c1 -i1 $DIR/$tfile || + error "Unable to create $tfile on OST1" + dd if=/dev/urandom of=$DIR/$tfile bs=1M count=4 status=none || + error "Unable to write on $tfile" + + echo "1. migrate $tfile on pool ${pool_names[0]}" + sub_test_56xg "-c-1" "${pool_names[0]}" ${pool_ranges[0]} + + echo "2. migrate $tfile on pool ${pool_names[2]}" + sub_test_56xg "-c-1 -S2M" "${pool_names[2]}" ${pool_ranges[2]} + + echo "3. migrate $tfile on pool ${pool_names[1]}" + sub_test_56xg "-n -c-1" "${pool_names[1]}" ${pool_ranges[1]} + + echo "4. migrate $tfile on pool ${pool_names[2]} with default stripe parameters" + sub_test_56xg "" "${pool_names[2]}" ${pool_ranges[2]} + echo + + # Clean pools + destroy_test_pools || + error "pool_destroy failed" +} +run_test 56xg "lfs migrate pool support" + test_56y() { [ $MDS1_VERSION -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" @@ -8696,6 +8829,104 @@ test_64f() { } run_test 64f "check grant consumption (with grant allocation)" +test_64g() { + #[ $MDS1_VERSION -lt $(version_code 2.14.54) ] && + # skip "Need MDS version at least 2.14.54" + + local mdts=$(comma_list $(mdts_nodes)) + + local old=$($LCTL get_param mdc.$FSNAME-*.grant_shrink_interval | + tr '\n' ' ') + stack_trap "$LCTL set_param $old" + + # generate dirty pages and increase dirty granted on MDT + stack_trap "rm -f $DIR/$tfile-*" + for (( i = 0; i < 10; i++)); do + $LFS setstripe -E 1M -L mdt $DIR/$tfile-$i || + error "can't set stripe" + dd if=/dev/zero of=$DIR/$tfile-$i bs=128k count=1 || + error "can't dd" + $LFS getstripe $DIR/$tfile-$i | grep -q pattern.*mdt || { + $LFS getstripe $DIR/$tfile-$i + error "not DoM file" + } + done + + # flush dirty pages + sync + + # wait until grant shrink reset grant dirty on MDTs + for ((i = 0; i < 120; i++)); do + grant_dirty=$(do_nodes $mdts $LCTL get_param -n mdt.*.tot_dirty | + awk '{sum=sum+$1} END {print sum}') + vm_dirty=$(awk '/Dirty:/{print $2}' /proc/meminfo) + echo "$grant_dirty grants, $vm_dirty pages" + (( grant_dirty + vm_dirty == 0 )) && break + (( i == 3 )) && sync && + $LCTL set_param mdc.$FSNAME-*.grant_shrink_interval=5 + sleep 1 + done + + grant_dirty=$(do_nodes $mdts $LCTL get_param -n mdt.*.tot_dirty | + awk '{sum=sum+$1} END {print sum}') + (( grant_dirty == 0 )) || error "$grant_dirty on MDT" +} +run_test 64g "grant shrink on MDT" + +test_64h() { + local instance=$($LFS getname -i $DIR) + local osc_tgt="$FSNAME-OST0000-osc-$instance" + local num_exps=$(do_facet ost1 \ + $LCTL get_param -n obdfilter.*OST0000*.num_exports) + local max_brw_size=$(import_param $osc_tgt max_brw_size) + local avail=$($LCTL get_param -n osc.*OST0000-osc-$instance.kbytesavail) + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + + # 10MiB is for file to be written, max_brw_size * 16 * + # num_exps is space reserve so that tgt_grant_shrink() decided + # to not shrink + local expect=$((max_brw_size * 16 * num_exps + 10 * 1048576)) + (( avail * 1024 < expect )) && + skip "need $expect bytes on ost1, have $(( avail * 1024 )) only" + + save_lustre_params client "osc.*OST0000*.grant_shrink" > $p + save_lustre_params client "osc.*OST0000*.grant_shrink_interval" >> $p + stack_trap "restore_lustre_params < $p; rm -f $save" EXIT + $LCTL set_param osc.*OST0000*.grant_shrink=1 + $LCTL set_param osc.*OST0000*.grant_shrink_interval=10 + + $LFS setstripe -c 1 -i 0 $DIR/$tfile + dd if=/dev/zero of=$DIR/$tfile bs=1M count=10 oflag=sync + + # drop cache so that coming read would do rpc + cancel_lru_locks osc + + # shrink interval is set to 10, pause for 7 seconds so that + # grant thread did not wake up yet but coming read entered + # shrink mode for rpc (osc_should_shrink_grant()) + sleep 7 + + declare -a cur_grant_bytes + declare -a tot_granted + cur_grant_bytes[0]=$($LCTL get_param -n osc.*OST0000*.cur_grant_bytes) + tot_granted[0]=$(do_facet ost1 \ + $LCTL get_param -n obdfilter.*OST0000*.tot_granted) + + dd if=$DIR/$tfile bs=4K count=1 of=/dev/null + + cur_grant_bytes[1]=$($LCTL get_param -n osc.*OST0000*.cur_grant_bytes) + tot_granted[1]=$(do_facet ost1 \ + $LCTL get_param -n obdfilter.*OST0000*.tot_granted) + + # grant change should be equal on both sides + (( cur_grant_bytes[0] - cur_grant_bytes[1] == + tot_granted[0] - tot_granted[1])) || + error "grant change mismatch, " \ + "server: ${tot_granted[0]} to ${tot_granted[1]}, " \ + "client: ${cur_grant_bytes[0]} to ${cur_grant_bytes[1]}" +} +run_test 64h "grant shrink on read" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -9720,6 +9951,36 @@ test_77n() { } run_test 77n "Verify read from a hole inside contiguous blocks with T10PI" +test_77o() { + (( $CLIENT_VERSION >= $(version_code 2.14.54) )) || + skip "Need at least version 2.14.54" + local ofd=obdfilter + local mdt=mdt + + # print OST checksum_type + echo "$ofd.$FSNAME-*.checksum_type:" + do_nodes $(comma_list $(osts_nodes)) \ + $LCTL get_param -n $ofd.$FSNAME-*.checksum_type + + # print MDT checksum_type + echo "$mdt.$FSNAME-*.checksum_type:" + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL get_param -n $mdt.$FSNAME-*.checksum_type + + local o_count=$(do_nodes $(comma_list $(osts_nodes)) \ + $LCTL get_param -n $ofd.$FSNAME-*.checksum_type | wc -l) + + (( $o_count == $OSTCOUNT )) || + error "found $o_count checksums, not \$MDSCOUNT=$OSTCOUNT" + + local m_count=$(do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL get_param -n $mdt.$FSNAME-*.checksum_type | wc -l) + + (( $m_count == $MDSCOUNT )) || + error "found $m_count checksums, not \$MDSCOUNT=$MDSCOUNT" +} +run_test 77o "Verify checksum_type for server (mdt and ofd(obdfilter))" + cleanup_test_78() { trap 0 rm -f $DIR/$tfile @@ -16352,7 +16613,7 @@ test_160o() { changelog_register --user test_160o -m unlnk+close+open || error "changelog_register failed" - # drop server mask so it doesn't interfere + do_facet $SINGLEMDS $LCTL --device $mdt \ changelog_register -u "Tt3_-#" && error "bad symbols in name should fail" @@ -16443,6 +16704,28 @@ test_160p() { } run_test 160p "Changelog orphan cleanup with no users" +test_160q() { + local mdt="$(facet_svc $SINGLEMDS)" + local clu + + [[ $PARALLEL != "yes" ]] || skip "skip parallel run" + remote_mds_nodsh && skip "remote MDS with nodsh" + [ $MDS1_VERSION -ge $(version_code 2.14.54) ] || + skip "Need MDS version at least 2.14.54" + + # set server mask to minimal value like server init does + changelog_chmask "MARK" + clu=$(do_facet $SINGLEMDS $LCTL --device $mdt changelog_register -n) || + error "changelog_register failed" + # check effective mask again, should be treated as DEFMASK now + mask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + do_facet $SINGLEMDS $LCTL --device $mdt changelog_deregister $clu || + error "changelog_deregister failed" + [[ $mask == *"HLINK"* ]] || error "mask is not DEFMASK as expected" +} +run_test 160q "changelog effective mask is DEFMASK if not set" + test_161a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -18178,23 +18461,23 @@ test_208() { $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eRE+eU || error "get lease error" echo "==== test 2: verify lease can be broken by upcoming open" - $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc & + $MULTIOP $DIR/$tfile oO_RDWR:eR_E-eUc & local PID=$! sleep 1 - $MULTIOP $DIR/$tfile oO_RDONLY:c + $MULTIOP $DIR/$tfile oO_RDWR:c kill -USR1 $PID && wait $PID || error "break lease error" echo "==== test 3: verify lease can't be granted if an open already exists" - $MULTIOP $DIR/$tfile oO_RDONLY:_c & + $MULTIOP $DIR/$tfile oO_RDWR:_c & local PID=$! sleep 1 - $MULTIOP $DIR/$tfile oO_RDONLY:eReUc && error "apply lease should fail" + $MULTIOP $DIR/$tfile oO_RDWR:eReUc && error "apply lease should fail" kill -USR1 $PID && wait $PID || error "open file error" echo "==== test 4: lease can sustain over recovery" - $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc & + $MULTIOP $DIR/$tfile oO_RDWR:eR_E+eUc & PID=$! sleep 1 @@ -18203,7 +18486,7 @@ test_208() { kill -USR1 $PID && wait $PID || error "lease broken over recovery" echo "==== test 5: lease broken can't be regained by replay" - $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc & + $MULTIOP $DIR/$tfile oO_RDWR:eR_E-eUc & PID=$! sleep 1 @@ -18649,26 +18932,48 @@ run_test 223 "osc reenqueue if without AGL lock granted =======================" test_224a() { # LU-1039, MRP-303 [ $PARALLEL == "yes" ] && skip "skip parallel run" - #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508 $LCTL set_param fail_loc=0x508 - dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 conv=fsync + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 conv=fsync $LCTL set_param fail_loc=0 df $DIR } run_test 224a "Don't panic on bulk IO failure" -test_224b() { # LU-1039, MRP-303 +test_224bd_sub() { # LU-1039, MRP-303 [ $PARALLEL == "yes" ] && skip "skip parallel run" + local timeout=$1 - dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 + shift + dd if=/dev/urandom of=$TMP/$tfile bs=1M count=1 + + $LFS setstripe -c 1 -i 0 $DIR/$tfile + + dd if=$TMP/$tfile of=$DIR/$tfile bs=1M count=1 cancel_lru_locks osc + set_checksums 0 + stack_trap "set_checksums $ORIG_CSUM" EXIT + local at_max_saved=0 + + # adaptive timeouts may prevent seeing the issue + if at_is_enabled; then + at_max_saved=$(at_max_get mds) + at_max_set 0 mds client + stack_trap "at_max_set $at_max_saved mds client" EXIT + fi + #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 - $LCTL set_param fail_loc=0x515 - dd of=/dev/null if=$DIR/$tfile bs=4096 count=1 - $LCTL set_param fail_loc=0 + do_facet ost1 $LCTL set_param fail_val=$timeout fail_loc=0x80000515 + dd of=$TMP/$tfile.new if=$DIR/$tfile bs=1M count=1 || "$@" + + do_facet ost1 $LCTL set_param fail_loc=0 + cmp $TMP/$tfile $TMP/$tfile.new || error "file contents wrong" df $DIR } + +test_224b() { + test_224bd_sub 3 error "dd failed" +} run_test 224b "Don't panic on bulk IO failure" test_224c() { # LU-6441 @@ -18709,6 +19014,11 @@ test_224c() { # LU-6441 } run_test 224c "Don't hang if one of md lost during large bulk RPC" +test_224d() { # LU-11169 + test_224bd_sub $((TIMEOUT + 2)) error "dd failed" +} +run_test 224d "Don't corrupt data on bulk IO timeout" + MDSSURVEY=${MDSSURVEY:-$(which mds-survey 2>/dev/null || true)} test_225a () { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -19388,7 +19698,7 @@ test_230d() { error "migrate remote dir error" echo "Finish migration, then checking.." - for file in $(find $migrate_dir); do + for file in $(find $migrate_dir -maxdepth 1); do mdt_index=$($LFS getstripe -m $file) if [ $mdt_index -lt $new_index ] || [ $mdt_index -gt $((new_index + new_count - 1)) ]; then @@ -19945,6 +20255,48 @@ test_230t() } run_test 230t "migrate directory with project ID set" +test_230u() +{ + (( MDSCOUNT > 3 )) || skip_env "needs >= 4 MDTs" + (( MDS1_VERSION >= $(version_code 2.14.53) )) || + skip "Need MDS version at least 2.14.53" + + local count + + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" + mkdir $DIR/$tdir/sub{0..99} || error "mkdir sub failed" + $LFS migrate -m -1 $DIR/$tdir/sub{0..99} || error "migrate sub failed" + for i in $(seq 0 $((MDSCOUNT - 1))); do + count=$($LFS getstripe -m $DIR/$tdir/sub* | grep -c ^$i) + echo "$count dirs migrated to MDT$i" + done + count=$($LFS getstripe -m $DIR/$tdir/sub* | sort -u | wc -l) + (( count >= MDSCOUNT - 1 )) || error "dirs migrated to $count MDTs" +} +run_test 230u "migrate directory by QOS" + +test_230v() +{ + (( MDSCOUNT > 3 )) || skip_env "needs >= 4 MDTs" + (( MDS1_VERSION >= $(version_code 2.14.53) )) || + skip "Need MDS version at least 2.14.53" + + local count + + mkdir $DIR/$tdir || error "mkdir $tdir failed" + mkdir $DIR/$tdir/sub{0..99} || error "mkdir sub failed" + $LFS migrate -m 0,2,1 $DIR/$tdir || error "migrate $tdir failed" + for i in $(seq 0 $((MDSCOUNT - 1))); do + count=$($LFS getstripe -m $DIR/$tdir/sub* | grep -c ^$i) + echo "$count subdirs migrated to MDT$i" + (( i == 3 )) && (( count > 0 )) && + error "subdir shouldn't be migrated to MDT3" + done + count=$($LFS getstripe -m $DIR/$tdir/sub* | sort -u | wc -l) + (( count == 3 )) || error "dirs migrated to $count MDTs" +} +run_test 230v "subdir migrated to the MDT where its parent is located" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc @@ -20844,18 +21196,20 @@ run_test 253 "Check object allocation limit" test_254() { [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_mds_nodsh && skip "remote MDS with nodsh" - do_facet $SINGLEMDS $LCTL get_param -n mdd.$MDT0.changelog_size || + + local mdt=$(facet_svc $SINGLEMDS) + + do_facet $SINGLEMDS $LCTL get_param -n mdd.$mdt.changelog_size || skip "MDS does not support changelog_size" local cl_user - local MDT0=$(facet_svc $SINGLEMDS) changelog_register || error "changelog_register failed" changelog_clear 0 || error "changelog_clear failed" local size1=$(do_facet $SINGLEMDS \ - $LCTL get_param -n mdd.$MDT0.changelog_size) + $LCTL get_param -n mdd.$mdt.changelog_size) echo "Changelog size $size1" rm -rf $DIR/$tdir @@ -20870,7 +21224,7 @@ test_254() { rm $DIR/$tdir/pics/desktop.jpg local size2=$(do_facet $SINGLEMDS \ - $LCTL get_param -n mdd.$MDT0.changelog_size) + $LCTL get_param -n mdd.$mdt.changelog_size) echo "Changelog size after work $size2" (( $size2 > $size1 )) || @@ -21799,6 +22153,16 @@ test_270h() { } run_test 270h "DoM: DoM stripe removal when disabled on server" +test_270i() { + (( $MDS1_VERSION >= $(version_code 2.14.54) )) || + skip "Need MDS version at least 2.14.54" + + mkdir $DIR/$tdir + $LFS setstripe -L mdt -S 128k -c -1 $DIR/$tdir && + error "setstripe should fail" || true +} +run_test 270i "DoM: setting invalid DoM striping should fail" + test_271a() { [ $MDS1_VERSION -lt $(version_code 2.10.55) ] && skip "Need MDS version at least 2.10.55" @@ -24627,6 +24991,8 @@ run_test 412 "mkdir on specific MDTs" generate_uneven_mdts() { local threshold=$1 + local lmv_qos_maxage + local lod_qos_maxage local ffree local bavail local max @@ -24636,6 +25002,17 @@ generate_uneven_mdts() { local tmp local i + lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage) + $LCTL set_param lmv.*.qos_maxage=1 + stack_trap "$LCTL set_param \ + lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null" RETURN + lod_qos_maxage=$(do_facet mds1 $LCTL get_param -n \ + lod.$FSNAME-MDT0000-mdtlov.qos_maxage | awk '{ print $1 }') + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param \ + lod.*.mdt_qos_maxage=1 + stack_trap "do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param \ + lod.*.mdt_qos_maxage=$lod_qos_maxage > /dev/null" RETURN + echo echo "Check for uneven MDTs: " @@ -24659,9 +25036,15 @@ generate_uneven_mdts() { fi done + (( ${ffree[min_index]} > 0 )) || + skip "no free files in MDT$min_index" + (( ${ffree[min_index]} < 10000000 )) || + skip "too many free files in MDT$min_index" + # Check if we need to generate uneven MDTs local diff=$(((max - min) * 100 / min)) local testdir=$DIR/$tdir-fillmdt + local start mkdir -p $testdir @@ -24669,17 +25052,21 @@ generate_uneven_mdts() { while (( diff < threshold )); do # generate uneven MDTs, create till $threshold% diff echo -n "weight diff=$diff% must be > $threshold% ..." - echo "Fill MDT$min_index with 100 files: loop $i" + echo "Fill MDT$min_index with 1000 files: loop $i" testdir=$DIR/$tdir-fillmdt/$i [ -d $testdir ] || $LFS mkdir -i $min_index $testdir || error "mkdir $testdir failed" $LFS setstripe -E 1M -L mdt $testdir || error "setstripe $testdir failed" - for F in f.{0..99}; do - dd if=/dev/zero of=$testdir/$F bs=1M count=1 > \ + start=$SECONDS + for F in f.{0..999}; do + dd if=/dev/zero of=$testdir/$F bs=64K count=1 > \ /dev/null 2>&1 || error "dd $F failed" done + # wait for QOS to update + (( SECONDS < start + 1 )) && sleep $((start + 1 - SECONDS)) + ffree=($(lctl get_param -n mdc.*[mM][dD][cC]-*.filesfree)) bavail=($(lctl get_param -n mdc.*[mM][dD][cC]-*.kbytesavail)) max=$(((${ffree[max_index]} >> 8) * \ @@ -24746,7 +25133,6 @@ test_qos_mkdir() { local stripe_index=$($LFS getstripe -m $testdir) local test_mkdir_rr=true - echo "dirstripe: '$($LFS getdirstripe $testdir)'" getfattr -d -m dmv -e hex $testdir | grep dmv if (( $? == 0 && $MDS1_VERSION >= $(version_code 2.14.51) )); then echo "defstripe: '$($LFS getdirstripe -D $testdir)'" @@ -24823,7 +25209,7 @@ test_qos_mkdir() { (( ${ffree[min_index]} > 0 )) || skip "no free files in MDT$min_index" - (( ${ffree[min_index]} < 100000000 )) || + (( ${ffree[min_index]} < 10000000 )) || skip "too many free files in MDT$min_index" echo "MDT filesfree available: ${ffree[@]}" @@ -24851,32 +25237,33 @@ test_qos_mkdir() { error "$mkdir_cmd subdir$i failed" done + max=0 for (( i = 0; i < $MDSCOUNT; i++ )); do count=$($LFS getdirstripe -i $testdir/* | grep -c "^$i$") + (( count > max )) && max=$count echo "$count directories created on MDT$i" - - if [ $stripe_count -gt 1 ]; then - count=$($LFS getdirstripe $testdir/* | - grep -c -P "^\s+$i\t") - echo "$count stripes created on MDT$i" - fi done - max=$($LFS getdirstripe -i $testdir/* | grep -c "^$max_index$") min=$($LFS getdirstripe -i $testdir/* | grep -c "^$min_index$") # D-value should > 10% of averge - (( max - min >= num / 10 )) || + (( max - min > num / 10 )) || error "subdirs shouldn't be evenly distributed: $max - $min < $((num / 10))" - # 5% for stripes + # ditto for stripes if (( stripe_count > 1 )); then - max=$($LFS getdirstripe $testdir/* | - grep -c -P "^\s+$max_index\t") + max=0 + for (( i = 0; i < $MDSCOUNT; i++ )); do + count=$($LFS getdirstripe $testdir/* | + grep -c -P "^\s+$i\t") + (( count > max )) && max=$count + echo "$count stripes created on MDT$i" + done + min=$($LFS getdirstripe $testdir/* | grep -c -P "^\s+$min_index\t") - (( max - min >= num * stripe_count / 20 )) || - error "stripes shouldn't be evenly distributed: $max - $min < $((num / 20)) * $stripe_count" + (( max - min > num * stripe_count / 10 )) || + error "stripes shouldn't be evenly distributed: $max - $min < $((num / 10)) * $stripe_count" fi } @@ -25020,7 +25407,7 @@ test_413z() { local pid for subdir in $(\ls -1 -d $DIR/d413*-fillmdt/*); do - unlinkmany $subdir/f. 100 & + unlinkmany $subdir/f. 1000 & pids="$pids $!" done @@ -25145,7 +25532,8 @@ check_lfs_df() { [ "$1" == "blocks" ] && inodes= || inodes="-i" for count in {1..100}; do - cancel_lru_locks + do_nodes "$CLIENTS" \ + $LCTL set_param ldlm.namespaces.*.lru_size=clear sync; sleep 0.2 # read the lines of interest @@ -25158,7 +25546,9 @@ check_lfs_df() { # ":/" for df, "filesystem_summary:" for lfs df # compare the two outputs passed=true - for i in {1..5}; do + # skip "available" on MDT until LU-13997 is fixed. + #for i in {1..5}; do + for i in 1 2 4 5; do [ "${df_out[i]}" != "${lfs_df_out[i]}" ] && passed=false done $passed && break @@ -27183,6 +27573,36 @@ test_822() { } run_test 822 "test precreate failure" +test_823() { + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + local OST_MAX_PRECREATE=20000 + + save_lustre_params mds1 \ + "osp.$FSNAME-OST*-osc-MDT0000.max_create_count" > $p + do_facet $SINGLEMDS "$LCTL set_param -n \ + osp.$FSNAME-OST*MDT0000.max_create_count=0" + do_facet $SINGLEMDS "$LCTL set_param -n \ + osp.$FSNAME-OST0000*MDT0000.max_create_count=$OST_MAX_PRECREATE" + + stack_trap "restore_lustre_params < $p; rm $p" + + do_facet $SINGLEMDS "$LCTL set_param -n \ + osp.$FSNAME-OST*-osc-MDT*.create_count=100200" + + local count=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + osp.$FSNAME-OST0000*MDT0000.create_count") + local max=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + osp.$FSNAME-OST0000*MDT0000.max_create_count") + local expect_count=$(((($max/2)/256) * 256)) + + log "setting create_count to 100200:" + log " -result- count: $count with max: $max, expecting: $expect_count" + + [[ $count -eq expect_count ]] || + error "Create count not set to max precreate." +} +run_test 823 "Setting create_count > OST_MAX_PRECREATE is lowered to maximum" + # # tests that do cleanup/setup should be run at the end # @@ -27238,6 +27658,21 @@ test_902() { } run_test 902 "test short write doesn't hang lustre" +# LU-14711 +test_903() { + $LFS setstripe -i 0 -c 1 $DIR/$tfile $DIR/${tfile}-2 + echo "blah" > $DIR/${tfile}-2 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=6 conv=fsync + #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 + $LCTL set_param fail_loc=0x417 fail_val=20 + + mv $DIR/${tfile}-2 $DIR/$tfile # Destroys the big object + sleep 1 # To start the destroy + wait_destroy_complete 150 || error "Destroy taking too long" + cat $DIR/$tfile > /dev/null || error "Evicted" +} +run_test 903 "Test long page discard does not cause evictions" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre