From: Andreas Dilger Date: Thu, 28 Nov 2024 21:25:30 +0000 (-0700) Subject: LU-12597 tests: return comma-separated mdts_nodes() X-Git-Tag: 2.16.52~36 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=a2d7e81c01e913d7d4401ab317b2b71652753576;p=fs%2Flustre-release.git LU-12597 tests: return comma-separated mdts_nodes() Start to return comma-separated MDTs list from mdts_nodes(), to avoid the redundant calling of comma_list() for each user. Fix the few places that do not call comma_list() on mdts_nodes() output afterward, or treat the output as a space-separated list. Later patches will clean up all of the callers of mdts_nodes, but there are too many places it is used to do in a single patch. Signed-off-by: Andreas Dilger Change-Id: Iddd3278321e7552dc2afd5fbb9f48033af3ebbe5 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57194 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Arshad Hussain Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- diff --git a/contrib/scripts/spelling.txt b/contrib/scripts/spelling.txt index 8b28f51..ae752c5 100644 --- a/contrib/scripts/spelling.txt +++ b/contrib/scripts/spelling.txt @@ -34,6 +34,8 @@ cfs_time_current_64||ktime_get cfs_time_current_sec||ktime_get_real_seconds ci_nob||ci_bytes CLASSERT||BUILD_BUG_ON() +comma_list.*osts_nodes||osts_nodes +comma_list.*mdts_nodes||mdts_nodes crw_count||crw_bytes msecs_to_jiffies||cfs_time_seconds DEFINE_TIMER||CFS_DEFINE_TIMER @@ -130,7 +132,6 @@ OS_STATE_ENOINO||OS_STATFS_ENOINO OS_STATE_SUM||OS_STATFS_SUM OS_STATE_NONROT||OS_STATFS_NONROT OS_STATFS_NOPRECREATE||OS_STATFS_NOCREATE -comma_list.*osts_nodes||osts_nodes page_cache_get||get_page PAGE_CACHE_MASK||PAGE_MASK page_cache_release||put_page diff --git a/lustre/tests/mds-survey.sh b/lustre/tests/mds-survey.sh index e3ae854..6119534 100644 --- a/lustre/tests/mds-survey.sh +++ b/lustre/tests/mds-survey.sh @@ -80,9 +80,10 @@ get_target() { get_targets() { local targets local node + local mdts=$(mdts_nodes) - for node in $(mdts_nodes); do - targets+="${targets:+ }$(get_target $node)" + for mds in ${mdts//,/ }; do + targets+="${targets:+ }$(get_target $mds)" done echo -n $targets diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 9468be3..19bf817 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -19116,7 +19116,7 @@ test_160f() { [[ $MDS1_VERSION -ge $(version_code 2.10.56) ]] || skip "Need MDS version at least 2.10.56" - local mdts=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) # Create a user changelog_register || error "first changelog_register failed" @@ -19132,17 +19132,18 @@ test_160f() { # use all_char because created files should be evenly distributed test_mkdir -c $MDSCOUNT -H all_char $DIR/$tdir || error "test_mkdir $tdir failed" - log "$(date +%s): creating first files" - for ((i = 0; i < MDSCOUNT * 2; i++)); do - $LFS mkdir -i $((i%MDSCOUNT)) $DIR/$tdir/d$i.$((i/MDSCOUNT)) || - error "create $DIR/$tdir/d$i.$((i/MDSCOUNT)) failed" + log "$(date +%s): creating first dirs" + for ((idx = 0; idx < MDSCOUNT * 2; idx++)); do + local d=$DIR/$tdir/d$idx.$((idx/MDSCOUNT)) + $LFS mkdir -i $((idx%MDSCOUNT)) $d || + error "create $d on MDT$idx failed" done # check changelogs have been generated local start=$SECONDS local idle_time=$((MDSCOUNT * 5 + 5)) local nbcl=$(changelog_dump | wc -l) - [[ $nbcl -eq 0 ]] && error "no changelogs found" + (( $nbcl != 0 )) || error "no changelogs found" for param in "changelog_max_idle_time=$idle_time" \ "changelog_gc=1" \ @@ -19166,7 +19167,7 @@ test_160f() { #define OBD_FAIL_CAT_FREE_RECORDS 0x1313 do_nodes $mdts "$LCTL set_param fail_loc=0x1313 fail_val=3" - for i in $(seq $MDSCOUNT); do + for ((i = 1; i <= $MDSCOUNT; i++)); do cl_users=(${CL_USERS[mds$i]}) cl_user1[mds$i]="${cl_users[0]}" cl_user2[mds$i]="${cl_users[1]}" @@ -19203,16 +19204,18 @@ test_160f() { # Generate one more changelog to trigger GC at fail_loc for cl_user2. # cl_user1 should be OK because it recently processed records. - echo "$(date +%s): creating $((MDSCOUNT * 2)) files" - for ((i = 0; i < MDSCOUNT * 2; i++)); do - $LFS mkdir -i $((i%MDSCOUNT)) $DIR/$tdir/d$i.$((i/MDSCOUNT+2))|| - error "create $DIR/$tdir/d$i.$((i/MDSCOUNT+2)) failed" + echo "$(date +%s): creating $((MDSCOUNT * 2)) dirs" + for ((idx = 0; idx < MDSCOUNT * 2; idx++)); do + local d=$DIR/$tdir/d$idx.$((idx/MDSCOUNT+2)) + + $LFS mkdir -i $((idx%MDSCOUNT)) $d || + error "create dir $d on MDT$idx failed" done # ensure gc thread is done - for i in $(mdts_nodes); do - wait_update $i "ps -e -o comm= | grep chlg_gc_thread" "" 20 || - error "$i: GC-thread not done" + for mds in ${mdts//,/ }; do + wait_update $mds "ps -e -o comm= | grep chlg_gc_thread" "" 20 || + error "$mds: GC-thread not done" done local first_rec @@ -19243,7 +19246,7 @@ test_160g() { [[ $MDS1_VERSION -ge $(version_code 2.14.55) ]] || skip "Need MDS version at least 2.14.55" - local mdts=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) # Create a user changelog_register || error "first changelog_register failed" @@ -19259,9 +19262,9 @@ test_160g() { # use all_char because created files should be evenly distributed test_mkdir -c $MDSCOUNT -H all_char $DIR/$tdir || error "test_mkdir $tdir failed" - for ((i = 0; i < MDSCOUNT; i++)); do - $LFS mkdir -i $i $DIR/$tdir/d$i.1 $DIR/$tdir/d$i.2 || - error "create $DIR/$tdir/d$i.1 failed" + for ((idx = 0; idx < MDSCOUNT; idx++)); do + $LFS mkdir -i $idx $DIR/$tdir/d$idx.1 $DIR/$tdir/d$idx.2 || + error "create $DIR/$tdir/d$idx.1 failed" done # check changelogs have been generated @@ -19282,7 +19285,7 @@ test_160g() { done local start=$SECONDS - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do cl_users=(${CL_USERS[mds$i]}) cl_user1[mds$i]="${cl_users[0]}" cl_user2[mds$i]="${cl_users[1]}" @@ -19317,15 +19320,15 @@ test_160g() { (( sleep2 > 0 )) && echo "sleep $sleep2 for interval" && sleep $sleep2 # Generate one more changelog to trigger GC at fail_loc for cl_user2. # cl_user1 should be OK because it recently processed records. - for ((i = 0; i < MDSCOUNT; i++)); do - $LFS mkdir -i $i $DIR/$tdir/d$i.3 || - error "create $DIR/$tdir/d$i.3 failed" + for ((idx = 0; idx < MDSCOUNT; idx++)); do + $LFS mkdir -i $idx $DIR/$tdir/d$idx.3 || + error "create $DIR/$tdir/d$idx.3 failed" done # ensure gc thread is done - for i in $(mdts_nodes); do - wait_update $i "ps -e -o comm= | grep chlg_gc_thread" "" 20 || - error "$i: GC-thread not done" + for mds in ${mdts//,/ }; do + wait_update $mds "ps -e -o comm= | grep chlg_gc_thread" "" 20 || + error "$mds: GC-thread not done" done local first_rec @@ -19356,7 +19359,7 @@ test_160h() { [[ $MDS1_VERSION -ge $(version_code 2.10.56) ]] || skip "Need MDS version at least 2.10.56" - local mdts=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) # Create a user changelog_register || error "first changelog_register failed" @@ -19372,9 +19375,9 @@ test_160h() { # use all_char because created files should be evenly distributed test_mkdir -c $MDSCOUNT -H all_char $DIR/$tdir || error "test_mkdir $tdir failed" - for ((i = 0; i < MDSCOUNT; i++)); do - $LFS mkdir -i $i $DIR/$tdir/d$i.1 $DIR/$tdir/d$i.2 || - error "create $DIR/$tdir/d$i.1 failed" + for ((idx = 0; idx < MDSCOUNT; idx++)); do + $LFS mkdir -i $idx $DIR/$tdir/d$idx.1 $DIR/$tdir/d$idx.2 || + error "create $DIR/$tdir/d$idx.1 failed" done # check changelogs have been generated @@ -19395,7 +19398,7 @@ test_160h() { # force cl_user2 to be idle (1st part) sleep 9 - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do cl_users=(${CL_USERS[mds$i]}) cl_user1[mds$i]="${cl_users[0]}" cl_user2[mds$i]="${cl_users[1]}" @@ -19441,15 +19444,15 @@ test_160h() { # stop MDT to stop GC-thread, should be done in back-ground as it will # block waiting for the thread to be released and exit declare -A stop_pids - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do stop mds$i & stop_pids[mds$i]=$! done - for i in $(mdts_nodes); do + for mds in ${mdts//,/ }; do local facet local nb=0 - local facets=$(facets_up_on_host $i) + local facets=$(facets_up_on_host $mds) for facet in ${facets//,/ }; do if [[ $facet == mds* ]]; then @@ -19458,25 +19461,25 @@ test_160h() { done # ensure each MDS's gc threads are still present and all in "R" # state (OBD_FAIL_FORCE_GC_THREAD effect!) - [[ $(do_node $i pgrep chlg_gc_thread | wc -l) -eq $nb ]] || - error "$i: expected $nb GC-thread" - wait_update $i \ + [[ $(do_node $mds pgrep chlg_gc_thread | wc -l) -eq $nb ]] || + error "$mds: expected $nb GC-thread" + wait_update $mds \ "ps -C chlg_gc_thread -o state --no-headers | uniq" \ "R" 20 || - error "$i: GC-thread not found in R-state" + error "$mds: GC-thread not found in R-state" # check umounts of each MDT on MDS have reached kthread_stop() - [[ $(do_node $i pgrep umount | wc -l) -eq $nb ]] || - error "$i: expected $nb umount" - wait_update $i \ + [[ $(do_node $mds pgrep umount | wc -l) -eq $nb ]] || + error "$mds: expected $nb umount" + wait_update $mds \ "ps -C umount -o state --no-headers | uniq" "D" 20 || - error "$i: umount not found in D-state" + error "$mds: umount not found in D-state" done # release all GC-threads do_nodes $mdts $LCTL set_param fail_loc=0 # wait for MDT stop to complete - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do wait ${stop_pids[mds$i]} || error "mds$i: stop failed" done @@ -19485,13 +19488,13 @@ test_160h() { # via ldiskfs/zfs and llog_reader... # re-start/mount MDTs - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do start mds$i $(mdsdevname $i) $MDS_MOUNT_OPTS || error "Fail to start mds$i" done local first_rec - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do # check cl_user1 still registered changelog_users mds$i | grep -q "${cl_user1[mds$i]}" || error "mds$i: User ${cl_user1[mds$i]} not registered" @@ -19516,8 +19519,7 @@ run_test 160h "changelog gc thread stop upon umount, orphan records delete " \ "during mount" test_160i() { - - local mdts=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) changelog_register || error "first changelog_register failed" @@ -19555,7 +19557,7 @@ test_160i() { local i local last_rec declare -A LAST_REC - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do if changelog_users mds$i | grep "^cl"; then # make sure new records are added with one user present LAST_REC[mds$i]=$(changelog_users $SINGLEMDS | @@ -19569,7 +19571,7 @@ test_160i() { createmany -m $DIR/$tdir/${tfile}bis $((MDSCOUNT * 2)) || error "create $DIR/$tdir/${tfile}bis failed" - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do last_rec=$(changelog_users $SINGLEMDS | awk '/^current.index:/ { print $NF }') echo "verify changelogs are on: $last_rec != ${LAST_REC[mds$i]}" @@ -19939,7 +19941,7 @@ test_160s() { (( $MDS1_VERSION >= $(version_code 2.14.55) )) || skip "Need MDS version at least 2.14.55" - local mdts=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) #define OBD_FAIL_TIME_IN_CHLOG_USER 0x1314 do_nodes $mdts $LCTL set_param fail_loc=0x1314 \ @@ -19955,9 +19957,9 @@ test_160s() { # use all_char because created files should be evenly distributed test_mkdir -c $MDSCOUNT -H all_char $DIR/$tdir || error "test_mkdir $tdir failed" - for ((i = 0; i < MDSCOUNT; i++)); do - $LFS mkdir -i $i $DIR/$tdir/d$i.1 $DIR/$tdir/d$i.2 || - error "create $DIR/$tdir/d$i.1 failed" + for ((idx = 0; idx < MDSCOUNT; idx++)); do + $LFS mkdir -i $idx $DIR/$tdir/d$idx.1 $DIR/$tdir/d$idx.2 || + error "create $DIR/$tdir/d$idx.1 on MDT$idx failed" done # check changelogs have been generated @@ -19979,7 +19981,7 @@ test_160s() { done local start=$SECONDS - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do cl_users=(${CL_USERS[mds$i]}) cl_user1[mds$i]="${cl_users[0]}" @@ -20001,9 +20003,9 @@ test_160s() { done # ensure gc thread is done - for node in $(mdts_nodes); do - wait_update $node "pgrep chlg_gc_thread" "" 20 || - error "$node: GC-thread not done" + for mds in ${mdts//,/ }; do + wait_update $mds "pgrep chlg_gc_thread" "" 20 || + error "$mds: GC-thread not done" done do_nodes $mdts $LCTL set_param fail_loc=0 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 81f0faf..ac61d46 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1370,7 +1370,7 @@ start_gss_daemons() { return 0 fi - nodes=$(comma_list $(mdts_nodes)) + nodes=$(mdts_nodes) echo "Starting gss daemon on mds: $nodes" if $GSS_SK; then # Start all versions, in case of switching @@ -1400,7 +1400,7 @@ start_gss_daemons() { } stop_gss_daemons() { - local nodes=$(comma_list $(mdts_nodes)) + local nodes=$(mdts_nodes) send_sigint $nodes lsvcgssd lgssd @@ -2061,7 +2061,7 @@ set_params_clients() { } set_params_mdts() { - local mdts=${1:-$(comma_list $(mdts_nodes))} + local mdts=${1:-$(mdts_nodes)} shift || true local params="${@:-$MDS_LCTL_SETPARAM_PARAM}" @@ -3589,8 +3589,7 @@ wait_update_facet() { } sync_all_data_mdts() { - do_nodes $(comma_list $(mdts_nodes)) \ - "lctl set_param -n os[cd]*.*MDT*.force_sync=1" + do_nodes $(mdts_nodes) "lctl set_param -n os[cd]*.*MDT*.force_sync=1" } sync_all_data_osts() { @@ -3669,7 +3668,7 @@ ost_watermarks_set() { local ost_name=$(ostname_from_index $ost_idx) local facets=$(get_facets MDS) - do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ + do_nodes $(mdts_nodes) $LCTL set_param -n \ osp.*$ost_name*.reserved_mb_low=$lwm \ osp.*$ost_name*.reserved_mb_high=$hwm > /dev/null @@ -3757,9 +3756,10 @@ wait_delete_completed_mds() { local etime local node local changes + local mdts=$(mdts_nodes) # find MDS with pending deletions - for node in $(mdts_nodes); do + for node in ${mdts//,/ }; do changes=$(do_node $node "$LCTL get_param -n osc.*MDT*.sync_*" \ 2>/dev/null | calc_sum) if [[ $changes -eq 0 ]]; then @@ -3879,7 +3879,7 @@ wait_mds_ost_sync () { local WAIT_TIMEOUT=${1:-$MAX} local WAIT=0 local new_wait=true - local list=$(comma_list $(mdts_nodes)) + local list=$(mdts_nodes) local cmd="$LCTL get_param -n osp.*osc*.old_sync_processed" if ! do_facet $SINGLEMDS \ "$LCTL list_param osp.*osc*.old_sync_processed 2> /dev/null" @@ -3938,9 +3938,9 @@ wait_destroy_complete () { # why it takes so long time local MAX=${1:-5} local WAIT=0 - local list=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) while [ $WAIT -lt $MAX ]; do - local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight)) + local -a RPCs=($(do_nodes $mdts $LCTL get_param -n osp.*.destroys_in_flight)) local con=1 local i @@ -4072,10 +4072,11 @@ all_mds_up() { [ -n "$delay" ] || error "fail to get maxage" sleep $delay - local nodes=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) + # initiate statfs RPC, all to all MDTs - do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null - do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $mdts $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $mdts $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null } client_up() { @@ -6537,14 +6538,10 @@ run_lfsck() { awk '/^status/ { print \\\$2 }'" "completed" 60 || error "MDS${k} namespace isn't the expected 'completed'" done - local rep_mdt=$(do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL get_param -n mdd.$FSNAME-*.lfsck_* | - awk '/repaired/ { print $2 }' | calc_sum) - local rep_ost=$(do_nodes $(osts_nodes) \ - $LCTL get_param -n obdfilter.$FSNAME-*.lfsck_* | - awk '/repaired/ { print $2 }' | calc_sum) - local repaired=$((rep_mdt + rep_ost)) - [ $repaired -eq 0 ] || + local repaired=$(do_nodes $(tgts_nodes) \ + "$LCTL get_param -n *.$FSNAME-*.lfsck_*" | + awk '/repaired/ { print $2 }' | calc_sum) + (( repaired == 0 )) || error "lfsck repaired $rep_mdt MDT and $rep_ost OST errors" } @@ -6969,32 +6966,33 @@ drop_bl_callback() { drop_mdt_ldlm_reply() { #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 RC=0 - local list=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) - do_nodes $list lctl set_param fail_loc=0x157 + do_nodes $mdts lctl set_param fail_loc=0x157 do_facet client "$@" || RC=$? - do_nodes $list lctl set_param fail_loc=0 + do_nodes $mdts lctl set_param fail_loc=0 return $RC } drop_mdt_ldlm_reply_once() { #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 RC=0 - local list=$(comma_list $(mdts_nodes)) + local mdts=$(mdts_nodes) - do_nodes $list lctl set_param fail_loc=0x80000157 + do_nodes $mdts lctl set_param fail_loc=0x80000157 do_facet client "$@" || RC=$? - do_nodes $list lctl set_param fail_loc=0 + do_nodes $mdts lctl set_param fail_loc=0 return $RC } clear_failloc() { - facet=$1 - pause=$2 + local facet=$1 + local pause=$2 + sleep $pause echo "clearing fail_loc on $facet" do_facet $facet "lctl set_param fail_loc=0 2>/dev/null || true" @@ -7928,21 +7926,26 @@ local_node() { ${!is_local} } -remote_node () { +remote_node() { local node=$1 ! local_node $node } -remote_mds () +# return true if any MDT is on a remote node +remote_mds() { + local mdts=$(mdts_nodes) local node - for node in $(mdts_nodes); do + + for node in ${mdts//,/ }; do remote_node $node && return 0 done + return 1 } +# return true if any MDT is on a remote node and no remote shell is configured remote_mds_nodsh() { [ -n "$CLIENTONLY" ] && return 0 || true @@ -8019,12 +8022,12 @@ facets_nodes () { # Get name of the active MGS node. mgs_node () { - echo -n $(facets_nodes $(get_facets MGS)) - } + echo -n $(facets_nodes $(get_facets MGS)) +} # Get all of the active MDS nodes. -mdts_nodes () { - echo -n $(facets_nodes $(get_facets MDS)) +mdts_nodes() { + comma_list $(facets_nodes $(get_facets MDS)) } # Get all of the active OSS nodes in a comma-separated list. @@ -8064,18 +8067,15 @@ all_mdts_nodes () { local host local failover_host local nodes - local nodes_sort local i - for i in $(seq $MDSCOUNT); do + for ((i=1; i <= $MDSCOUNT; i++)); do host=mds${i}_HOST failover_host=mds${i}failover_HOST nodes="$nodes ${!host} ${!failover_host}" done - [ -n "$nodes" ] || nodes="${mds_HOST} ${mdsfailover_HOST}" - nodes_sort=$(for i in $nodes; do echo $i; done | sort -u) - echo -n $nodes_sort + comma_list $nodes } # Get all of the OSS nodes, including active and passive nodes. @@ -8637,8 +8637,7 @@ run_mdtest () { save_lustre_params $(get_facets MDS) \ mdt.*.enable_remote_dir_gid > $params_file - do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param mdt.*.enable_remote_dir_gid=-1 + do_nodes $(mdts_nodes) $LCTL set_param mdt.*.enable_remote_dir_gid=-1 stack_trap "restore_lustre_params < $params_file" EXIT @@ -11109,8 +11108,7 @@ changelog_user_rec() { changelog_chmask() { local mask=$1 - do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param mdd.*.changelog_mask="$mask" + do_nodes $(mdts_nodes) $LCTL set_param mdd.*.changelog_mask="$mask" } # usage: __changelog_clear FACET CL_USER [+]INDEX