Whamcloud - gitweb
LU-11634 tests: sanityn/test_77 improvements 07/33607/5
authorVladimir Saveliev <c17830@cray.com>
Mon, 9 Apr 2018 09:18:50 +0000 (12:18 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 18 Feb 2019 06:38:10 +0000 (06:38 +0000)
sshd limits number of simultaneous unauthenticated connections via
MaxStartups configuration parameter. By default, 10 connections are
allowed. nrs_write_read() tries to run up to 32 do_nodes() in
parallel, causing sshd to drop some of connections.

The fix is to have do_nodes() to start required number of dd-s in
parallel.

Minor changes which were probably meant at the development:
- Test filenames include $HOSTNAME so that each client worked with its
own file, it seems. Add missing escaping backslashes so that $HOSTNAME
worked as expected.
- Add conv=notrunc parameter for dd-s which write lustre file at
  different seeks.
- Have reading dd-s to read files which were especially created for
  that.
- use /dev/null instead on /dev/zero to throw read data away.

Signed-off-by: Vladimir Saveliev <c17830@cray.com>
Change-Id: I496b0f6b50811351ac8e0e606cf5a20843fab5d4
Cray-bug-id: LUS-2493
Test-Parameters: testlist=sanityn envdefinitions=ONLY=77
Reviewed-on: https://review.whamcloud.com/33607
Reviewed-by: Elena Gryaznova <c17455@cray.com>
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/sanityn.sh

index 586fbce..a3d505d 100755 (executable)
@@ -3123,31 +3123,47 @@ nrs_write_read() {
        chmod 777 $dir
 
        do_nodes $CLIENTS $myRUNAS \
-               dd if=/dev/zero of="$dir/nrs_r_$HOSTNAME" bs=1M count=$n ||
+               dd if=/dev/zero of="$dir/nrs_r_\$HOSTNAME" bs=1M count=$n ||
                error "dd at 0 on client failed (1)"
 
-       for ((i = 0; i < $n; i++)); do
-               do_nodes $CLIENTS $myRUNAS dd if=/dev/zero \
-                       of="$dir/nrs_w_$HOSTNAME" bs=1M seek=$i count=1 ||
-                        error "dd at ${i}MB on client failed (2)" &
-               local pids_w[$i]=$!
-       done
+       do_nodes $CLIENTS $myRUNAS \
+               "declare -a pids_w;
+               for ((i = 0; i < $n; i++)); do
+                       dd if=/dev/zero of=$dir/nrs_w_\$HOSTNAME bs=1M \
+seek=\\\$i count=1 conv=notrunc &
+                       pids_w[\\\$i]=\\\$!;
+               done;
+               rc_w=0;
+               for ((i = 0; i < $n; i++)); do
+                       wait \\\${pids_w[\\\$i]};
+                       newrc=\\\$?;
+                       [ \\\$newrc -gt \\\$rc_w ] && rc_w=\\\$newrc;
+               done;
+               exit \\\$rc_w" &
+       local pid_w=$!
        do_nodes $CLIENTS sync;
        cancel_lru_locks osc
 
-       for ((i = 0; i < $n; i++)); do
-               do_nodes $CLIENTS $myRUNAS dd if="$dir/nrs_w_$HOSTNAME" \
-                       of=/dev/zero bs=1M seek=$i count=1 > /dev/null ||
-                       error "dd at ${i}MB on client failed (3)" &
-               local pids_r[$i]=$!
-       done
+       do_nodes $CLIENTS $myRUNAS \
+               "declare -a pids_r;
+               for ((i = 0; i < $n; i++)); do
+                       dd if=$dir/nrs_r_\$HOSTNAME bs=1M of=/dev/null \
+seek=\\\$i count=1 &
+                       pids_r[\\\$i]=\\\$!;
+               done;
+               rc_r=0;
+               for ((i = 0; i < $n; i++)); do
+                       wait \\\${pids_r[\\\$i]};
+                       newrc=\\\$?;
+                       [ \\\$newrc -gt \\\$rc_r ] && rc_r=\\\$newrc;
+               done;
+               exit \\\$rc_r" &
+       local pid_r=$!
        cancel_lru_locks osc
 
-       for ((i = 0; i < $n; i++)); do
-               wait ${pids_w[$i]}
-               wait ${pids_r[$i]}
-       done
-       rm -rf $dir || error "rm -rf $dir failed"
+       wait $pid_w || error "dd (write) failed (2)"
+       wait $pid_r || error "dd (read) failed (3)"
+       rm -rvf $dir || error "rm -rf $dir failed"
 }
 
 test_77a() { #LU-3266