Whamcloud - gitweb
LU-10973 lutf: Fix crash and other updates 26/44726/5
authorAmir Shehata <ashehata@whamcloud.com>
Mon, 23 Aug 2021 18:45:18 +0000 (11:45 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Aug 2021 05:20:24 +0000 (05:20 +0000)
Fix crash in wait_for_agents. Was mis-using
cYAML_get_next_seq_item().

Update the lustre_lnet_config_ni() with a newly added parameter
for conns_per_peer. Later on tests can be added to explicitly
test setting the conns_per_peer from the C API.

Remove auth_timeout from the paramiko file to be backwards
compatible with older versions of the paramiko python API.

Only delete the progress file if this node is the LUTF master
node. This is to avoid other nodes trampling over each other
if they are using the same directory to dump temporary files.

Test-parameters: trivial

Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ifb5ef0e16c6bc859c3893919a9242b64fd049ebe
Reviewed-on: https://review.whamcloud.com/44726
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/lutf/python/config/lutf_start.py
lustre/tests/lutf/python/infra/lutf.py
lustre/tests/lutf/python/infra/lutf_paramiko.py
lustre/tests/lutf/python/tests-infra/lnet_helpers.py
lustre/tests/lutf/src/liblutf_agent.c

index 5327296..f6132a9 100644 (file)
@@ -165,10 +165,6 @@ class LUTF:
                        cfg['lutf']['tmp-dir'] = os.environ['LUTF_TMP_DIR']
                except:
                        pass
-               try:
-                       cfg['lutf']['tmp-dir'] = os.environ['LUTF_TMP_DIR']
-               except:
-                       pass
 
                if len(agent_list) > 0:
                        cfg['lutf']['agent-list'] = agent_list
@@ -228,11 +224,11 @@ class LUTF:
        def __collect_lutf_logs(self, host):
                if host != os.environ['HOSTNAME']:
                        rfname = "lutf."+host+".tar.gz"
+                       tmp_dir = cfg['lutf']['tmp-dir']
                        rfpath = os.path.join(os.sep, 'tmp', rfname)
-                       rtardir = os.path.join('tmp', 'lutf')
-                       cmd = "tar -czf "+rfpath+" -C "+os.sep+" "+rtardir
+                       cmd = "tar -czf "+rfpath+" -C "+os.sep+" "+tmp_dir
                        lutf_exec_remote_cmd(cmd, host);
-                       lutf_get_file(host, rfpath, os.path.join(os.sep, 'tmp', 'lutf', rfname))
+                       lutf_get_file(host, rfpath, os.path.join(tmp_dir, rfname))
 
        def run(self):
                master = ''
index 0077b98..1d4b751 100644 (file)
@@ -770,7 +770,7 @@ class Myself:
                        self.__lustre_base_path = ''
                self.alias_list = self.provision_intfs(config_ifs_num)
                # delete any older test_progress files
-               if os.path.isfile(self.get_test_progress_path()):
+               if os.path.isfile(self.get_test_progress_path()) and self.__lutf_type == EN_LUTF_MASTER:
                        os.remove(self.get_test_progress_path())
 
        def import_env_vars(self, fpath):
index 5f59646..684773c 100644 (file)
@@ -4,7 +4,7 @@ def lutf_get_file(target, rfile, sfile):
        ssh = paramiko.SSHClient()
        ssh.load_system_host_keys()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-       ssh.connect(hostname=target, timeout=3, banner_timeout=3, auth_timeout=3, username='root')
+       ssh.connect(hostname=target, timeout=3, banner_timeout=3, username='root')
        sftp = ssh.open_sftp()
 
        logging.debug("Commencing get %s -> %s" % (rfile, sfile))
@@ -17,7 +17,7 @@ def lutf_put_file(target, sfile, rfile):
        ssh = paramiko.SSHClient()
        ssh.load_system_host_keys()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-       ssh.connect(hostname=target, timeout=3, banner_timeout=3, auth_timeout=3, username='root')
+       ssh.connect(hostname=target, timeout=3, banner_timeout=3, username='root')
        sftp = ssh.open_sftp()
 
        logging.debug("Commencing put %s -> %s" % (sfile, rfile))
@@ -30,7 +30,7 @@ def lutf_exec_remote_cmd(cmd, host, ignore_err=False):
        ssh = paramiko.SSHClient()
        ssh.load_system_host_keys()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-       ssh.connect(hostname=host, timeout=3, banner_timeout=3, auth_timeout=3, username='root')
+       ssh.connect(hostname=host, timeout=3, banner_timeout=3, username='root')
        stdin, stdout, stderr = ssh.exec_command(cmd)
 
        error = False
index 376c460..e113338 100644 (file)
@@ -133,7 +133,7 @@ class LNetHelpers(BaseTest):
 
        def api_config_ni(self, net, device_list=[], global_cpts=None, ip2nets=None,
                          peer_credits=128, peer_timeout=180, peer_buffer_credits=0,
-                         credits=256):
+                         credits=256, conns_per_peer = -1):
                tunables = lnetconfig.lnet_ioctl_config_lnd_tunables()
                tunables.lt_cmn.lct_peer_timeout = peer_timeout
                tunables.lt_cmn.lct_peer_tx_credits = peer_credits;
@@ -165,7 +165,7 @@ class LNetHelpers(BaseTest):
                                return False, [rc, net, device_list, global_cpts, ip2nets]
                else:
                        g_cpts = None
-               rc, yaml_err = lnetconfig.lustre_lnet_config_ni(nwd, g_cpts, ip2nets, tunables, -1)
+               rc, yaml_err = lnetconfig.lustre_lnet_config_ni(nwd, g_cpts, ip2nets, tunables, conns_per_peer, -1)
                #Freeing the g_cpts causes a segmentation fault
                #if g_cpts:
                #       lnetconfig.cfs_expr_list_free(g_cpts)
index ea35271..6597383 100644 (file)
@@ -442,7 +442,6 @@ lutf_rc_t wait_for_agents(struct cYAML *agents, int timeout)
 {
        struct timeval start;
        struct timeval now;
-       struct cYAML *a;
        bool found = false;
        lutf_agent_blk_t *agent;
 
@@ -457,6 +456,8 @@ lutf_rc_t wait_for_agents(struct cYAML *agents, int timeout)
        PDEBUG("Start waiting for Agents");
 
        while (now.tv_sec - start.tv_sec < timeout && !found) {
+               struct cYAML *a = NULL;
+
                found = true;
                PDEBUG("Waiting for Agents");
                while (cYAML_get_next_seq_item(agents, &a) != NULL) {