From 370044d25d92254ae83972be6934f5fccba76294 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Mon, 23 Aug 2021 11:45:18 -0700 Subject: [PATCH] LU-10973 lutf: Fix crash and other updates Fix crash in wait_for_agents. Was mis-using cYAML_get_next_seq_item(). Update the lustre_lnet_config_ni() with a newly added parameter for conns_per_peer. Later on tests can be added to explicitly test setting the conns_per_peer from the C API. Remove auth_timeout from the paramiko file to be backwards compatible with older versions of the paramiko python API. Only delete the progress file if this node is the LUTF master node. This is to avoid other nodes trampling over each other if they are using the same directory to dump temporary files. Test-parameters: trivial Signed-off-by: Amir Shehata Change-Id: Ifb5ef0e16c6bc859c3893919a9242b64fd049ebe Reviewed-on: https://review.whamcloud.com/44726 Reviewed-by: Chris Horn Reviewed-by: James Simmons Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/tests/lutf/python/config/lutf_start.py | 10 +++------- lustre/tests/lutf/python/infra/lutf.py | 2 +- lustre/tests/lutf/python/infra/lutf_paramiko.py | 6 +++--- lustre/tests/lutf/python/tests-infra/lnet_helpers.py | 4 ++-- lustre/tests/lutf/src/liblutf_agent.c | 3 ++- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/lustre/tests/lutf/python/config/lutf_start.py b/lustre/tests/lutf/python/config/lutf_start.py index 5327296..f6132a9 100644 --- a/lustre/tests/lutf/python/config/lutf_start.py +++ b/lustre/tests/lutf/python/config/lutf_start.py @@ -165,10 +165,6 @@ class LUTF: cfg['lutf']['tmp-dir'] = os.environ['LUTF_TMP_DIR'] except: pass - try: - cfg['lutf']['tmp-dir'] = os.environ['LUTF_TMP_DIR'] - except: - pass if len(agent_list) > 0: cfg['lutf']['agent-list'] = agent_list @@ -228,11 +224,11 @@ class LUTF: def __collect_lutf_logs(self, host): if host != os.environ['HOSTNAME']: rfname = "lutf."+host+".tar.gz" + tmp_dir = cfg['lutf']['tmp-dir'] rfpath = os.path.join(os.sep, 'tmp', rfname) - rtardir = os.path.join('tmp', 'lutf') - cmd = "tar -czf "+rfpath+" -C "+os.sep+" "+rtardir + cmd = "tar -czf "+rfpath+" -C "+os.sep+" "+tmp_dir lutf_exec_remote_cmd(cmd, host); - lutf_get_file(host, rfpath, os.path.join(os.sep, 'tmp', 'lutf', rfname)) + lutf_get_file(host, rfpath, os.path.join(tmp_dir, rfname)) def run(self): master = '' diff --git a/lustre/tests/lutf/python/infra/lutf.py b/lustre/tests/lutf/python/infra/lutf.py index 0077b98..1d4b751 100644 --- a/lustre/tests/lutf/python/infra/lutf.py +++ b/lustre/tests/lutf/python/infra/lutf.py @@ -770,7 +770,7 @@ class Myself: self.__lustre_base_path = '' self.alias_list = self.provision_intfs(config_ifs_num) # delete any older test_progress files - if os.path.isfile(self.get_test_progress_path()): + if os.path.isfile(self.get_test_progress_path()) and self.__lutf_type == EN_LUTF_MASTER: os.remove(self.get_test_progress_path()) def import_env_vars(self, fpath): diff --git a/lustre/tests/lutf/python/infra/lutf_paramiko.py b/lustre/tests/lutf/python/infra/lutf_paramiko.py index 5f59646..684773c 100644 --- a/lustre/tests/lutf/python/infra/lutf_paramiko.py +++ b/lustre/tests/lutf/python/infra/lutf_paramiko.py @@ -4,7 +4,7 @@ def lutf_get_file(target, rfile, sfile): ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(hostname=target, timeout=3, banner_timeout=3, auth_timeout=3, username='root') + ssh.connect(hostname=target, timeout=3, banner_timeout=3, username='root') sftp = ssh.open_sftp() logging.debug("Commencing get %s -> %s" % (rfile, sfile)) @@ -17,7 +17,7 @@ def lutf_put_file(target, sfile, rfile): ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(hostname=target, timeout=3, banner_timeout=3, auth_timeout=3, username='root') + ssh.connect(hostname=target, timeout=3, banner_timeout=3, username='root') sftp = ssh.open_sftp() logging.debug("Commencing put %s -> %s" % (sfile, rfile)) @@ -30,7 +30,7 @@ def lutf_exec_remote_cmd(cmd, host, ignore_err=False): ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - ssh.connect(hostname=host, timeout=3, banner_timeout=3, auth_timeout=3, username='root') + ssh.connect(hostname=host, timeout=3, banner_timeout=3, username='root') stdin, stdout, stderr = ssh.exec_command(cmd) error = False diff --git a/lustre/tests/lutf/python/tests-infra/lnet_helpers.py b/lustre/tests/lutf/python/tests-infra/lnet_helpers.py index 376c460..e113338 100644 --- a/lustre/tests/lutf/python/tests-infra/lnet_helpers.py +++ b/lustre/tests/lutf/python/tests-infra/lnet_helpers.py @@ -133,7 +133,7 @@ class LNetHelpers(BaseTest): def api_config_ni(self, net, device_list=[], global_cpts=None, ip2nets=None, peer_credits=128, peer_timeout=180, peer_buffer_credits=0, - credits=256): + credits=256, conns_per_peer = -1): tunables = lnetconfig.lnet_ioctl_config_lnd_tunables() tunables.lt_cmn.lct_peer_timeout = peer_timeout tunables.lt_cmn.lct_peer_tx_credits = peer_credits; @@ -165,7 +165,7 @@ class LNetHelpers(BaseTest): return False, [rc, net, device_list, global_cpts, ip2nets] else: g_cpts = None - rc, yaml_err = lnetconfig.lustre_lnet_config_ni(nwd, g_cpts, ip2nets, tunables, -1) + rc, yaml_err = lnetconfig.lustre_lnet_config_ni(nwd, g_cpts, ip2nets, tunables, conns_per_peer, -1) #Freeing the g_cpts causes a segmentation fault #if g_cpts: # lnetconfig.cfs_expr_list_free(g_cpts) diff --git a/lustre/tests/lutf/src/liblutf_agent.c b/lustre/tests/lutf/src/liblutf_agent.c index ea35271..6597383 100644 --- a/lustre/tests/lutf/src/liblutf_agent.c +++ b/lustre/tests/lutf/src/liblutf_agent.c @@ -442,7 +442,6 @@ lutf_rc_t wait_for_agents(struct cYAML *agents, int timeout) { struct timeval start; struct timeval now; - struct cYAML *a; bool found = false; lutf_agent_blk_t *agent; @@ -457,6 +456,8 @@ lutf_rc_t wait_for_agents(struct cYAML *agents, int timeout) PDEBUG("Start waiting for Agents"); while (now.tv_sec - start.tv_sec < timeout && !found) { + struct cYAML *a = NULL; + found = true; PDEBUG("Waiting for Agents"); while (cYAML_get_next_seq_item(agents, &a) != NULL) { -- 1.8.3.1