From 786479e3c897ccc19cf9aacddc7e7f821777685b Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Fri, 28 Jan 2022 12:38:53 -0600 Subject: [PATCH] EX-4683 lipe: remove lipe_test Remove the unused utilities lipe_test, lipe_test_lauch, lipe_test_console, lipe_test_scheduler and lipe_virt. Remove supporting files. Remove the lipe-pyltest RPM. Test-Parameters: trivial testlist=hot-pools,sanity-lipe Signed-off-by: John L. Hammond Change-Id: I365c6fb6995a5029a397499b36deefc5996e4507 Reviewed-on: https://review.whamcloud.com/46363 Tested-by: jenkins Tested-by: Maloo --- lipe/.gitignore | 1 - lipe/Makefile.am | 22 +- lipe/autogen.sh | 2 +- .../clownfish/combined_mgs/lipe_virt.conf | 1 - .../clownfish/seperate_mgs/lipe_virt.conf | 193 -- lipe/example_configs/lipe/lipe_virt.conf | 1 - .../example_configs/ltest/lipe_test_scheduler.conf | 67 - lipe/gen_lipe_test | 11 - lipe/init.d/lipe_test_scheduler | 93 - lipe/lipe.spec.in | 70 +- lipe/lipe_test | 11 - lipe/lipe_test_console | 11 - lipe/lipe_test_launch | 11 - lipe/lipe_test_scheduler | 11 - lipe/lipe_virt | 11 - lipe/pybuild/__init__.py | 4 +- lipe/pybuild/gen_lipe_test.py | 84 - lipe/pybuild/pyltest_import_check.py | 43 - lipe/pylipe/__init__.py | 4 +- lipe/pylipe/lipe_hotpool_test.py | 1833 -------------- lipe/pylipe/lipe_test.py | 2665 -------------------- lipe/pyltest/__init__.py | 6 - lipe/pyltest/lipe_test_console.py | 309 --- lipe/pyltest/lipe_test_launch.py | 1717 ------------- lipe/pyltest/lipe_test_scheduler.py | 1572 ------------ lipe/pyltest_import_check | 11 - lipe/pylustre/__init__.py | 3 - lipe/pylustre/constants.py | 5 - lipe/pylustre/lipe_virt.py | 1814 ------------- lipe/pylustre/lustre_test.py | 158 -- lipe/pylustre/test_common.py | 321 --- lipe/systemd/lipe_test_scheduler.service | 10 - 32 files changed, 5 insertions(+), 11070 deletions(-) delete mode 120000 lipe/example_configs/clownfish/combined_mgs/lipe_virt.conf delete mode 100644 lipe/example_configs/clownfish/seperate_mgs/lipe_virt.conf delete mode 120000 lipe/example_configs/lipe/lipe_virt.conf delete mode 100644 lipe/example_configs/ltest/lipe_test_scheduler.conf delete mode 100755 lipe/gen_lipe_test delete mode 100755 lipe/init.d/lipe_test_scheduler delete mode 100755 lipe/lipe_test delete mode 100755 lipe/lipe_test_console delete mode 100755 lipe/lipe_test_launch delete mode 100755 lipe/lipe_test_scheduler delete mode 100755 lipe/lipe_virt delete mode 100644 lipe/pybuild/gen_lipe_test.py delete mode 100644 lipe/pybuild/pyltest_import_check.py delete mode 100644 lipe/pylipe/lipe_hotpool_test.py delete mode 100644 lipe/pylipe/lipe_test.py delete mode 100644 lipe/pyltest/__init__.py delete mode 100644 lipe/pyltest/lipe_test_console.py delete mode 100644 lipe/pyltest/lipe_test_launch.py delete mode 100644 lipe/pyltest/lipe_test_scheduler.py delete mode 100755 lipe/pyltest_import_check delete mode 100644 lipe/pylustre/lipe_virt.py delete mode 100644 lipe/pylustre/lustre_test.py delete mode 100644 lipe/pylustre/test_common.py delete mode 100644 lipe/systemd/lipe_test_scheduler.service diff --git a/lipe/.gitignore b/lipe/.gitignore index 307f2e7..d608af5 100644 --- a/lipe/.gitignore +++ b/lipe/.gitignore @@ -6,7 +6,6 @@ *~ /build /example_configs/clownfish/clownfish_test.conf -/example_configs/lipe/lipe_test.conf /lipe.spec /lipe-*.tar.bz2 /lipe-*.tar.gz diff --git a/lipe/Makefile.am b/lipe/Makefile.am index dfaf527..a6a3bd8 100644 --- a/lipe/Makefile.am +++ b/lipe/Makefile.am @@ -33,7 +33,6 @@ rpmbuild_opt += --without hotpool endif PYTHON_COMMANDS = \ - gen_lipe_test \ ldsync \ lipe_build \ lipe_expression_tests \ @@ -43,27 +42,19 @@ PYTHON_COMMANDS = \ lipe_install \ lipe_install_build_deps \ lipe_launch \ - lipe_test \ - lipe_test_console \ - lipe_test_launch \ - lipe_test_scheduler \ - lipe_virt \ loris_backup \ loris_crontab \ loris_test \ - lpcc \ - pyltest_import_check + lpcc EXTRA_DIST= \ $(PYTHON_COMMANDS) \ detect-distro.sh \ lipe-revision.sh \ example_configs/clownfish/seperate_mgs/clownfish.conf \ - example_configs/clownfish/seperate_mgs/lipe_virt.conf \ example_configs/lipe/lipe_install.conf \ example_configs/lipe/lipe_launch.json \ example_configs/loris/loris.conf \ - example_configs/ltest/lipe_test_scheduler.conf \ example_configs/hotpool/* \ init.d/* \ lipe.conf \ @@ -82,25 +73,16 @@ EXTRA_DIST= \ pylipe/*.py \ pyloris/*.py \ pylustre/*.py \ - pyltest/*.py \ scripts/*.sh \ systemd/* \ man/* \ .pylintrc -PYLTEST_FILES = $(wildcard pyltest/*.py) PYTHON_LIB_FILES = $(wildcard pyclownfish/*.py pylustre/*.py pyloris/*.py) -PYTHON_LIB_FILES += $(PYLTEST_FILES) PYTHON_FILES = $(PYTHON_LIB_FILES) $(PYTHON_COMMANDS) PYTHON_CHECKS = $(PYTHON_FILES:%=%.python_checked) -PYLTEST_CHECKS = $(PYLTEST_FILES:%=%.pyltest_import_checked) -PYTHON_CHECKS += $(PYLTEST_CHECKS) CHECKS = $(PYTHON_CHECKS) -%.pyltest_import_checked: % - python2 ./pyltest_import_check $< - touch $@ - %.python_checked: % pylipe/.pylintrc @if test $< != $(PYTHON_PROTOBUF); then \ PYLINTRC=pylipe/.pylintrc $(PYLINT) --disable=I $< || exit 1; \ @@ -121,7 +103,6 @@ mrproper: maintainer-clean rm -f compile depcomp install-sh missing PYLUSTRE_RPM = build/RPMS/x86_64/lipe-pylustre-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm -PYLTEST_RPM = build/RPMS/x86_64/lipe-pyltest-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm CLOWNFISH_RPM = build/RPMS/x86_64/lipe-clownfish-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm LIPE_RPM = build/RPMS/x86_64/lipe-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm LIPE_DEBUGINFO_RPM = build/RPMS/x86_64/lipe-debuginfo-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm @@ -167,7 +148,6 @@ lipe-$(PACKAGE_VERSION).x86_64.iso: rpms cp $(LIPE_RPM) $(PACKAGE_PATH) cp $(LIPE_DEBUGINFO_RPM) $(PACKAGE_PATH) cp $(PYLUSTRE_RPM) $(PACKAGE_PATH) - cp $(PYLTEST_RPM) $(PACKAGE_PATH) cp $(LORIS_RPM) $(PACKAGE_PATH) cp $(LIPE_CLIENT_RPM) $(PACKAGE_PATH) cp $(LIPE_SERVER_RPM) $(PACKAGE_PATH) diff --git a/lipe/autogen.sh b/lipe/autogen.sh index bbf2366..3823866 100644 --- a/lipe/autogen.sh +++ b/lipe/autogen.sh @@ -55,6 +55,6 @@ set -x touch configure.ac && autoheader \ && aclocal \ -&& $libtoolize --ltdl --copy --force \ +&& $libtoolize --copy --force \ && automake --add-missing --copy \ && autoconf diff --git a/lipe/example_configs/clownfish/combined_mgs/lipe_virt.conf b/lipe/example_configs/clownfish/combined_mgs/lipe_virt.conf deleted file mode 120000 index 383ca28..0000000 --- a/lipe/example_configs/clownfish/combined_mgs/lipe_virt.conf +++ /dev/null @@ -1 +0,0 @@ -../seperate_mgs/lipe_virt.conf \ No newline at end of file diff --git a/lipe/example_configs/clownfish/seperate_mgs/lipe_virt.conf b/lipe/example_configs/clownfish/seperate_mgs/lipe_virt.conf deleted file mode 100644 index 6659865..0000000 --- a/lipe/example_configs/clownfish/seperate_mgs/lipe_virt.conf +++ /dev/null @@ -1,193 +0,0 @@ -# Configuration file of installing virtual machines -# -# Configuration Guide: -# -# -ssh_hosts: # Array of hosts - - host_id: server17 # ID of this SSH host - hostname: server17 # The host name - ssh_identity_file: /root/.ssh/id_dsa # The SSH key to connect to the host -templates: # The templates to installed - - hostname: server17_rhel6_template # Template hostname name - internet: true # Whether to enable Internet access - dns: 10.0.0.253 # The DNS IP - ram_size: 2048 # Ram size in MB - disk_sizes: # Disks to attach to this VM - - 10 # Disk size in GB - iso: /work/ISOs/CentOS-6.9-x86_64-bin-DVD1.iso # The path of ISO - reinstall: false # Whether to reinstall - network_configs: # Configurations of network interfaces - - gateway: 10.0.0.253 - ip: 10.0.0.189 - netmask: 255.255.252.0 - virt_install_option: bridge=br0 - server_host_id: server17 # On which host this template exists - image_dir: /images2 # The path to save virtual machine images - distro: rhel6 # The distro version - - hostname: server17_rhel7_template - internet: true - dns: 10.0.0.253 - ram_size: 2048 - disk_sizes: - - 10 - iso: /work/ISOs/CentOS-7-x86_64-DVD-1611.iso - reinstall: false - network_configs: - - gateway: 10.0.0.253 - ip: 10.0.0.190 - netmask: 255.255.252.0 - virt_install_option: bridge=br0 - server_host_id: server17 - image_dir: /images2 - distro: rhel7 -shared_disks: # Array of disks shared by multiple hosts - - disk_id: lipe_mgs # ID of the disk - size: 1 # Size of the disk (GB) - server_host_id: server17 # On which host this disk exists - image_file: /images2/lipe_mgs.img # Path of the image file - - disk_id: lipe0_mdt0 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_mdt0.img - - disk_id: lipe0_mdt1 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_mdt1.img - - disk_id: lipe0_ost0 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_ost0.img - - disk_id: lipe0_ost1 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_ost1.img - - disk_id: lipe0_ost2 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_ost2.img - - disk_id: lipe0_ost3 - size: 5 - server_host_id: server17 - image_file: /images2/lipe0_ost3.img - - disk_id: lipe1_mdt0 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_mdt0.img - - disk_id: lipe1_mdt1 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_mdt1.img - - disk_id: lipe1_ost0 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_ost0.img - - disk_id: lipe1_ost1 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_ost1.img - - disk_id: lipe1_ost2 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_ost2.img - - disk_id: lipe1_ost3 - size: 5 - server_host_id: server17 - image_file: /images2/lipe1_ost3.img -vm_hosts: # Array of hosts - - hostname: server17-el7-vm1 # The host name - ips: # The host IPs - - 10.0.1.148 - reinstall: false # Whether to reinstall this vm - template_hostname: server17_rhel7_template # The hostname of template - shared_disk_ids: - - lipe_mgs - - lipe0_mdt0 - - lipe0_mdt1 - - hostname: server17-el7-vm2 - ips: - - 10.0.1.149 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe_mgs - - lipe0_mdt0 - - lipe0_mdt1 - - hostname: server17-el7-vm3 - ips: - - 10.0.1.251 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe0_ost0 - - lipe0_ost1 - - lipe0_ost2 - - lipe0_ost3 - - hostname: server17-el7-vm4 - ips: - - 10.0.1.252 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe0_ost0 - - lipe0_ost1 - - lipe0_ost2 - - lipe0_ost3 - - hostname: server17-el7-vm5 - ips: - - 10.0.1.253 - reinstall: false - template_hostname: server17_rhel7_template # The hostname of template - shared_disk_ids: - - lipe1_mdt0 - - lipe1_mdt1 - - hostname: server17-el7-vm6 - ips: - - 10.0.1.254 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe1_mdt0 - - lipe1_mdt1 - - hostname: server17-el7-vm7 - ips: - - 10.0.1.255 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe1_ost0 - - lipe1_ost1 - - lipe1_ost2 - - lipe1_ost3 - - hostname: server17-el7-vm8 - ips: - - 10.0.2.197 - reinstall: false - template_hostname: server17_rhel7_template - shared_disk_ids: - - lipe1_ost0 - - lipe1_ost1 - - lipe1_ost2 - - lipe1_ost3 - - hostname: server17-el7-vm9 - ips: - - 10.0.2.198 - reinstall: false - template_hostname: server17_rhel7_template - - hostname: server17-el7-vm10 - ips: - - 10.0.2.199 - reinstall: false - template_hostname: server17_rhel7_template - - hostname: server17-el7-vm11 - ips: - - 10.0.2.200 - reinstall: false - template_hostname: server17_rhel7_template - - hostname: server17-el7-vm12 - ips: - - 10.0.2.201 - reinstall: false - template_hostname: server17_rhel7_template -hosts: # Array of hosts to add into /etc/hosts of VMs - - hostname: server17 # Hostname of the host - ip: 10.0.0.37 # IP of the host diff --git a/lipe/example_configs/lipe/lipe_virt.conf b/lipe/example_configs/lipe/lipe_virt.conf deleted file mode 120000 index f276e3d..0000000 --- a/lipe/example_configs/lipe/lipe_virt.conf +++ /dev/null @@ -1 +0,0 @@ -../clownfish/seperate_mgs/lipe_virt.conf \ No newline at end of file diff --git a/lipe/example_configs/ltest/lipe_test_scheduler.conf b/lipe/example_configs/ltest/lipe_test_scheduler.conf deleted file mode 100644 index 46d06ac..0000000 --- a/lipe/example_configs/ltest/lipe_test_scheduler.conf +++ /dev/null @@ -1,67 +0,0 @@ -# Configuration file of LiPE test Scheduler Service -# -# Configuration Guide: -# port: -# network port used to connect to the scheduler service, defalut to 1234 -# -# log_dir: -# log diraectory to save latest scheduler log -# -# $test_hosts: -# Hosts used to build LiPE or to run LiPE tests. -# -# $templates: -# template used to recover the broken VMs -# -port: 1234 -test_hosts: # Array of hosts - - hostname: server17-el7-vm[1-9] # The host name - purpose: test # The purpose of these hosts, either test or build - distro: rhel7 # Distro, support rhel6,rhel7 now, debian serise is WIP - kvm: # KVM related config - kvm_server_hostname: server17 # KVM host server, that these nodes run on - kvm_template_ipv4_address: 10.0.0.190 # IP adress of the template, that used to recover the kvm VM - template_hostname: rhel7_template # The template hostname configured in $templates - concurrency: 1 - - hostname: server17 - purpose: build - distro: rhel7 - concurrency: 3 -templates: # The templates to installed - - hostname: rhel6_template # Template hostname name - internet: true # Whether to enable Internet access - dns: 10.0.0.253 # The DNS IP - ram_size: 2048 # Ram size in MB - bus_type: virtio # virt bus type, virtio, scsi, ide - disk_sizes: # Disks attached to this VM - - 10 - - 2 - iso: /work/ISOs/CentOS-6.9-x86_64-bin-DVD1.iso # The path of ISO - reinstall: false # Whether to reinstall - network_configs: # Configurations of network interfaces - - gateway: 10.0.0.253 - ip: 10.0.0.189 - netmask: 255.255.252.0 - virt_install_option: bridge=br0 - image_dir: /images/ # Where are the virt image stored. - distro: rhel6 # The distro version - - hostname: rhel7_template - internet: true - dns: 10.0.0.253 - ram_size: 2048 - bus_type: virtio - disk_sizes: - - 10 - - 2 - iso: /work/ISOs/CentOS-7-x86_64-Minimal-1804.iso - reinstall: false - network_configs: - - gateway: 10.0.0.253 - ip: 10.0.0.189 - netmask: 255.255.252.0 - virt_install_option: bridge=br0 - image_dir: /images/ - distro: rhel7 -ip_addresses: - - ip_address: 10.0.0.40 - bindnetaddr: 10.0.0.0 diff --git a/lipe/gen_lipe_test b/lipe/gen_lipe_test deleted file mode 100755 index f916980..0000000 --- a/lipe/gen_lipe_test +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Generate lipe_test.conf -""" -from pybuild import gen_lipe_test - -if __name__ == "__main__": - gen_lipe_test.main() diff --git a/lipe/init.d/lipe_test_scheduler b/lipe/init.d/lipe_test_scheduler deleted file mode 100755 index e5b13fa..0000000 --- a/lipe/init.d/lipe_test_scheduler +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/sh -# -# Starts/stop the lipe test scheduler service -# -# chkconfig: 345 95 5 -# description: Lipe Test Scheduler -# configs: /etc/lipe_test_scheduler.conf - -### BEGIN INIT INFO -# Required-Start: $local_fs -# Required-Stop: $local_fs -# Default-Start: 345 -# Default-Stop: 95 -# Short-Description: Lipe Test Scheduler -# Description:Lipe Test Scheduler, common scheduler framework, manages the usage of test hosts and services the test launchers. -### END INIT INFO - -. /etc/rc.d/init.d/functions - -exec=/usr/bin/lipe_test_scheduler -prog="lipe_test_scheduler" - -start() { - [ -x $exec ] || exit 5 - echo -n $"Starting $prog ..." - $exec > /dev/null 2> /dev/null & - echo -} - -stop() { - echo -n $"Stopping $prog ..." - if [ -n "`pidfileofproc $exec`" ] ; then - killproc $exec - else - killall $prog > /dev/null 2> /dev/null & - fi - echo -} - -restart() { - stop - start -} - -reload() { - restart -} - -force_reload() { - restart -} - -rh_status() { - # run checks to determine if the service is running or use generic status - status $prog -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - reload) - rh_status_q || exit 7 - $1 - ;; - force-reload) - force_reload - ;; - status) - rh_status - ;; - condrestart|try-restart) - rh_status_q || exit 0 - restart - ;; - *) - echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}" - exit 2 -esac -exit $? diff --git a/lipe/lipe.spec.in b/lipe/lipe.spec.in index 9a2734f..63d6728 100644 --- a/lipe/lipe.spec.in +++ b/lipe/lipe.spec.in @@ -106,44 +106,6 @@ Group: Applications/System %description clownfish Clownfish manages Lustre clusters for HA purposes. -%package pyltest -Summary: Python Library of LiPE common test framework -Requires: lipe-pylustre = %{version}-%{release} -Provides: lipe-pyltest = %{version}-%{release} -%if %{with systemd} -Requires(post): systemd -Requires(preun): systemd -Requires(postun): systemd -BuildRequires: systemd -%else -Requires(post): chkconfig -Requires(preun): chkconfig -%endif -Group: Applications/System - -%description pyltest -Pyltest is a common test framework for LiPE - -%post pyltest -%if %{with systemd} -%systemd_post lipe_test_scheduler.service -%endif - -%preun pyltest -%if %{with systemd} -%systemd_preun lipe_test_scheduler.service -%else -/sbin/service lipe_test_scheduler stop >/dev/null 2>&1 ||: -/sbin/chkconfig --del lipe_test_scheduler -%endif - -%postun pyltest -%if %{with systemd} -%systemd_postun_with_restart lipe_test_scheduler.service -%else -/sbin/service lipe_test_scheduler condrestart >/dev/null 2>&1 ||: -%endif - %package server Summary: Lipe Server Package Requires: lustre @@ -193,10 +155,9 @@ python2 -m py_compile pylustre/*.py python2 -m py_compile pyclownfish/*.py python2 -m py_compile pylipe/*.py python2 -m py_compile pyloris/*.py -python2 -m py_compile pyltest/*.py %endif -find pyclownfish pylustre pylipe pyloris pyltest -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f +find pyclownfish pylustre pylipe pyloris -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f %install rm -rf $RPM_BUILD_ROOT @@ -210,13 +171,11 @@ mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d cp \ lpcc \ - lipe_virt \ src/lpcc_purge \ $RPM_BUILD_ROOT%{_bindir} cp -a pylustre $RPM_BUILD_ROOT%{python_sitelib} cp -a \ - example_configs/clownfish/seperate_mgs/lipe_virt.conf \ lpcc.conf \ $RPM_BUILD_ROOT%{_sysconfdir} @@ -232,10 +191,6 @@ cp \ lipe_run_action \ lipe_install \ lipe_launch \ - lipe_test \ - lipe_test_console \ - lipe_test_launch \ - lipe_test_scheduler \ loris_backup \ loris_crontab \ loris_test \ @@ -263,14 +218,12 @@ install -m 0755 scripts/*.sh $RPM_BUILD_ROOT%{ddntoolsdir}/ cp -a pyclownfish $RPM_BUILD_ROOT%{python_sitelib} cp -a pylipe $RPM_BUILD_ROOT%{python_sitelib} cp -a pyloris $RPM_BUILD_ROOT%{python_sitelib} -cp -a pyltest $RPM_BUILD_ROOT%{python_sitelib} mkdir -p $RPM_BUILD_ROOT%{_sysconfdir} cp -a \ example_configs/clownfish/seperate_mgs/clownfish.conf \ example_configs/lipe/lipe_install.conf \ example_configs/lipe/lipe_launch.json \ example_configs/loris/loris.conf \ - example_configs/ltest/lipe_test_scheduler.conf \ lipe.conf \ $RPM_BUILD_ROOT%{_sysconfdir} @@ -288,8 +241,6 @@ cp -a example_configs/hotpool/* $RPM_BUILD_ROOT%{_sysconfdir}/ mkdir -p $RPM_BUILD_ROOT%{_unitdir}/ install -m 0644 -D systemd/lpcc.service $RPM_BUILD_ROOT%{_unitdir}/lpcc.service %if %{with server} - install -m 0644 -D systemd/lipe_test_scheduler.service \ - $RPM_BUILD_ROOT%{_unitdir}/lipe_test_scheduler.service %if %{with hotpool} install -m 0644 -D systemd/lpurge@.service \ $RPM_BUILD_ROOT%{_unitdir}/lpurge@.service @@ -301,10 +252,6 @@ cp -a example_configs/hotpool/* $RPM_BUILD_ROOT%{_sysconfdir}/ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d install -m 0744 -D init.d/lpcc \ $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/lpcc -%if %{with server} - install -m 0744 -D init.d/lipe_test_scheduler \ - $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/lipe_test_scheduler -%endif # end server %endif install -m 0644 man/lpcc.8 $RPM_BUILD_ROOT%{_mandir}/man8/ install -m 0644 man/lpcc-start.8 $RPM_BUILD_ROOT%{_mandir}/man8/ @@ -328,8 +275,6 @@ rm -rf $RPM_BUILD_ROOT %files pylustre %{python2_sitelib}/pylustre -%{_bindir}/lipe_virt -%config(noreplace) %{_sysconfdir}/lipe_virt.conf %files lpcc %defattr(-,root,root) @@ -360,18 +305,6 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/lcreatemany %config(noreplace) %{_sysconfdir}/clownfish.conf -%files pyltest -%{python2_sitelib}/pyltest -%{_bindir}/lipe_test_console -%{_bindir}/lipe_test_launch -%{_bindir}/lipe_test_scheduler -%config(noreplace) %{_sysconfdir}/lipe_test_scheduler.conf -%if %{with systemd} - %{_unitdir}/lipe_test_scheduler.service -%else - %{_sysconfdir}/rc.d/init.d/lipe_test_scheduler -%endif - %files server %defattr(-,root,root) %{_bindir}/ext4_inode2path @@ -405,7 +338,6 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/lipe_convert_expr %{_bindir}/lipe_install %{_bindir}/lipe_launch -%{_bindir}/lipe_test %{_bindir}/lfill %{_bindir}/lipe_scan %{_bindir}/lipe_scan2 diff --git a/lipe/lipe_test b/lipe/lipe_test deleted file mode 100755 index 671f4cd..0000000 --- a/lipe/lipe_test +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Test LIPE -""" -from pylipe import lipe_test - -if __name__ == "__main__": - lipe_test.main() diff --git a/lipe/lipe_test_console b/lipe/lipe_test_console deleted file mode 100755 index 560f917..0000000 --- a/lipe/lipe_test_console +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Start the scheduler -""" -from pyltest import lipe_test_console - -if __name__ == "__main__": - lipe_test_console.main() diff --git a/lipe/lipe_test_launch b/lipe/lipe_test_launch deleted file mode 100755 index 3cffd5c..0000000 --- a/lipe/lipe_test_launch +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Start the Lipe test process -""" -from pyltest import lipe_test_launch - -if __name__ == "__main__": - lipe_test_launch.main() diff --git a/lipe/lipe_test_scheduler b/lipe/lipe_test_scheduler deleted file mode 100755 index d83054c..0000000 --- a/lipe/lipe_test_scheduler +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Start the scheduler -""" -from pyltest import lipe_test_scheduler - -if __name__ == "__main__": - lipe_test_scheduler.main() diff --git a/lipe/lipe_virt b/lipe/lipe_virt deleted file mode 100755 index 2dce6da..0000000 --- a/lipe/lipe_virt +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Install virtual machines -""" -from pylustre import lipe_virt - -if __name__ == "__main__": - lipe_virt.main() diff --git a/lipe/pybuild/__init__.py b/lipe/pybuild/__init__.py index bcb25c3..57e1d61 100644 --- a/lipe/pybuild/__init__.py +++ b/lipe/pybuild/__init__.py @@ -1,7 +1,5 @@ """ Python library for build """ -__all__ = ["gen_lipe_test", - "lipe_build", - "pyltest_import_check", +__all__ = ["lipe_build", "lipe_expression_tests"] diff --git a/lipe/pybuild/gen_lipe_test.py b/lipe/pybuild/gen_lipe_test.py deleted file mode 100644 index ad4d9f0..0000000 --- a/lipe/pybuild/gen_lipe_test.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2017 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com - -""" -Generate lipe_test.conf -""" -import os -import sys -import yaml - -from pylustre import utils -from pylustre import cstr -from pylustre import lipe_virt -from pylustre import clog -from pylustre import lyaml -from pylustre import constants -from pylipe import lipe_test - - -def usage(): - """ - Print usage string - """ - utils.oprint("Usage: %s config_file" % - sys.argv[0]) - - -def main(): - """ - Generate lipe_test.conf - """ - # pylint: disable=bare-except,not-callable - log = clog.get_log() - if sys.version[0] == '2': - reload(sys) - if hasattr(sys, "setdefaultencoding"): - set_encoding = getattr(sys, "setdefaultencoding", None) - set_encoding('UTF-8') - else: - os.environ["PYTHONIOENCODING"] = 'UTF-8' - - if len(sys.argv) != 2: - usage() - sys.exit(-1) - - config_fpath = sys.argv[1] - config = {} - config[cstr.CSTR_VIRT_CONFIG] = lipe_virt.LIPE_VIRT_CONFIG - config[cstr.CSTR_SKIP_VIRT] = False - config[cstr.CSTR_SKIP_INSTALL] = False - config[cstr.CSTR_LIPE_INSTALL_CONFIG] = constants.LIPE_INSTALL_CONFIG - install_server = {} - install_server[cstr.CSTR_HOSTNAME] = "installhost" - install_server[cstr.CSTR_SSH_IDENTITY_FILE] = "/root/.ssh/id_dsa" - config[cstr.CSTR_INSTALL_SERVER] = install_server - tests = [] - for test_funct in lipe_test.LIPE_TESTS: - tests.append(test_funct.__name__) - config[cstr.CSTR_ONLY_TESTS] = tests - config_string = ("""# -# Configuration file for testing LiPE from DDN -# -# Please comment the test names under "%s" if want to skip some tests -# -# Please set "%s" to true if LiPE and Clownfish is already installed and -# properly running. -# -# Please set "%s" to true if the virtual machines are already -# installed and properly running. -# -""" % (cstr.CSTR_ONLY_TESTS, cstr.CSTR_SKIP_INSTALL, cstr.CSTR_SKIP_VIRT)) - config_string += yaml.dump(config, Dumper=lyaml.YamlDumper, - default_flow_style=False) - try: - with open(config_fpath, 'w') as yaml_file: - yaml_file.write(config_string) - except: - log.cl_error("""Failed to save the config file. To avoid data lose, please save the -following config manually:""") - sys.stdout.write(config_string) - sys.exit(-1) - log.cl_info("Config file saved to file [%s]", config_fpath) - sys.exit(0) diff --git a/lipe/pybuild/pyltest_import_check.py b/lipe/pybuild/pyltest_import_check.py deleted file mode 100644 index aee9617..0000000 --- a/lipe/pybuild/pyltest_import_check.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2019 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Library for checking the import of pyltest source code -""" -import sys - -# Local libs -from pylustre import clog - - -def check_import(log, fpath): - """ - Check the import of file path - """ - with open(fpath, "r") as fd: - lines = fd.readlines() - - for line in lines: - if line.startswith("from pyclownfish import"): - log.cl_error("file [%s] imports library from pyclownfish, which " - "is not allowed", fpath) - return -1 - if line.startswith("from pylipe import"): - log.cl_error("file [%s] imports library from pylipe, which " - "is not allowed", fpath) - return -1 - return 0 - - -def main(): - """ - Check the source code files - """ - log = clog.get_log() - for arg in sys.argv[1:]: - log.cl_info("checking file [%s]", arg) - ret = check_import(log, arg) - if ret: - log.cl_error("file [%s] imported wrong library", arg) - sys.exit(-1) - sys.exit(0) diff --git a/lipe/pylipe/__init__.py b/lipe/pylipe/__init__.py index 6e858a1..674222f 100644 --- a/lipe/pylipe/__init__.py +++ b/lipe/pylipe/__init__.py @@ -10,6 +10,4 @@ __all__ = ["ldsync", "lipe_flist_handle", "lipe_install", "lipe_install_nodeps", - "lipe_launch", - "lipe_test", - "lipe_hotpool_test"] + "lipe_launch"] diff --git a/lipe/pylipe/lipe_hotpool_test.py b/lipe/pylipe/lipe_hotpool_test.py deleted file mode 100644 index 8428fe6..0000000 --- a/lipe/pylipe/lipe_hotpool_test.py +++ /dev/null @@ -1,1833 +0,0 @@ -# Copyright (c) 2020 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: Gu Zheng -""" -Regression test for lipe/hotpool (lamigo&lpurge) -""" -# pylint: disable=too-many-lines -import math -import os -import time -import traceback -import threading -import yaml -from pylustre import lustre -from pylustre import utils - - -# constant params -HOTPOOL_LAMIGO = "lamigo" -HOTPOOL_LPURGE = "lpurge" -HOTPOOL_CLIENT = "/mnt/hotpool_test" -HOTPOOL_POOL_FAST = "fast" -HOTPOOL_POOL_SLOW = "slow" -HOTPOOL_LAMIGO_DUMP_PREFIX = "lamigo.dump" -HOTPOOL_LAMIGO_DEBUG_PREFIX = "lamigo.debug" -HOTPOOL_LAMIGO_CONFIG_PREFIX = "lamigo.conf" -HOTPOOL_LPURGE_DUMP_PREFIX = "lpurge.dump" -HOTPOOL_LPURGE_DEBUG_PREFIX = "lpurge.debug" -HOTPOOL_LPURGE_CONFIG_PREFIX = "lpurge.conf" - -LPURGE_ETC_CONFIG_DIR = "/etc/lpurge/" - - -# lamigo option key words -LAMIGO_CONFIG_MDT = "mdt" -LAMIGO_CONFIG_DAEMONIZE = "daemonize" -LAMIGO_CONFIG_MOUNTPOINT = "mountpoint" -LAMIGO_CONFIG_CACHE_SIZE = "max-cache" -LAMIGO_CONFIG_CHANGELOG_USER = "user" -LAMIGO_CONFIG_MIN_AGE = "min-age" -LAMIGO_CONFIG_AGENT = "agent" -LAMIGO_CONFIG_SRC_POOL = "src" -LAMIGO_CONFIG_TGT_POOL = "tgt" -LAMIGO_CONFIG_DUMP_FILE = "dump" -LAMIGO_CONFIG_DEBUG_FILE = "debug" -LAMIGO_CONFIG_IML_RE_SOCKET = "iml-re-socket" -LAMIGO_CONFIG_IML_EX_SOCKET = "iml-ex-socket" -LAMIGO_CONFIG_NUM_THREADS = "num-threads" -LAMIGO_CONFIG_RESCAN = "rescan" -LAMIGO_CONFIG_POOL_REFRESH = "pool-refresh" -LAMIGO_CONFIG_PROGRESS_INTERVAL = "progress-interval" - -LAMIGO_DUMP_CONFIG_SECTION = "config" - -LAMIGO_DUMP_KEYWORDS_MAP = { - LAMIGO_CONFIG_CHANGELOG_USER: "chlg_user", - LAMIGO_CONFIG_MDT: "mdtname", - LAMIGO_CONFIG_MOUNTPOINT: "mountpoint", - LAMIGO_CONFIG_SRC_POOL: "source_pool", - LAMIGO_CONFIG_TGT_POOL: "target_pool", - LAMIGO_CONFIG_MIN_AGE: "min_age", - LAMIGO_CONFIG_CACHE_SIZE: "max_cache", - LAMIGO_CONFIG_RESCAN: "rescan", - LAMIGO_CONFIG_NUM_THREADS: "thread_count", - LAMIGO_CONFIG_POOL_REFRESH: "pool_refresh", - LAMIGO_CONFIG_PROGRESS_INTERVAL: "progress_interval", - LAMIGO_CONFIG_IML_RE_SOCKET: "iml_re_socket", - LAMIGO_CONFIG_IML_EX_SOCKET: "iml_ex_socket"} - -# lpurge option key words -LPURGE_CONFIG_DEVICE = "device" # lustre-OST0000 -LPURGE_CONFIG_FREELO = "freelo" # 50 -LPURGE_CONFIG_FREEHI = "freehi" # 80 -LPURGE_CONFIG_DEBUG = "debug" -LPURGE_CONFIG_DUMP = "dump" -LPURGE_CONFIG_MAX_JOBS = "max_jobs" # 8 -LPURGE_CONFIG_SCAN_THREADS = "scan_threads" # 1 -LPURGE_CONFIG_POOL = "pool" # fast_pool -LPURGE_CONFIG_MDS = "mds" # 0:host:/mnt/lustre -LPURGE_CONFIG_MOUNT = "mount" # /mnt/lustre -LPURGE_CONFIG_SCAN_RATE = "scan_rate" # 10000 -LPURGE_CONFIG_DRYRUN = "dryrun" # true -LPURGE_CONFIG_IML_SOCKET = "iml_socket" # /tmp/mylpurge.socket -LPURGE_CONFIG_SLOT_SIZE = "slot_size" # 1048576 -LPURGE_CONFIG_CHECK_INTERVAL = "interval" - -LPURGE_DUMP_CONFIG_SECTION = "config" - -LPURGE_DUMP_KEYWORDS_MAP = { - LPURGE_CONFIG_FREEHI: "free_high", - LPURGE_CONFIG_FREELO: "free_low", - LPURGE_CONFIG_DEVICE: "ostname", - LPURGE_CONFIG_MOUNT: "mountpoint", - LPURGE_CONFIG_POOL: "pool", - LPURGE_CONFIG_MDS: "mds", - LPURGE_CONFIG_MAX_JOBS: "max_jobs", - LPURGE_CONFIG_CHECK_INTERVAL: "check_interval", - LPURGE_CONFIG_SCAN_RATE: "scan_rate", - LPURGE_CONFIG_SCAN_THREADS: "scan_threads", - LPURGE_CONFIG_SLOT_SIZE: "slot_size", - LPURGE_CONFIG_IML_SOCKET: "iml_socket"} - -HOTPOOL_TEST_CASES = [] - - -class HotpoolTestContext(object): - """ - test context for hotpool - """ - # pylint: disable=too-many-instance-attributes - def __init__(self, log, workspace, clowfish_instance): - self.htc_log = log - self.htc_workspace = workspace - self.htc_clowfish_instance = clowfish_instance - self.htc_lustre_fs = None - self.htc_fsname = "" - self.htc_mgs_host = None - self.htc_mdt = None - self.htc_mdt_instance = None - self.htc_mdt_index = "" - self.htc_mds_host = None - self.htc_fast_osts = list() - self.htc_slow_osts = list() - self.htc_fast_oss_host = None - self.htc_changelog_user = "" - self.htc_ost_list = list() - self.htc_ost_pools = dict() - self.htc_additional_clients = list() - self.htc_client_mountpoint = "/mnt/hotpool_test" - - def prepare_lustre_fs(self): - """ - prepare lustre for following test cases - """ - # pylint: disable=too-many-branches,too-many-statements - # reformat & mount lustre instance to get a pure testing environment - rc = self.htc_clowfish_instance.ci_umount_all(self.htc_log) - if rc < 0: - self.htc_log.cl_error("failed to umount clownfish instance") - return rc - - rc = self.htc_clowfish_instance.ci_format_all(self.htc_log) - if rc < 0: - self.htc_log.cl_error("failed to reformat clownfish instance") - return rc - - rc = self.htc_clowfish_instance.ci_mount_all(self.htc_log) - if rc < 0: - self.htc_log.cl_error("failed to mount clownfish instance") - return rc - - for lustrefs in self.htc_clowfish_instance.ci_lustres.values(): - # hint: use lustre instance which has 6 osts (4ldiskfs + 2zfs) - if len(lustrefs.lf_osts) > 4: - self.htc_lustre_fs = lustrefs - break - if self.htc_lustre_fs is None: - self.htc_log.cl_error("can't find valid lustre fs for hotpool testing, exit") - return -1 - - self.htc_fsname = self.htc_lustre_fs.lf_fsname - - # get mgs - if self.htc_lustre_fs.lf_mgs is not None: - mgs = self.htc_lustre_fs.lf_mgs - elif self.htc_lustre_fs.lf_mgs_mdt is not None: - mgs = self.htc_lustre_fs.lf_mgs_mdt - else: - self.htc_log.cl_error("no mgs found from lustre %s", - self.htc_fsname) - return -1 - - mgsi = mgs.ls_mounted_instance(self.htc_log) - if mgsi is None: - self.htc_log.cl_error("failed to get active mgs service from lustre %s", - self.htc_fsname) - return -1 - self.htc_mgs_host = mgsi.lsi_host - - # get mdt0 - if len(self.htc_lustre_fs.lf_mdts) < 1: - self.htc_log.cl_error("no mdt found from lustre %s", - self.htc_fsname) - return -1 - - mdt0 = None - for mdt in self.htc_lustre_fs.lf_mdts.values(): - if mdt.ls_index == 0: - mdt0 = mdt - if mdt0 is None: - self.htc_log.cl_error("no valid MDT instance found from lustre %s", - self.htc_fsname) - return -1 - self.htc_mdt = mdt0 - - self.htc_mdt_index = "-".join([self.htc_fsname, self.htc_mdt.ls_index_string]) - mdti = mdt0.ls_mounted_instance(self.htc_log) - if mdti is None: - self.htc_log.cl_error("failed to get mdt service for mdt %s", - self.htc_mdt_index) - return -1 - self.htc_mdt_instance = mdti - self.htc_mds_host = mdti.lsi_host - - # get fast osts - for ost in self.htc_lustre_fs.lf_osts.values(): - if ost.ls_backfstype == lustre.BACKFSTYPE_LDISKFS: - self.htc_ost_list.append(ost) - if len(self.htc_ost_list) < 4: - self.htc_log.cl_error("invalid ost list, at least 4, but got %d", - len(self.htc_ost_list)) - return -1 - - oss_host_osts_mapping = dict() - for ost in self.htc_ost_list: - osti = ost.ls_mounted_instance(self.htc_log) - if osti is None: - self.htc_log.cl_error("failed to get ost service for ost %s", - ost.ls_index_string) - return -1 - if oss_host_osts_mapping.get(osti.lsi_host.sh_hostname) is None: - oss_host_osts_mapping[osti.lsi_host.sh_hostname] = list() - oss_host_osts_mapping[osti.lsi_host.sh_hostname].append(ost) - if len(oss_host_osts_mapping[osti.lsi_host.sh_hostname]) >= 2: - self.htc_fast_oss_host = osti.lsi_host - - if self.htc_fast_oss_host is None: - self.htc_log.cl_error("failed to get host with two active ost service") - return -1 - - self.htc_fast_osts = oss_host_osts_mapping[self.htc_fast_oss_host.sh_hostname][0:2] - self.htc_slow_osts = list(set(self.htc_ost_list).difference(set(self.htc_fast_osts)))[0:2] - - if (len(self.htc_slow_osts) < 2) or (len(self.htc_fast_osts) < 2): - self.htc_log.cl_error("failed to get enough(two or more) active osts for hotpool testing") - return -1 - return 0 - - def prepare_additional_clients(self): - """ - Setup additional client mountpoint on mds add fast oss - """ - for host in [self.htc_mds_host, self.htc_fast_oss_host]: - new_client = lustre.LustreClient(self.htc_log, - self.htc_lustre_fs, - host, - self.htc_client_mountpoint, - add_to_host=True) - self.htc_additional_clients.append(new_client) - rc = new_client.lc_mount(self.htc_log) - if rc < 0: - self.htc_log.cl_error("failed to mount client [%s] to host [%s]", - self.htc_client_mountpoint, - host.sh_hostname) - return rc - return 0 - - def prepare_changelog_user(self): - """ - Prepare changelog user for following tests - """ - self.htc_changelog_user = self.htc_mdt_instance.mdti_changelog_register(self.htc_log) - if self.htc_changelog_user is None: - self.htc_log.cl_error("failed to register changelog user to mdt %s", - self.htc_mdt_index) - return -1 - return 0 - - def prepare_ost_pools(self): - """ - Setup ost pool properly - """ - fast_pool = lustre.LustrePool(self.htc_lustre_fs, - HOTPOOL_POOL_FAST) - rc = fast_pool.lp_new(self.htc_log, self.htc_mgs_host) - if rc < 0: - self.htc_log.cl_error("failed to create pool [%s] from host [%s]", - fast_pool.lp_fullname, - self.htc_mgs_host.sh_hostname) - return rc - rc = fast_pool.lp_add(self.htc_log, self.htc_mgs_host, - self.htc_fast_osts) - if rc < 0: - self.htc_log.cl_error("failed to add osts [%s] into pool [%s] from host [%s]", - [ost.ls_index_string for ost in self.htc_fast_osts], - fast_pool.lp_fullname, self.htc_mgs_host.sh_hostname) - return rc - - self.htc_ost_pools[HOTPOOL_POOL_FAST] = fast_pool - - slow_pool = lustre.LustrePool(self.htc_lustre_fs, - HOTPOOL_POOL_SLOW) - rc = slow_pool.lp_new(self.htc_log, self.htc_mgs_host) - if rc < 0: - self.htc_log.cl_error("failed to create pool [%s] from host [%s]", - fast_pool.lp_fullname, - self.htc_mgs_host.sh_hostname) - return rc - rc = slow_pool.lp_add(self.htc_log, self.htc_mgs_host, - self.htc_slow_osts) - if rc < 0: - self.htc_log.cl_error("failed to add osts [%s] into pool [%s] from host [%s]", - [ost.ls_index_string for ost in self.htc_slow_osts], - fast_pool.lp_fullname, self.htc_mgs_host.sh_hostname) - return rc - self.htc_ost_pools[HOTPOOL_POOL_SLOW] = slow_pool - return 0 - - def cleanup(self): - """ - Cleanup the hotpool environment - """ - for client in self.htc_additional_clients: - client.lc_umount(self.htc_log) - idx = client.lc_client_name - if idx in self.htc_lustre_fs.lf_clients.keys(): - self.htc_lustre_fs.lf_clients.pop(idx) - for pool in self.htc_ost_pools.values(): - pool.lp_remove(self.htc_log, self.htc_mgs_host, pool.lp_osts) - pool.lp_destroy(self.htc_log, self.htc_mgs_host) - self.htc_clowfish_instance.ci_umount_all(self.htc_log) - - -def hotpool_test_genarate_tmpfile(workspace, prefix): - """ - generate a timestamped file with prefix - :return: - """ - time_stamp = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) - raw_path = os.path.join(workspace, str(prefix)) - return ".".join([raw_path, time_stamp]) - - -def hotpool_test_generate_config(log, config_file, options_dict): - """ - Generate a config from an options dict - """ - - try: - with open(config_file, "w") as fd: - for k, v in options_dict.items(): - if isinstance(v, bool): - fd.write("%s=%s\n" % (k, str(v).lower())) - else: - fd.write("%s=%s\n" % (k, v)) - fd.write("debug\n") - except: - log.cl_error("failed to generate config [%s] for options:%s ", - config_file, options_dict) - return -1 - return 0 - - -def hotpool_test_setup(log, workspace, clowfish_instance): - """ - Initialize & setup test context - """ - hotpool_test_ctx = HotpoolTestContext(log, workspace, - clowfish_instance) - - rc = hotpool_test_ctx.prepare_lustre_fs() - if rc < 0: - log.cl_error("failed to prepare testing environment") - return None - - rc = hotpool_test_ctx.prepare_additional_clients() - if rc < 0: - log.cl_error("failed to mount additional clients") - return None - - rc = hotpool_test_ctx.prepare_changelog_user() - if rc < 0: - log.cl_error("failed to register changelog user") - return None - - rc = hotpool_test_ctx.prepare_ost_pools() - if rc < 0: - log.cl_error("failed to setup ost pools") - return None - return hotpool_test_ctx - - -def hotpool_test_run(test_context): - """ - Run hotpool test cases - """ - rc = 0 - for case in HOTPOOL_TEST_CASES: - try: - test_context.htc_log.cl_info("start test case [%s]", - case.__name__) - rc = case(test_context) - # break test routine only for fail - if rc < 0: - test_context.htc_log.cl_error("test case [%s] failed", - case.__name__) - break - elif rc > 0: - test_context.htc_log.cl_info("test case [%s] skipped", - case.__name__) - else: - test_context.htc_log.cl_info("test %s success", - case.__name__) - except: - test_context.htc_log.cl_error("%s", - traceback.format_exc()) - rc = -1 - break - test_context.cleanup() - return rc - - -def hotpool_test(log, workspace, clownfish_instance): - """ - Run hotpool test on target cluster instance - """ - test_ctx = hotpool_test_setup(log, workspace, clownfish_instance) - if test_ctx is None: - log.cl_error("failed to start hotpool test") - return -1 - log.cl_info("starting hotpool test") - - rc = hotpool_test_run(test_ctx) - if rc: - log.cl_error("hotpool test failed") - else: - log.cl_info("hotpool test done") - return rc - - -def hotpool_test_start_lamigo_with_options(log, host, params): - """ - Start lamigo with commandline options - :return: 0: success, -1: fail - """ - assert isinstance(params, dict) - - rescan_enabled = params.get(LAMIGO_CONFIG_RESCAN) - scan_thread_count = params.get(LAMIGO_CONFIG_NUM_THREADS) - if rescan_enabled and scan_thread_count: - assert isinstance(rescan_enabled, int) - assert isinstance(scan_thread_count, int) - rescan_options = "-r -n %d" % scan_thread_count - else: - rescan_options = "" - - command = "lamigo -u {user} -m {mdt} -a {min-age} -b{debug} -c {max-cache} " \ - "-w {dump} -g {agent} -s {src} -t {tgt} %s -M {mountpoint}" % rescan_options - command = command.format(**params) - - log.cl_info("starting lamigo with command [%s] on host [%s]", - command, host.sh_hostname) - ret_val = host.sh_run(log, command, wait=False) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, - ret_val.cr_stderr) - return -1 - return 0 - - -def hotpool_test_start_lamigo_with_config(test_ctx, host, config_file): - """ - Start lamigo with config_file - :return: 0: success, -1: fail - """ - log = test_ctx.htc_log - workspace = test_ctx.htc_workspace - # send config to host - rc = host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, host.sh_hostname) - return -1 - - rc = host.sh_send_file(log, config_file, workspace) - if rc < 0: - log.cl_error("failed to send config file [%s] to target host [%s]", - config_file, host.sh_hostname) - return -1 - - # start lamigo with config - command = "lamigo -f %s" % config_file - log.cl_info("starting lamigo with command [%s] on host [%s]", - command, host.sh_hostname) - retval = host.sh_run(log, command, wait=False) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def hotpool_test_start_lpurge(test_ctx, host, config_file): - """ - Start lpurge - :return: 0: success, -1: fail - """ - log = test_ctx.htc_log - workspace = test_ctx.htc_workspace - rc = host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, host.sh_hostname) - return -1 - # send config to host - rc = host.sh_send_file(log, config_file, workspace) - if rc < 0: - test_ctx.cl_error("failed to send config file [%s] to target host [%s]", - config_file, host.sh_hostname) - return -1 - - # start lpurge with config - command = "lpurge -f %s" % config_file - log.cl_info("starting lpurge with command [%s] on host [%s]", - command, host.sh_hostname) - retval = host.sh_run(log, command, wait=False) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def hotpool_test_start_lpurge_service(test_ctx, host, - params_dict, ost_index): - """ - Start lpurge service unit - """ - log = test_ctx.htc_log - ect_conf_dir = os.path.join(LPURGE_ETC_CONFIG_DIR, test_ctx.htc_fsname) - rc = host.sh_mkdir(log, ect_conf_dir) - if rc < 0: - return -1 - - config_file = hotpool_test_genarate_tmpfile(test_ctx.htc_workspace, - HOTPOOL_LPURGE_CONFIG_PREFIX) - rc = hotpool_test_generate_config(log, config_file, params_dict) - if rc < 0: - return -1 - - rc = host.sh_send_file(log, config_file, - os.path.join(ect_conf_dir, - ost_index + ".conf")) - if rc < 0: - return -1 - - command = "systemctl start lpurge@%s-%s.service" % (test_ctx.htc_fsname, ost_index) - log.cl_info("starting lpurge with command [%s] on host [%s]", - command, host.sh_hostname) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def hotpool_test_trigger_dump(log, host, dump_file, pid): - """ - Trigger dump via sending signal SIGUSR1 - """ - if not pid: - log.cl_error("pid is NULL") - return -1 - - # pylint: disable=too-many-arguments - timeout = 10 - time_start = time.time() - while True: - # trigger SIGUSR1 to dump status - rc = host.sh_kill(log, pid, special_signal=10) - if rc != 0: - log.cl_error("failed to trigger dump stats on [%s]," - "ret = [%d]", - host.sh_hostname, rc) - return -1 - time.sleep(1) - command = """awk 'END { print NR }' %s""" % dump_file - retval = host.sh_run(log, command, timeout=10) - if retval.cr_exit_status == 0: - file_size = retval.cr_stdout.strip() - if file_size and int(file_size) > 0: - return 0 - - time_now = time.time() - elapsed = time_now - time_start - if elapsed >= timeout: - log.cl_error("dumping file [%s] not completed after 10 seconds\n", - dump_file) - break - return -1 - - -def hotpool_test_stop_process(log, host, process, pid_list): - """ - Stop lamigo/lpurge - :return: 0: success, 1: no lamigo process found, -1: fail - """ - if not pid_list: - log.cl_error("pid list is NULL") - return -1 - - for pid in pid_list: - rc = host.sh_kill(log, pid, 15) - if rc: - return -1 - return hotpool_test_wait_process_terminate(log, host, process) - - -def hotpool_test_wait_process_terminate(log, host, process): - """ - Wait for @process terminate - :return: 0: success, -1: failure - """ - def alive(retval, arg=0): - """ - Internal compare function - """ - if retval.cr_exit_status == 0: - instance_count = retval.cr_stdout.strip() - if instance_count and int(instance_count) > arg: - return True - return False - - still_exist = host.sh_wait_condition(log, "pkill --signal 0 -c -x %s" % process, - alive, 0, - timeout=10, sleep_interval=1) - if still_exist: - log.cl_error("process %s is still alive after 10 seconds\n", - process) - return -1 - return 0 - - -def hotpool_test_wait_process_launch(log, host, process, instance_count=1): - """ - Wait for @process launch - :return: 0: success, -1: failure - """ - def active(retval, arg=0): - """ - Internal compare function - """ - if retval.cr_exit_status == 0: - count = retval.cr_stdout.strip() - if count and int(count) == arg: - return 0 - return -1 - - not_active = host.sh_wait_condition(log, "pkill --signal 0 -c -x %s" % process, - active, instance_count, - timeout=5, sleep_interval=1) - if not_active: - log.cl_error("process %s is still not active after 10 seconds\n", - process) - return -1 - return 0 - - -def hotpool_test_lamigo_validate_options(log, dump_file, expected_dict): - """ - validate options in dump_file, compared with expected_dict - """ - assert isinstance(expected_dict, dict) - # parse dump file to get dumped config options - with open(dump_file) as fd: - dump_data = yaml.load(fd) - config_options = dump_data.get(LAMIGO_DUMP_CONFIG_SECTION) - if config_options is None: - # no *config* section found - log.cl_error("invalid dump file :%s, no [%s] section found\n", - dump_file, LAMIGO_DUMP_CONFIG_SECTION) - return -1 - - match = True - for k, v in expected_dict.items(): - dump_key = LAMIGO_DUMP_KEYWORDS_MAP.get(k) - if dump_key is None: - continue - get_v = config_options.get(dump_key) - if get_v is None: - # no such field found - log.cl_error("invalid dump file :%s, no [%s] field found\n", - dump_file, dump_key) - return -1 - - if k == LAMIGO_CONFIG_SRC_POOL: - pool_list = get_v.split(",") - if v not in pool_list: - match = False - log.cl_error("expected %s:%s\nfound %s:%s", - k, v, k, get_v) - break - elif get_v != v: - match = False - log.cl_info("expected %s:%s\nfound %s:%s", - k, v, k, get_v) - break - - if not match: - log.cl_info("expected options:%s\nfound options:%s", - expected_dict, config_options) - return -1 - return 0 - - -def hotpool_test_lpurge_validate_options(log, dump_file, expected_dict): - """ - validate options in dump_file, compared with expected_dict - """ - assert isinstance(expected_dict, dict) - # parse dump file to get config options - with open(dump_file) as fd: - dump_data = yaml.load(fd) - - config_options = dump_data.get(LPURGE_DUMP_CONFIG_SECTION) - if config_options is None: - # no *config* section found - log.cl_error("invalid dump file :%s, no [%s] section found\n", - dump_file, LPURGE_DUMP_CONFIG_SECTION) - return -1 - - match = True - for k, v in expected_dict.items(): - dump_key = LPURGE_DUMP_KEYWORDS_MAP.get(k) - if dump_key is None: - continue - get_v = config_options.get(dump_key) - if get_v is None: - # no such field found - log.cl_error("invalid dump file :%s, no [%s] field found\n", - dump_file, dump_key) - return -1 - - if k == LPURGE_CONFIG_MDS: - mds_list = get_v.split(",") - if v not in mds_list: - match = False - log.cl_error("expected %s:%s\nfound %s:%s", - k, v, k, get_v) - break - elif get_v != v: - match = False - log.cl_error("expected %s:%s\nfound %s:%s", - k, v, k, get_v) - break - - if not match: - log.cl_info("expected options:%s\nfound options:%s", - expected_dict, config_options) - return -1 - return 0 - - -def hotpool_test_generate_basic_lamigo_parameters(test_context): - """ - Generate basic parameters (dict format) to lamigo from @test_context - :return: parameters dict - """ - assert isinstance(test_context, HotpoolTestContext) - workspace = test_context.htc_workspace - params = {LAMIGO_CONFIG_CHANGELOG_USER: test_context.htc_changelog_user, - LAMIGO_CONFIG_MDT: test_context.htc_mdt_index, - LAMIGO_CONFIG_AGENT: "%s:%s:1" % (test_context.htc_mds_host.sh_hostname, - test_context.htc_client_mountpoint), - LAMIGO_CONFIG_MIN_AGE: 5, - LAMIGO_CONFIG_CACHE_SIZE: 2048576, - LAMIGO_CONFIG_DEBUG_FILE: hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LAMIGO_DEBUG_PREFIX), - LAMIGO_CONFIG_DUMP_FILE: hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LAMIGO_DUMP_PREFIX), - LAMIGO_CONFIG_SRC_POOL: test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name, - LAMIGO_CONFIG_TGT_POOL: test_context.htc_ost_pools[HOTPOOL_POOL_SLOW].lp_name, - LAMIGO_CONFIG_MOUNTPOINT: test_context.htc_client_mountpoint} - return params - - -def hotpool_test_generate_basic_lpurge_parameters(test_context): - """ - Generate basic parameters (dict format) to lpurge from @test_context - :param test_context: - :return: - """ - mds_host = test_context.htc_mds_host - fast_pool = test_context.htc_ost_pools[HOTPOOL_POOL_FAST] - ost_device = "-".join([test_context.htc_fsname, - test_context.htc_fast_osts[0].ls_index_string]) - - params = {LPURGE_CONFIG_DEVICE: ost_device, - LPURGE_CONFIG_FREELO: 30, - LPURGE_CONFIG_FREEHI: 50, - LPURGE_CONFIG_MAX_JOBS: 2, - LPURGE_CONFIG_SCAN_THREADS: 1, - LPURGE_CONFIG_POOL: fast_pool.lp_name, - LPURGE_CONFIG_MDS: "0:%s:%s" % (mds_host.sh_hostname, - test_context.htc_client_mountpoint), - LPURGE_CONFIG_MOUNT: test_context.htc_client_mountpoint, - LPURGE_CONFIG_SCAN_RATE: 10000, - LPURGE_CONFIG_SLOT_SIZE: 1048576} - return params - - -def hotpool_test_file_mirrored(log, host, file_path): - """ - Check whether file has more than one mirror - return 1 for True, 0 for False, -1 for error - stripe info is dumped to return values @stripe_info - """ - command = "lfs getstripe %s" % file_path - ret_val = host.sh_run(log, command) - if ret_val.cr_exit_status != 0: - log.cl_error("failed to run [%s] on host [%s], ret = [%d], " - "stdout = [%s], stderr = [%s]", command, - host.sh_hostname, ret_val.cr_exit_status, - ret_val.cr_stdout, ret_val.cr_stderr) - return -1, None - if not ret_val.cr_stdout: - log.cl_error("no stripe info of file [%s]", file_path) - return -1, None - - stripe_info = ret_val.cr_stdout.strip() - for line in stripe_info.splitlines(): - raw_line = line.strip() - if raw_line.startswith("lcm_mirror_count:"): - mirror_info = raw_line.split(':') - mirror_count = mirror_info[1].strip() - if int(mirror_count) >= 2: - return True, ret_val.cr_stdout - elif int(mirror_count) == 1: - return False, ret_val.cr_stdout - log.cl_error("no mirror info of file [%s]", file_path) - return -1, ret_val.cr_stdout - - -def hotpool_test_lamigo_wait_replicated(log, host, pid, stats_file, - replicated_count, timeout=10): - """ - Wait for the count of replicated item get @replicated_count - :return: 0: success, -1: failure - """ - # pylint: disable=too-many-arguments - time_start = time.time() - while True: - # trigger SIGUSR1 to dump lpurge status - rc = host.sh_kill(log, pid, special_signal=10) - if rc != 0: - log.cl_error("failed to trigger lamigo dump stats on [%s]," - "ret = [%d]", - host.sh_hostname, rc) - return -1 - time.sleep(1) - command = """grep replicated %s | awk -F ':' '{print $2}'""" % stats_file - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to distro name on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - host.sh_hostname, retval.cr_exit_status, - retval.cr_stdout, retval.cr_stderr) - return -1 - raw_out = retval.cr_stdout.strip() - count = int(raw_out) if raw_out else 0 - if count >= replicated_count: - return 0 - - time_now = time.time() - elapsed = time_now - time_start - if elapsed < timeout: - log.cl_info("replicated:%d, expected:%d, wait more time", - count, replicated_count) - continue - log.cl_error("timeout (%d s) on waiting for lamigo replicated [%d] " - "files on host [%s]", - timeout, replicated_count, host.sh_hostname) - return -1 - return -1 - - -def hotpool_test_lpurge_wait_purged(log, host, pid, stats_file, - purge_count, timeout=10): - """ - Wait for the count of purged items get @purge_count - :return: 0: success, -1: failure - """ - # pylint: disable=too-many-arguments - time_start = time.time() - while True: - # trigger SIGUSR1 to dump lpurge status - rc = host.sh_kill(log, pid, special_signal=10) - if rc != 0: - log.cl_error("failed to trigger lpurge dump stats on [%s]," - "ret = [%d]", host.sh_hostname, rc) - return -1 - time.sleep(1) - command = """grep purged %s | awk -F ':' '{print $2}'""" % stats_file - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to distro name on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - host.sh_hostname, retval.cr_exit_status, - retval.cr_stdout, retval.cr_stderr) - return -1 - raw_out = retval.cr_stdout.strip() - purged = int(raw_out) if raw_out else 0 - if purged >= purge_count: - return 0 - - time_now = time.time() - elapsed = time_now - time_start - if elapsed < timeout: - log.cl_info("pruged:%d, expected:%d, wait more time", - purged, purge_count) - continue - log.cl_error("timeout (%d s) on waiting for lpurge processed [%d] " - "files on host [%s]", - timeout, purge_count, host.sh_hostname) - - command = """journalctl -u 'lpurge*'""" - retval = host.sh_run(log, command) - log.cl_error("lpurge logs = [%s], [%s]", retval.cr_stdout, retval.cr_stderr) - return -1 - return -1 - - -def hotpool_test_lpurge_check_mirror_status(log, test_host, file_list): - """ - Check the mirror state of files in file_list after purged - """ - assert isinstance(file_list, list) - - for file_path in file_list: - mirrored, stripe_info = hotpool_test_file_mirrored(log, test_host, - file_path) - if mirrored < 0: - log.cl_error("failed to get stripe info of file [%s]", - file_path) - return -1 - elif mirrored: - log.cl_error("%s, still extended mirrors after purge\n%s", - file_path, stripe_info) - return -1 - return 0 - - -def hotpool_create_mirror_dir(log, host, dirname, pool, ost_index=None): - """ - Create dir, and set mirror info if specified - """ - # pylint: disable=too-many-arguments - rc = host.sh_mkdir(log, dirname) - if rc < 0: - return -1 - - if ost_index is not None: - ost_index_option = "-o %s" % ost_index - else: - ost_index_option = "" - - command = "lfs mirror create -N -p %s %s %s" % (pool, ost_index_option, dirname) - ret_val = host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, ret_val.cr_stderr) - return -1 - return 0 - - -def hotpool_test_lamigo_start(test_context): - """ - Test launch lamigo - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options, - (log, test_host, params)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - if rc < 0: - log.cl_error("failed to stop lamigo") - lamigo_t.join() - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_start) - - -def hotpool_test_lamigo_start_with_config(test_context): - """ - Test run lamigo with config - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - - config_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LAMIGO_CONFIG_PREFIX) - rc = hotpool_test_generate_config(log, config_file, params) - if rc < 0: - log.cl_error("failed to generate lamigo config with parameters: %s", - params) - return rc - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_config, - (test_context, test_host, config_file)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - # check pid - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - if rc < 0: - log.cl_error("failed to stop lamigo") - lamigo_t.join() - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_start_with_config) - - -def hotpool_test_lamigo_comand_line_options(test_context): - """ - Test lamigo options (dumped to dumpfile) - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - dump_file = params[LAMIGO_CONFIG_DUMP_FILE] - - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options, - (log, test_host, params)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - - rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0]) - if rc_1: - log.cl_error("can't find file [%s] on host [%s]", - dump_file, test_host.sh_hostname) - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - if rc < 0: - log.cl_error("failed to stop lamigo") - else: - rc = rc_1 - - lamigo_t.join() - if rc: - return rc - - # get dumpfile from host - rc = test_host.sh_get_file(log, dump_file, test_context.htc_workspace) - if rc < 0: - log.cl_error("failed to get file [%s] from host [%s]", - dump_file, test_host.sh_hostname) - return rc - - return hotpool_test_lamigo_validate_options(log, dump_file, params) - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_comand_line_options) - - -def hotpool_test_lamigo_config_options(test_context): - """ - Test lamigo config options - :return: 0: success, others: fail - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - dump_file = params[LAMIGO_CONFIG_DUMP_FILE] - - config_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LAMIGO_CONFIG_PREFIX) - rc = hotpool_test_generate_config(log, config_file, params) - if rc < 0: - log.cl_error("failed to generate lamigo config with parameters: %s", - params) - return rc - - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_config, - (test_context, test_host, config_file)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - - rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0]) - if rc_1: - log.cl_error("can't find file [%s] on host [%s]", - dump_file, test_host.sh_hostname) - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - if rc < 0: - log.cl_error("failed to stop lamigo") - else: - rc = rc_1 - - lamigo_t.join() - if rc: - return rc - # get dumpfile from host - rc = test_host.sh_get_file(log, dump_file, workspace) - if rc < 0: - log.cl_error("failed to get file [%s] from host [%s]", - dump_file, test_host.sh_hostname) - return rc - - return hotpool_test_lamigo_validate_options(log, dump_file, params) - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_config_options) - - -def hotpool_test_lamigo_mirror_target(test_context): - """ - Test whether lamigo creates mirror on target/slow pool - """ - # pylint: disable=too-many-locals,too-many-branches,too-many-statements - log = test_context.htc_log - workspace = test_context.htc_workspace - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - dump_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LAMIGO_DUMP_PREFIX) - - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - params[LAMIGO_CONFIG_DUMP_FILE] = dump_file - fast_pool_name = test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name - min_age = params[LAMIGO_CONFIG_MIN_AGE] - - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - test_dir = os.path.join(test_context.htc_client_mountpoint, - "test_lamigo_mirror_target") - ret = test_host.sh_rmdir_if_exist(log, test_dir) - if ret: - # already there, maybe with files under it, create a new one - test_dir = ".".join([test_dir, - time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))]) - - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options, - (log, test_host, params)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - # wait lamigo be active - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if not pid_list: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - elif len(pid_list) > 1: - log.cl_error("more than one lamigo instances found, PIDs:%s", - pid_list) - lamigo_t.join() - return -1 - - rc = hotpool_create_mirror_dir(log, test_host, test_dir, - fast_pool_name) - if rc: - log.cl_error("failed to create testdir [%s], exit", test_dir) - hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - lamigo_t.join() - return rc - - file_count = 10 - file_list = list() - flist = [os.path.join(test_dir, str(i)) for i in range(file_count)] - for item in flist: - rc = test_host.sh_fill_binary_file(log, item, 1048576) - if rc: - log.cl_error("failed to create file [%s], exit", item) - for f in file_list: - test_host.sh_remove_file(log, f) - test_host.sh_rmdir_if_exist(log, test_dir) - hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - lamigo_t.join() - return rc - file_list.append(item) - - test_host.sh_run(log, "sync", timeout=10) - - # sleep a while, and wait for replication done - time.sleep(min_age) - rc1 = hotpool_test_lamigo_wait_replicated(log, test_host, pid_list[0], - dump_file, file_count, - timeout=60) - if rc1 < 0: - log.cl_error("lamigo: replicate timeout") - - # stop lamigo - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - lamigo_t.join() - if rc < 0: - log.cl_error("failed to stop lamigo") - else: - rc = rc1 - - # check mirror - if rc == 0: - for item in file_list: - mirrored, stripe_info = hotpool_test_file_mirrored(log, - test_host, item) - if mirrored is True: - continue - elif mirrored is False: - log.cl_error("file [%s] not replicated\n%s", - item, stripe_info) - rc = -1 - break - else: - log.cl_error("failed to get stripeinfo on [%s]", item) - rc = -1 - break - - # cleanup test dir - for item in file_list: - test_host.sh_remove_file(log, item) - test_host.sh_rmdir_if_exist(log, test_dir) - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_mirror_target) - - -def hotpool_test_lamigo_no_mirror_on_continuous_write(test_context): - """ - Test lamigo shouldn't create mirror on continuous modify - """ - # pylint: disable=too-many-statements,too-many-locals - log = test_context.htc_log - test_host = test_context.htc_mds_host - test_host.sh_pkill(log, HOTPOOL_LAMIGO) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO) - if rc: - log.cl_error("there are other running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LAMIGO) - return -1 - params = hotpool_test_generate_basic_lamigo_parameters(test_context) - fast_pool_name = test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name - min_age = params[LAMIGO_CONFIG_MIN_AGE] - - try: - lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options, - (log, test_host, params)) - except threading.ThreadError: - log.cl_error("failed to start lamigo with parameters: %s", - params) - return -1 - - # wait lamigo be active - pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lamigo instance") - lamigo_t.join() - return -1 - - test_file = os.path.join(test_context.htc_client_mountpoint, - "test_lamigo_no_mirror_on_continuous_write") - if test_host.sh_path_exists(log, test_file) == 1: - test_file = ".".join([test_file, - time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))]) - - command = "lfs mirror create -N -p %s %s" % (fast_pool_name, test_file) - ret_val = test_host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, ret_val.cr_stderr) - hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - lamigo_t.join() - return -1 - - rc = test_host.sh_fill_binary_file(log, test_file, 1048576) - if rc < 0: - log.cl_error("failed to create file [%s], exit", test_file) - hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - lamigo_t.join() - return rc - - # modify - for _ in range(min_age * 2): - time.sleep(1) - - rc = test_host.sh_fill_binary_file(log, test_file, 4096, - random=True, - dd_options="conv=notrunc") - if rc: - log.cl_error("failed to mpdify file [%s], exit", test_file) - break - - mirrored, stripe_info = hotpool_test_file_mirrored(log, test_host, - test_file) - # should not be mirror extended - if mirrored is False: - continue - elif mirrored is True: - log.cl_error("file [%s] has been replicated\n%s", - test_file, stripe_info) - rc = -1 - break - else: - log.cl_error("failed to get stripeinfo on [%s]", - test_file) - rc = -1 - break - - rc1 = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list) - if rc1 < 0: - log.cl_error("failed to stop lamigo") - rc = rc1 - - lamigo_t.join() - test_host.sh_remove_file(log, test_file) - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_no_mirror_on_continuous_write) - - -def hotpool_test_lpurge_start(test_context): - """ - Test lpurge start - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - params = hotpool_test_generate_basic_lpurge_parameters(test_context) - test_host = test_context.htc_fast_oss_host - test_host.sh_pkill(log, HOTPOOL_LPURGE) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LPURGE) - return -1 - - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - - config_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LPURGE_CONFIG_PREFIX) - rc = hotpool_test_generate_config(log, config_file, params) - if rc < 0: - log.cl_error("failed to generate lpurge config with parameters: %s", - params) - return rc - - try: - lpurge_t = utils.thread_start(hotpool_test_start_lpurge, - (test_context, test_host, config_file)) - except threading.ThreadError: - log.cl_error("failed to start lpurge with parameters: %s", - params) - return -1 - - # check pid - pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lpurge instance") - lpurge_t.join() - return -1 - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list) - if rc < 0: - log.cl_error("failed to stop lpurge") - lpurge_t.join() - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_start) - - -def hotpool_test_lpurge_service(test_context): - """ - Test lpurge service (systemd unit) - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - service_subname = "-".join([test_context.htc_fsname, - test_context.htc_fast_osts[0].ls_index_string]) - params = hotpool_test_generate_basic_lpurge_parameters(test_context) - test_host = test_context.htc_fast_oss_host - test_host.sh_pkill(log, HOTPOOL_LPURGE) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LPURGE) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - - rc = hotpool_test_start_lpurge_service(test_context, test_host, - params, - test_context.htc_fast_osts[0].ls_index_string) - if rc < 0: - return -1 - - rc = hotpool_test_wait_process_launch(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there's no running instance of [%s]\n", - HOTPOOL_LPURGE) - return -1 - - service_unitname = "lpurge@%s.service" % service_subname - # check service status - command = "systemctl is-active %s" % service_unitname - ret_val = test_host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - ret_val.cr_exit_status, ret_val.cr_stdout, - ret_val.cr_stderr) - return -1 - - if ret_val.cr_stdout.strip() != "active": - log.cl_error("unit %s status:[%s]", service_unitname, - ret_val.cr_stdout.strip()) - return -1 - - rc = test_host.sh_service_stop(log, service_unitname) - if rc < 0: - log.cl_error("failed to stop systemd unit [%s] on host [%s]", - service_unitname, test_host.sh_hostname) - return rc - return 0 - - -HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_service) - - -def hotpool_test_lpurge_multiple_services(test_context): - """ - Test start multiple lpurge service instance - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - service_units = list() - test_host = test_context.htc_fast_oss_host - test_host.sh_pkill(log, HOTPOOL_LPURGE) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LPURGE) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - params = hotpool_test_generate_basic_lpurge_parameters(test_context) - for ost in test_context.htc_fast_osts: - fast_ost_device = "-".join([test_context.htc_fsname, - ost.ls_index_string]) - - params[LPURGE_CONFIG_DEVICE] = fast_ost_device - service_unit = "lpurge@%s.service" % fast_ost_device - - rc = hotpool_test_start_lpurge_service(test_context, test_host, - params, ost.ls_index_string) - if rc < 0: - log.cl_error("failed to start systemd unit [%s] on host [%s]", - service_unit, test_host.sh_hostname) - break - service_units.append(service_unit) - - if rc < 0: - for unit in service_units: - test_host.sh_service_stop(log, unit) - return -1 - - rc = hotpool_test_wait_process_launch(log, test_host, - HOTPOOL_LPURGE, instance_count=2) - if rc: - log.cl_error("there's no running instance of [%s]\n", - HOTPOOL_LPURGE) - return -1 - - for unit in service_units: - command = "systemctl is-active %s" % unit - ret_val = test_host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, - ret_val.cr_stderr) - rc = -1 - break - - if ret_val.cr_stdout.strip() != "active": - log.cl_error("unit %s status:[%s]", unit, - ret_val.cr_stdout.strip()) - rc = -1 - break - - for unit in service_units: - rc_1 = test_host.sh_service_stop(log, unit) - if rc_1 < 0: - log.cl_error("failed to stop systemd unit [%s] on host [%s]", - unit, test_host.sh_hostname) - rc = -1 - - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_multiple_services) - - -def hotpool_test_lpurge_options(test_context): - """ - Test lpurge options - """ - log = test_context.htc_log - workspace = test_context.htc_workspace - params = hotpool_test_generate_basic_lpurge_parameters(test_context) - dump_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LPURGE_DUMP_PREFIX) - params[LPURGE_CONFIG_DUMP] = dump_file - test_host = test_context.htc_fast_oss_host - test_host.sh_pkill(log, HOTPOOL_LPURGE) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LPURGE) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - config_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LPURGE_CONFIG_PREFIX) - rc = hotpool_test_generate_config(log, config_file, params) - if rc < 0: - log.cl_error("failed to generate lpurge config with parameters: %s", - params) - return rc - - try: - lpurge_t = utils.thread_start(hotpool_test_start_lpurge, - (test_context, test_host, config_file)) - except threading.ThreadError: - log.cl_error("failed to start lpurge with parameters: %s", - params) - return -1 - - # check pid - pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE) - if pid_list is None or len(pid_list) == 0: - log.cl_error("can't find lpurge instance") - lpurge_t.join() - return -1 - - rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0]) - if rc_1: - log.cl_error("can't find file [%s] on host [%s]", - dump_file, test_host.sh_hostname) - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list) - if rc < 0: - log.cl_error("failed to stop lpurge") - else: - rc = rc_1 - - lpurge_t.join() - if rc: - return rc - # get dumpfile from host - rc = test_host.sh_get_file(log, dump_file, workspace) - if rc < 0: - log.cl_error("failed to get file [%s] from host:%s", - dump_file, test_host.sh_hostname) - return rc - - return hotpool_test_lpurge_validate_options(log, dump_file, params) - - -HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_options) - - -def hotpool_test_lpurge_purge_effect(test_context): - """ - Test lpurge options - """ - # pylint: disable=too-many-locals,too-many-branches,too-many-statements - log = test_context.htc_log - workspace = test_context.htc_workspace - fast_pool = test_context.htc_ost_pools[HOTPOOL_POOL_FAST] - slow_pool = test_context.htc_ost_pools[HOTPOOL_POOL_SLOW] - - params = hotpool_test_generate_basic_lpurge_parameters(test_context) - params[LPURGE_CONFIG_FREELO] = 30 - params[LPURGE_CONFIG_FREEHI] = 99 - dump_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LPURGE_DUMP_PREFIX) - params[LPURGE_CONFIG_DUMP] = dump_file - test_host = test_context.htc_fast_oss_host - test_host.sh_pkill(log, HOTPOOL_LPURGE) - rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE) - if rc: - log.cl_error("there are another running instances of [%s], " - "and can't be stopped\n", HOTPOOL_LPURGE) - return -1 - rc = test_host.sh_mkdir(log, workspace) - if rc < 0: - log.cl_error("failed to create remote workspace [%s] to target host [%s]", - workspace, test_host.sh_hostname) - return -1 - - config_file = hotpool_test_genarate_tmpfile(workspace, - HOTPOOL_LPURGE_CONFIG_PREFIX) - - # create test dir, and set stripe to fast pool - test_dir = os.path.join(HOTPOOL_CLIENT, "test_scan_fids") - rc = hotpool_create_mirror_dir(log, test_host, test_dir, - fast_pool.lp_name, - test_context.htc_fast_osts[0].ls_index) - if rc < 0: - return rc - - # choose a suitable file size, a bit larger than 1/100 target ost size - command = "lctl get_param osd*.%s.kbytestotal -n | head -1" % params[LPURGE_CONFIG_DEVICE] - ret_val = test_host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, ret_val.cr_stderr) - return -1 - ost_size = float(ret_val.cr_stdout.strip()) - file_size_mb = int(math.ceil(ost_size / (100 * 1024))) - # create 10 mirrored files and resync, store the FID into the list - files_count = 10 - file_list = [os.path.join(test_dir, str(i)) for i in range(files_count)] - for item in file_list: - rc = test_host.sh_fill_binary_file(log, item, file_size_mb * 1048576) - if rc: - return -1 - - test_host.sh_run(log, "sync", timeout=10) - # extend mirror to slow pool - for item in file_list: - command = "lfs mirror extend -N -p %s -o %d %s" % (slow_pool.lp_name, - slow_pool.lp_osts[0].ls_index, - item) - ret_val = test_host.sh_run(log, command) - if ret_val.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - ret_val.cr_exit_status, - ret_val.cr_stdout, ret_val.cr_stderr) - return -1 - - test_host.sh_run(log, "sync", timeout=10) - - # start lpurge - rc = hotpool_test_generate_config(log, config_file, params) - if rc < 0: - log.cl_error("failed to generate config from parameters: %s", - params) - return -1 - - try: - lpurge_t = utils.thread_start(hotpool_test_start_lpurge, - (test_context, test_host, config_file)) - except threading.ThreadError: - log.cl_error("failed to start lpurge with parameters: %s", - params) - return -1 - - # check pid - pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE) - if not pid_list: - log.cl_error("can't find lpurge instance") - lpurge_t.join() - return -1 - elif len(pid_list) > 1: - log.cl_error("more than one lpurge instances found, PIDs: %s\n", - pid_list) - lpurge_t.join() - return -1 - - # trigger dump and check replicated count to determine where the purge progress - # is done or not - rc1 = hotpool_test_lpurge_wait_purged(log, test_host, pid_list[0], - dump_file, files_count, - timeout=30) - if rc1 < 0: - log.cl_error("failed to wait purge progress done") - - rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list) - lpurge_t.join() - if rc < 0: - log.cl_error("failed to stop lpurge") - - if rc < 0 or rc1 < 0: - return -1 - - rc = hotpool_test_lpurge_check_mirror_status(log, test_host, file_list) - - # cleanup test dir - for item in file_list: - test_host.sh_remove_file(log, item) - test_host.sh_rmdir_if_exist(log, test_dir) - return rc - - -HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_purge_effect) diff --git a/lipe/pylipe/lipe_test.py b/lipe/pylipe/lipe_test.py deleted file mode 100644 index 97ef7c3..0000000 --- a/lipe/pylipe/lipe_test.py +++ /dev/null @@ -1,2665 +0,0 @@ -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Test Library for LiPE -LiPE is a policy engine for Lustre -""" -# pylint: disable=too-many-lines -import time -import traceback -import random -import shutil -import stat -import os -import json -import yaml -import prettytable - -from pylustre import cmd_general -from pylustre import test_common -from pylustre import ssh_host -from pylustre import watched_io -from pylustre import cstr -from pylustre import utils -from pylustre import time_util -from pylustre import lustre -from pylustre import constants -from pylustre import clog -from pylipe import lipe -from pylipe import lipe_find -from pylipe import lipe_install_nodeps -from pylipe import lipe_constant -from pylipe import lipe_hotpool_test -from pyclownfish import clownfish - -LIPE_TESTS = [] -LIPE_LAUNCH_CONFIG = None -LIPE_CLUSTER = None - - -LIPE_PYLUSTRE_RPM = "lipe-pylustre*" -LIPE_PYLTEST_RPM = "lipe-pyltest*" -LIPE_CLOWNFISH_RPM = "lipe-clownfish*" -LIPE_RPM = "lipe-1.*" -LIPE_LORIS_RPM = "lipe-loris*" -LIPE_CLIENT_RPM = "lipe-client*" -LIPE_SERVER_RPM = "lipe-server*" -LIPE_HSM_RPM = "lipe-hsm*" -LIPE_LPCC_RPM = "lipe-lpcc*" - -# We assumed all the other non-lipe deps are all installed first -# here we only test the deps of lipe internal rpms. -# key: target rpm, value: dep rpm (if the target rpm has) -LIPE_RPM_DEPENDENCY_DICT = { - LIPE_PYLUSTRE_RPM: [], - LIPE_SERVER_RPM: [], - LIPE_HSM_RPM: [LIPE_PYLUSTRE_RPM], - LIPE_LORIS_RPM: [LIPE_PYLUSTRE_RPM], - LIPE_LPCC_RPM: [LIPE_PYLUSTRE_RPM], - LIPE_CLOWNFISH_RPM: [LIPE_PYLUSTRE_RPM], - LIPE_RPM: [LIPE_PYLUSTRE_RPM], - LIPE_CLIENT_RPM: [LIPE_PYLUSTRE_RPM, LIPE_RPM] -} - -TEST_POOL_NAME = "pool0" -TEST_STRIPE_SIZE = 1048576 - - -def get_rpm_path(log, test_host, rpms_dir, rpm_pattern): - """ - Get the real-path of target rpm - """ - command = "ls %s/%s" % (rpms_dir, rpm_pattern) - retval = test_host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = %d, stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return None - - lines = retval.cr_stdout.strip().splitlines() - if len(lines) != 1: - log.cl_error("command [%s] on host [%s] has unexpected output " - "ret = %d, stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return None - - return lines[0] - - -def _test_rpms_dependency(args, mnt_path): - """ - Key function of rpm deps testing - """ - # pylint: disable=too-many-locals,too-many-arguments,too-many-statements - # pylint: disable=too-many-branches - # Make sure install server is not local host, since this will overwrite the - # local config files - log, test_host = args - uuid_install = test_host.sh_uuid(log) - if uuid_install is None: - log.cl_error("failed to get the UUID on host [%s]", - test_host.sh_hostname) - return -1 - - local_host = ssh_host.SSHHost("localhost", local=True) - uuid_local = local_host.sh_uuid(log) - if uuid_local is None: - log.cl_error("failed to get the UUID on localhost") - return -1 - - if uuid_local == uuid_install: - log.cl_error("please do NOT use host [%s] as the install server, " - "since it is the localhost, and installation test " - "would overwrite the local configuration files", - local_host.sh_hostname) - return -1 - - rpms_dir = mnt_path + "/" + cstr.CSTR_PACKAGES - - for target_rpm in LIPE_RPM_DEPENDENCY_DICT: - # python-crontab needed by lipe-loris is generated by EXAScaler. - if target_rpm == LIPE_LORIS_RPM: - continue - log.cl_info("testing dependency for RPM [%s]", target_rpm) - - ret = test_host.sh_rpm_find_and_uninstall(log, "grep lipe") - if ret: - log.cl_error("failed to uninstall LiPE rpms on host [%s]", - test_host.sh_hostname) - return -1 - - rpms = LIPE_RPM_DEPENDENCY_DICT[target_rpm] - rpms.append(target_rpm) - command = "rpm -ivh" - for rpm in rpms: - rpm_fpath = get_rpm_path(log, test_host, rpms_dir, rpm) - if rpm_fpath is None: - log.cl_error("failed to get RPM path [%s] in dir [%s]", - rpm, rpms_dir) - return -1 - command += " " + rpm_fpath - - retval = test_host.sh_run(log, command, - timeout=ssh_host.LONGEST_TIME_RPM_INSTALL) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = %d, stdout = [%s], stderr = [%s]", - command, test_host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def test_rpms_dependency(log, workspace, host): - """ - Start do real install action - """ - # pylint: disable=too-many-locals,too-many-arguments - command = "mkdir -p %s" % workspace - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - local_host = ssh_host.SSHHost("localhost", local=True) - command = "ls lipe-*.iso" - retval = local_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - local_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - current_dir = os.getcwd() - iso_names = retval.cr_stdout.split() - if len(iso_names) != 1: - log.cl_error("found unexpected ISOs [%s] under currect directory " - "[%s]", iso_names, current_dir) - return -1 - - iso_name = iso_names[0] - iso_path = current_dir + "/" + iso_name - - ret = host.sh_send_file(log, iso_path, workspace) - if ret: - log.cl_error("failed to send LiPE ISO [%s] on local host to " - "directory [%s] on host [%s]", - iso_path, workspace, - host.sh_hostname) - return -1 - - host_iso_path = workspace + "/" + iso_name - args = (log, host) - ret = test_common.mount_and_run(log, host, host_iso_path, - _test_rpms_dependency, args) - if ret: - log.cl_error("failed to mount and run RPM dependency") - return ret - return 0 - - -def umount_prepare_format_mount(log, workspace, clownfish_instance): - """ - prepare the hosts, mount file systems, umount file system - """ - # pylint: disable=invalid-name - ret = clownfish_instance.ci_umount_all(log) - if ret: - log.cl_error("failed to umount all") - return -1 - - ret = clownfish_instance.ci_prepare_all(log, workspace) - if ret: - log.cl_error("failed to prepare all") - return -1 - - ret = clownfish_instance.ci_format_all(log) - if ret: - log.cl_error("failed to format all") - return -1 - - ret = clownfish_instance.ci_mount_all(log) - if ret: - log.cl_error("failed to mount all") - return -1 - - return 0 - -LIPE_TESTS.append(umount_prepare_format_mount) - - -def get_device_id(host_id, device): - """ - Get the device ID - """ - common_string = "/dev/mapper/" - if device.startswith(common_string): - device = device[len(common_string):] - else: - common_string = "/dev/" - if device.startswith(common_string): - device = device[len(common_string):] - - device_name = "" - for char in device: - if char.isalnum(): - device_name += char - else: - device_name += "_" - - device_id = ("%s_%s" % (host_id, device_name)) - return device_id - - -def finish_lipe_launch_config(log, clownfish_instance, config): - """ - Generate the json config file of lipe_launch - - groups of config should be generated already - """ - # pylint: disable=too-many-locals - group_names = [] - groups = config[lipe.LIPE_CONFIG_GROUPS] - for group in groups: - group_name = group[lipe.LIPE_CONFIG_NAME] - group_names.append(group_name) - - device_ids = [] - device_configs = [] - ssh_host_configs = [] - ssh_hosts = {} - for lustrefs in clownfish_instance.ci_lustres.values(): - # Add all client hosts into ssh_hosts so as to enable fid2path - for client in lustrefs.lf_clients.values(): - host = client.lc_host - host_id = host.sh_host_id - - if host_id not in ssh_hosts: - ssh_hosts[host_id] = host - ssh_host_config = {} - ssh_host_config[cstr.CSTR_HOST_ID] = host_id - ssh_host_config[cstr.CSTR_HOSTNAME] = host.sh_hostname - ssh_host_config[cstr.CSTR_SSH_IDENTITY_FILE] = host.sh_identity_file - ssh_host_configs.append(ssh_host_config) - - for mdt in lustrefs.lf_mdts.values(): - for mdti in mdt.ls_instances.values(): - host = mdti.lsi_host - device = mdti.lsi_device - host_id = host.sh_host_id - - # Use the information to fill json file of lipe_launch - if host_id not in ssh_hosts: - ssh_hosts[host_id] = host - ssh_host_config = {} - ssh_host_config[cstr.CSTR_HOST_ID] = host_id - ssh_host_config[cstr.CSTR_HOSTNAME] = host.sh_hostname - ssh_host_config[cstr.CSTR_SSH_IDENTITY_FILE] = host.sh_identity_file - ssh_host_configs.append(ssh_host_config) - - device_id = get_device_id(host_id, device) - if device_id in device_ids: - log.cl_error("multiple devices with the same ID [%s]", - device_id) - return -1 - - device_config = {} - device_config[cstr.CSTR_HOST_ID] = host_id - device_config[lipe.LIPE_CONFIG_PATH] = device - device_config[lipe.LIPE_CONFIG_GROUPS] = group_names - device_config[lipe.LIPE_CONFIG_DEVICE_ID] = device_id - device_configs.append(device_config) - - config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs - config[lipe.LIPE_CONFIG_DEVICES] = device_configs - config[lipe.LIPE_CONFIG_ONLY_SCAN_ACTIVE] = True - return 0 - - -def get_lipe_launch_result(logdir, service_instance): - """ - Get the lipe_launch result for the test - """ - # pylint: disable=too-many-locals - host = service_instance.lsi_host - host_id = host.sh_host_id - device = service_instance.lsi_device - - # Should use the same device_id/group_name/action/argument/counter_name in launch_config - # This could be get from launch_config actually - device_id = get_device_id(host_id, device) - group_name = "group_name" - action = lipe.LAT_SHELL_CMD_FID - argument = "stat $LUSTRE_MNT/.lustre/fid/$LUSTRE_FID" - counter_name = "fname_reg_of_old_file" - - local_device_workspace = logdir + "/" + device_id - result = lipe.LipeCounterResult(local_device_workspace, group_name, - action, argument, counter_name) - return result - - -def lipe_launch_on_host(log, host, parent_dir, config): - """ - Run lipe_launch on a web host - """ - basename = "launch_host-" + host.sh_hostname - logdir = parent_dir + "/" + basename - - command = "mkdir -p %s" % logdir - retval = utils.run(command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on local host, " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - - command = "mkdir -p %s" % logdir - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - - config_fpath = logdir + "/lipe_launch.json" - with open(config_fpath, 'w') as outfile: - json.dump(config, outfile, indent=4) - - ret = host.sh_send_file(log, config_fpath, logdir) - if ret: - log.cl_error("failed to send config file [%s] of localhost to [%s] " - "directory [%s] of host [%s]", - config_fpath, logdir, config_fpath, - host.sh_hostname) - return None - - return_value = 0 - command = ("%s --logdir %s --config %s" % - (lipe.LIPE_LAUNCH, logdir, config_fpath)) - stdout_file = logdir + "/" + "lipe_launch.stdout.log" - stderr_file = logdir + "/" + "lipe_launch.stderr.log" - retval = host.sh_watched_run(log, command, stdout_file, stderr_file) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return None - - host_local_dir = logdir + "/" + host.sh_hostname - command = "mkdir -p %s" % host_local_dir - retval = utils.run(command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on local host, " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - - ret = host.sh_get_and_clean_dir(log, logdir, host_local_dir) - if ret: - log.cl_error("failed to get and clean dir [%s] on host [%s]", - logdir, host.sh_hostname) - if return_value is -1: - log.cl_error("please check [%s] on host [%s] and [%s] on " - "local host to debug why lipe_launch failed", - logdir, host.sh_hostname, host_local_dir) - else: - log.cl_error("please check why copying and removing directory " - "failed after success of lipe_launch test") - return None - - if return_value: - return None - - return host_local_dir + "/" + basename - - -def lipe_launch_with_example_config_ftype(log, clownfish_instance, logdir, flist_type): - """ - Run lipe_launch with example config, flist_type needs to be specified - """ - config = LIPE_LAUNCH_CONFIG.copy() - - config[lipe.LIPE_CONFIG_FLIST_TYPE] = flist_type - ret = finish_lipe_launch_config(log, clownfish_instance, config) - if ret: - log.cl_error("failed to finish the lipe_launch config") - return -1 - - for mdt_host in LIPE_CLUSTER.lc_mdt_hosts: - result_dir = lipe_launch_on_host(log, mdt_host, logdir, config) - if result_dir is None: - log.cl_error("failed to run lipe_launch on host [%s]", - mdt_host.sh_hostname) - return -1 - - return 0 - - -def lipe_launch_with_example_config(log, logdir, clownfish_instance): - """ - Run lipe_launch with example config - """ - # pylint: disable=unused-argument - for flist_type in lipe.LIPE_FLIST_TYPES: - # the directory will be created in lipe_launch_on_host() - # by "mkdir -p" - basename = "flist_type-" + flist_type - ret = lipe_launch_with_example_config_ftype(log, clownfish_instance, logdir + "/" + basename, - flist_type) - if ret: - log.cl_error("failed to run lipe launch with example config and " - "flist type [%s]", flist_type) - return ret - - return 0 - - -LIPE_TESTS.append(lipe_launch_with_example_config) - - -def run_lipe_launch_stat(log, logdir, host, service_instance, - launch_config): - """ - Run lipe_launch on a device - """ - # pylint: disable=too-many-locals - result_dir = lipe_launch_on_host(log, host, logdir, launch_config) - if result_dir is None: - log.cl_error("failed to run lipe_launch with config") - return None - - result = get_lipe_launch_result(result_dir, service_instance) - return result - - -def _check_fid_lipe_launch_found(log, parent_dir, host, mdti, - launch_config, fid, expect_found=True): - """ - Check whether the found number is expected - """ - # pylint: disable=too-many-arguments - workspace = (parent_dir + "/" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - ret = utils.mkdir(workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - workspace) - log.cl_abort = True - return -1 - - # Do not abort here, since lipe_launch might still be able to find the - # inode on MDT, but can not execute stat on the client side. In this case, - # lipe_launch will fail, but next round lipe_launch could be able to - # run successfully. - result = run_lipe_launch_stat(log, workspace, host, mdti, launch_config) - if result is None: - log.cl_error("failed to run lipe_launch on Lustre service instance [%s]", - mdti.lsi_service_instance_name) - return -1 - - ret = result.lcr_load_json(log) - if ret: - log.cl_error("failed to load json file of lipe_launch result on Lustre " - "service instance [%s]", mdti.lsi_service_instance_name) - log.cl_abort = True - return -1 - - found = result.lcr_fid_is_found(fid) - if found: - found_string = "" - else: - found_string = " not" - log.cl_debug("FID [%s] is%s found on Lustre service instance [%s]", fid, - found_string, mdti.lsi_service_instance_name) - - if found == expect_found: - return 0 - else: - return -1 - - -def check_fid_lipe_launch_found(log, parent_dir, host, mdti, - launch_config, fid): - """ - Check whether the fid is found - """ - # pylint: disable=too-many-arguments - return _check_fid_lipe_launch_found(log, parent_dir, host, mdti, - launch_config, fid, expect_found=True) - - -def check_fid_lipe_launch_missing(log, parent_dir, host, mdti, - launch_config, fid): - """ - Check whether the fid is not found - """ - # pylint: disable=too-many-arguments - return _check_fid_lipe_launch_found(log, parent_dir, host, mdti, - launch_config, fid, expect_found=False) - - -def generate_lipe_launch_stat_config(log, clownfish_instance, fname): - """ - Generate the config for stating each file - """ - # pylint: disable=unused-argument - config = {} - groups = [] - - group = {} - group_name = "group_name" - group[lipe.LIPE_CONFIG_NAME] = group_name - - rules = [] - rule = {} - rule[lipe.LIPE_CONFIG_ACTION] = lipe.LAT_SHELL_CMD_FID - funct = lipe_constant.LIPE_POLICY_FUNCTION_FNAME_REG - rule[lipe.LIPE_CONFIG_EXPRESSION] = ('%s("%s")' % (funct, fname)) - rule[lipe.LIPE_CONFIG_ARGUMENT] = "stat $LUSTRE_MNT/.lustre/fid/$LUSTRE_FID" - rule[lipe.LIPE_CONFIG_COUNTER_NAME] = "fname_reg_of_old_file" - rules.append(rule) - group[lipe.LIPE_CONFIG_RULES] = rules - groups.append(group) - config[lipe.LIPE_CONFIG_GROUPS] = groups - - ret = finish_lipe_launch_config(log, clownfish_instance, config) - if ret: - log.cl_error("failed to finish the lipe_launch config") - return None - - return config - - -def test_lipe_launch_type(log, workspace, clownfish_instance, mdti, host, - lustre_dir_path, inode_type=stat.S_IFREG, - major=None, minor=None, path=None): - """ - Create an remove file/dir with a special type and check whether lipe_launch - can find it - """ - # pylint: disable=too-many-arguments,too-many-branches,too-many-statements - # pylint: disable=too-many-locals - inode_fname = "new_inode" - inode_path = lustre_dir_path + "/" + inode_fname - type_string = utils.file_type2string(inode_type) - - ret = host.sh_create_inode(log, inode_path, inode_type=inode_type, - major=major, minor=minor, path=path) - if ret: - log.cl_error("failed create inode [%s] with type [%s] on host [%s]", - inode_path, type_string, host.sh_hostname) - return -1 - - fid = lustre.lfs_path2fid(log, host, inode_path) - if fid is None: - log.cl_error("failed to get FID of file [%s] on host [%s]", - inode_path, host.sh_hostname) - return -1 - - creation_workspace = workspace + "/after_creation" - ret = utils.mkdir(creation_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - creation_workspace) - return -1 - - config = generate_lipe_launch_stat_config(log, clownfish_instance, inode_fname) - if config is None: - log.cl_error("generate lipe_launch stat config", - creation_workspace) - return -1 - - waited = False - for mdt_host in LIPE_CLUSTER.lc_mdt_hosts: - if not waited: - ret = utils.wait_condition(log, check_fid_lipe_launch_found, - (creation_workspace, mdt_host, mdti, config, - fid)) - else: - ret = check_fid_lipe_launch_found(log, creation_workspace, - mdt_host, mdti, config, fid) - if ret: - log.cl_error("can not find FID [%s] on Lustre service instance " - "[%s] by running lipe_launch on host [%s] after " - "creation of [%s] with type [%s] on host [%s]", fid, - mdti.lsi_service_instance_name, host.sh_hostname, - inode_path, type_string, mdt_host.sh_hostname) - return -1 - waited = True - - ret = host.sh_remove_inode(log, inode_path, inode_type=inode_type) - if ret: - log.cl_error("failed to remove inode [%s] with type [%s] on host [%s]", - inode_path, type_string, host.sh_hostname) - return -1 - - waited = False - for mdt_host in LIPE_CLUSTER.lc_mdt_hosts: - if not waited: - ret = utils.wait_condition(log, check_fid_lipe_launch_missing, - (creation_workspace, mdt_host, mdti, config, - fid)) - else: - ret = check_fid_lipe_launch_missing(log, creation_workspace, - mdt_host, mdti, config, fid) - if ret: - log.cl_error("can not find FID [%s] on Lustre service instance " - "[%s] by running lipe_launch on host [%s] after " - "creation of [%s] with type [%s] on host [%s]", fid, - mdti.lsi_service_instance_name, host.sh_hostname, - inode_path, type_string, mdt_host.sh_hostname) - return -1 - waited = True - - return 0 - - -def test_lipe_launch_mdt(log, workspace, clownfish_instance, mdt): - """ - Test whether lipe_launch can find the newly created file/directory on a mdt - """ - # pylint: disable=too-many-locals - mdt_index = mdt.ls_index - lustrefs = mdt.ls_lustre_fs - clients = lustrefs.lf_clients - if len(clients) == 0: - log.cl_warning("no client for file system [%s] configured, skip test of " - "file creation", lustrefs.lf_fsname) - return 0 - - mdti = mdt.ls_mounted_instance(log) - if mdti is None: - log.cl_error("failed to get the active instance of Lustre service [%s]", - mdt.ls_service_name) - return -1 - - lustre_relative_dir_path = ("lipe_launch_" + mdt.ls_service_name + "_" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - - launch_workspace = workspace + "/" + "dir_path" - ret = utils.mkdir(launch_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - launch_workspace) - return -1 - - client = random.choice(clients.values()) - host = client.lc_host - mnt = client.lc_mnt - lustre_dir_path = mnt + "/" + lustre_relative_dir_path - - command = ("lfs mkdir -c 1 -i %d %s" % (mdt_index, lustre_dir_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - inode_types = [stat.S_IFDIR, stat.S_IFCHR, stat.S_IFBLK, stat.S_IFREG, - stat.S_IFIFO, stat.S_IFLNK, stat.S_IFSOCK] - for inode_type in inode_types: - major = None - minor = None - path = None - if inode_type == stat.S_IFCHR or inode_type == stat.S_IFBLK: - major = 7 - minor = 1 - elif inode_type == stat.S_IFLNK: - path = "unkown" - ret = test_lipe_launch_type(log, workspace, clownfish_instance, mdti, host, - lustre_dir_path, inode_type=inode_type, - major=major, minor=minor, path=path) - if ret: - log.cl_error("failed to test the lipe_launch for mdt " - "[%s] with inode type [%s]", - mdt.ls_service_name, inode_type) - return -1 - - command = ("rmdir %s" % (lustre_dir_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def test_lipe_launch(log, workspace, clownfish_instance): - """ - Test whether lipe_launch can find and remove the newly created - file/directory - """ - for lustrefs in clownfish_instance.ci_lustres.values(): - for mdt in lustrefs.lf_mdts.values(): - mdt_workspace = workspace + "/" + mdt.ls_service_name - ret = utils.mkdir(mdt_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - mdt_workspace) - return -1 - - ret = test_lipe_launch_mdt(log, mdt_workspace, clownfish_instance, mdt) - if ret: - log.cl_error("failed to test lipe_launch on " - "Lustre service [%s]", mdt.ls_service_name) - return ret - return 0 - -LIPE_TESTS.append(test_lipe_launch) - - -def lipe_find_device(log, workspace, host, device, find_option, - action_option=None): - """ - Run lipe_find on a device - """ - # pylint: disable=too-many-locals, too-many-arguments - identity = time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f") - stdout_file = workspace + "/" + "lipe_find.stdout.log" - stderr_file = workspace + "/" + "lipe_find.stderr.log" - - args = {} - args[watched_io.WATCHEDIO_LOG] = log - args[watched_io.WATCHEDIO_HOSTNAME] = host.sh_hostname - stdout_fd = watched_io.watched_io_open(stdout_file, - watched_io.log_watcher_info, args) - stderr_fd = watched_io.watched_io_open(stderr_file, - watched_io.log_watcher_error, args) - if not action_option: - command = ("lipe_find --identity %s %s %s" % (identity, device, find_option)) - else: - command = ("lipe_find --identity %s %s %s %s" % (identity, action_option, - device, find_option)) - - log.cl_info("start to run command [%s] on host [%s]", command, - host.sh_hostname) - retval = host.sh_run(log, command, stdout_tee=stdout_fd, - stderr_tee=stderr_fd, timeout=None) - stdout_fd.close() - stderr_fd.close() - - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], " - "stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - - find_workspace = lipe_find.LIPE_FIND_LOG_DIR + "/" + identity - ret = host.sh_get_file(log, find_workspace, workspace) - if ret: - log.cl_error("failed to get file [%s] from on host [%s] to local " - "host directory [%s] ", - workspace, host.sh_hostname, workspace) - return None - - local_find_workspace = workspace + "/" + "lfs_find_log_" + identity - shutil.move(workspace + "/" + identity, local_find_workspace) - - device_id = lipe_find.generate_device_id(0, device) - local_device_workspace = local_find_workspace + "/" + device_id - result = lipe_find.LipeFindCounterResult(local_device_workspace) - return result - - -def lipe_find_service_instance(log, workspace, service_instance, find_option, - action_option=None): - """ - Run lipe_find on the instance of a Lustre service - """ - host = service_instance.lsi_host - device = service_instance.lsi_device - result = lipe_find_device(log, workspace, host, device, find_option, - action_option=action_option) - if result is None: - log.cl_error("failed to run lipe_find on device [%s] of host " - "[%s]", device, host.sh_hostname) - return None - return result - - -def check_found_number(log, parent_dir, mdti, find_option, expected_count, - action_option=None): - """ - Check whether the found number is expected - """ - # pylint: disable=too-many-arguments - workspace = (parent_dir + "/" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - ret = utils.mkdir(workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - workspace) - log.cl_abort = True - return -1, None - - result = lipe_find_service_instance(log, workspace, mdti, find_option, - action_option=action_option) - if result is None: - log.cl_error("failed to run lipe_find on Lustre service instance [%s]", - mdti.lsi_service_instance_name) - log.cl_abort = True - return -1, None - - ret = result.lcr_load_json(log) - if ret: - log.cl_error("failed to load json file of lipe_find result on Lustre " - "service instance [%s]", mdti.lsi_service_instance_name) - log.cl_abort = True - return -1, None - - if result.lcr_found_count != expected_count: - log.cl_debug("unexpected found inode number on Lustre service " - "instance [%s], expected [%s], got [%d]", - mdti.lsi_service_instance_name, - expected_count, result.lcr_found_count) - return -1, None - - return 0, result - - -def _check_fid_found(log, parent_dir, mdti, find_option, fid, - expect_found=True, expect_count=None, - action_option=None): - """ - Check whether the fid is found (or not) as expected - """ - # pylint: disable=too-many-arguments - workspace = (parent_dir + "/" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - ret = utils.mkdir(workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - workspace) - log.cl_abort = True - return -1 - - result = lipe_find_service_instance(log, workspace, mdti, find_option, - action_option=action_option) - if result is None: - log.cl_error("failed to run lipe_find on Lustre service instance [%s]", - mdti.lsi_service_instance_name) - log.cl_abort = True - return -1 - - ret = result.lcr_load_json(log) - if ret: - log.cl_error("failed to load json file of lipe_find result on Lustre " - "service instance [%s]", mdti.lsi_service_instance_name) - log.cl_abort = True - return -1 - - found = result.lcr_fid_is_found(fid) - if found: - found_string = "" - else: - found_string = " not" - log.cl_debug("FID [%s] is%s found on Lustre service instance [%s]", fid, - found_string, mdti.lsi_service_instance_name) - - found_count = result.lcr_items_count(log) - if found and expect_count is not None and found_count != expect_count: - log.cl_error("expect [%d] items, [%d] found.\n", - expect_count, found_count) - return -1 - - if found == expect_found: - return 0 - else: - return -1 - - -def check_fid_found(log, parent_dir, mdti, find_option, fid, - expect_count=None, action_option=None): - """ - Check whether the fid found - """ - # pylint: disable=too-many-arguments - return _check_fid_found(log, parent_dir, mdti, find_option, fid, - expect_found=True, expect_count=expect_count, - action_option=action_option) - - -def check_fid_missing(log, parent_dir, mdti, find_option, fid, action_option=None): - """ - Check whether the fid is not found - """ - # pylint: disable=too-many-arguments - return _check_fid_found(log, parent_dir, mdti, find_option, fid, - expect_found=False, action_option=action_option) - - -def lipe_find_posix_client(log, workspace, client, find_option, - action_option=None): - """ - Run lipe_find on a Lustre client - """ - host = client.lc_host - mnt = client.lc_mnt - result = lipe_find_device(log, workspace, host, mnt, find_option, - action_option=action_option) - if result is None: - log.cl_error("failed to run lipe_find on path [%s] of host " - "[%s]", mnt, host.sh_hostname) - return None - return result - - -def _check_fid_found_client(log, parent_dir, client, find_option, fid, - expect_found=True, expect_count=None, - action_option=None): - """ - Check whether the fid is found (or not) as expected from client - """ - # pylint: disable=too-many-arguments - workspace = (parent_dir + "/" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - ret = utils.mkdir(workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - workspace) - log.cl_abort = True - return -1 - - result = lipe_find_posix_client(log, workspace, client, find_option, action_option=action_option) - if result is None: - log.cl_error("failed to run lipe_find on Lustre client [%s]", - client.lc_client_name) - log.cl_abort = True - return -1 - - ret = result.lcr_load_json(log) - if ret: - log.cl_error("failed to load json file of lipe_find result on Lustre " - "client [%s]", client.lc_client_name) - log.cl_abort = True - return -1 - - found = result.lcr_fid_is_found(fid) - if found: - found_string = "" - else: - found_string = " not" - log.cl_debug("FID [%s] is%s found on Lustre client [%s]", fid, - found_string, client.lc_client_name) - - found_count = result.lcr_items_count(log) - if found and expect_count is not None and found_count != expect_count: - log.cl_error("expect [%d] items, [%d] found.\n", - expect_count, found_count) - return -1 - - if found == expect_found: - return 0 - else: - return -1 - - -def check_fid_found_client(log, parent_dir, client, find_option, fid, - expect_count=None, action_option=None): - """ - Check whether the fid found on a Lustre client - """ - # pylint: disable=too-many-arguments - return _check_fid_found_client(log, parent_dir, client, find_option, fid, - expect_found=True, expect_count=expect_count, - action_option=action_option) - - -def check_fid_missing_client(log, parent_dir, client, find_option, fid, action_option=None): - """ - Check whether the fid is not found on a Lustre client - """ - # pylint: disable=too-many-arguments - return _check_fid_found_client(log, parent_dir, client, find_option, fid, - expect_found=False, action_option=action_option) - - -def lipe_find_message(fid, mdti, host, inode_path, - inode_type=stat.S_IFREG, major=None, minor=None, - path=None, stripe_index=None, pool=None, - component_layout=False): - """ - Return the message for a failure of lipe_find - """ - # pylint: disable=too-many-arguments - if pool: - pool_name = pool.lp_name - else: - pool_name = "None" - return ("FID [%s], service [%s], client [%s], inode path: [%s], " - "type [%s], major [%s], minor [%s], symbol link path [%s], " - "stripe index [%s], pool [%s], component_layout [%s]" % - (fid, mdti.lsi_service_instance_name, - host.sh_hostname, inode_path, - utils.file_type2string(inode_type), - major, minor, path, stripe_index, pool_name, component_layout)) - - -class LipeFindTestCase(object): - """ - Each test case of lipe_find has @found_count object of this type - """ - # pylint: disable=too-few-public-methods,too-many-instance-attributes - def __init__(self, expression, found, expect_count=None, skip_server=False, - skip_client=False, option=None): - # pylint: disable=too-many-arguments - self.lftc_expression = expression - self.lftc_option = option - self.lftc_found = found - self.lftc_expect_count = expect_count - self.lftc_skip_server = skip_server - self.lftc_skip_client = skip_client - self.lftc_filename, self.lftc_lineno, self.lftc_func = clog.back_caller(2) - self.lftc_debug_info = ("%s:%s %s()" % - (os.path.basename(self.lftc_filename), - self.lftc_lineno, - self.lftc_func)) - - -def test_find_options(log, parent_dir, client, mdti, test_cases, fid, msg): - """ - Run lipe_find with options that should find and should not find a fid - """ - # pylint: disable=too-many-arguments,too-many-locals - for test_case in test_cases: - find_option = test_case.lftc_expression - action_option = test_case.lftc_option - skip_server = test_case.lftc_skip_server - skip_client = test_case.lftc_skip_client - debug_info = test_case.lftc_debug_info - expect_count = test_case.lftc_expect_count - action_msg = " action [%s]" % action_option if action_option else "" - if test_case.lftc_found: - if not skip_server: - ret = check_fid_found(log, parent_dir, mdti, find_option, fid, - expect_count=expect_count, action_option=action_option) - if ret: - log.cl_error("expected to find fid with option [%s]%s for test [%s], " - "but got failure, %s", find_option, action_msg, debug_info, msg) - return -1 - - if not skip_client: - ret = check_fid_found_client(log, parent_dir, client, find_option, fid, - expect_count=expect_count, action_option=action_option) - if ret: - log.cl_error("expected to find fid with option [%s]%s for test [%s] from " - "Lustre client, but got failure, %s", - find_option, action_msg, debug_info, msg) - return -1 - else: - if not skip_server: - ret = check_fid_missing(log, parent_dir, mdti, find_option, - fid, action_option=action_option) - if ret: - log.cl_error("expected to NOT find FID with option [%s]%s for test [%s], " - "but got failure, %s", find_option, action_msg, debug_info, msg) - return -1 - - if not skip_client: - ret = check_fid_missing_client(log, parent_dir, client, - find_option, fid, action_option=action_option) - if ret: - log.cl_error("expected to NOT find FID with option [%s]%s for test [%s] " - "from Lustre client, but got failure, %s", - find_option, action_msg, debug_info, msg) - return -1 - return 0 - - -def test_lipe_find_size(log, parent_dir, client, mdti, fid, host, - inode_path, inode_type, msg, size=True): - """ - Check whether the lipe_find with -size or -blocks is expected - """ - # pylint: disable=too-many-arguments - # The Lustre file should be empty file that has no DoM - if size: - attribute = "size" - else: - attribute = "blocks" - size = host.sh_get_file_size(log, inode_path, size=size) - if size < 0: - log.cl_error("failed to get file %s, %s", attribute, msg) - return -1 - - if inode_type != stat.S_IFREG: - have_size = True - elif not lipe_constant.HAVE_LUSTRE_PFL: - # If no PFL support, LiPE is not able get the size or LSoM from object - # on MDT. - have_size = False - else: - have_size = True - - cases = [] - if not have_size: - cases.append(LipeFindTestCase("-%s %sc" % (attribute, size), False)) - cases.append(LipeFindTestCase("-%s +%sc" % (attribute, size), False)) - cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size + 1)), False)) - cases.append(LipeFindTestCase("! -%s %sc" % (attribute, size), False)) - if size != 0: - cases.append(LipeFindTestCase("! -%s +%sc" % (attribute, (size - 1)), - False)) - return 0 - - cases.append(LipeFindTestCase("-%s %sc" % (attribute, size), True)) - cases.append(LipeFindTestCase("! -%s %sc" % (attribute, size), False)) - cases.append(LipeFindTestCase("-%s +%sc" % (attribute, size), False)) - cases.append(LipeFindTestCase("-%s -%sc" % (attribute, size), False)) - - cases.append(LipeFindTestCase("-%s %sc" % (attribute, (size + 1)), False)) - cases.append(LipeFindTestCase("! -%s %sc" % (attribute, (size + 1)), True)) - cases.append(LipeFindTestCase("-%s +%sc" % (attribute, (size + 1)), False)) - cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size + 1)), True)) - - if size != 0: - cases.append(LipeFindTestCase("-%s %sc" % (attribute, (size - 1)), False)) - cases.append(LipeFindTestCase("! -%s %sc" % (attribute, (size - 1)), True)) - cases.append(LipeFindTestCase("-%s +%sc" % (attribute, (size - 1)), True)) - cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size - 1)), False)) - - return test_find_options(log, parent_dir, client, mdti, cases, fid, msg) - - -def test_lipe_find_mdt_actions(log, workspace, mdti, client, - lustre_dir_path, pool): - """ - Create and remove a regular file and check whether lipe_find can find it - and perform action against. - """ - # pylint: disable=too-many-arguments,too-many-branches,too-many-statements - # pylint: disable=too-many-locals - inode_fname = "new_Inode" - inode_type = stat.S_IFREG - inode_path = lustre_dir_path + "/" + inode_fname - host = client.lc_host - - if mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS: - return 0 - - pool_name = None - if pool is not None: - pool_name = pool.lp_name - - ret = lustre.lustre_file_setstripe(log, host, inode_path, - stripe_size=TEST_STRIPE_SIZE) - if ret: - log.cl_error("failed to create inode [%s] with type [%s] on host [%s]", - inode_path, utils.file_type2string(inode_type), - host.sh_hostname) - return -1 - - ret = lustre.lustre_file_mirror_extend(log, host, inode_path, - pool_name=pool_name) - if ret: - log.cl_error("failed to extend the mirror of inode [%s] with type [%s] on host [%s]", - inode_path, utils.file_type2string(inode_type), - host.sh_hostname) - return -1 - - command = "cat %s" % (inode_path) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - fid = lustre.lfs_path2fid(log, host, inode_path) - if fid is None: - log.cl_error("failed get fid of file [%s] on host [%s]", - inode_path, host.sh_hostname) - return -1 - - stat_result = host.sh_stat(log, inode_path) - if stat_result is None: - log.cl_error("failed stat file [%s] on host [%s]", - inode_path, host.sh_hostname) - return -1 - - creation_workspace = workspace + "/after_creation" - ret = utils.mkdir(creation_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - creation_workspace) - return -1 - - msg = lipe_find_message(fid, mdti, host, inode_path, - inode_type=inode_type, pool=pool) - - ret = utils.wait_condition(log, check_fid_found, - (creation_workspace, mdti, "", fid)) - if ret: - log.cl_error("expected to find fid after inode creation," - " but got failure, %s", msg) - return -1 - - cases = list() - cases.append(LipeFindTestCase('-pool %s -fid %s' % (pool_name, fid), True)) - - ret = test_find_options(log, creation_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find with given options after inode " - "creation") - return -1 - - action_run_workspace = workspace + "/actions_run" - ret = utils.mkdir(action_run_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - action_run_workspace) - return -1 - - cases = list() - cases.append(LipeFindTestCase("-pool %s -fid %s" % (pool_name, fid), - True, skip_client=True, - option="--agents %s -purge %s" % (host.sh_hostname, pool_name))) - - ret = test_find_options(log, action_run_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find with given options") - return -1 - - action_post_workspace = workspace + "/actions_post" - ret = utils.mkdir(action_post_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - action_post_workspace) - return -1 - - cases = list() - cases.append(LipeFindTestCase("-pool %s -fid %s" % (pool_name, fid), False)) - - ret = test_find_options(log, action_post_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find with given options after inode " - "creation") - return -1 - - ret = host.sh_remove_inode(log, inode_path, inode_type=inode_type) - if ret: - log.cl_error("failed to remove inode [%s] with type [%s] on host [%s]", - inode_path, utils.file_type2string(inode_type), - host.sh_hostname) - return -1 - - return 0 - - -def test_lipe_find_skip_substripe_inodes(log, workspace, mdti, client, - lustre_dir_path): - """ - Create a striped directory, then check whether lipe_find can skip - dir substripe inodes. - """ - # pylint: disable=too-many-arguments,too-many-branches,too-many-statements - # pylint: disable=too-many-locals - # pylint: disable=too-many-locals,too-many-arguments,too-many-branches - mdt = mdti.lsi_service - lustrefs = mdt.ls_lustre_fs - client_host = client.lc_host - mgs = lustrefs.lf_mgs_mdt - if mgs is None: - mgs = lustrefs.lf_mgs - if mgs is None: - log.cl_error("Lustre file system [%s] doesn't have a mgs", - lustrefs.lf_fsname) - return -1 - - mgsi = mgs.ls_mounted_instance(log) - if mgsi is None: - log.cl_error("failed to get the active instance of Lustre service [%s]", - mgs.ls_service_name) - return -1 - - command = ("lfs mkdir -c 2 -i %d %s" % (mdt.ls_index, lustre_dir_path)) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return 1 - - fid = lustre.lfs_path2fid(log, client_host, lustre_dir_path) - cases = list() - cases.append(LipeFindTestCase("", True, expect_count=1, skip_client=True)) - msg = lipe_find_message(fid, mdti, client_host, lustre_dir_path) - test_workspace = workspace + "/scan_against_striped_dir" - ret = utils.mkdir(test_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - test_workspace) - return -1 - - ret = test_find_options(log, test_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find to get all inodes") - return -1 - return 0 - - -def test_lipe_find_skip_snapshot_inodes(log, workspace, mdti, client, - lustre_dir_path): - """ - Create regular files and creat snapshots, then check whether lipe_find can skip - snapshot inodes. - """ - # pylint: disable=too-many-arguments,too-many-branches,too-many-statements - # pylint: disable=too-many-locals - # pylint: disable=too-many-locals,too-many-arguments,too-many-branches - mdt = mdti.lsi_service - lustrefs = mdt.ls_lustre_fs - client_host = client.lc_host - mgs = lustrefs.lf_mgs_mdt - if mgs is None: - mgs = lustrefs.lf_mgs - if mgs is None: - log.cl_error("Lustre file system [%s] doesn't have a mgs", - lustrefs.lf_fsname) - return -1 - - mgsi = mgs.ls_mounted_instance(log) - if mgsi is None: - log.cl_error("failed to get the active instance of Lustre service [%s]", - mgs.ls_service_name) - return -1 - - for mdt in lustrefs.lf_mdts.values(): - mdti = mdt.ls_mounted_instance(log) - if mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS: - log.cl_debug("skip snapshot test for zfs backend") - return 1 - for ost in lustrefs.lf_osts.values(): - osti = ost.ls_mounted_instance(log) - if osti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS: - log.cl_debug("skip snapshot test for zfs backend") - return 1 - - command = ("mkdir %s" % lustre_dir_path) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - inode_path = os.path.join(lustre_dir_path, "test_inode") - ret = client_host.sh_create_inode(log, inode_path) - if ret: - log.cl_error("failed to create regular file [%s] on host [%s]", - inode_path, client_host.sh_hostname) - return -1 - client_host.sh_run(log, "sync") - - fid = lustre.lfs_path2fid(log, client_host, inode_path) - if fid is None: - log.cl_error("failed get fid of file [%s] on host [%s]", - inode_path, client_host.sh_hostname) - return -1 - - test_workspace = workspace + "/scan_before_snapshot" - ret = utils.mkdir(test_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - test_workspace) - return -1 - - cases = list() - cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True)) - msg = lipe_find_message(fid, mdti, client_host, inode_path) - - ret = test_find_options(log, test_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find to get all inodes") - return -1 - - ret = lustrefs.lf_create_snapshot(log, "snapshot0") - if ret: - log.cl_debug("failed to create snapshot, skip following test") - return 0 - - test_workspace = workspace + "/scan_after_snapshot" - ret = utils.mkdir(test_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - test_workspace) - return -1 - - cases = list() - cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True)) - msg = lipe_find_message(fid, mdti, client_host, inode_path) - - ret = test_find_options(log, test_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find to get all inodes after one snapshot created") - return -1 - - for i in range(10): - sname = "snapshot" + str(i) - ret = lustrefs.lf_create_snapshot(log, sname) - if ret: - log.cl_debug("failed to create snapshot with name [%s]", sname) - return -1 - - test_workspace = workspace + "/scan_after_10_more_snapshots" - ret = utils.mkdir(test_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - test_workspace) - return -1 - - cases = list() - cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True)) - msg = lipe_find_message(fid, mdti, client_host, inode_path) - - ret = test_find_options(log, test_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find to get all inodes after 10 more snapshots created") - return -1 - - ret = client_host.sh_remove_inode(log, inode_path) - if ret: - log.cl_error("failed to remove inode [%s] on host [%s]", - inode_path, client_host.sh_hostname) - return -1 - return 0 - - -def test_lipe_find_mdt_type(log, workspace, mdti, client, lustre_dir_path, - inode_type=stat.S_IFREG, major=None, minor=None, - path=None, stripe_index=None, pool=None, - component_layout=False): - """ - Create an remove file/dir with a special type and check whether lipe_find - can find it - """ - # pylint: disable=too-many-arguments,too-many-branches,too-many-statements - # pylint: disable=too-many-locals - inode_fname = "new_Inode" - upper_fname = inode_fname.upper() - lower_fname = inode_fname.lower() - assert(upper_fname != inode_fname and lower_fname != inode_fname) - inode_path = lustre_dir_path + "/" + inode_fname - inode_paths = [inode_path] - type_option = lipe_find.file_type2option(inode_type) - type_constant = lipe_find.file_option2lipe_constant(type_option) - host = client.lc_host - - pool_name = None - if pool is not None: - pool_name = pool.lp_name - assert inode_type == stat.S_IFREG - - if inode_type == stat.S_IFREG: - assert stripe_index is not None - ret = lustre.lustre_file_setstripe(log, host, inode_path, - stripe_index=stripe_index, - pool_name=pool_name, - stripe_size=TEST_STRIPE_SIZE) - else: - ret = host.sh_create_inode(log, inode_path, inode_type=inode_type, - major=major, minor=minor, path=path) - if ret: - log.cl_error("failed to create inode [%s] with type [%s] on host [%s]", - inode_path, utils.file_type2string(inode_type), - host.sh_hostname) - return -1 - - if component_layout: - assert stripe_index is not None - assert inode_type == stat.S_IFREG - ret = lustre.lustre_file_mirror_extend(log, host, inode_path, - stripe_index=stripe_index, - pool_name=pool_name) - if ret: - log.cl_error("failed to extend the mirror of inode [%s] with type [%s] on host [%s]", - inode_path, utils.file_type2string(inode_type), - host.sh_hostname) - return -1 - comp_count = 2 - else: - comp_count = 1 - - # If dir has no stripe set, then it doesn't have LOV xattr. - # And find -stripe-count will not be able to find it. - # So set the stripe count here. - if inode_type == stat.S_IFDIR: - assert stripe_index is not None - command = ("lfs setstripe -c 1 -i %s -S %s %s " % - (stripe_index, TEST_STRIPE_SIZE, inode_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - fid = lustre.lfs_path2fid(log, host, inode_path) - if fid is None: - log.cl_error("failed get fid of file [%s] on host [%s]", - inode_path, host.sh_hostname) - return -1 - - if inode_type != stat.S_IFDIR: - nlink = 10 - for i in range(0, nlink - 1): - link_path = lustre_dir_path + "/" + "link" + str(i) - inode_paths.append(link_path) - command = "ln %s %s" % (inode_path, link_path) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - else: - nlink = 2 - - # If a regular file has never been written, LSoM won't be updated. Then - # block number in LSoM will be kept 0. But the new file's block number - # might not be zero if the file locates on ZFS OST. So, open the file - # here to sync the LSoM. - if inode_type == stat.S_IFREG: - command = "cat %s" % (inode_path) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - stat_result = host.sh_stat(log, inode_path) - if stat_result is None: - log.cl_error("failed stat file [%s] on host [%s]", - inode_path, host.sh_hostname) - return -1 - - if stat_result.st_nlink != nlink: - log.cl_error("failed wrong nlink of file [%s], expected [%d], got [%d]", - nlink, stat_result.st_nlink) - return -1 - - creation_workspace = workspace + "/after_creation" - ret = utils.mkdir(creation_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - creation_workspace) - return -1 - - msg = lipe_find_message(fid, mdti, host, inode_path, - inode_type=inode_type, major=major, - minor=minor, path=path, - stripe_index=stripe_index, pool=pool, - component_layout=component_layout) - - find_option = "" - ret = utils.wait_condition(log, check_fid_found, - (creation_workspace, mdti, find_option, fid)) - if ret: - log.cl_error("expected to find fid with option [%s] after inode " - "creation, but got failure, %s", find_option, msg) - return -1 - - # Do not need to use wait_condition since now, since the inode should have - # been be synced during last wait_condition - cases = [] - - cases.append(LipeFindTestCase("-fid %s" % fid, True)) - cases.append(LipeFindTestCase('-expr \'fid_match("%s")\'' % fid, True)) - cases.append(LipeFindTestCase('-fid "*"', True)) - cases.append(LipeFindTestCase('-fid "*%s"' % fid[3:], True)) - cases.append(LipeFindTestCase('-fid "%s*"' % fid[:-3], True)) - cases.append(LipeFindTestCase('-fid "*%s*"' % fid[3:-3], True)) - cases.append(LipeFindTestCase("-fid X%s" % fid, False)) - cases.append(LipeFindTestCase("-fid %sX" % fid, False)) - - cases.append(LipeFindTestCase("-name %s" % inode_fname, True)) - cases.append(LipeFindTestCase('-expr \'fname_match("%s")\'' % inode_fname, True)) - cases.append(LipeFindTestCase("-name %s" % upper_fname, False)) - cases.append(LipeFindTestCase("-name %s" % lower_fname, False)) - cases.append(LipeFindTestCase('-name "*"', True)) - cases.append(LipeFindTestCase("! -name %s" % inode_fname, False)) - cases.append(LipeFindTestCase("! -name %s" % upper_fname, True)) - cases.append(LipeFindTestCase("! -name %s" % lower_fname, True)) - cases.append(LipeFindTestCase('! -name "*"', False)) - - cases.append(LipeFindTestCase("-iname %s" % inode_fname, True)) - cases.append(LipeFindTestCase('-expr \'fname_imatch("%s")\'' % inode_fname, True)) - cases.append(LipeFindTestCase("-iname %s" % upper_fname, True)) - cases.append(LipeFindTestCase('-expr \'fname_imatch("%s")\'' % upper_fname, True)) - cases.append(LipeFindTestCase("-iname %s" % lower_fname, True)) - cases.append(LipeFindTestCase('-iname "*"', True)) - cases.append(LipeFindTestCase("! -iname %s" % inode_fname, False)) - cases.append(LipeFindTestCase("! -iname %s" % upper_fname, False)) - cases.append(LipeFindTestCase("! -iname %s" % lower_fname, False)) - cases.append(LipeFindTestCase('! -iname "*"', False)) - - cases.append(LipeFindTestCase("-inum %s" % stat_result.st_ino, - True, skip_server=True)) - cases.append(LipeFindTestCase('-expr "inum == %s"' % stat_result.st_ino, - True, skip_server=True)) - cases.append(LipeFindTestCase('-expr "== inum %s"' % stat_result.st_ino, - True, skip_server=True)) - cases.append(LipeFindTestCase("! -inum %s" % stat_result.st_ino, - False, skip_server=True)) - - cases.append(LipeFindTestCase("-type %s" % type_option, True)) - cases.append(LipeFindTestCase('-expr "== type %s"' % type_constant, True)) - cases.append(LipeFindTestCase('-expr "type == %s"' % type_constant, True)) - cases.append(LipeFindTestCase('-expr \'(type == %s) && fid_match("%s")\'' % (type_constant, fid), True)) - cases.append(LipeFindTestCase('-expr \'(type==%s)&& fid_match("%s")\'' % (type_constant, fid), True)) - cases.append(LipeFindTestCase("! -type %s" % type_option, False)) - cases.append(LipeFindTestCase('-expr \'(type != %s) || fid_match("%s")\'' % (type_constant, fid), True)) - - if pool_name is None: - cases.append(LipeFindTestCase('-pool "*"', False)) - cases.append(LipeFindTestCase('-pool-regex ".*"', False)) - else: - cases.append(LipeFindTestCase('-pool "*"', True)) - cases.append(LipeFindTestCase('-pool-regex ".*"', True)) - cases.append(LipeFindTestCase('-pool %s' % pool_name, True)) - cases.append(LipeFindTestCase('-pool-regex %s' % pool_name, True)) - cases.append(LipeFindTestCase('-pool %sx' % pool_name, False)) - cases.append(LipeFindTestCase('-pool-regex %sx' % pool_name, False)) - cases.append(LipeFindTestCase('-pool x%s' % pool_name, False)) - cases.append(LipeFindTestCase('-pool-regex x%s' % pool_name, False)) - cases.append(LipeFindTestCase('-stripe-count 0', False)) - cases.append(LipeFindTestCase('! -stripe-count 1', False)) - cases.append(LipeFindTestCase('-stripe-count 2', False)) - cases.append(LipeFindTestCase('-stripe-size %s' % (TEST_STRIPE_SIZE - 1), False)) - cases.append(LipeFindTestCase('-stripe-size %s' % (TEST_STRIPE_SIZE + 1), False)) - cases.append(LipeFindTestCase('-stripe-size +%s' % TEST_STRIPE_SIZE, False)) - - # Only regular files have objects on OSTs. - if inode_type == stat.S_IFREG: - cases.append(LipeFindTestCase("-ost %s" % stripe_index, True)) - cases.append(LipeFindTestCase('-expr "ost(%s)"' % stripe_index, True)) - cases.append(LipeFindTestCase("! -ost %s" % stripe_index, False)) - cases.append(LipeFindTestCase("-ost %s" % (stripe_index + 1), False)) - cases.append(LipeFindTestCase("! -ost %s" % (stripe_index + 1), True)) - - # -stripe-index is the same with -ost - cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, True)) - cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, False)) - cases.append(LipeFindTestCase("-stripe-index %s" % (stripe_index + 1), False)) - cases.append(LipeFindTestCase("! -stripe-index %s" % (stripe_index + 1), True)) - elif inode_type == stat.S_IFDIR: - # The directoy has been "lfs setstripe", it is considered to have no - # object, but the LOV EA is valid. - cases.append(LipeFindTestCase("-ost %s" % stripe_index, False)) - cases.append(LipeFindTestCase("! -ost %s" % stripe_index, True)) - cases.append(LipeFindTestCase("-ost %s" % (stripe_index + 1), False)) - cases.append(LipeFindTestCase("! -ost %s" % (stripe_index + 1), True)) - - # -stripe-index is the same with -ost - cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, False)) - cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, True)) - cases.append(LipeFindTestCase("-stripe-index %s" % (stripe_index + 1), False)) - cases.append(LipeFindTestCase("! -stripe-index %s" % (stripe_index + 1), True)) - else: - cases.append(LipeFindTestCase("-ost 0", False)) - cases.append(LipeFindTestCase("! -ost 0", False)) - cases.append(LipeFindTestCase("-ost 0", False)) - cases.append(LipeFindTestCase("! -ost 0", False)) - - # -stripe-index is the same with -ost - cases.append(LipeFindTestCase("-stripe-index 0", False)) - cases.append(LipeFindTestCase("! -stripe-index 0", False)) - cases.append(LipeFindTestCase("-stripe-index 0", False)) - cases.append(LipeFindTestCase("! -stripe-index 0", False)) - - cases.append(LipeFindTestCase('-stripe-size %s' % TEST_STRIPE_SIZE, False)) - cases.append(LipeFindTestCase('-stripe-size -%s' % (TEST_STRIPE_SIZE + 1), False)) - cases.append(LipeFindTestCase('-stripe-size +%s' % (TEST_STRIPE_SIZE - 1), False)) - - cases.append(LipeFindTestCase('-comp-count %s' % (comp_count - 1), False)) - cases.append(LipeFindTestCase('-component-count %s' % (comp_count - 1), False)) - cases.append(LipeFindTestCase('-comp-count %s' % (comp_count + 1), False)) - cases.append(LipeFindTestCase('-component-count %s' % (comp_count + 1), False)) - cases.append(LipeFindTestCase('-comp-count +%s' % comp_count, False)) - cases.append(LipeFindTestCase('-component-count +%s' % comp_count, False)) - cases.append(LipeFindTestCase('-comp-count -%s' % comp_count, False)) - cases.append(LipeFindTestCase('-component-count -%s' % comp_count, False)) - - # Only regular files and the directories with stripe configuration have - # LOV EA. For inode with LOV EA, any expression with - # pool_match/pool_reg will not be matched. - if inode_type == stat.S_IFREG or inode_type == stat.S_IFDIR: - if pool_name is None: - cases.append(LipeFindTestCase('! -pool "*"', True)) - cases.append(LipeFindTestCase('! -pool-regex ".*"', True)) - else: - cases.append(LipeFindTestCase('! -pool "*"', False)) - cases.append(LipeFindTestCase('! -pool-regex ".*"', False)) - cases.append(LipeFindTestCase('! -stripe-count 0', True)) - cases.append(LipeFindTestCase('-stripe-count 1', True)) - cases.append(LipeFindTestCase('! -stripe-count 2', True)) - - cases.append(LipeFindTestCase('-stripe-size %s' % TEST_STRIPE_SIZE, True)) - cases.append(LipeFindTestCase('-stripe-size -%s' % (TEST_STRIPE_SIZE + 1), True)) - cases.append(LipeFindTestCase('-stripe-size +%s' % (TEST_STRIPE_SIZE - 1), True)) - cases.append(LipeFindTestCase('-layout raid0', True)) - cases.append(LipeFindTestCase('-layout raid0,raid0', True)) - - cases.append(LipeFindTestCase('-comp-count %s' % comp_count, True)) - cases.append(LipeFindTestCase('-component-count %s' % comp_count, True)) - cases.append(LipeFindTestCase('-comp-count -%s' % (comp_count + 1), True)) - cases.append(LipeFindTestCase('-component-count -%s' % (comp_count + 1), True)) - cases.append(LipeFindTestCase('-comp-count +%s' % (comp_count - 1), True)) - cases.append(LipeFindTestCase('-component-count +%s' % (comp_count - 1), True)) - else: - cases.append(LipeFindTestCase('! -pool "*"', False)) - cases.append(LipeFindTestCase('! -pool-regex ".*"', False)) - cases.append(LipeFindTestCase('! -stripe-count 0', False)) - cases.append(LipeFindTestCase('-stripe-count 1', False)) - cases.append(LipeFindTestCase('! -stripe-count 2', False)) - - cases.append(LipeFindTestCase('-layout raid0', False)) - cases.append(LipeFindTestCase('-layout raid0,raid0', False)) - - cases.append(LipeFindTestCase('-comp-count %s' % comp_count, False)) - cases.append(LipeFindTestCase('-component-count %s' % comp_count, False)) - cases.append(LipeFindTestCase('-comp-count -%s' % (comp_count + 1), False)) - cases.append(LipeFindTestCase('-component-count -%s' % (comp_count + 1), False)) - cases.append(LipeFindTestCase('-comp-count +%s' % (comp_count - 1), False)) - cases.append(LipeFindTestCase('-component-count +%s' % (comp_count - 1), False)) - - cases.append(LipeFindTestCase('-layout released', False)) - cases.append(LipeFindTestCase('-layout mdt', False)) - cases.append(LipeFindTestCase('-layout raid0,released', False)) - cases.append(LipeFindTestCase('-layout raid0,mdt', False)) - cases.append(LipeFindTestCase('-layout mdt,released', False)) - cases.append(LipeFindTestCase('-layout raid0,mdt,released', False)) - - cases.append(LipeFindTestCase("-uid 0", True)) - cases.append(LipeFindTestCase("! ! -uid 0", True)) - cases.append(LipeFindTestCase("! -uid 0", False)) - cases.append(LipeFindTestCase("! -uid 1 ! -uid 0", False)) - cases.append(LipeFindTestCase("-user 0", True)) - cases.append(LipeFindTestCase("! -user 0", False)) - cases.append(LipeFindTestCase("! -user 1 ! -user 0", False)) - cases.append(LipeFindTestCase("-user root", True)) - cases.append(LipeFindTestCase("! -user root", False)) - cases.append(LipeFindTestCase("! -user bin ! -user root", False)) - cases.append(LipeFindTestCase("-nouser", False)) - - cases.append(LipeFindTestCase("-gid 0", True)) - cases.append(LipeFindTestCase("! -gid 0", False)) - cases.append(LipeFindTestCase("-group 0", True)) - cases.append(LipeFindTestCase("! -group 0", False)) - cases.append(LipeFindTestCase("-group root", True)) - cases.append(LipeFindTestCase("! -group root", False)) - cases.append(LipeFindTestCase("-nogroup", False)) - - for xtime in ["atime", "ctime", "mtime"]: - cases.append(LipeFindTestCase("-%s 0" % xtime, True)) - cases.append(LipeFindTestCase("! -%s 0" % xtime, False)) - cases.append(LipeFindTestCase("-%s +0" % xtime, False)) - cases.append(LipeFindTestCase("! -%s +0" % xtime, True)) - cases.append(LipeFindTestCase("-%s -0" % xtime, False)) - cases.append(LipeFindTestCase("! -%s -0" % xtime, True)) - cases.append(LipeFindTestCase("-%s -1" % xtime, True)) - cases.append(LipeFindTestCase("! -%s -1" % xtime, False)) - - cases.append(LipeFindTestCase('-expr "(%s > (sys_time - 1 * days) || uid == 100) && gid == 0"' % xtime, True)) - - # The time between file creation and test should never be longer - # than 10 hours - for xtime in ["amin", "cmin", "mmin"]: - cases.append(LipeFindTestCase("-%s +600" % xtime, False)) - cases.append(LipeFindTestCase("! -%s +600" % xtime, True)) - cases.append(LipeFindTestCase("-%s -0" % xtime, False)) - cases.append(LipeFindTestCase("! -%s -0" % xtime, True)) - cases.append(LipeFindTestCase("-%s -600" % xtime, True)) - cases.append(LipeFindTestCase("! -%s -600" % xtime, False)) - - # The ctime and atime might be slightly different, so don't expect that - # "-used 0" can find the file. And because of this, "-used -0" might find - # the file unexpctedly. - cases.append(LipeFindTestCase("-used +0", False)) - cases.append(LipeFindTestCase("-used 1", False)) - cases.append(LipeFindTestCase("-used -1", True)) - cases.append(LipeFindTestCase("-used +1", False)) - - cases.append(LipeFindTestCase("-name %s -type %s" % (inode_fname, type_option), - True)) - cases.append(LipeFindTestCase('-name "*" -type %s' % (type_option), - True)) - - if inode_type == stat.S_IFDIR or inode_type == stat.S_IFREG: - cases.append(LipeFindTestCase("-empty", True)) - cases.append(LipeFindTestCase("! -empty", False)) - else: - cases.append(LipeFindTestCase("-empty", False)) - cases.append(LipeFindTestCase("! -empty", True)) - - cases.append(LipeFindTestCase("-entries 0", True)) - cases.append(LipeFindTestCase("! -entries 0", False)) - cases.append(LipeFindTestCase("-entries +0", False)) - cases.append(LipeFindTestCase("! -entries +0", True)) - cases.append(LipeFindTestCase("-entries -0", False)) - cases.append(LipeFindTestCase("! -entries -0", True)) - - cases.append(LipeFindTestCase("-links %d" % nlink, True)) - cases.append(LipeFindTestCase("-links %d" % (nlink + 1), False)) - cases.append(LipeFindTestCase("-links %d" % (nlink - 1), False)) - - # Some file types support setfattr to user xattr - if inode_type in [stat.S_IFREG, stat.S_IFDIR]: - command = ("setfattr -h -n user.xattr_name -v xattr_value %s" % (inode_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - cases.append(LipeFindTestCase('-xattr "user.xattr_name=*"', True)) - cases.append(LipeFindTestCase('-xattr "*=xattr_value"', True)) - cases.append(LipeFindTestCase('-xattr "user.xattr_name=xattr_value"', True)) - cases.append(LipeFindTestCase('-xattr "user.xattr_name*=xattr_value"', True)) - cases.append(LipeFindTestCase('-xattr "user.*=xattr_value"', True)) - cases.append(LipeFindTestCase('-xattr "user.*=*_value"', True)) - - cases.append(LipeFindTestCase('-xattr "*=*"', True)) - cases.append(LipeFindTestCase('-xattr "trusted.lma=*"', True)) - cases.append(LipeFindTestCase('-xattr "invalid_name=*"', False)) - cases.append(LipeFindTestCase('-xattr "*=invalid_value"', False)) - - cases.append(LipeFindTestCase('-perm -0', True)) - cases.append(LipeFindTestCase('-perm /0', True)) - - perm = stat_result.st_mode & 07777 - perm_str = oct(perm) - cases.append(LipeFindTestCase('-perm %s' % perm_str, True)) - cases.append(LipeFindTestCase('-perm -%s' % perm_str, True)) - cases.append(LipeFindTestCase('-perm /%s' % perm_str, True)) - # for each bit up to 010000 - for i in range(12): - bit = 1 << i - if (perm & bit) == 0: - more_perm = perm | bit - more_perm_str = oct(more_perm) - cases.append(LipeFindTestCase('-perm %s' % more_perm_str, False)) - cases.append(LipeFindTestCase('-perm -%s' % more_perm_str, False)) - cases.append(LipeFindTestCase('-perm /%s' % more_perm_str, True)) - else: - less_perm = perm & (~bit) - less_perm_str = oct(less_perm) - cases.append(LipeFindTestCase('-perm %s' % less_perm_str, False)) - cases.append(LipeFindTestCase('-perm -%s' % less_perm_str, True)) - cases.append(LipeFindTestCase('-perm /%s' % less_perm_str, True)) - - if (mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS and - not lipe_constant.HAVE_ZFS_ZPL_PROJID): - # ZFS versions earlier than 0.8 don't provide proper interfaces to - # read the project IDs. - cases.append(LipeFindTestCase('-projid 0', False, skip_client=True)) - cases.append(LipeFindTestCase('-projid +0', False, skip_client=True)) - cases.append(LipeFindTestCase('-projid -1', False, skip_client=True)) - # Client can still get the projid - cases.append(LipeFindTestCase('-projid 0', True, skip_server=True)) - cases.append(LipeFindTestCase('-projid +0', False, skip_server=True)) - cases.append(LipeFindTestCase('-projid -1', True, skip_server=True)) - else: - cases.append(LipeFindTestCase('-projid 0', True)) - cases.append(LipeFindTestCase('-expr "projid == 0"', True)) - cases.append(LipeFindTestCase('-expr "== projid 0"', True)) - cases.append(LipeFindTestCase('-projid +0', False)) - cases.append(LipeFindTestCase('-projid -1', True)) - - ret = test_find_options(log, creation_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find with given options after inode " - "creation") - return -1 - - ret = test_lipe_find_size(log, creation_workspace, client, mdti, fid, host, - inode_path, inode_type, msg, size=True) - if ret: - log.cl_error("failed to run find -size test after creation, %s", msg) - return -1 - - ret = test_lipe_find_size(log, creation_workspace, client, mdti, fid, host, - inode_path, inode_type, msg, size=False) - if ret: - log.cl_error("failed to run find -blocks test after creation, %s", msg) - return -1 - - action_delete_workspace = workspace + "/action_delete" - ret = utils.mkdir(action_delete_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - action_delete_workspace) - return -1 - - cases = [] - cases.append(LipeFindTestCase("-fid %s" % fid, True, skip_client=True, - option="--agents %s -delete" % client.lc_host.sh_hostname)) - ret = test_find_options(log, action_delete_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to remove files with option '-delete' removal") - return -1 - - removal_workspace = workspace + "/after_removal" - ret = utils.mkdir(removal_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - removal_workspace) - return -1 - - find_option = "" - ret = utils.wait_condition(log, check_fid_missing, - (removal_workspace, mdti, find_option, fid)) - if ret: - log.cl_error("can find FID UNEXPECTEDLY after removal with option " - "[%s], %s", find_option, msg) - return -1 - - # Do not need to use wait_condition since now, since the inode should have - # been be synced during last wait_condition - cases = [] - cases.append(LipeFindTestCase("-name %s" % inode_fname, False)) - cases.append(LipeFindTestCase("! -name %s" % inode_fname, False)) - cases.append(LipeFindTestCase('-name "*"', False)) - cases.append(LipeFindTestCase('! -name "*"', False)) - cases.append(LipeFindTestCase("-type %s" % type_option, False)) - cases.append(LipeFindTestCase("! -type %s" % type_option, False)) - - if stripe_index is not None: - cases.append(LipeFindTestCase("-ost %s" % stripe_index, False)) - cases.append(LipeFindTestCase("! -ost %s" % stripe_index, False)) - cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, False)) - cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, False)) - - cases.append(LipeFindTestCase('-pool ".*"', False)) - cases.append(LipeFindTestCase('! -pool ".*"', False)) - - cases.append(LipeFindTestCase('-pool-regex ".*"', False)) - cases.append(LipeFindTestCase('! -pool-regex ".*"', False)) - - cases.append(LipeFindTestCase("-uid 0", False)) - cases.append(LipeFindTestCase("! -uid 0", False)) - cases.append(LipeFindTestCase("-user 0", False)) - cases.append(LipeFindTestCase("! -user 0", False)) - cases.append(LipeFindTestCase("-user root", False)) - cases.append(LipeFindTestCase("! -user root", False)) - - cases.append(LipeFindTestCase("-gid 0", False)) - cases.append(LipeFindTestCase("! -gid 0", False)) - cases.append(LipeFindTestCase("-group 0", False)) - cases.append(LipeFindTestCase("! -group 0", False)) - cases.append(LipeFindTestCase("-group root", False)) - cases.append(LipeFindTestCase("! -group root", False)) - - cases.append(LipeFindTestCase("-size 0", False)) - cases.append(LipeFindTestCase("! -size 0", False)) - cases.append(LipeFindTestCase("-blocks 0", False)) - cases.append(LipeFindTestCase("! -blocks 0", False)) - cases.append(LipeFindTestCase("-empty", False)) - cases.append(LipeFindTestCase("! -empty", False)) - - cases.append(LipeFindTestCase("-inum %s" % stat_result.st_ino, - False, skip_server=True)) - cases.append(LipeFindTestCase("! -inum %s" % stat_result.st_ino, - False, skip_server=True)) - - cases.append(LipeFindTestCase('-xattr "*=*"', False)) - - cases.append(LipeFindTestCase('-perm -0', False)) - cases.append(LipeFindTestCase('-perm /0', False)) - cases.append(LipeFindTestCase('-stripe-count 1', False)) - cases.append(LipeFindTestCase('! -stripe-count 1', False)) - cases.append(LipeFindTestCase('-projid 0', False)) - cases.append(LipeFindTestCase('-projid +0', False)) - cases.append(LipeFindTestCase('-projid -1', False)) - - cases.append(LipeFindTestCase('-layout raid0', False)) - cases.append(LipeFindTestCase('-layout released', False)) - cases.append(LipeFindTestCase('-layout mdt', False)) - - ret = test_find_options(log, removal_workspace, client, mdti, cases, - fid, msg) - if ret: - log.cl_error("failed to test lipe_find with given options after inode " - "removal") - return -1 - return 0 - - -def test_lipe_find_mdt_all_inode_types(log, workspace, mdti, client, - lustre_dir_path, pool): - """ - Test whether lipe_find can find the newly created file/directory on a mdt - """ - # pylint: disable=too-many-locals,too-many-arguments - # pylint: disable=too-many-branches,too-many-statements - mdt = mdti.lsi_service - lustrefs = mdt.ls_lustre_fs - mdt_index = mdt.ls_index - client_host = client.lc_host - - command = ("lfs mkdir -c 1 -i %d %s" % (mdt_index, lustre_dir_path)) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - inode_types = [stat.S_IFDIR, stat.S_IFCHR, stat.S_IFBLK, stat.S_IFREG, - stat.S_IFIFO, stat.S_IFLNK, stat.S_IFSOCK] - for inode_type in inode_types: - major = None - minor = None - path = None - if inode_type == stat.S_IFCHR or inode_type == stat.S_IFBLK: - major = 7 - minor = 1 - elif inode_type == stat.S_IFLNK: - path = "unknown" - - if inode_type == stat.S_IFREG: - # Test layout component - ret = test_lipe_find_mdt_type(log, workspace, mdti, client, - lustre_dir_path, inode_type=inode_type, - stripe_index=0, component_layout=True) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] on regular file type with component layout", - mdti.lsi_service_instance_name) - return -1 - - # Need to test different stripe - for ost in lustrefs.lf_osts.values(): - ost_index = ost.ls_index - ret = test_lipe_find_mdt_type(log, workspace, mdti, client, - lustre_dir_path, inode_type=inode_type, - stripe_index=ost_index) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] on regular file type", - mdti.lsi_service_instance_name) - return -1 - - # Test OST pool - ret = test_lipe_find_mdt_type(log, workspace, mdti, client, - lustre_dir_path, inode_type=inode_type, - stripe_index=0, pool=pool) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] with regular file type on OST pool [%s]", - mdti.lsi_service_instance_name, - pool.lp_name) - return -1 - else: - ret = test_lipe_find_mdt_type(log, workspace, mdti, client, - lustre_dir_path, inode_type=inode_type, - stripe_index=1, - major=major, minor=minor, path=path) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] with inode type [%s]", - mdti.lsi_service_instance_name, inode_type) - return -1 - - # test action options - ret = test_lipe_find_mdt_actions(log, workspace, mdti, client, - lustre_dir_path, pool) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] with actions", mdti.lsi_service_instance_name) - return -1 - - command = ("rmdir %s" % (lustre_dir_path)) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # test lipe skip snapshot inodes - ret = test_lipe_find_skip_snapshot_inodes(log, workspace, mdti, client, - lustre_dir_path) - if ret < 0: - log.cl_error("failed to test lipe_find skip snapshot inodes against " - "[%s]", mdti.lsi_service_instance_name) - return -1 - elif ret > 0: - log.cl_debug("skip test lipe_find against snapshot inodes") - return 0 - - command = ("rmdir %s" % (lustre_dir_path)) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # test lipe skip dir substripe inodes - ret = test_lipe_find_skip_substripe_inodes(log, workspace, mdti, client, - lustre_dir_path) - if ret < 0: - log.cl_error("failed to test lipe_find skip dir substripe inodes against " - "[%s]", mdti.lsi_service_instance_name) - return -1 - elif ret > 0: - log.cl_debug("skip test lipe_find against dir substripe inodes") - return 0 - - command = ("rmdir %s" % (lustre_dir_path)) - retval = client_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - client_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -def test_lipe_find_mdt(log, workspace, mdt, pool): - """ - Test whether lipe_find can find the newly created file/directory on a mdt - """ - lustrefs = mdt.ls_lustre_fs - clients = lustrefs.lf_clients - if len(clients) == 0: - log.cl_warning("no client for file system [%s] configured, skip test of " - "file creation", lustrefs.lf_fsname) - return 0 - - mdti = mdt.ls_mounted_instance(log) - if mdti is None: - log.cl_error("failed to get the active instance of Lustre service [%s]", - mdt.ls_service_name) - return -1 - - lustre_relative_dir_path = ("lipe_test_" + mdt.ls_service_name + "_" + - time_util.local_strftime(time_util.utcnow(), - "%Y-%m-%d-%H_%M_%S.%f")) - - find_workspace = workspace + "/" + "dir_path" - ret = utils.mkdir(find_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - find_workspace) - return -1 - - client = random.choice(clients.values()) - lustre_dir_path = client.lc_mnt + "/" + lustre_relative_dir_path - - ret = test_lipe_find_mdt_all_inode_types(log, workspace, mdti, client, - lustre_dir_path, pool) - if ret: - log.cl_error("failed to test the lipe_find for mdt instance " - "[%s] with all kinds of inode types", - mdti.lsi_service_instance_name) - return -1 - return 0 - - -def test_lipe_find_lustrefs(log, workspace, lustrefs): - """ - Run lipe_find tests on a Lustre file system - """ - # Create OST pool that is going to be used later - mgs = lustrefs.lf_mgs_mdt - if mgs is None: - mgs = lustrefs.lf_mgs - if mgs is None: - log.cl_error("Lustre file system [%s] doesn't have a mgs", - lustrefs.lf_fsname) - return -1 - - mgsi = mgs.ls_mounted_instance(log) - if mgsi is None: - log.cl_error("failed to get the active instance of Lustre service [%s]", - mgs.ls_service_name) - return -1 - - pool_name = TEST_POOL_NAME - pool = lustre.LustrePool(lustrefs, pool_name) - ret = pool.lp_new(log, mgsi.lsi_host) - if ret: - log.cl_error("failed to create pool [%s] on file system [%s]", - pool_name, lustrefs.lf_fsname) - return ret - - ret = pool.lp_add(log, mgsi.lsi_host, lustrefs.lf_osts.values()) - if ret: - log.cl_error("failed to add OSTs to pool [%s] on file system [%s]", - pool_name, lustrefs.lf_fsname) - return ret - - for mdt in lustrefs.lf_mdts.values(): - mdt_workspace = workspace + "/" + mdt.ls_service_name - ret = utils.mkdir(mdt_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - mdt_workspace) - return -1 - - ret = test_lipe_find_mdt(log, mdt_workspace, mdt, pool) - if ret: - log.cl_error("failed to test the inode number of lipe_find on " - "Lustre service [%s]", mdt.ls_service_name) - return ret - - ret = pool.lp_remove(log, mgsi.lsi_host, lustrefs.lf_osts.values()) - if ret: - log.cl_error("failed to remove OSTs from pool [%s] on file system [%s]", - pool_name, lustrefs.lf_fsname) - return ret - - ret = pool.lp_destroy(log, mgsi.lsi_host) - if ret: - log.cl_error("failed to destroy pool [%s] on file system [%s]", - pool_name, lustrefs.lf_fsname) - return ret - - return 0 - - -def test_lipe_find(log, workspace, clownfish_instance): - """ - Test whether lipe_find can find the newly created file/directory - """ - # To save time, ony test the lustre file system with maximum number of - # MDTs - biggest_lustrefs = None - for lustrefs in clownfish_instance.ci_lustres.values(): - if ((biggest_lustrefs is None) or - (len(biggest_lustrefs.lf_mdts) < len(lustrefs.lf_mdts))): - biggest_lustrefs = lustrefs - if biggest_lustrefs is None: - log.cl_error("no lustre file system to test lipe_find") - return -1 - return test_lipe_find_lustrefs(log, workspace, biggest_lustrefs) - - -LIPE_TESTS.append(test_lipe_find) - - -def lipe_test_prepare(log, workspace, lipe_install_config_fpath): - """ - Load the lipe.conf file in JSON format - """ - # pylint: disable=global-statement - lipe_install_config_fd = open(lipe_install_config_fpath) - ret = 0 - try: - lipe_install_config = yaml.load(lipe_install_config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", - lipe_install_config_fpath, traceback.format_exc()) - ret = -1 - lipe_install_config_fd.close() - if ret: - return -1 - - lipe_config_fpath = utils.config_value(lipe_install_config, - cstr.CSTR_CONFIG_FPATH) - if lipe_config_fpath is None: - log.cl_error("can NOT find [%s] in the installation config, " - "please correct file [%s]", - cstr.CSTR_CONFIG_FPATH, - lipe_install_config_fpath) - return -1 - - global LIPE_LAUNCH_CONFIG - with open(lipe_config_fpath) as json_file: - LIPE_LAUNCH_CONFIG = json.load(json_file) - - global LIPE_CLUSTER - # Pass None mnt_path, which should be fine - LIPE_CLUSTER = lipe_install_nodeps.lipe_init_cluster(log, workspace, None, - lipe_install_config, - lipe_install_config_fpath) - if LIPE_CLUSTER is None: - log.cl_error("failed to parse LiPE config, please correct file [%s]", - lipe_install_config_fpath) - return -1 - - return 0 - - -def do_test(log, workspace, clownfish_instance, test_config, test_config_fpath, - test_functs): - """ - Run test - """ - # pylint: disable=too-many-branches,too-many-locals,too-many-arguments - # pylint: disable=too-many-statements - test_dict = {} - for test_funct in test_functs: - test_dict[test_funct.__name__] = test_funct - - quit_on_error = True - only_test_configs = utils.config_value(test_config, - cstr.CSTR_ONLY_TESTS) - if only_test_configs is None: - log.cl_debug("no [%s] is configured, run all tests", - cstr.CSTR_ONLY_TESTS) - selected_tests = test_functs - else: - selected_tests = [] - for test_name in only_test_configs: - if test_name not in test_dict: - log.cl_error("test [%s] doenot exist, please correct file " - "[%s]", test_name, test_config_fpath) - return -1 - test_funct = test_dict[test_name] - selected_tests.append(test_funct) - - not_selected_tests = [] - for test_funct in test_functs: - if test_funct not in selected_tests: - not_selected_tests.append(test_funct) - - passed_tests = [] - failed_tests = [] - skipped_tests = [] - test_durations = {} - - for test_func in selected_tests: - test_name = test_func.__name__ - test_workspace = workspace + "/" + test_name - ret = utils.mkdir(test_workspace) - if ret: - log.cl_error("failed to create directory [%s] on local host", - test_workspace) - return -1 - - log.cl_info("starting test [%s]", test_name) - start_time = time.time() - ret = test_func(log, test_workspace, clownfish_instance) - duration_time = time.time() - start_time - test_durations[test_func.__name__] = duration_time - if ret < 0: - log.cl_error("test [%s] failed, duration %f seconds", test_name, - duration_time) - failed_tests.append(test_func) - if quit_on_error: - return -1 - elif ret == 1: - log.cl_warning("test [%s] skipped, duration %f seconds", test_name, - duration_time) - skipped_tests.append(test_func) - else: - log.cl_info("test [%s] passed, duration %f seconds", test_name, - duration_time) - passed_tests.append(test_func) - - table = prettytable.PrettyTable() - table.field_names = ["Test name", "Result", "Duration"] - for test_func in not_selected_tests: - test_name = test_func.__name__ - table.add_row([test_name, "Not selected", "0"]) - - for test_func in skipped_tests: - test_name = test_func.__name__ - table.add_row([test_name, "Skipped", "%f seconds" % test_durations[test_name]]) - - for test_func in failed_tests: - test_name = test_func.__name__ - table.add_row([test_name, "Failed", "%f seconds" % test_durations[test_name]]) - - for test_func in passed_tests: - test_name = test_func.__name__ - table.add_row([test_name, "Passed", "%f seconds" % test_durations[test_name]]) - - log.cl_stdout(table) - return ret - - -def _lipe_test(log, workspace, test_config, test_config_fpath): - """ - Run LiPE test - """ - # pylint: disable=too-many-statements,too-many-branches,too-many-locals - # pylint: disable=global-statement - log.cl_info("installing virtul machines") - ret = test_common.test_install_virt(log, workspace, test_config, - test_config_fpath) - if ret: - log.cl_error("failed to install virtual machines") - return -1 - - log.cl_info("installing LiPE") - lipe_install_config_fpath = utils.config_value(test_config, - cstr.CSTR_LIPE_INSTALL_CONFIG) - if lipe_install_config_fpath is None: - log.cl_error("can NOT find [%s] in the test config, " - "please correct file [%s]", - cstr.CSTR_LIPE_INSTALL_CONFIG, test_config_fpath) - return -1 - - skip_install = utils.config_value(test_config, - cstr.CSTR_SKIP_INSTALL) - if skip_install is None: - log.cl_debug("no [%s] is configured, do not skip install") - skip_install = False - - install_server_config = utils.config_value(test_config, - cstr.CSTR_INSTALL_SERVER) - if install_server_config is None: - log.cl_error("can NOT find [%s] in the config file [%s], " - "please correct it", cstr.CSTR_INSTALL_SERVER, - test_config_fpath) - return -1 - - install_server_hostname = utils.config_value(install_server_config, - cstr.CSTR_HOSTNAME) - if install_server_hostname is None: - log.cl_error("can NOT find [%s] in the config of installation host, " - "please correct file [%s]", - cstr.CSTR_HOSTNAME, test_config_fpath) - return None - - ssh_identity_file = utils.config_value(install_server_config, - cstr.CSTR_SSH_IDENTITY_FILE) - install_server = ssh_host.SSHHost(install_server_hostname, - identity_file=ssh_identity_file) - ret = test_common.test_install(log, workspace, lipe_install_config_fpath, - skip_install, install_server, "lipe", - constants.LIPE_INSTALL_CONFIG_FNAME) - if ret: - log.cl_error("failed to test installation of LiPE") - return -1 - - clownfish_config_fpath = utils.config_value(test_config, - cstr.CSTR_CLOWNFISH_CONFIG) - if clownfish_config_fpath is None: - log.cl_error("can NOT find [%s] in the test config, " - "please correct file [%s]", - cstr.CSTR_CLOWNFISH_CONFIG, - test_config_fpath) - return -1 - - clownfish_config_fd = open(clownfish_config_fpath) - ret = 0 - try: - clownfish_config = yaml.load(clownfish_config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", - clownfish_config_fpath, traceback.format_exc()) - ret = -1 - clownfish_config_fd.close() - if ret: - return -1 - - clownfish_instance = clownfish.init_instance(log, workspace, - clownfish_config, - clownfish_config_fpath) - if clownfish_instance is None: - log.cl_error("failed to init the Clownfish instance, please check " - "config file [%s]", clownfish_config_fpath) - return -1 - - ret = lipe_test_prepare(log, workspace, lipe_install_config_fpath) - if ret: - log.cl_error("failed to prepare to run LiPE tests") - return -1 - - log.cl_info("running LiPE tests") - ret = do_test(log, workspace, clownfish_instance, test_config, - test_config_fpath, LIPE_TESTS) - if ret: - log.cl_error("failed to run LiPE tests") - return ret - - # Run hotpool test - ret = lipe_hotpool_test.hotpool_test(log, workspace, clownfish_instance) - if ret: - log.cl_error("failed to test HotPool") - return ret - - # This test needs that all Lustre RPMs are installed and the test will - # remove the LiPE RPMs, so run this test after all tests - log.cl_info("testing LiPE rpms dependency") - ret = test_rpms_dependency(log, workspace, LIPE_CLUSTER.lc_mdt_hosts[0]) - if ret: - log.cl_error("failed to test LiPE rpms dependence") - return ret - return 0 - - -def lipe_test(log, workspace, config_fpath): - """ - Start to test LiPE holding the confiure lock - """ - # pylint: disable=bare-except - config_fd = open(config_fpath) - ret = 0 - try: - config = yaml.load(config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", config_fpath, - traceback.format_exc()) - ret = -1 - config_fd.close() - if ret: - return -1 - - ret = _lipe_test(log, workspace, config, config_fpath) - if ret: - log.cl_error("test of LiPE failed, please check [%s] for more " - "log", workspace) - else: - log.cl_info("test of LiPE passed, please check [%s] " - "for more log", workspace) - return ret - - -def main(): - """ - Start to test LiPE - """ - cmd_general.main(constants.LIPE_TEST_CONFIG, - constants.LIPE_TEST_LOG_DIR, - lipe_test) diff --git a/lipe/pyltest/__init__.py b/lipe/pyltest/__init__.py deleted file mode 100644 index ee5488c..0000000 --- a/lipe/pyltest/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -Python library for LiPE test -""" -__all__ = ["lipe_test_console", - "lipe_test_launch", - "lipe_test_scheduler"] diff --git a/lipe/pyltest/lipe_test_console.py b/lipe/pyltest/lipe_test_console.py deleted file mode 100644 index 0d3d298..0000000 --- a/lipe/pyltest/lipe_test_console.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Console that manages the scheduler -""" -import xmlrpclib -import readline -import logging -import getopt -import sys -import traceback - -# local libs -from pylustre import clog -from pylustre import utils -from pylustre import time_util -from pyltest import lipe_test_scheduler - - -LIPE_TEST_CONSOLE_LOG_DIR = "/var/log/lipe_test_console" - - -class LipeTestConsoleCompleter(object): - """ - Completer of command - """ - # pylint: disable=too-few-public-methods - def __init__(self, options): - self.ltcc_options = options - self.ltcc_current_candidates = [] - return - - def ltcc_complete(self, text, state): - # pylint: disable=unused-argument,too-many-nested-blocks - """ - The complete function of the completer - """ - response = None - if state == 0: - # This is the first time for this text, - # so build a match list. - origline = readline.get_line_buffer() - begin = readline.get_begidx() - end = readline.get_endidx() - being_completed = origline[begin:end] - words = origline.split() - if not words: - self.ltcc_current_candidates = sorted(self.ltcc_options.keys()) - else: - try: - if begin == 0: - # first word - candidates = self.ltcc_options.keys() - else: - # later word - first = words[0] - candidates = self.ltcc_options[first] - if being_completed: - # match options with portion of input - # being completed - self.ltcc_current_candidates = [] - for candidate in candidates: - if not candidate.startswith(being_completed): - continue - self.ltcc_current_candidates.append(candidate) - else: - # matching empty string so use all candidates - self.ltcc_current_candidates = candidates - except (KeyError, IndexError): - self.ltcc_current_candidates = [] - try: - response = self.ltcc_current_candidates[state] - except IndexError: - response = None - return response - - -def tconsole_input_init(): - """ - Initialize the input completer - """ - readline.parse_and_bind("tab: complete") - readline.parse_and_bind("set editing-mode vi") - # Register our completer function - completer = LipeTestConsoleCompleter({"help": [], - "host_cleanup": [], - "host_list": [], - "ip_list": [], - "ip_cleanup": [], - "job_list": [], - "job_kill": []}) - readline.set_completer(completer.ltcc_complete) - - -def tconsole_input_fini(): - """ - Stop the input completer - """ - readline.set_completer(None) - - -def tconsole_command_help(proxy, arg_string): - # pylint: disable=unused-argument - """ - Print the help string - """ - logging.info("help: show help messages") - return 0 - - -def tconsole_command_host_list(proxy, arg_string): - # pylint: disable=unused-variable - """ - List the hosts - """ - error = False - args = arg_string.split() - options, remainder = getopt.getopt(args, - "he", - ["--error", - "--help"]) - for opt, arg in options: - if opt in ("-e", "--error"): - error = True - elif opt in ("-h", "--help"): - print """Usage: host_list [-e|--error] - -e: print hosts that have cleanup error - -h: print this string""" - sys.exit(0) - - output = proxy.ts_host_list(error) - print "%s" % output - return 0 - - -def tconsole_command_ip_list(proxy, arg_string): - # pylint: disable=unused-variable - """ - List the IP addreses - """ - error = False - args = arg_string.split() - options, remainder = getopt.getopt(args, - "he", - ["--error", - "--help"]) - for opt, arg in options: - if opt in ("-e", "--error"): - error = True - elif opt in ("-h", "--help"): - print """Usage: host_list [-e|--error] - -e: print hosts that have cleanup error - -h: print this string""" - sys.exit(0) - - output = proxy.ts_ip_address_list(error) - print "%s" % output - return 0 - - -def tconsole_command_job_list(proxy, arg_string): - """ - List all the active jobs on the scheduler - """ - # pylint: disable=unused-argument - jobs = proxy.ts_job_list() - print "%s" % jobs - return 0 - - -def tconsole_command_job_kill(proxy, arg_string): - """ - Kill a job - """ - jobid = arg_string - scheduler_id = proxy.ts_get_id() - ret = proxy.ts_job_stop(scheduler_id, jobid) - return ret - - -def tconsole_command_host_cleanup(proxy, arg_string): - """ - Fix the host - """ - arg_string = arg_string.strip() - args = arg_string.split() - if len(args) == 1: - hostname = args[0] - else: - logging.error("""Usage: host_cleanup """) - return -1 - ret = proxy.ts_host_cleanup(hostname) - return ret - - -def tconsole_command_ip_cleanup(proxy, arg_string): - """ - Cleanup the IP address - """ - arg_string = arg_string.strip() - args = arg_string.split() - if len(args) == 1: - ip_address = args[0] - else: - logging.error("""Usage: ip_cleanup """) - return -1 - ret = proxy.ts_ip_cleanup(ip_address) - return ret - - -def tconsole_command(proxy, line): - """ - Run a command in the console - """ - # pylint: disable=broad-except - functions = {"help": tconsole_command_help, - "host_cleanup": tconsole_command_host_cleanup, - "host_list": tconsole_command_host_list, - "ip_list": tconsole_command_ip_list, - "ip_cleanup": tconsole_command_ip_cleanup, - "job_list": tconsole_command_job_list, - "job_kill": tconsole_command_job_kill} - if " " in line: - command, arg_string = line.split(' ', 1) - else: - command = line - arg_string = "" - - try: - func = functions[command] - except (KeyError, IndexError), err: - func = None - - # Run system command - if func is not None: - try: - ret = func(proxy, arg_string) - except Exception, err: - logging.error("failed to run command [%s %s] %s, %s", - command, arg_string, err, - traceback.format_exc()) - return -1 - else: - logging.error("no command: %s\n", line) - ret = -1 - return ret - - -def tconsole_input_loop(proxy): - """ - Loop and excute the command - """ - while True: - line = raw_input('> ("q" to quit): ') - if line == 'q' or line == 'quit': - break - tconsole_command(proxy, line) - - -def usage(): - """ - Print the usage of the command - """ - utils.oprint("Usage: {cmd} \n" - " server: the server address\n\n" - "examples:\n" - "{cmd} --> use http://localhost:1234 as the server address\n" - "{cmd} -s localhost\n" - "{cmd} -s localhost\n" - "{cmd} -s http://localhost:1234\n" - "{cmd} -s http://10.0.0.10:1234".format(cmd=sys.argv[0])) - - -def main(): - """ - Run the console - """ - # pylint: disable=unused-variable - now = time_util.utcnow() - workspace = (LIPE_TEST_CONSOLE_LOG_DIR + "/" + - time_util.local_strftime(now, ('%Y-%m-%d-%H:%M:%S'))) - ret = utils.run("mkdir -p %s" % workspace) - if ret.cr_exit_status != 0: - utils.eprint("failed to create directory [%s]" % workspace) - sys.exit(1) - - log = clog.get_log(resultsdir=workspace) - - argc = len(sys.argv) - if argc == 1: - server = "http://localhost:1234" - elif argc == 2: - arg = sys.argv[1] - if arg == "-h" or arg == "--help": - usage() - sys.exit(0) - server = arg - if not server.startswith("http://"): - server = "http://" + server - if server.count(":") != 2: - server = server + ":" + str(lipe_test_scheduler.TEST_SCHEDULER_PORT) - - log.cl_info("connecting to server [%s]", server) - proxy = xmlrpclib.ServerProxy(server, allow_none=True) - - tconsole_input_init() - tconsole_input_loop(proxy) - tconsole_input_fini() diff --git a/lipe/pyltest/lipe_test_launch.py b/lipe/pyltest/lipe_test_launch.py deleted file mode 100644 index ccd99c0..0000000 --- a/lipe/pyltest/lipe_test_launch.py +++ /dev/null @@ -1,1717 +0,0 @@ -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Console that manages the scheduler -""" -# pylint: disable=too-many-lines -import xmlrpclib -import getopt -import sys -import os -import time -import traceback -import yaml - -# local libs -from pylustre import clog -from pylustre import utils -from pylustre import time_util -from pylustre import cstr -from pylustre import ssh_host -from pylustre import lyaml -from pylustre import lipe_virt -from pylustre import lustre -from pylustre import constants -from pyltest import lipe_test_scheduler - - -LIPE_TEST_LAUNCH_LOG_DIR = "/var/log/lipe_test_launch" -CHECK_TIME = time_util.utcnow() -EXIT_REASON = "unkown reason" -SHUTTING_DOWN = False -DEFAULT_HOST_TIMEOUT = 86400 -HOST_ALLOCATION_INTERVAL = 3 -DEFAULT_LUSTRE_RPM_DIR = "/lustre_rpms" -DEFAULT_E2FSPROGS_RPM_DIR = "/e2fsprogs_rpms" -DEV_MAPPER_PREFIX = "/dev/mapper/" -LIPE_ISO_PATTERN = "lipe-*.x86_64.iso" -LIPE_MD5_PATTERN = "lipe-*.x86_64.md5" - - -def usage(): - """ - Print the usage of the command - """ - command = sys.argv[0] - utils.oprint("Usage: %s [--lustre|-l ] [--e2fsprogs|-e ]\n" - " [--server|-s ] [--source_path|-p ]\n" - " [--host_timeout ]\n" - "\n" - "lustre_dir:\n" - " The dir of Lustre RPMs, usually generated by lbuild.\n" - " By default '%s'\n" - "e2fsprogs_dir:\n" - " The dir of E2fsprogs RPMs.\n" - " By default '%s'.\n" - "server:\n" - " The server address.\n" - " By default localhost.\n" - "source_path:\n" - " The path to lipe source code.\n" - " By default current directory.\n" - "host_timeout:\n" - " The seconds to wait for host allocation.\n" - " By default %d seconds. 0 means wait for ever.\n" - "\n" - "examples:\n" - "%s\n" - "%s -s localhost\n" - "%s -s http://localhost -p /dir/to/lipe.git\n" - "%s -s http://localhost:1234 -p /dir/to/lipe.git\n" - "%s -s http://10.0.0.10:1234 -p /dir/to/lipe.git\n" - "%s -l /dir/to/lustre_rpms -e /dir/to/e2fsprogs_rpms -s http://10.0.0.10:1234 -p /dir/to/lipe.git" - % (command, DEFAULT_LUSTRE_RPM_DIR, DEFAULT_E2FSPROGS_RPM_DIR, - DEFAULT_HOST_TIMEOUT, command, command, command, - command, command, command)) - - -class LaunchArg(object): - """ - The arg of launch command - """ - # pylint: disable=too-few-public-methods,too-many-instance-attributes - def __init__(self): - self.la_server = "http://localhost:1234" - self.la_source_path = os.getcwd() - self.la_source_lipe_launch_config_file = (self.la_source_path + "/" + - constants.LIPE_LAUNCH_CONFIG_FNAME) - self.la_host_wait_time = DEFAULT_HOST_TIMEOUT - self.la_lustre_dir = DEFAULT_LUSTRE_RPM_DIR - self.la_e2fsprogs_dir = DEFAULT_E2FSPROGS_RPM_DIR - # Init when building - self.la_test_host_source_path = None - self.la_test_host_iso_fpath = None - self.la_test_host_iso_dir = None - self.la_test_host_md5_fpath = None - - def la_update_server(self, log, server): - """ - Update the server URL - """ - if not server.startswith("http://"): - server = "http://" + server - if server.count(":") != 2: - server = server + ":" + str(lipe_test_scheduler.TEST_SCHEDULER_PORT) - self.la_server = server - log.cl_debug("updated the url to [%s]", server) - return 0 - - def la_update_source_path(self, log, path): - """ - Update the source path - """ - if not os.path.isdir(path): - log.cl_error("source directory [%s] is not a directory", path) - return -1 - - lipe_launch_config_file = path + "/example_configs/lipe/" + constants.LIPE_LAUNCH_CONFIG_FNAME - if not os.path.isfile(lipe_launch_config_file): - log.cl_error("source directory [%s] doesn't have file [%s]", path, - constants.LIPE_LAUNCH_CONFIG_FNAME) - return -1 - self.la_source_lipe_launch_config_file = lipe_launch_config_file - self.la_source_path = path - log.cl_debug("updated the source path to [%s]", path) - return 0 - - def la_update_host_wait_time(self, log, second): - """ - Update the host wait time - """ - self.la_host_wait_time = second - log.cl_debug("the host wait time is [%d]", self.la_host_wait_time) - return 0 - - def la_update_lustre_dir(self, log, lustre_dir): - """ - Update the dir of Lustre RPMs - """ - if not os.path.isdir(lustre_dir): - log.cl_error("Lustre RPMs directory [%s] is not a directory", - lustre_dir) - return -1 - self.la_lustre_dir = lustre_dir - log.cl_debug("updated the lustre_dir to [%s]", lustre_dir) - return 0 - - def la_update_e2fsprogs_dir(self, log, e2fsprogs_dir): - """ - Update the dir of E2fsprogs RPMs - """ - if not os.path.isdir(e2fsprogs_dir): - log.cl_error("E2fsprogs RPMs directory [%s] is not a directory", - e2fsprogs_dir) - return -1 - self.la_e2fsprogs_dir = e2fsprogs_dir - log.cl_debug("updated the url to [%s]", e2fsprogs_dir) - return 0 - - def la_check_arguments(self, log): - """ - Check whether all arguments are valid - """ - ret = self.la_update_server(log, self.la_server) - if ret: - return ret - - ret = self.la_update_host_wait_time(log, self.la_host_wait_time) - if ret: - return ret - - ret = self.la_update_source_path(log, self.la_source_path) - if ret: - return ret - - ret = self.la_update_lustre_dir(log, self.la_lustre_dir) - if ret: - return ret - - ret = self.la_update_lustre_dir(log, self.la_lustre_dir) - if ret: - return ret - - ret = self.la_update_e2fsprogs_dir(log, self.la_e2fsprogs_dir) - if ret: - return ret - - return 0 - - -class ClientRPCHost(lipe_test_scheduler.RPCHost): - """ - The host for transfering between scheduler and its clients - """ - # pylint: disable=too-few-public-methods - def __init__(self, hostname, - global_template_hostname=None, - kvm_template_config=None, - kvm_server_hostname=None, - expected_distro=None, ipv4_addresses=None, - kvm_template_ipv4_address=None): - # pylint: disable=too-many-arguments - super(ClientRPCHost, self).__init__(hostname, - kvm_server_hostname=kvm_server_hostname, - expected_distro=expected_distro, - ipv4_addresses=ipv4_addresses, - kvm_template_ipv4_address=kvm_template_ipv4_address) - self.crh_host = ssh_host.SSHHost(hostname) - self.crh_global_template_hostname = global_template_hostname - self.crh_kvm_template_config = kvm_template_config - self.crh_shared_disk_ids = [] - - def crh_add_shared_disk(self, disk_id): - """ - Add the shared disk ID - """ - self.crh_shared_disk_ids.append(disk_id) - - -def send_lipe_source(log, workspace, build_host, launch_argument): - """ - Send the lipe source codes to remote host - """ - source_path = launch_argument.la_source_path - log.cl_info("sending the source code in directory [%s] of local host to " - "directory [%s] on host [%s]", source_path, workspace, - build_host.sh_hostname) - ret = build_host.sh_send_file(log, source_path, workspace) - if ret: - log.cl_error("failed to send directory [%s] on local host to " - "directory [%s] on host [%s]", - source_path, workspace, build_host.sh_hostname) - return -1 - log.cl_info("sent directory [%s] on local host to " - "directory [%s] on host [%s]", - source_path, workspace, build_host.sh_hostname) - basename = os.path.basename(source_path) - - remote_source_path = workspace + "/" + constants.LIPE_BUILD_LOG_DIR_BASENAME - origin_remote_source_path = workspace + "/" + basename - if origin_remote_source_path != remote_source_path: - command = ("mv %s %s" % - (origin_remote_source_path, remote_source_path)) - retval = build_host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, build_host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - log.cl_error("please clean up the dir [%s] on host [%s] " - "manually to avoid exhaustion of disk space on that " - "host", origin_remote_source_path, - build_host.sh_hostname) - return -1 - - launch_argument.la_test_host_source_path = remote_source_path - launch_argument.la_test_host_iso_dir = remote_source_path + "/ISO" - return 0 - - -def ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host): - """ - Add the server of rpc_host to ssh_host_dict - """ - kvm_server_hostname = rpc_host.lrh_kvm_server_hostname - if kvm_server_hostname not in ssh_host_dict: - ssh_host_dict[kvm_server_hostname] = True - ssh_host_config = {} - ssh_host_config[cstr.CSTR_HOST_ID] = kvm_server_hostname - ssh_host_config[cstr.CSTR_HOSTNAME] = kvm_server_hostname - ssh_host_configs.append(ssh_host_config) - log.cl_debug("adding KVM server [%s] to config", - kvm_server_hostname) - - -def generate_lipe_virt_config_ssh_hosts(log, hosts, pairs, config): - """ - Generate the ssh_hosts part of the lipe_virt config - """ - ssh_host_configs = [] - ssh_host_dict = {} - - for rpc_hosts in pairs: - rpc_host = rpc_hosts[0] - ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host) - - for rpc_host in hosts: - ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host) - - config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs - - -def generate_lipe_virt_config_templates(log, kvm_server_dict, config): - """ - Generate the templates part of the lipe_virt config - """ - template_configs = [] - - # kvm_server_dict has keys of server hostnames, and values of - # diction. The diction has keys of global template hostname, - # and values of full template config - for diction in kvm_server_dict.values(): - for global_template_hostname, template_config in diction.iteritems(): - template_configs.append(template_config) - log.cl_debug("adding template host [%s] to config", - global_template_hostname) - - config[cstr.CSTR_TEMPLATES] = template_configs - - -def vm_hosts_add(log, vm_host_configs, rpc_host): - """ - Add the rpc_host to vm_host_configs - """ - vm_host_config = {} - vm_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname - vm_host_config[cstr.CSTR_REINSTALL] = False - vm_host_config[cstr.CSTR_TEMPLATE_HOSTNAME] = rpc_host.crh_global_template_hostname - vm_host_config[cstr.CSTR_SHARED_DISK_IDS] = rpc_host.crh_shared_disk_ids - vm_host_config[cstr.CSTR_IPS] = rpc_host.lrh_ipv4_addresses - vm_host_configs.append(vm_host_config) - log.cl_debug("adding VM host [%s] to config", rpc_host.lrh_hostname) - - -def generate_lipe_virt_config_vm_hosts(log, hosts, pairs, config): - """ - Generate the vm_hosts part of the lipe_virt config - """ - vm_host_configs = [] - - for rpc_hosts in pairs: - for rpc_host in rpc_hosts: - vm_hosts_add(log, vm_host_configs, rpc_host) - - for rpc_host in hosts: - vm_hosts_add(log, vm_host_configs, rpc_host) - - config[cstr.CSTR_VM_HOSTS] = vm_host_configs - - -def generate_lipe_virt_config_shared_disks(shared_disk_dict, config): - """ - Generate the shared_disks part of the lipe_virt config - """ - config[cstr.CSTR_SHARED_DISKS] = shared_disk_dict.values() - - -def config_shared_disk_add(log, shared_disk_dict, disk_id, size, - image_fpath, server_host_id): - """ - Add shared disk to the dict - """ - # pylint: disable=too-many-arguments - if disk_id in shared_disk_dict: - log.cl_error("disk ID [%s] already exists", disk_id) - return -1 - shared_disk_config = {} - shared_disk_config[cstr.CSTR_DISK_ID] = disk_id - shared_disk_config[cstr.CSTR_SIZE] = size - shared_disk_config[cstr.CSTR_SERVER_HOST_ID] = server_host_id - shared_disk_config[cstr.CSTR_IMAGE_FILE] = image_fpath - shared_disk_dict[disk_id] = shared_disk_config - return 0 - - -def add_shared_device(log, pair, shared_disk_dict, disk_id, size): - """ - Save the shared disk IDs into the ClientRPCHost - """ - host0 = pair[0] - template_config = host0.crh_kvm_template_config - # Image dir might be different for the templates of two hosts, - # use the first one's - image_dir = template_config[cstr.CSTR_IMAGE_DIR] - # Host ID should be the same for two hosts - image_fname = (host0.lrh_hostname + "_shared_" + - str(len(host0.crh_shared_disk_ids)) + ".img") - image_fpath = os.path.join(image_dir, image_fname) - server_host_id = template_config[cstr.CSTR_SERVER_HOST_ID] - - ret = config_shared_disk_add(log, shared_disk_dict, disk_id, size, - image_fpath, server_host_id) - if ret: - log.cl_error("failed to add shared disk with ID [%s]", disk_id) - return -1 - - for host in pair: - host.crh_add_shared_disk(disk_id) - return 0 - - -class TestCluster(object): - """ - The cluster to run test - - Host 0: LiPE install server, Lustre client - Host 1: Lustre client - - Pair 0: MGS/lipe0-MDS, devices: mgs(1GB, LDISKFS), mdt0(5GB, LDISKFS), mdt1(5GB, ZFS) - Pair 1: lipe0-OSS0, devices: ost0(5GB, LDISKFS), ost1(5GB, LDISKFS), ost2(5GB, ZFS) - Pair 2: lipe0-OSS1, devices: ost3(5GB, LDISKFS), ost4(5GB, LDISKFS), ost5(5GB, LDISKFS) - Pair 3: lipe1-MDS/OSS, devices: mdt0(5GB, ZFS), ost0(5GB, LDISKFS), ost1(5GB, ZFS) - """ - # pylint: disable=too-few-public-methods,too-many-instance-attributes - # pylint: disable=too-many-locals - PAIR_NUMBER = 4 - HOST_NUMBER = 2 - - def __init__(self, workspace, test_host, hosts, pairs, rpc_ip_address, - kvm_server_dict): - # pylint: disable=too-many-arguments,too-many-statements - self.tc_vm_hosts = [] - self.tc_vm_hosts += hosts - for pair in pairs: - self.tc_vm_hosts += pair - self.tc_lipe_test_logdir = (workspace + "/" + - constants.LIPE_TEST_LOG_DIR_BASENAME) - self.tc_rpc_ip_address = rpc_ip_address - self.tc_kvm_server_dict = kvm_server_dict - self.tc_workspace = workspace - self.tc_cluster_id = utils.random_word(7) - self.tc_hosts = hosts - self.tc_client_host0 = hosts[0] - self.tc_client_host1 = hosts[1] - self.tc_test_host = test_host - self.tc_install_server = self.tc_hosts[0] - self.tc_pairs = pairs - self.tc_mgs_pair = pairs[0] - self.tc_mgt_size = 1 - self.tc_mdt_size = 5 - self.tc_ost_size = 5 - self.tc_mgt_disk_id = self.tc_cluster_id + "_mgt" - self.tc_fs0_fsname = self.tc_cluster_id + "0" - self.tc_fs0_mnt = "/mnt/" + self.tc_fs0_fsname - self.tc_fs0_mdt_disk_id_prefix = (self.tc_fs0_fsname + "_" + - "mdt") - self.tc_fs0_ost_disk_id_prefix = (self.tc_fs0_fsname + "_" + - "ost") - self.tc_fs0_mdt_number_per_mds = 2 - self.tc_fs0_ost_number_per_oss = 3 - self.tc_fs0_mds_pair0 = pairs[0] - self.tc_fs0_oss_pair0 = pairs[1] - self.tc_fs0_oss_pair1 = pairs[2] - self.tc_fs0_oss_pairs = [pairs[1], pairs[2]] - - self.tc_fs1_fsname = self.tc_cluster_id + "1" - self.tc_fs1_mnt = "/mnt/" + self.tc_fs1_fsname - self.tc_fs1_mdt_disk_id_prefix = (self.tc_fs1_fsname + "_" + - "mdt") - self.tc_fs1_ost_disk_id_prefix = (self.tc_fs1_fsname + "_" + - "ost") - self.tc_fs1_mdt_number_per_mds = 1 - self.tc_fs1_ost_number_per_oss = 2 - self.tc_fs1_mds_pair0 = pairs[3] - self.tc_fs1_oss_pair0 = pairs[3] - - fname = lipe_virt.LIPE_VIRT_CONFIG_FNAME - self.tc_lipe_virt_config_fpath = workspace + "/" + fname - fname = constants.CLOWNFISH_CONFIG_FNAME - self.tc_clownfish_config_fpath = workspace + "/" + fname - fname = constants.LIPE_INSTALL_CONFIG_FNAME - self.tc_lipe_install_config_fpath = workspace + "/" + fname - fname = constants.LIPE_TEST_CONFIG_FNAME - self.tc_lipe_test_config_fpath = workspace + "/" + fname - fname = constants.LIPE_BUILD_CONFIG_FNAME - self.tc_lipe_build_config_fpath = workspace + "/" + fname - fname = constants.LIPE_LAUNCH_CONFIG_FNAME - self.tc_lipe_launch_fpath = workspace + "/" + fname - self.tc_test_host_lustre_rpm_dir = workspace + DEFAULT_LUSTRE_RPM_DIR - self.tc_test_host_e2fsprogs_rpm_dir = workspace + DEFAULT_E2FSPROGS_RPM_DIR - - def _tc_add_shared_mgt(self, log, shared_disk_dict): - """ - Save the shared disk MGS into the ClientRPCHost - """ - return add_shared_device(log, self.tc_mgs_pair, shared_disk_dict, - self.tc_mgt_disk_id, - self.tc_mgt_size) - - def _tc_fs0_add_shared_devices(self, log, shared_disk_dict): - """ - Save the shared disks of file system 0 - """ - for mdt_index in range(self.tc_fs0_mdt_number_per_mds): - mdt_disk_id = self.tc_fs0_mdt_disk_id_prefix + str(mdt_index) - ret = add_shared_device(log, self.tc_fs0_mds_pair0, - shared_disk_dict, - mdt_disk_id, - self.tc_mdt_size) - if ret: - log.cl_error("failed to add shared disk for [%s]", mdt_disk_id) - return -1 - - for pair_index, pair in enumerate(self.tc_fs0_oss_pairs): - for index in range(self.tc_fs0_ost_number_per_oss): - ost_index = index + (self.tc_fs0_ost_number_per_oss * pair_index) - ost_disk_id = self.tc_fs0_ost_disk_id_prefix + str(ost_index) - ret = add_shared_device(log, pair, - shared_disk_dict, - ost_disk_id, - self.tc_ost_size) - if ret: - log.cl_error("failed to add shared disk for [%s]", ost_disk_id) - return -1 - return 0 - - def _tc_fs1_add_shared_devices(self, log, shared_disk_dict): - """ - Save the shared disks of file system 1 - """ - for mdt_index in range(self.tc_fs1_mdt_number_per_mds): - mdt_disk_id = self.tc_fs1_mdt_disk_id_prefix + str(mdt_index) - ret = add_shared_device(log, self.tc_fs1_mds_pair0, - shared_disk_dict, - mdt_disk_id, - self.tc_mdt_size) - if ret: - log.cl_error("failed to add shared disk for [%s]", mdt_disk_id) - return -1 - - for ost_index in range(self.tc_fs1_ost_number_per_oss): - ost_disk_id = self.tc_fs1_ost_disk_id_prefix + str(ost_index) - ret = add_shared_device(log, self.tc_fs1_oss_pair0, - shared_disk_dict, - ost_disk_id, - self.tc_ost_size) - if ret: - log.cl_error("failed to add shared disk for [%s]", ost_disk_id) - return -1 - return 0 - - def tc_add_shared_devices(self, log, shared_disk_dict): - """ - Save the shared disks into the ClientRPCHost - """ - ret = self._tc_add_shared_mgt(log, shared_disk_dict) - if ret: - return -1 - - ret = self._tc_fs0_add_shared_devices(log, shared_disk_dict) - if ret: - return -1 - - ret = self._tc_fs1_add_shared_devices(log, shared_disk_dict) - if ret: - return -1 - - return 0 - - def tc_send_lustre_e2fsprogs_rpms(self, log, launch_argument): - """ - Send Lustre RPMs and E2fsprogs RPMs - """ - host = self.tc_test_host.crh_host - workspace = self.tc_workspace - - local_rpm_dir = launch_argument.la_lustre_dir - test_host = self.tc_test_host.crh_host - ret = test_host.sh_send_file(log, local_rpm_dir, workspace) - if ret: - log.cl_error("failed to send dir [%s] on local host to " - "directory [%s] on host [%s]", - local_rpm_dir, workspace, test_host.sh_hostname) - return -1 - basename = os.path.basename(local_rpm_dir) - remote_rpm_dir = workspace + "/" + basename - remote_lustre_rpm_dir = self.tc_test_host_lustre_rpm_dir - if remote_rpm_dir != remote_lustre_rpm_dir: - command = ("mv %s %s" % (remote_rpm_dir, remote_lustre_rpm_dir)) - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return -1 - - local_rpm_dir = launch_argument.la_e2fsprogs_dir - test_host = self.tc_test_host.crh_host - ret = test_host.sh_send_file(log, local_rpm_dir, workspace) - if ret: - log.cl_error("failed to send dir [%s] on local host to " - "directory [%s] on host [%s]", - local_rpm_dir, workspace, test_host.sh_hostname) - return -1 - basename = os.path.basename(local_rpm_dir) - remote_rpm_dir = workspace + "/" + basename - remote_e2fsprogs_rpm_dir = self.tc_test_host_e2fsprogs_rpm_dir - if remote_rpm_dir != remote_e2fsprogs_rpm_dir: - command = ("mv %s %s" % (remote_rpm_dir, remote_e2fsprogs_rpm_dir)) - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return -1 - - def tc_get_and_clean_dir(self, log, host, logdir): - """ - Get and clean log - """ - host_local_dir = self.tc_workspace + "/" + host.sh_hostname - ret = utils.mkdir(host_local_dir) - if ret: - log.cl_error("failed to create directory [%s] on local host", - host_local_dir) - log.cl_error("please backup the log [%s] on host " - "[%s] manually for debug purpose", - logdir, host.sh_hostname) - return -1 - - ret = host.sh_get_and_clean_dir(log, logdir, host_local_dir) - if ret: - log.cl_error("failed to get and clean dir [%s] on host [%s]", - logdir, host.sh_hostname) - return ret - - def tc_run_lipe_test(self, log, launch_argument): - """ - Run lipe test - """ - return_value = 0 - workspace = self.tc_workspace - host = self.tc_test_host.crh_host - - source_path = launch_argument.la_test_host_source_path - command = ("cd %s && ./lipe_test --logdir %s --config %s" % - (source_path, self.tc_lipe_test_logdir, - self.tc_lipe_test_config_fpath)) - stdout_file = workspace + "/" + "lipe_test_watching.stdout" - stderr_file = workspace + "/" + "lipe_test_watching.stderr" - retval = host.sh_watched_run(log, command, stdout_file, stderr_file) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return_value = -1 - - ret = self.tc_get_and_clean_dir(log, host, - self.tc_lipe_test_logdir) - if ret: - log.cl_error("failed to get and clean dir [%s] on host [%s]", - self.tc_lipe_test_logdir, host.sh_hostname) - if return_value: - log.cl_error("please check [%s] on host [%s] and [%s] on " - "local host to debug why test of lipe failed", - self.tc_lipe_test_logdir, host.sh_hostname, - workspace) - else: - log.cl_error("please check why copying remove directory " - "failed after success of clownfish test") - return_value = -1 - return return_value - - def _tc_generate_clownfish_config_lustre_distributions(self, config): - """ - Generate the lustre distributions part of clownfish.conf - """ - lustre_distr_configs = [] - lustre_distr_config = {} - lustre_distr_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id - lustre_distr_config[cstr.CSTR_LUSTRE_RPM_DIR] = self.tc_test_host_lustre_rpm_dir - lustre_distr_config[cstr.CSTR_E2FSPROGS_RPM_DIR] = self.tc_test_host_e2fsprogs_rpm_dir - lustre_distr_configs.append(lustre_distr_config) - config[cstr.CSTR_LUSTRE_DISTRIBUTIONS] = lustre_distr_configs - - def _tc_generate_clownfish_config_ssh_hosts(self, config): - """ - Generate the ssh_hosts part of clownfish.conf - """ - ssh_host_configs = [] - for pair in self.tc_pairs: - for rpc_host in pair: - ssh_host_config = {} - ssh_host_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - ssh_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname - ssh_host_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id - ssh_host_configs.append(ssh_host_config) - - for rpc_host in [self.tc_client_host0, self.tc_client_host1]: - ssh_host_config = {} - ssh_host_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - ssh_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname - ssh_host_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id - ssh_host_configs.append(ssh_host_config) - config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs - - def _tc_generate_clownfish_config_mgs_list(self, config): - """ - Generate the mgs_list part of clownfish.conf - """ - mgs_configs = [] - mgs_config = {} - mgs_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id - mgs_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS - mgs_instance_configs = [] - for rpc_host in self.tc_mgs_pair: - mgs_instance_config = {} - mgs_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - mgs_instance_config[cstr.CSTR_DEVICE] = DEV_MAPPER_PREFIX + self.tc_mgt_disk_id - mgs_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp" - mgs_instance_configs.append(mgs_instance_config) - mgs_config[cstr.CSTR_INSTANCES] = mgs_instance_configs - mgs_configs.append(mgs_config) - config[cstr.CSTR_MGS_LIST] = mgs_configs - - def _tc_generate_clownfish_config_fs0_mdts(self, lustre_config): - """ - Generate the mdts part of lustre 0 of clownfish.conf - """ - mdt_configs = [] - for mdt_index in range(self.tc_fs0_mdt_number_per_mds): - mdt_config = {} - mdt_config[cstr.CSTR_IS_MGS] = False - mdt_config[cstr.CSTR_INDEX] = mdt_index - # The last MDT on each MDS pair is ZFS - if mdt_index == self.tc_fs0_mdt_number_per_mds - 1: - is_ldiskfs = False - else: - is_ldiskfs = True - if is_ldiskfs: - mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS - else: - mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS - mdt_instance_configs = [] - for rpc_host in self.tc_fs0_mds_pair0: - mdt_instance_config = {} - mdt_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - disk_id = self.tc_fs0_mdt_disk_id_prefix + str(mdt_index) - block_device = DEV_MAPPER_PREFIX + disk_id - if is_ldiskfs: - device = block_device - else: - zpool = disk_id - device = zpool + ("/mdt%d" % mdt_index) - mdt_instance_config[cstr.CSTR_DEVICE] = device - mdt_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp" - mdt_instance_configs.append(mdt_instance_config) - if not is_ldiskfs: - zpool_create = ("zpool create -f %s %s" % - (zpool, block_device)) - mdt_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create - mdt_config[cstr.CSTR_INSTANCES] = mdt_instance_configs - mdt_configs.append(mdt_config) - lustre_config[cstr.CSTR_MDTS] = mdt_configs - - def _tc_generate_clownfish_config_fs0_osts(self, lustre_config): - """ - Generate the osts part of lustre 0 of clownfish.conf - """ - ost_configs = [] - - for pair_index, pair in enumerate(self.tc_fs0_oss_pairs): - for index in range(self.tc_fs0_ost_number_per_oss): - ost_config = {} - ost_config[cstr.CSTR_IS_MGS] = False - ost_index = index + (self.tc_fs0_ost_number_per_oss * pair_index) - ost_config[cstr.CSTR_INDEX] = ost_index - # The last OST on the first OSS pair will be formated with ZFS, - # others ldiskfs. This will make sure at least the second OSS - # pair have three LDISKFS OSTs, thus one of the OSS in this pair - # have two active LDISKFS OSTs which is a requirement for hotpool test. - if ost_index == self.tc_fs0_ost_number_per_oss - 1: - is_ldiskfs = False - ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS - else: - is_ldiskfs = True - ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS - - ost_instance_configs = [] - for rpc_host in pair: - ost_instance_config = {} - ost_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - disk_id = self.tc_fs0_ost_disk_id_prefix + str(ost_index) - block_device = DEV_MAPPER_PREFIX + disk_id - if is_ldiskfs: - device = block_device - else: - zpool = disk_id - device = zpool + ("/ost%d" % index) - zpool_create = ("zpool create -f %s %s" % - (zpool, block_device)) - ost_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create - ost_instance_config[cstr.CSTR_DEVICE] = device - ost_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp" - ost_instance_configs.append(ost_instance_config) - ost_config[cstr.CSTR_INSTANCES] = ost_instance_configs - ost_configs.append(ost_config) - lustre_config[cstr.CSTR_OSTS] = ost_configs - - def _tc_generate_clownfish_config_fs0_clients(self, lustre_config): - """ - Generate the clients part of a lustre file system of clownfish.conf - """ - clients_configs = [] - clients_config = {} - clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host0.lrh_hostname - clients_config[cstr.CSTR_MNT] = self.tc_fs0_mnt - clients_config[cstr.CSTR_OPTIONS] = "user_xattr" - clients_configs.append(clients_config) - - clients_config = {} - clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host1.lrh_hostname - clients_config[cstr.CSTR_MNT] = self.tc_fs0_mnt - clients_config[cstr.CSTR_OPTIONS] = "user_xattr" - clients_configs.append(clients_config) - lustre_config[cstr.CSTR_CLIENTS] = clients_configs - - def _tc_generate_clownfish_config_fs1_mdts(self, lustre_config): - """ - Generate the mdts part of lustre 1 of clownfish.conf - """ - mdt_configs = [] - for mdt_index in range(self.tc_fs1_mdt_number_per_mds): - mdt_config = {} - mdt_config[cstr.CSTR_IS_MGS] = False - mdt_config[cstr.CSTR_INDEX] = mdt_index - if mdt_index == self.tc_fs1_mdt_number_per_mds - 1: - is_ldiskfs = False - else: - is_ldiskfs = True - if is_ldiskfs: - mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS - else: - mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS - mdt_instance_configs = [] - for rpc_host in self.tc_fs1_mds_pair0: - mdt_instance_config = {} - mdt_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - disk_id = self.tc_fs1_mdt_disk_id_prefix + str(mdt_index) - block_device = DEV_MAPPER_PREFIX + disk_id - if is_ldiskfs: - device = block_device - else: - zpool = disk_id - device = zpool + ("/mdt%d" % mdt_index) - mdt_instance_config[cstr.CSTR_DEVICE] = device - mdt_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp" - mdt_instance_configs.append(mdt_instance_config) - if not is_ldiskfs: - zpool_create = ("zpool create -f %s %s" % - (zpool, block_device)) - mdt_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create - mdt_config[cstr.CSTR_INSTANCES] = mdt_instance_configs - mdt_configs.append(mdt_config) - lustre_config[cstr.CSTR_MDTS] = mdt_configs - - def _tc_generate_clownfish_config_fs1_osts(self, lustre_config): - """ - Generate the osts part of lustre 1 of clownfish.conf - """ - ost_configs = [] - for ost_index in range(self.tc_fs1_ost_number_per_oss): - ost_config = dict() - ost_config[cstr.CSTR_IS_MGS] = False - ost_config[cstr.CSTR_INDEX] = ost_index - if ost_index == self.tc_fs1_ost_number_per_oss - 1: - is_ldiskfs = False - else: - is_ldiskfs = True - if is_ldiskfs: - ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS - else: - ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS - ost_instance_configs = [] - for rpc_host in self.tc_fs1_oss_pair0: - ost_instance_config = dict() - ost_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - disk_id = self.tc_fs1_ost_disk_id_prefix + str(ost_index) - block_device = DEV_MAPPER_PREFIX + disk_id - if is_ldiskfs: - device = block_device - else: - zpool = disk_id - device = zpool + ("/ost%d" % ost_index) - ost_instance_config[cstr.CSTR_DEVICE] = device - ost_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp" - ost_instance_configs.append(ost_instance_config) - if not is_ldiskfs: - zpool_create = ("zpool create -f %s %s" % - (zpool, block_device)) - ost_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create - ost_config[cstr.CSTR_INSTANCES] = ost_instance_configs - ost_configs.append(ost_config) - lustre_config[cstr.CSTR_OSTS] = ost_configs - - def _tc_generate_clownfish_config_fs1_clients(self, lustre_config): - """ - Generate the clients part of a lustre file system of clownfish.conf - """ - clients_configs = [] - clients_config = {} - clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host0.lrh_hostname - clients_config[cstr.CSTR_MNT] = self.tc_fs1_mnt - clients_config[cstr.CSTR_OPTIONS] = "user_xattr" - clients_configs.append(clients_config) - - clients_config = {} - clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host1.lrh_hostname - clients_config[cstr.CSTR_MNT] = self.tc_fs1_mnt - clients_config[cstr.CSTR_OPTIONS] = "user_xattr" - clients_configs.append(clients_config) - lustre_config[cstr.CSTR_CLIENTS] = clients_configs - - def _tc_generate_clownfish_config_lustres(self, config): - """ - Generate the lustres part of clownfish.conf - """ - lustre_configs = [] - lustre_config = {} - lustre_config[cstr.CSTR_FSNAME] = self.tc_fs0_fsname - lustre_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id - self._tc_generate_clownfish_config_fs0_mdts(lustre_config) - self._tc_generate_clownfish_config_fs0_osts(lustre_config) - self._tc_generate_clownfish_config_fs0_clients(lustre_config) - lustre_configs.append(lustre_config) - - lustre_config = {} - lustre_config[cstr.CSTR_FSNAME] = self.tc_fs1_fsname - lustre_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id - self._tc_generate_clownfish_config_fs1_mdts(lustre_config) - self._tc_generate_clownfish_config_fs1_osts(lustre_config) - self._tc_generate_clownfish_config_fs1_clients(lustre_config) - lustre_configs.append(lustre_config) - config[cstr.CSTR_LUSTRES] = lustre_configs - - def tc_generate_clownfish_config(self, log): - """ - Generate clownfish.conf - """ - config = {} - config[cstr.CSTR_LAZY_PREPARE] = True - config[cstr.CSTR_HIGH_AVAILABILITY] = False - self._tc_generate_clownfish_config_lustre_distributions(config) - self._tc_generate_clownfish_config_ssh_hosts(config) - self._tc_generate_clownfish_config_mgs_list(config) - self._tc_generate_clownfish_config_lustres(config) - - config_fpath = self.tc_clownfish_config_fpath - start_string = """# -# Configuration file of Clownfish -# -""" - ret = self.tc_write_and_send_config(log, config, config_fpath, - start_string) - if ret: - log.cl_error("failed to write and send clownfish.conf") - return -1 - - log.cl_info("config of clownfish.conf is saved to [%s]", config_fpath) - return 0 - - def tc_generate_lipe_virt_config(self, log, shared_disk_dict): - """ - Generate the config that can be digested by lipe_virt command - """ - kvm_server_dict = self.tc_kvm_server_dict - hosts = self.tc_hosts - pairs = self.tc_pairs - config = {} - generate_lipe_virt_config_shared_disks(shared_disk_dict, config) - generate_lipe_virt_config_templates(log, kvm_server_dict, config) - generate_lipe_virt_config_ssh_hosts(log, hosts, pairs, config) - generate_lipe_virt_config_vm_hosts(log, hosts, pairs, config) - - config_fpath = self.tc_lipe_virt_config_fpath - start_string = """# -# Configuration file of installing virtual machines, used by lipe_virt command -# -""" - ret = self.tc_write_and_send_config(log, config, config_fpath, - start_string) - if ret: - log.cl_error("failed to write and send lipe_virt.conf") - return -1 - - log.cl_info("config of lipe_virt.conf is saved to [%s]", config_fpath) - return 0 - - def tc_write_and_send_config(self, log, config, config_fpath, - start_string): - """ - Write the config and send to test host - """ - config_string = start_string - config_string += yaml.dump(config, Dumper=lyaml.YamlDumper, - default_flow_style=False) - - try: - with open(config_fpath, 'w') as yaml_file: - yaml_file.write(config_string) - except: - sys.stdout.write(config_string) - return -1 - - workspace = self.tc_workspace - test_host = self.tc_test_host.crh_host - ret = test_host.sh_send_file(log, config_fpath, workspace) - if ret: - log.cl_error("failed to send file [%s] on local host to " - "directory [%s] on host [%s]", - config_fpath, workspace, test_host.sh_hostname) - return -1 - - def tc_generate_lipe_install_config(self, log, launch_argument): - """ - Generate lipe_install.conf - """ - # pylint: disable=too-many-locals - # send the example web json file to test host - workspace = self.tc_workspace - test_host = self.tc_test_host.crh_host - ret = test_host.sh_send_file(log, - launch_argument.la_source_lipe_launch_config_file, - workspace) - if ret: - log.cl_error("failed to send file [%s] on local host to " - "directory [%s] on host [%s]", - launch_argument.la_source_lipe_launch_config_file, - workspace, test_host.sh_hostname) - return -1 - - config = {} - config[cstr.CSTR_CONFIG_FPATH] = self.tc_lipe_launch_fpath - config[cstr.CSTR_ISO_PATH] = launch_argument.la_test_host_iso_fpath - - ssh_hosts = [] - mdt_hosts = [] - for rpc_host in self.tc_fs0_mds_pair0 + self.tc_fs1_mds_pair0: - mdt_host = {} - mdt_host[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - mdt_hosts.append(mdt_host) - ssh_host_dict = {} - ssh_host_dict[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname - ssh_host_dict[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname - ssh_hosts.append(ssh_host_dict) - config[cstr.CSTR_MDT_HOSTS] = mdt_hosts - - # Add oss hosts into ssh_hosts list to make lpurge available there - for oss_host in self.tc_fs0_oss_pair0 + self.tc_fs0_oss_pair1: - ssh_host_dict = {} - ssh_host_dict[cstr.CSTR_HOST_ID] = oss_host.lrh_hostname - ssh_host_dict[cstr.CSTR_HOSTNAME] = oss_host.lrh_hostname - ssh_hosts.append(ssh_host_dict) - - # Need to add client hosts here too, otherwise, lipe_run_action - # might be missing - for client_host in [self.tc_client_host0, self.tc_client_host1]: - ssh_host_dict = {} - ssh_host_dict[cstr.CSTR_HOST_ID] = client_host.lrh_hostname - ssh_host_dict[cstr.CSTR_HOSTNAME] = client_host.lrh_hostname - ssh_hosts.append(ssh_host_dict) - config[cstr.CSTR_SSH_HOSTS] = ssh_hosts - - config_fpath = self.tc_lipe_install_config_fpath - start_string = """# -# Configuration file for installing LiPE -# -""" - ret = self.tc_write_and_send_config(log, config, config_fpath, - start_string) - if ret: - log.cl_error("failed to write and send lipe_install.conf") - return -1 - log.cl_info("config of lipe_install.conf saved to [%s]", config_fpath) - return 0 - - def tc_generate_lipe_test_config(self, log): - """ - Generate lipe_test.conf - """ - config = {} - config[cstr.CSTR_CLOWNFISH_CONFIG] = self.tc_clownfish_config_fpath - install_server_config = {} - install_server_config[cstr.CSTR_HOSTNAME] = self.tc_install_server.lrh_hostname - config[cstr.CSTR_INSTALL_SERVER] = install_server_config - config[cstr.CSTR_LIPE_INSTALL_CONFIG] = self.tc_lipe_install_config_fpath - config[cstr.CSTR_SKIP_INSTALL] = False - config[cstr.CSTR_SKIP_VIRT] = False - config[cstr.CSTR_VIRT_CONFIG] = self.tc_lipe_virt_config_fpath - - config_fpath = self.tc_lipe_test_config_fpath - start_string = """# -# Configuration file for testing LiPE -# -""" - ret = self.tc_write_and_send_config(log, config, config_fpath, - start_string) - if ret: - log.cl_error("failed to write and send lipe_install.conf") - return -1 - log.cl_info("config of lipe_test.conf is saved to [%s]", config_fpath) - return 0 - - def tc_generate_lipe_build_config(self, log): - """ - Generate the config that can be digested by lipe_build command - """ - config = {} - config[cstr.CSTR_ZFS_SUPPORT] = True - - config_fpath = self.tc_lipe_build_config_fpath - start_string = """# -# Configuration file of building LiPE ISO -# -""" - ret = self.tc_write_and_send_config(log, config, config_fpath, - start_string) - if ret: - log.cl_error("failed to write and send lipe_virt.conf") - return -1 - - log.cl_info("config of lipe_virt.conf is saved to [%s]", config_fpath) - return 0 - - def tc_build_lipe(self, log, launch_argument): - """ - Send the lipe source codes to remote host and build it, get the ISO back - """ - workspace = self.tc_workspace - host = self.tc_test_host.crh_host - - ret = send_lipe_source(log, workspace, host, launch_argument) - if ret: - return -1 - - remote_source_path = launch_argument.la_test_host_source_path - lipe_build_logdir = (remote_source_path + "/" + - constants.LIPE_BUILD_LOG_DIR_BASENAME) - log.cl_info("building lipe in directory [%s] on host [%s]", - remote_source_path, host.sh_hostname) - command = ("cd %s && ./lipe_build --logdir %s --config %s" % - (remote_source_path, lipe_build_logdir, - self.tc_lipe_build_config_fpath)) - stdout_file = workspace + "/" + "lipe_build_watching.stdout" - stderr_file = workspace + "/" + "lipe_build_watching.stderr" - retval = host.sh_watched_run(log, command, stdout_file, stderr_file) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - ret = self.tc_get_and_clean_build_dir(log, launch_argument) - if ret: - log.cl_error("please check [%s] on host [%s] and [%s] on " - "local host to debug why build of lipe failed", - remote_source_path, host.sh_hostname, workspace) - return -1 - return 0 - - def tc_get_and_clean_build_dir(self, log, launch_argument): - """ - Get and clean the build dir on test host - """ - remote_source_path = launch_argument.la_test_host_source_path - host = self.tc_test_host.crh_host - - ret = self.tc_get_and_clean_dir(log, host, remote_source_path) - if ret: - log.cl_error("failed to get and clean dir [%s] on host [%s], " - "please backup it manually for further debugging", - remote_source_path, host.sh_hostname) - return -1 - return 0 - - def tc_prepare_workspace(self, log): - """ - Create the workspace - """ - workspace = self.tc_workspace - build_host = self.tc_test_host.crh_host - - command = ("mkdir -p %s" % (workspace)) - retval = build_host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, build_host.sh_hostname, - retval.cr_exit_status, retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - def tc_cleanup_vm_hosts(self, log): - """ - Cleanup the virtual machones hosts - """ - for host in self.tc_vm_hosts: - ret = host.crh_host.sh_rpm_find_and_uninstall(log, "grep lipe") - if ret: - log.cl_error("failed to uninstall LiPE RPMs on host " - "[%s]", host.crh_host.sh_hostname) - return -1 - - ret = host.crh_host.sh_rpm_find_and_uninstall(log, "grep clownfish") - if ret: - log.cl_error("failed to uninstall Clownfish RPMs on host " - "[%s]", host.crh_host.sh_hostname) - return -1 - - # Ignore the error since these mnt might not exist yet - command = "umount %s/*" % constants.LIPE_INSTALL_MNT_DIR - host.crh_host.sh_run(log, command) - command = ("rmdir %s/*" % (constants.LIPE_INSTALL_MNT_DIR)) - host.crh_host.sh_run(log, command) - return 0 - - def tc_run_test_without_removing_build_dir(self, log, launch_argument): - """ - Run test without removing build dir - """ - shared_disk_dict = {} - ret = self.tc_add_shared_devices(log, shared_disk_dict) - if ret: - log.cl_error("failed to add shared devices") - return -1 - - ret = self.tc_generate_lipe_virt_config(log, shared_disk_dict) - if ret: - log.cl_error("failed to generate config of lipe_virt") - return -1 - - ret = self.tc_generate_clownfish_config(log) - if ret: - log.cl_error("failed to generate config of clownfish") - return -1 - - ret = self.tc_generate_lipe_install_config(log, launch_argument) - if ret: - log.cl_error("failed to generate config of installing lipe") - return -1 - - ret = self.tc_generate_lipe_test_config(log) - if ret: - log.cl_error("failed to generate config of testing lipe") - return -1 - - ret = self.tc_send_lustre_e2fsprogs_rpms(log, launch_argument) - if ret: - log.cl_error("failed to send Lustre and E2fsprogs RPMs") - return -1 - - ret = self.tc_run_lipe_test(log, launch_argument) - if ret: - log.cl_error("failed to run LiPE test") - return -1 - - def tc_run_test(self, log, launch_argument): - """ - Run test on this cluster - """ - # pylint: disable=too-many-locals,too-many-arguments,too-many-branches - ret = self.tc_cleanup_vm_hosts(log) - if ret: - log.cl_error("failed to clean up VMs") - return -1 - - ret = self.tc_prepare_workspace(log) - if ret: - log.cl_error("failed to prepare workspace") - return -1 - - ret = self.tc_generate_lipe_build_config(log) - if ret: - log.cl_error("failed to generate lipe_build.conf") - return -1 - - ret = self.tc_build_lipe(log, launch_argument) - if ret: - log.cl_error("failed to build LiPE") - return -1 - - return_value = self.tc_run_test_without_removing_build_dir(log, - launch_argument) - if return_value: - log.cl_error("failed to run tests") - - ret = self.tc_get_and_clean_build_dir(log, launch_argument) - if ret: - log.cl_error("failed to get and clean build directory on test host") - return_value = -1 - return return_value - - -def _allocate_resources(log, scheduler_id, jobid, proxy, descs): - """ - Allocate resources from server - """ - rpc_descriptors = proxy.ts_resources_allocate(scheduler_id, jobid, descs) - if len(rpc_descriptors) == 0: - log.cl_info("not enough resources to allocate") - return -1, None, None - same_kvm_host_descriptors = [] - other_descriptors = [] - lipe_test_scheduler.rpc2descriptors(log, rpc_descriptors, - same_kvm_host_descriptors, - other_descriptors) - return 0, same_kvm_host_descriptors, other_descriptors - - -def allocate_resources(log, scheduler_id, jobid, proxy, descs, - timeout=DEFAULT_HOST_TIMEOUT, - sleep_interval=HOST_ALLOCATION_INTERVAL): - """ - Allocate resources from server, wait if necessary - """ - # pylint: disable=too-many-arguments,unused-variable - return utils.wait_condition(log, _allocate_resources, - (scheduler_id, jobid, proxy, descs), - timeout=timeout, - sleep_interval=sleep_interval) - - -def allocate_hosts_and_ip(log, scheduler_id, jobid, proxy): - """ - Allocate hosts and IP - """ - # pylint: disable=unused-variable - descs = [] - des = lipe_test_scheduler.ResourceDescriptorIPAddress() - descs.append(des) - desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_BUILD) - descs.append(desc) - desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_TEST, - number_min=TestCluster.HOST_NUMBER, - number_max=TestCluster.HOST_NUMBER) - descs.append(desc) - for i in range(TestCluster.PAIR_NUMBER): - desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_TEST, - number_min=2, number_max=2, - same_kvm_server=True) - descs.append(desc) - - ret, decs1, decs2 = allocate_resources(log, scheduler_id, jobid, proxy, - descs) - if ret: - log.cl_error("failed to allocate hosts and IP") - return ret, decs1, decs2 - - -def resource2host(res, kvm_server_dict): - """ - Transfer the RPCHost to ClientRPCHost - """ - hostname = res.lrh_hostname - kvm_template = res.lrh_kvm_template - kvm_server_hostname = res.lrh_kvm_server_hostname - if kvm_template is not None: - template_hostname = kvm_template["vt_template_hostname"] - # kvm_server_dict has keys of server hostnames, and values of - # diction. The diction has keys of global template hostname, - # and values of full template config - if kvm_server_hostname not in kvm_server_dict: - kvm_template_per_sever_dict = {} - kvm_server_dict[kvm_server_hostname] = kvm_template_per_sever_dict - kvm_template_per_sever_dict = kvm_server_dict[kvm_server_hostname] - - global_template_hostname = kvm_server_hostname + "_" + template_hostname - if global_template_hostname not in kvm_template_per_sever_dict: - kvm_template_config = {} - # The template is an object of VirtTemplate - kvm_template_config[cstr.CSTR_DNS] = kvm_template["vt_dns"] - kvm_template_config[cstr.CSTR_ISO] = kvm_template["vt_iso"] - kvm_template_config[cstr.CSTR_HOSTNAME] = global_template_hostname - kvm_template_config[cstr.CSTR_INTERNET] = kvm_template["vt_internet"] - kvm_template_config[cstr.CSTR_NETWORK_CONFIGS] = kvm_template["vt_network_configs"] - kvm_template_config[cstr.CSTR_IMAGE_DIR] = kvm_template["vt_image_dir"] - kvm_template_config[cstr.CSTR_DISTRO] = kvm_template["vt_distro"] - kvm_template_config[cstr.CSTR_RAM_SIZE] = kvm_template["vt_ram_size"] - kvm_template_config[cstr.CSTR_DISK_SIZES] = kvm_template["vt_disk_sizes"] - kvm_template_config[cstr.CSTR_BUS_TYPE] = kvm_template["vt_bus_type"] - kvm_template_config[cstr.CSTR_SERVER_HOST_ID] = kvm_server_hostname - kvm_template_config[cstr.CSTR_REINSTALL] = False - kvm_template_per_sever_dict[global_template_hostname] = kvm_template_config - kvm_template_config = kvm_template_per_sever_dict[global_template_hostname] - else: - kvm_template_config = None - global_template_hostname = None - - rpc_host = ClientRPCHost(hostname, global_template_hostname=global_template_hostname, - kvm_template_config=kvm_template_config, - kvm_server_hostname=kvm_server_hostname, - expected_distro=res.lrh_expected_distro, - ipv4_addresses=res.lrh_ipv4_addresses) - return rpc_host - - -def resources2hosts(log, resources, kvm_server_dict): - """ - Transfer an array of RPCHost to an array of ClientRPCHost - """ - rpc_hosts = [] - hostnames = [] - for res in resources: - rpc_host = resource2host(res, kvm_server_dict) - rpc_hosts.append(rpc_host) - hostnames.append(rpc_host.lrh_hostname) - - log.cl_info("allocated hosts %s", hostnames) - return rpc_hosts - - -def descriptor2build_hosts(log, desc, build_host, kvm_server_dict): - """ - Transfer an descriptor of build host to an object of ClientRPCHost - """ - if build_host is not None: - log.cl_error("allocated too many build host") - return None - if len(desc.rd_resources) != 1: - log.cl_error("unexpected host number in the test resource, " - "expected %d, got %d", TestCluster.HOST_NUMBER, - len(desc.rd_resources)) - return None - build_host = resource2host(desc.rd_resources[0], kvm_server_dict) - log.cl_info("allocated build host %s", build_host.lrh_hostname) - return build_host - - -def descriptor2pair_hosts(log, desc, pairs, kvm_server_dict): - """ - Transfer an descriptor of a pair of hosts to an array of ClientRPCHost - """ - if desc.rdh_purpose != lipe_test_scheduler.PURPOSE_TEST: - log.cl_error("unexpected purpose, expected [%s], got [%s]", - lipe_test_scheduler.PURPOSE_TEST, - desc.rdh_purpose) - return None - if len(pairs) >= TestCluster.PAIR_NUMBER: - log.cl_error("allocated too many pair hosts") - return None - if len(desc.rd_resources) != 2: - log.cl_error("unexpected host number in a pair resource, " - "expected 2, got %d", len(desc.rd_resources)) - return None - rpc_hosts = resources2hosts(log, desc.rd_resources, kvm_server_dict) - pairs.append(rpc_hosts) - return 0 - - -def descriptor2single_hosts(log, desc, single_hosts, kvm_server_dict): - """ - Transfer an descriptor of single hosts to an array of ClientRPCHost - """ - if desc.rdh_purpose != lipe_test_scheduler.PURPOSE_TEST: - log.cl_error("unexpected purpose, expected [%s], got [%s]", - lipe_test_scheduler.PURPOSE_TEST, - desc.rdh_purpose) - return None - if len(single_hosts) != 0: - log.cl_error("allocated too many hosts") - return None - if len(desc.rd_resources) != TestCluster.HOST_NUMBER: - log.cl_error("unexpected host number in the test resource, " - "expected %d, got %d", TestCluster.HOST_NUMBER, - len(desc.rd_resources)) - return None - return resources2hosts(log, desc.rd_resources, kvm_server_dict) - - -def descriptions2cluster(log, workspace, descs): - """ - Use the allocated resources to group a cluster - """ - hosts = [] - build_host = None - kvm_server_dict = {} - rpc_ip_address = None - pairs = [] - for desc in descs: - if desc.rd_type == lipe_test_scheduler.RESOURCE_TYPE_IP_ADDRESS: - if rpc_ip_address is not None: - log.cl_error("unexpected multiple IPs") - return None - if len(desc.rd_resources) != 1: - log.cl_error("unexpected IP number in a resource, expected 1, " - "got %d", len(desc.rd_resources)) - return None - rpc_ip_address = desc.rd_resources[0] - log.cl_info("got address [%s]", rpc_ip_address.ripa_address) - elif desc.rd_type == lipe_test_scheduler.RESOURCE_TYPE_HOST: - if desc.rdh_same_kvm_server: - ret = descriptor2pair_hosts(log, desc, pairs, kvm_server_dict) - if ret: - return None - elif desc.rdh_purpose == lipe_test_scheduler.PURPOSE_BUILD: - build_host = descriptor2build_hosts(log, desc, build_host, - kvm_server_dict) - if build_host is None: - return None - else: - hosts = descriptor2single_hosts(log, desc, hosts, kvm_server_dict) - if hosts is None: - return None - return TestCluster(workspace, build_host, hosts, pairs, rpc_ip_address, kvm_server_dict) - - -def run_test_with_resources_allocated(log, workspace, launch_argument, descs): - """ - Run the test with resources allocated - """ - cluster = descriptions2cluster(log, workspace, descs) - if cluster is None: - log.cl_error("failed to group to a cluster from the resources") - return -1 - return cluster.tc_run_test(log, launch_argument) - - -def run_test_connected(log, workspace, launch_argument, scheduler_id, jobid, proxy): - """ - Run the test with connection to scheduler - """ - # pylint: disable=too-many-arguments - ret, descs1, descs2 = allocate_hosts_and_ip(log, scheduler_id, jobid, - proxy) - if ret: - log.cl_error("failed to allocate hosts and IP") - return -1 - - descs = descs1 + descs2 - - retval = run_test_with_resources_allocated(log, workspace, launch_argument, - descs) - - ret = proxy.ts_resources_release(scheduler_id, jobid, descs) - if ret: - log.cl_error("failed to release hosts and IPs") - return -1 - return retval - - -def send_heartbeat(log, proxy, scheduler_id, jobid): - """ - Send heartbeat to scheduler - Improvement: the main thread should send heatbeat and check time - from time to time in case the heatbeat thread is broken - """ - # pylint: disable=global-statement,broad-except - global CHECK_TIME, EXIT_REASON, SHUTTING_DOWN - ret = 0 - - log.cl_debug("sending heartbeat") - now = time_util.utcnow() - try: - ret = proxy.ts_job_heartbeat(scheduler_id, jobid) - if ret == 0: - CHECK_TIME = now - else: - SHUTTING_DOWN = True - return -1 - except Exception, err: - disconnet_time = (now - CHECK_TIME).seconds - if disconnet_time <= lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT: - extra_string = ("will time out in [%d] seconds" % - (lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT - disconnet_time)) - else: - extra_string = ("already time out") - log.cl_error("failed to send heartbeat of job [%s], " - "%s: %s, %s", - jobid, extra_string, str(err), traceback.format_exc()) - ret = -1 - disconnet_time = (now - CHECK_TIME).seconds - if disconnet_time > lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT: - EXIT_REASON = ("heatbeat of job [%s] time out, shutting down" % jobid) - SHUTTING_DOWN = True - return -1 - if ret: - log.cl_error("failed to send heartbeat") - else: - log.cl_debug("sent heartbeat") - return ret - - -def heartbeat_thread(log, proxy, scheduler_id, jobid): - """ - Thread that send heatbeat - """ - while True: - time.sleep(lipe_test_scheduler.TEST_HEARTBEAT_INTERVAL) - send_heartbeat(log, proxy, scheduler_id, jobid) - - -def run_test(log, workspace, launch_argument): - """ - Run the test - """ - log.cl_info("connecting to server [%s]", launch_argument.la_server) - proxy = xmlrpclib.ServerProxy(launch_argument.la_server, allow_none=True) - scheduler_id = proxy.ts_get_id() - jobid = proxy.ts_job_start(scheduler_id) - log.cl_info("got job ID [%s]", jobid) - - utils.thread_start(heartbeat_thread, (log, proxy, scheduler_id, jobid)) - - ret = run_test_connected(log, workspace, launch_argument, scheduler_id, - jobid, proxy) - - proxy.ts_job_stop(scheduler_id, jobid) - log.cl_info("released job [%s]", jobid) - return ret - - -def parse_options(log): - """ - Parse the options - """ - # pylint: disable=too-many-branches - launch_argument = LaunchArg() - options, arguments = getopt.getopt(sys.argv[1:], - "e:hl:p:s:", - ["e2fsprogs", - "help", - "host_timeout=", - "lustre=" - "source_path=", - "server="]) - - for opt, arg in options: - if opt == "--e2fsprogs" or opt == "-e": - ret = launch_argument.la_update_e2fsprogs_dir(log, arg) - if ret: - log.cl_error("invalid e2fsprogs path option [%s %s]", opt, arg) - sys.exit(1) - elif opt == "--help" or opt == "-h": - usage() - sys.exit(0) - elif opt == "--host_timeout": - ret = launch_argument.la_update_host_wait_time(log, int(arg)) - if ret: - log.cl_error("invalid server option [%s]", arg) - sys.exit(1) - elif opt == "--lustre" or opt == "-l": - ret = launch_argument.la_update_lustre_dir(log, arg) - if ret: - log.cl_error("invalid lustre path option [%s %s]", opt, arg) - sys.exit(1) - elif opt == "--server" or opt == "-s": - ret = launch_argument.la_update_server(log, arg) - if ret: - log.cl_error("invalid server option [%s]", arg) - sys.exit(1) - elif opt == "--source_path" or opt == "-p": - ret = launch_argument.la_update_source_path(log, arg) - if ret: - log.cl_error("invalid source path option [%s %s]", opt, arg) - sys.exit(1) - else: - log.cl_error("unkown option [%s %s]", opt, arg) - usage() - sys.exit(1) - - if len(arguments) != 0: - log.cl_error("unkown arguments %s", arguments) - usage() - sys.exit(1) - - ret = launch_argument.la_check_arguments(log) - if ret: - sys.exit(1) - return launch_argument - - -def main(): - """ - Run the test - """ - now = time_util.utcnow() - workspace = (LIPE_TEST_LAUNCH_LOG_DIR + "/" + - time_util.local_strftime(now, ('%Y-%m-%d-%H_%M_%S'))) - retval = utils.run("mkdir -p %s" % workspace) - if retval.cr_exit_status != 0: - utils.eprint("failed to create directory [%s]\n" % workspace) - sys.exit(1) - - log = clog.get_log(resultsdir=workspace) - launch_argument = parse_options(log) - - return_value = 0 - ret = run_test(log, workspace, launch_argument) - if ret: - log.cl_error("failed to launch test") - return_value = - 1 - else: - log.cl_info("successfully launched the test") - log.cl_info("moving the log from [%s] to [%s]", workspace, - launch_argument.la_source_path) - log.cl_fini() - - # No matter failure or sucess, move the workspace to source path - command = ("mv %s %s" % (workspace, launch_argument.la_source_path)) - retval = utils.run(command) - if retval.cr_exit_status != 0: - utils.eprint("failed to run command [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]" % - (command, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr)) - utils.eprint("please check, backup and maybe delete directory of " - "[%s] on local host" % - (workspace)) - sys.exit(-1) - basename = os.path.basename(workspace) - logdir = launch_argument.la_source_path + "/" + basename - if return_value: - utils.eprint("please check [%s] for more log" % logdir) - else: - utils.oprint("please check [%s] for more log" % logdir) - - sys.exit(return_value) diff --git a/lipe/pyltest/lipe_test_scheduler.py b/lipe/pyltest/lipe_test_scheduler.py deleted file mode 100644 index 02a49e0..0000000 --- a/lipe/pyltest/lipe_test_scheduler.py +++ /dev/null @@ -1,1572 +0,0 @@ -# Copyright (c) 2016 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -The scheduler manages the usage of test hosts. All test launchers -need to allocate hosts from the scheduler. -""" -# pylint: disable=too-many-lines -import SimpleXMLRPCServer -import threading -import signal -import time -import os -import re -import traceback -import socket -import yaml - -# Local libs -from pylustre import utils -from pylustre import time_util -from pylustre import cstr -from pylustre import cmd_general -from pylustre import lipe_virt -from pylustre import ssh_host - -TEST_SCHEDULER_PORT = 1234 -TEST_SCHEDULER_LOG_DIR = "/var/log/lipe_test_scheduler" -TEST_SCHEDULER_CONFIG = "/etc/lipe_test_scheduler.conf" - - -PURPOSE_BUILD = "build" -PURPOSE_TEST = "test" - -RESOURCE_TYPE_HOST = "host" -RESOURCE_TYPE_IP_ADDRESS = "ip_address" - -GLOBAL_LOG = None -SHUTTING_DOWN = False -MIN_GOOD_RES_CHECK_INTERVAL = 7200 -MIN_BAD_RES_CHECK_INTERVAL = 3600 -# Need to wait at least this long time before assert that the IP is not -# used by any host -IP_MAX_FAILOVER_TIME = 60 -# The interval to check whether a IP is being used or not -IP_CHECK_INTERVAL = 3 -# Need to check at least this times before assert that the IP is not -# used by any host -IP_MIN_CHECK_TIMES = 5 - -# The heatbeat interval -TEST_HEARTBEAT_INTERVAL = 10 -# The heatbeat timeout. Both scheduler and client will abort the job if -# heatbeat is not recived/sent correctly for this long time. -TEST_HEARTBEAT_TIMEOUT = 20 - - -class ScheduledResource(object): - """ - Each resource has this type - """ - # pylint: disable=too-few-public-methods,too-many-instance-attributes - # This return value signs that the resource is being checked - RESOURCE_IS_BUSY = 1 - - def __init__(self, name, resource_type, concurrency): - self.sr_is_clean = False - # The time when cleaning up the resource - self.sr_check_time = 0 - self.sr_max_concurrency = concurrency - self.sr_concurrency = 0 - self.sr_job_sequence = None - self.rr_resource_type = resource_type - self.sr_error = 0 - self.sr_name = name - self.sr_cleaning = False - - def sr_dirty(self): - """ - Dirty the resource so as to check later - """ - self.sr_check_time = 0 - self.sr_is_clean = False - - -class RPCResouce(object): - """ - The resource for transfering between scheduler and its clients - """ - # pylint: disable=too-few-public-methods - def __init__(self, resource_type): - self.rr_resource_type = resource_type - - -class RPCIPAddress(RPCResouce): - # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - The IP address to manage in this scheduler - """ - def __init__(self, address, bindnetaddr): - super(RPCIPAddress, self).__init__(RESOURCE_TYPE_IP_ADDRESS) - self.ripa_address = address - self.ripa_bindnetaddr = bindnetaddr - - -class RPCHost(RPCResouce): - """ - The host for transfering between scheduler and its clients - """ - # pylint: disable=too-few-public-methods - def __init__(self, hostname, kvm_server_hostname=None, - expected_distro=None, ipv4_addresses=None, - kvm_template_ipv4_address=None, kvm_template=None): - # pylint: disable=too-many-arguments - super(RPCHost, self).__init__(RESOURCE_TYPE_HOST) - self.lrh_hostname = hostname - self.lrh_kvm_server_hostname = kvm_server_hostname - self.lrh_expected_distro = expected_distro - self.lrh_ipv4_addresses = ipv4_addresses - self.lrh_kvm_template_ipv4_address = kvm_template_ipv4_address - # Only server side kvm_template uses to send info to client - self.lrh_kvm_template = kvm_template - - -class TestHost(ScheduledResource): - # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - The host that is managed by scheduler - """ - def __init__(self, hostname, distro, purpose, tag, concurrency, - ipv4_addresses=None, kvm_server_hostname=None, - kvm_template_ipv4_address=None, - kvm_template=None): - # pylint: disable=too-many-arguments - super(TestHost, self).__init__(hostname, RESOURCE_TYPE_HOST, concurrency) - self.th_hostname = hostname - self.th_purpose = purpose - self.th_distro = distro - self.th_tag = tag - self.th_kvm_server_hostname = kvm_server_hostname - self.th_kvm_template_ipv4_address = kvm_template_ipv4_address - self.th_ipv4_addresses = ipv4_addresses - self.th_kvm_template = kvm_template - self.th_host = ssh_host.SSHHost(hostname) - - def th_print_info(self, log): - """ - Print the info of this host - """ - log.cl_debug("added host [%s], purpose [%s], distro [%s], tag [%s], " - "kvm server [%s]", - self.th_hostname, - self.th_purpose, - self.th_distro, - self.th_tag, - self.th_kvm_server_hostname) - - def _sr_cleanup(self, log, scheduler): - """ - Clean up the host - - Improvement: call shared functions in lipe_virt directly - Improvement: cleanup directories for spaces - """ - # pylint: disable=unused-argument - if self.th_purpose == PURPOSE_BUILD: - return 0 - - host = self.th_host - service_names = ["corosync", "pacemaker"] - for service_name in service_names: - ret = host.sh_service_stop(log, service_name) - if ret: - log.cl_error("failed to stop service [%s] on host [%s]", - service_name, host.sh_hostname) - return -1 - - ret = host.sh_service_disable(log, service_name) - if ret: - log.cl_error("failed to disable service [%s] on host [%s]", - service_name, host.sh_hostname) - return -1 - - return 0 - - def sr_cleanup(self, log, scheduler): - """ - Clean up the host - - Improvement: call shared functions in lipe_virt directly - Improvement: cleanup directories for spaces - """ - log.cl_info("cleaning up host [%s]", self.th_hostname) - self.sr_cleaning = True - ret = self._sr_cleanup(log, scheduler) - self.sr_cleaning = False - if ret: - log.cl_info("failed to clean up host [%s]", self.th_hostname) - else: - log.cl_info("cleaned up host [%s]", self.th_hostname) - return ret - - -def _wait_disconnected(log, host): - """ - Check whether host is disconnected from this host - """ - ret = host.sh_ping(log, slient=True) - if ret: - return 0 - log.cl_info("still able to connect to host [%s] from local host", - host.sh_hostname) - return -1 - - -def wait_disconnected(log, host, timeout=10, sleep_interval=1): - """ - Wait until the host can not be connected from this host - """ - return utils.wait_condition(log, _wait_disconnected, - (host, ), - timeout=timeout, - sleep_interval=sleep_interval) - - -class IPAddress(ScheduledResource): - # pylint: disable=too-few-public-methods,too-many-instance-attributes - """ - The IP address to manage in this scheduler - """ - def __init__(self, address, bindnetaddr): - super(IPAddress, self).__init__(address, RESOURCE_TYPE_IP_ADDRESS, 1) - self.ipa_address = address - self.ipa_bindnetaddr = bindnetaddr - self.ipa_host = ssh_host.SSHHost(address) - - def _sr_cleanup(self, log, scheduler): - """ - Cleanup the IP adress by stopping the corosync/pacemaker on any - host that is using the IP - """ - ip_host = self.ipa_host - idle_time = None - checked_times = 0 - - while True: - now_time = time.time() - ret = ip_host.sh_ping(log, silent=True) - if ret: - log.cl_debug("can not ping IP [%s]", self.ipa_address) - if idle_time is None: - idle_time = now_time - checked_times = 0 - checked_times += 1 - # The IP has not been used for a long time, and enough times - # have been chcked, so clean to use - if (idle_time + IP_MAX_FAILOVER_TIME < now_time and - checked_times > IP_MIN_CHECK_TIMES): - return 0 - # Not long enough to decide, sleep a while and check later - time.sleep(IP_CHECK_INTERVAL) - continue - else: - log.cl_debug("can ping IP [%s]", self.ipa_address) - idle_time = None - checked_times = 0 - - command = "hostname" - retval = ip_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_info("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - ip_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - log.cl_info("maybe the host with IP [%s] has been cleaned up, " - "will check in the next loop", self.ipa_address) - continue - - lines = retval.cr_stdout.splitlines() - if len(lines) != 1: - log.cl_error("unexpected output of command [%s] on host [%s]: " - "[%s]", command, ip_host.sh_hostname, - retval.cr_stdout) - return -1 - hostname = lines[0] - - res = scheduler.ts_find_host(hostname) - if res is None: - log.cl_error("host [%s] is not managed by the scheduler but " - "is using IP [%s]", hostname, self.ipa_address) - return -1 - - # The host is being cleaned, so the IP might be released soon. - # Check that in the next loop. - if res.sr_cleaning: - log.cl_info("host [%s] is being cleaned, will check IP [%s] " - "in next loop", hostname, self.ipa_address) - continue - - ret = scheduler.ts_resource_cleanup(res) - if ret == ScheduledResource.RESOURCE_IS_BUSY: - log.cl_info("host [%s] is busy, checking in next " - "loop", hostname, self.ipa_address) - continue - elif ret: - log.cl_error("failed to cleanup host [%s]", - hostname) - return -1 - - ret = wait_disconnected(log, ip_host) - if ret: - log.cl_error("still be able to connect to [%s] after fixing " - "host [%s]", ip_host.sh_hostname, hostname) - return -1 - return 0 - - def sr_cleanup(self, log, scheduler): - """ - Cleanup the IP adress by stopping the corosync/pacemaker on any - host that is using the IP - """ - log.cl_info("cleaning up IP address [%s]", self.ipa_address) - self.sr_cleaning = True - ret = self._sr_cleanup(log, scheduler) - self.sr_cleaning = False - if ret: - log.cl_info("failed to clean up IP address [%s]", self.ipa_address) - else: - log.cl_info("cleaned up IP address [%s]", self.ipa_address) - return ret - - -class ResourceDescriptor(object): - # pylint: disable=too-few-public-methods - """ - Used when trying to allocate a resource - """ - def __init__(self, resource_type, number_min=1, number_max=1, - resources=None): - self.rd_type = resource_type - self.rd_number_min = number_min - self.rd_number_max = number_max - if resources is None: - self.rd_resources = [] - else: - self.rd_resources = list(resources) - - -class ResourceDescriptorIPAddress(ResourceDescriptor): - # pylint: disable=too-few-public-methods - """ - Used when trying to allocate a host - """ - def __init__(self, number_min=1, number_max=1, rpc_addresses=None): - super(ResourceDescriptorIPAddress, self).__init__(RESOURCE_TYPE_IP_ADDRESS, - number_min=number_min, - number_max=number_max, - resources=rpc_addresses) - - -class ResourceDescriptorHost(ResourceDescriptor): - # pylint: disable=too-few-public-methods - """ - Used when trying to allocate a host - """ - def __init__(self, purpose, distro=ssh_host.DISTRO_RHEL7, - same_kvm_server=False, tag=None, number_min=1, number_max=1, - hosts=None): - # pylint: disable=too-many-arguments - super(ResourceDescriptorHost, self).__init__(RESOURCE_TYPE_HOST, - number_min=number_min, - number_max=number_max, - resources=hosts) - self.rdh_distro = distro - self.rdh_purpose = purpose - self.rdh_same_kvm_server = same_kvm_server - self.rdh_tag = tag - - -def resource_compare(host_x, host_y): - """ - Sort the resource according to cleanup time - """ - return host_x.sr_check_time > host_y.sr_check_time - - -class TestSchedulerJob(object): - """ - Each test client allocates a job in the scheduler. Hosts could be - allocated into the job afterwards. - """ - def __init__(self, scheduler, jobid, sequence): - self.laj_jobid = jobid - self.laj_hosts = [] - self.laj_scheduler = scheduler - self.laj_sequence = sequence - self.laj_check_time = time_util.utcnow() - self.laj_ip_addresses = [] - - def laj_host_add(self, lhost): - """ - Add one host into the job - """ - self.laj_hosts.append(lhost) - - def laj_has_host(self, lhost): - """ - Check whether a host is in this job - """ - return lhost in self.laj_hosts - - def laj_host_remove(self, lhost): - """ - Remove one host from the job - """ - self.laj_hosts.remove(lhost) - - def laj_has_ip_address(self, ip_address): - """ - Check whether a ip address is in this job - """ - return ip_address in self.laj_ip_addresses - - def laj_ip_address_add(self, ip_address): - """ - Add one host into the job - """ - self.laj_ip_addresses.append(ip_address) - - def laj_ip_address_remove(self, ip_address): - """ - Remove one host from the job - """ - self.laj_ip_addresses.remove(ip_address) - - -def rpc2descriptors(log, rpc_descriptors, same_kvm_host_descriptors, - other_descriptors): - """ - Parse the descriptors from RPC to objects - """ - # pylint: disable=too-many-locals - for descriptor in rpc_descriptors: - descriptor_type = descriptor["rd_type"] - number_min = descriptor["rd_number_min"] - number_max = descriptor["rd_number_max"] - resources = descriptor["rd_resources"] - if descriptor_type == RESOURCE_TYPE_HOST: - distro = descriptor["rdh_distro"] - purpose = descriptor["rdh_purpose"] - same_kvm_server = descriptor["rdh_same_kvm_server"] - tag = descriptor["rdh_tag"] - hosts = [] - for res in resources: - hostname = res["lrh_hostname"] - kvm_server_hostname = res["lrh_kvm_server_hostname"] - expected_distro = res["lrh_expected_distro"] - ipv4_addresses = res["lrh_ipv4_addresses"] - kvm_template_ipv4_address = res["lrh_kvm_template_ipv4_address"] - kvm_template = res["lrh_kvm_template"] - host = RPCHost(hostname, kvm_server_hostname=kvm_server_hostname, - expected_distro=expected_distro, - ipv4_addresses=ipv4_addresses, - kvm_template_ipv4_address=kvm_template_ipv4_address, - kvm_template=kvm_template) - hosts.append(host) - host_desc = ResourceDescriptorHost(purpose, distro=distro, - same_kvm_server=same_kvm_server, - tag=tag, - number_min=number_min, - number_max=number_max, - hosts=hosts) - if same_kvm_server: - same_kvm_host_descriptors.append(host_desc) - else: - other_descriptors.append(host_desc) - elif descriptor_type == RESOURCE_TYPE_IP_ADDRESS: - rpc_addresses = [] - for res in resources: - address = res["ripa_address"] - bindnetaddr = res["ripa_bindnetaddr"] - rpc_address = RPCIPAddress(address, bindnetaddr) - rpc_addresses.append(rpc_address) - ip_desc = ResourceDescriptorIPAddress(number_min=number_min, - number_max=number_max, - rpc_addresses=rpc_addresses) - other_descriptors.append(ip_desc) - else: - log.cl_error("wrong descriptor type [%s]", descriptor_type) - return -1 - return 0 - - -class TestScheduler(object): - """ - The main object of the scheduler. - """ - # pylint: disable=too-many-instance-attributes - def __init__(self, log, scheduler_id, hosts, addresses): - self.ts_log = log - self.ts_resources = hosts + addresses - self.ts_hosts = [] - self.ts_addresses = addresses - self.ts_job_dict = {} - self.ts_condition = threading.Condition() - self.ts_jobid_sequence = 0 - self.ts_id = scheduler_id - self.ts_id += ("_%d" % os.getpid()) - log.cl_info("ID of scheduler: [%s]", self.ts_id) - self.ts_kvm_hosts_dict = {} - for host in hosts: - self._ts_add_host(host) - - def _ts_add_host(self, host): - """ - Add host into the list - """ - log = self.ts_log - host.th_print_info(log) - self.ts_hosts.append(host) - - kvm_server_hostname = host.th_kvm_server_hostname - - if kvm_server_hostname is not None: - if kvm_server_hostname not in self.ts_kvm_hosts_dict: - self.ts_kvm_hosts_dict[kvm_server_hostname] = [] - kvm_hosts = self.ts_kvm_hosts_dict[kvm_server_hostname] - kvm_hosts.append(host) - - def ts_find_ip_address(self, ip_address): - """ - Find the IP address by its hostname. Lock should be acquired in advance. - """ - for ip_address_obj in self.ts_addresses: - if ip_address_obj.ipa_address == ip_address: - return ip_address_obj - return None - - def ts_find_host(self, hostname): - """ - Find the host by its hostname. Lock should be acquired in advance. - """ - for lhost in self.ts_hosts: - if lhost.th_hostname == hostname: - return lhost - return None - - def ts_get_id(self): - """ - Return the scheduler ID. Scheduler ID is the ID of this scheduler. It - could prevent clients from operating on a wrong scheduler. Usually - called remotely by client. - """ - return self.ts_id - - def ts_host_list(self, error): - """ - List the hosts that the scheduler is managing. Usually called remotely - by console. - """ - log = self.ts_log - log.cl_debug("listing host") - format_string = "%-20s%-9s%-8s%-10s%-10s%-9s%-7s%-12s%-11s\n" - output = format_string % ("Host", "Purpose", "Distro", "KVM host", - "Job slot", "Job seq", "Error", - "Clean time", "Next clean") - output += '{0:->80}'.format("") + "\n" - now = time.time() - for lhost in self.ts_hosts: - if error: - if lhost.sr_concurrency > 0: - continue - if lhost.sr_is_clean: - continue - if lhost.sr_max_concurrency > 1: - continue - if lhost.sr_check_time == 0: - fix_string = "not clean" - if lhost.sr_cleaning: - next_check_string = "cleaning" - elif lhost.sr_concurrency > 0: - next_check_string = "occupied" - else: - next_check_string = "initing" - else: - if not lhost.sr_is_clean: - fix_string = "not clean" - next_check = lhost.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL - else: - fix_time = time.gmtime(lhost.sr_check_time) - fix_string = time.strftime("%H:%M:%S", fix_time) - next_check = lhost.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL - next_check_time = time.gmtime(next_check) - next_check_string = time.strftime("%H:%M:%S", next_check_time) - next_check_string += "(%d)" % (int(next_check - now)) - job_slot = ("%d/%d" % (lhost.sr_concurrency, - lhost.sr_max_concurrency)) - output += (format_string % - (lhost.th_hostname, - lhost.th_purpose, - lhost.th_distro, - lhost.th_kvm_server_hostname, - job_slot, - lhost.sr_job_sequence, - lhost.sr_error, - fix_string, - next_check_string)) - return output - - def ts_ip_address_list(self, error): - """ - List the ip_address that the scheduler is managing. Usually called remotely - by console. - """ - log = self.ts_log - log.cl_debug("listing IP addresses") - format_string = "%-17s%-17s%-10s%-9s%-7s%-12s%-11s\n" - output = format_string % ("IP", "Bindnetaddr", - "Job slot", "Job seq", "Error", - "Clean time", "Next clean") - output += '{0:->80}'.format("") + "\n" - now = time.time() - - for address in self.ts_addresses: - if error: - if address.sr_concurrency > 0: - continue - if address.sr_is_clean: - continue - if address.sr_max_concurrency > 1: - continue - if address.sr_check_time == 0: - fix_string = "not clean" - if address.sr_cleaning: - next_check_string = "cleaning" - elif address.sr_concurrency > 0: - next_check_string = "occupied" - else: - next_check_string = "initing" - else: - if not address.sr_is_clean: - fix_string = "not clean" - next_check = address.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL - else: - fix_time = time.gmtime(address.sr_check_time) - fix_string = time.strftime("%H:%M:%S", fix_time) - next_check = address.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL - next_check_time = time.gmtime(next_check) - next_check_string = time.strftime("%H:%M:%S", next_check_time) - next_check_string += "(%d)" % (int(next_check - now)) - job_slot = ("%d/%d" % (address.sr_concurrency, - address.sr_max_concurrency)) - output += (format_string % - (address.ipa_address, - address.ipa_bindnetaddr, - job_slot, - address.sr_job_sequence, - address.sr_error, - fix_string, - next_check_string)) - return output - - def _ts_host_list_allocate(self, host_list, job, distro, number_min, - number_max, purpose, tag): - """ - Allocate hosts from a list, if failed, return [] - """ - # pylint: disable=too-many-arguments,no-self-use - log = self.ts_log - log.cl_debug("listing IP addresses") - rpc_hosts = [] - hosts = [] - # reverse the host from time to time, so that the host in the tail of - # the list have equal chance to be allocated - host_list.reverse() - - # Check the potential hosts that can be allocated - for host in host_list: - if host.th_distro != distro: - log.cl_debug("the distro [%s] of host [%s] != [%s]", - host.th_distro, host.th_hostname, distro) - continue - if host.th_purpose != purpose: - log.cl_debug("the purpose [%s] of host [%s] != [%s]", - host.th_purpose, host.th_hostname, purpose) - continue - if tag is not None and host.th_tag != tag: - log.cl_debug("the tag [%s] of host [%s] != [%s]", - host.th_tag, host.th_hostname, tag) - continue - if host.sr_max_concurrency <= host.sr_concurrency: - log.cl_debug("no currency for host [%s]", host.th_hostname) - continue - hosts.append(host) - - # Not enough hosts, abort - if len(hosts) < number_min: - log.cl_error("not enough hosts to allocate, needs [%d], have [%d]", - number_min, len(hosts)) - return rpc_hosts - - hosts.sort(resource_compare) - # Allocate the hosts - for host in hosts: - if len(rpc_hosts) >= number_max: - break - host.sr_concurrency += 1 - host.sr_job_sequence = job.laj_sequence - job.laj_host_add(host) - rpc_host = RPCHost(host.th_hostname, - kvm_server_hostname=host.th_kvm_server_hostname, - expected_distro=host.th_distro, - ipv4_addresses=host.th_ipv4_addresses, - kvm_template=host.th_kvm_template) - rpc_hosts.append(rpc_host) - log.cl_debug("preallocated host [%s] for job [%s]", - host.th_hostname, job.laj_jobid) - return rpc_hosts - - def _ts_job_allocate_ip_resource_holding_lock(self, job, desc): - """ - Allocated one resource for host, if fails, returen -1 - """ - log = self.ts_log - log.cl_debug("allocating a IP resource for job [%s]", job.laj_jobid) - rpc_addresses = [] - # reverse the IPs from time to time, so that the host in the tail of - # the list have equal chance to be allocated - self.ts_addresses.reverse() - - addresses = [] - # Check the potential addresses that can be allocated - for address in self.ts_addresses: - if address.sr_max_concurrency <= address.sr_concurrency: - continue - # Can not allocate an IP that might being used - if not address.sr_is_clean: - continue - addresses.append(address) - - # Not enough hosts, abort - if len(addresses) < desc.rd_number_min: - log.cl_info("not enough IP to allocate, needs [%d], have [%d]", - desc.rd_number_min, len(addresses)) - return -1 - - addresses.sort(resource_compare) - # Allocate the address - for address in addresses: - if len(rpc_addresses) >= desc.rd_number_max: - break - address.sr_concurrency += 1 - address.sr_job_sequence = job.laj_sequence - job.laj_ip_address_add(address) - rpc_address = RPCIPAddress(address.ipa_address, - address.ipa_bindnetaddr) - rpc_addresses.append(rpc_address) - desc.rd_resources = rpc_addresses - return 0 - - def _ts_job_allocate_host_resource_holding_lock(self, job, desc): - """ - Allocated one resource for host, if fails, returen -1 - """ - log = self.ts_log - if not desc.rdh_same_kvm_server: - log.cl_debug("allocating a host resource that doesn't need to " - "share KVM server for job [%s]", job.laj_jobid) - rpc_hosts = self._ts_host_list_allocate(self.ts_hosts, job, - desc.rdh_distro, - desc.rd_number_min, - desc.rd_number_max, - desc.rdh_purpose, - desc.rdh_tag) - else: - log.cl_debug("allocating a hosts resource that shares KVM server " - "for job [%s]", job.laj_jobid) - for host_list in self.ts_kvm_hosts_dict.values(): - rpc_hosts = self._ts_host_list_allocate(host_list, job, - desc.rdh_distro, - desc.rd_number_min, - desc.rd_number_max, - desc.rdh_purpose, - desc.rdh_tag) - if len(rpc_hosts) != 0: - break - if len(rpc_hosts) == 0: - return -1 - desc.rd_resources = rpc_hosts - return 0 - - def _ts_job_allocate_resource_holding_lock(self, job, desc): - """ - Allocated one resource, if fails, returen -1 - """ - log = self.ts_log - log.cl_debug("allocating a resource for job [%s]", job.laj_jobid) - if desc.rd_type == RESOURCE_TYPE_HOST: - return self._ts_job_allocate_host_resource_holding_lock(job, desc) - elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS: - return self._ts_job_allocate_ip_resource_holding_lock(job, desc) - else: - log.cl_error("wrong resource type [%s]", desc.rd_type) - return -1 - - def _ts_job_allocate_resources_holding_lock(self, job, descs): - """ - Allocate multiple resources holding lock, if any of them fails, -1 - """ - log = self.ts_log - log.cl_debug("allocating resources for job [%s]", job.laj_jobid) - for desc in descs: - ret = self._ts_job_allocate_resource_holding_lock(job, desc) - if ret: - log.cl_debug("failed to allocate resource, releasing " - "allocated resource of job [%s]", job.laj_jobid) - self._ts_job_release_resources_holding_lock(job, descs) - return ret - return 0 - - def _ts_job_release_one_host_holding_lock(self, job, res): - """ - Release a host resources - """ - log = self.ts_log - log.cl_debug("releasing host [%s] for job [%s]", res.lrh_hostname, - job.laj_jobid) - test_host = self.ts_find_host(res.lrh_hostname) - if test_host is None: - log.cl_error("failed to release host [%s], not exists in " - "the scheduler", res.lrh_hostname) - return -1 - - if not job.laj_has_host(test_host): - log.cl_error("failed to release host [%s], not used by the " - "job [%s]", res.lrh_hostname, job.laj_jobid) - return -1 - - job.laj_host_remove(test_host) - test_host.sr_job_sequence = None - test_host.sr_concurrency -= 1 - return 0 - - def _ts_job_release_one_ip_holding_lock(self, job, res): - """ - Release a host resources - """ - log = self.ts_log - log.cl_debug("releasing a IP resource for job [%s]", job.laj_jobid) - ip_address_obj = self.ts_find_ip_address(res.ripa_address) - if ip_address_obj is None: - log.cl_error("failed to release IP address [%s], not exists in " - "the scheduler", res.ripa_address) - return -1 - - if not job.laj_has_ip_address(ip_address_obj): - log.cl_error("failed to release IP address [%s], not used by the " - "job [%s]", res.ripa_address, job.laj_jobid) - return -1 - - job.laj_ip_address_remove(ip_address_obj) - ip_address_obj.sr_job_sequence = None - ip_address_obj.sr_concurrency -= 1 - return 0 - - def _ts_job_release_one_holding_lock(self, job, res): - """ - Release one resource - """ - log = self.ts_log - if res.rr_resource_type == RESOURCE_TYPE_IP_ADDRESS: - return self._ts_job_release_one_ip_holding_lock(job, res) - elif res.rr_resource_type == RESOURCE_TYPE_HOST: - return self._ts_job_release_one_host_holding_lock(job, res) - else: - log.cl_error("wrong resource type [%s]", res.rr_resource_type) - return -1 - - def _ts_job_release_resource_holding_lock(self, job, desc): - """ - Release a resource - """ - log = self.ts_log - log.cl_debug("releasing a resource for job [%s]", job.laj_jobid) - retval = 0 - for res in desc.rd_resources[:]: - ret = self._ts_job_release_one_holding_lock(job, res) - if ret: - log.cl_error("failed to release one resource") - retval = ret - else: - desc.rd_resources.remove(res) - - return retval - - def _ts_job_release_resources_holding_lock(self, job, descs): - """ - Release a lot resources - """ - log = self.ts_log - log.cl_debug("releasing resources for job [%s]", job.laj_jobid) - retval = 0 - for desc in descs: - ret = self._ts_job_release_resource_holding_lock(job, desc) - if ret: - retval = ret - return retval - - def _ts_resources_dirty_holding_lock(self, log, jobid, descs): - """ - Dirty the resources in the descriptors - Only call this when about to return the resources to client - """ - # pylint: disable=no-self-use - host_names = [] - ip_addresses = [] - for desc in descs: - for rpc_res in desc.rd_resources: - if desc.rd_type == RESOURCE_TYPE_HOST: - res = self.ts_find_host(rpc_res.lrh_hostname) - host_names.append(rpc_res.lrh_hostname) - elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS: - res = self.ts_find_ip_address(rpc_res.ripa_address) - ip_addresses.append(rpc_res.ripa_address) - else: - log.cl_error("invalid resource type [%s]", desc.rd_type) - return -1 - res.sr_dirty() - - log.cl_error("allocated hosts %s and IPs %s for job [%s]", - host_names, ip_addresses, jobid) - return 0 - - def ts_resources_allocate(self, scheduler_id, jobid, descriptors): - """ - Allocate multiple resources, if any of them fails, return [] - """ - log = self.ts_log - log.cl_debug("allocating resources for job [%s]", jobid) - if scheduler_id != self.ts_id: - log.cl_error("wrong scheduler ID [%s], expected [%s]", - scheduler_id, self.ts_id) - return [] - - same_kvm_host_descriptors = [] - other_descriptors = [] - ret = rpc2descriptors(log, descriptors, same_kvm_host_descriptors, - other_descriptors) - if ret: - log.cl_error("failed to parse resource descriptors from RPC") - return [] - ret_descriptors = same_kvm_host_descriptors + other_descriptors - - self.ts_condition.acquire() - if jobid not in self.ts_job_dict: - log.cl_error("resource allocation from unknown job [%s]", jobid) - self.ts_condition.release() - return [] - job = self.ts_job_dict[jobid] - ret = self._ts_job_allocate_resources_holding_lock(job, same_kvm_host_descriptors) - if ret == 0: - ret = self._ts_job_allocate_resources_holding_lock(job, other_descriptors) - if ret: - log.cl_debug("failed to allocated resources for job [%s]", jobid) - else: - log.cl_error("failed to allocated host resources that share the " - "same KVM server for job [%s]", jobid) - ret = -1 - - if ret: - log.cl_debug("releasing allocated resource of job [%s]", jobid) - self._ts_job_release_resources_holding_lock(job, ret_descriptors) - ret_descriptors = [] - else: - ret = self._ts_resources_dirty_holding_lock(log, jobid, - ret_descriptors) - if ret: - self._ts_job_release_resources_holding_lock(job, ret_descriptors) - ret_descriptors = [] - - job.laj_check_time = time_util.utcnow() - self.ts_condition.release() - return ret_descriptors - - def _ts_print_release_message(self, log, jobid, descs): - """ - Print the release message - """ - # pylint: disable=no-self-use - host_names = [] - ip_addresses = [] - for desc in descs: - for rpc_res in desc.rd_resources: - if desc.rd_type == RESOURCE_TYPE_HOST: - host_names.append(rpc_res.lrh_hostname) - elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS: - ip_addresses.append(rpc_res.ripa_address) - else: - log.cl_error("invalid resource type [%s]", desc.rd_type) - return -1 - - log.cl_info("releasing hosts %s and IPs %s for job [%s]", - host_names, ip_addresses, jobid) - return 0 - - def ts_resources_release(self, scheduler_id, jobid, descriptors): - """ - Release multiple resources - """ - log = self.ts_log - if scheduler_id != self.ts_id: - log.cl_error("wrong scheduler ID [%s], expected [%s]", - scheduler_id, self.ts_id) - return -1 - - same_kvm_host_descriptors = [] - other_descriptors = [] - ret = rpc2descriptors(log, descriptors, same_kvm_host_descriptors, - other_descriptors) - if ret: - log.cl_error("failed to parse resource descriptors from RPC") - return -1 - - descs = same_kvm_host_descriptors + other_descriptors - self._ts_print_release_message(log, jobid, descs) - - self.ts_condition.acquire() - if jobid not in self.ts_job_dict: - log.cl_error("resource releasing from unknown job [%s]", jobid) - self.ts_condition.release() - return -1 - job = self.ts_job_dict[jobid] - ret = self._ts_job_release_resources_holding_lock(job, descs) - job.laj_check_time = time_util.utcnow() - self.ts_condition.notifyAll() - self.ts_condition.release() - return ret - - def ts_ip_cleanup(self, ip_address): - """ - fix a host - """ - log = self.ts_log - log.cl_info("cleaning up IP address [%s]", ip_address) - res = self.ts_find_ip_address(ip_address) - if res is None: - log.cl_error("failed to cleanup IP address [%s], not exists in " - "the scheduler", ip_address) - return -1 - - ret = self.ts_resource_cleanup(res) - if ret: - log.cl_error("failure during fix process of IP [%s]", - ip_address) - return -1 - - log.cl_info("cleaned up IP [%s]", ip_address) - return 0 - - def ts_host_cleanup(self, hostname): - """ - fix a host - """ - log = self.ts_log - log.cl_info("cleaning up host [%s]", hostname) - res = self.ts_find_host(hostname) - if res is None: - log.cl_error("failed to cleanup host [%s], not exists in " - "the scheduler", hostname) - return -1 - - ret = self.ts_resource_cleanup(res) - if ret: - log.cl_error("failure during fix process of host [%s]", - hostname) - return -1 - - log.cl_info("cleaned up host [%s]", hostname) - return 0 - - def ts_job_start(self, scheduler_id): - """ - Start a jobs in the scheduler. Usually called remotely by client. - """ - log = self.ts_log - if scheduler_id != self.ts_id: - return -1 - self.ts_condition.acquire() - sequence = self.ts_jobid_sequence - jobid = time_util.local_strftime(time_util.utcnow(), "%Y-%m-%d-%H_%M_%S") - jobid += ("-%d" % sequence) - self.ts_jobid_sequence += 1 - log.cl_info("starting an new job [%s]", jobid) - - job = TestSchedulerJob(self, jobid, sequence) - self.ts_job_dict[jobid] = job - self.ts_condition.release() - return jobid - - def ts_job_list(self): - """ - List all active jobs in the scheduler. Usually called remotely by - console. - """ - log = self.ts_log - log.cl_info("listing job") - format_string = "%-25s%-6s%-10s\n" - job_names = format_string % ("Name", "Hosts", "Heartbeat") - job_names += "{0:->30}".format("") + "\n" - - now = time_util.utcnow() - self.ts_condition.acquire() - for job in self.ts_job_dict.values(): - diff = (now - job.laj_check_time).seconds - diff_string = str(diff) - if diff > TEST_HEARTBEAT_TIMEOUT: - diff_string += "*" - job_names += (format_string % - (job.laj_jobid, str(len(job.laj_hosts)), - diff_string)) - self.ts_condition.release() - return job_names - - def _ts_resource_cleanup_holding_concurrency(self, res): - """ - Check and fix a res - This function assumes the concurrency of the res has already been held - """ - # pylint: disable=bare-except - log = self.ts_log - # skip the heathy node - log.cl_debug("checking resource [%s]", res.sr_name) - try: - ret = res.sr_cleanup(log, self) - except: - ret = -1 - log.cl_error("exception when cleaning up resource [%s]: [%s]", - res.sr_name, traceback.format_exc()) - if ret: - res.sr_error += 1 - res.sr_is_clean = False - ret = -1 - else: - res.sr_is_clean = True - ret = 0 - res.sr_check_time = time.time() - - self.ts_condition.acquire() - res.sr_concurrency = 0 - self.ts_condition.release() - return ret - - def ts_resource_cleanup(self, res): - """ - Check and fix a res - """ - # pylint: disable=bare-except - log = self.ts_log - self.ts_condition.acquire() - # If the node is being used by some job, skip it. Job reclaim - # routine will release the dead nodes.. - if res.sr_concurrency > 0: - self.ts_condition.release() - log.cl_info("res [%s] is busy, skipping", res.sr_name) - return ScheduledResource.RESOURCE_IS_BUSY - # Set concurrency to max so no other one can use it. - res.sr_concurrency = res.sr_max_concurrency - self.ts_condition.release() - - return self._ts_resource_cleanup_holding_concurrency(res) - - def ts_recovery_main(self): - """ - Checking the health of each nodes, repaire them if necessary. - """ - # pylint: disable=bare-except - self.ts_condition.acquire() - log = self.ts_log - while True: - log.cl_debug("recovery thread is checking resources") - fix_res = None - res_fix_time = None - now = time.time() - wakeup_time = now + MIN_GOOD_RES_CHECK_INTERVAL - for res in self.ts_resources: - # Ignore the busy resources - if res.sr_concurrency > 0: - continue - - if res.sr_is_clean: - fix_time = res.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL - else: - fix_time = res.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL - - if fix_time > now: - if fix_time < wakeup_time: - wakeup_time = fix_time - continue - - if (fix_res is None or - res_fix_time < fix_time): - fix_res = res - res_fix_time = fix_time - if fix_res is not None: - # Hold the concurrency and create a thread to fix it - fix_res.sr_concurrency = fix_res.sr_max_concurrency - self.ts_condition.release() - utils.thread_start(self._ts_resource_cleanup_holding_concurrency, - (fix_res, )) - self.ts_condition.acquire() - continue - # Sleep unless something happen or time for fixing again - now = time.time() - if wakeup_time > now: - sleep_time = wakeup_time - now - log.cl_debug("recovery thread is going to sleep for [%s] " - "seconds", sleep_time) - start_time = now - self.ts_condition.wait(sleep_time) - now = time.time() - log.cl_debug("recovery thread slept [%s] seconds", - now - start_time) - self.ts_condition.release() - - def ts_jobs_check(self): - """ - Checking the timeout of all active jobs. The scheduler checks all the - jobs from time to time to cleanup timeout jobs. - """ - log = self.ts_log - log.cl_debug("scheduler is checking jobs") - now = time_util.utcnow() - stopped = False - self.ts_condition.acquire() - for job in self.ts_job_dict.values(): - log.cl_info("checking job [%s]", job.laj_jobid) - diff = (now - job.laj_check_time).seconds - if diff > TEST_HEARTBEAT_TIMEOUT: - self._ts_job_stop(job) - stopped = True - if stopped: - self.ts_condition.notifyAll() - self.ts_condition.release() - log.cl_debug("scheduler checked jobs") - - def _ts_job_stop(self, job): - """ - Stop a job. - """ - log = self.ts_log - log.cl_info("job [%s] stopping", job.laj_jobid) - for lhost in job.laj_hosts[:]: - job.laj_host_remove(lhost) - lhost.sr_job_sequence = None - lhost.sr_concurrency -= 1 - for res in job.laj_ip_addresses[:]: - job.laj_ip_address_remove(res) - res.sr_job_sequence = None - res.sr_concurrency -= 1 - del self.ts_job_dict[job.laj_jobid] - - def ts_job_stop(self, scheduler_id, jobid): - """ - Stop a job. Usually called remotely by client or console. - """ - log = self.ts_log - if scheduler_id != self.ts_id: - return -1 - self.ts_condition.acquire() - if jobid not in self.ts_job_dict: - log.cl_error("stopping unknown job [%s]", jobid) - self.ts_condition.release() - return -1 - job = self.ts_job_dict[jobid] - self._ts_job_stop(job) - self.ts_condition.notifyAll() - self.ts_condition.release() - return 0 - - def ts_job_heartbeat(self, scheduler_id, jobid): - """ - Stop a job. This is usually called remotely by client. - """ - log = self.ts_log - if scheduler_id != self.ts_id: - log.cl_info("got a heartbeat from job [%s] with wrong scheduler " - "ID, expected [%s], got [%s]", jobid, self.ts_id, - scheduler_id) - return -1 - log.cl_info("recived heatbeat of job [%s]", jobid) - self.ts_condition.acquire() - if jobid not in self.ts_job_dict: - log.cl_error("heartbeat from unknown job [%s]", jobid) - self.ts_condition.release() - return -1 - job = self.ts_job_dict[jobid] - job.laj_check_time = time_util.utcnow() - self.ts_condition.release() - return 0 - - -def server_main(scheduler, scheduler_port): - """ - Main function of scheduler thread. - """ - server = SimpleXMLRPCServer.SimpleXMLRPCServer(("0.0.0.0", - scheduler_port), - allow_none=True) - server.register_introspection_functions() - server.register_instance(scheduler) - while not SHUTTING_DOWN: - server.handle_request() - - -def parse_config_test_hosts(log, test_host_configs, kvm_template_dict): - """ - Parse test hosts from configuration. - :param test_host_configs: - :return: host node list, None if failed - """ - # pylint: disable=too-many-locals,too-many-branches,too-many-statements - compute_node = re.compile(r"(?P[\w\-.]+)" - r"(?P\[(?P\d+)\-(?P\d+)\])?", - re.VERBOSE) - hosts = list() - for node_conf in test_host_configs: - node_hostname = node_conf.get(cstr.CSTR_HOSTNAME) - if node_hostname is None: - log.cl_error("no [%s] found in items of section [%s]", - cstr.CSTR_HOSTNAME, cstr.CSTR_TEST_HOSTS) - return None - - match = compute_node.match(node_hostname) - if not match or not match.group("comname"): - log.cl_error("wrong format of hostname configuration [%s]", - node_hostname) - return None - - distro = node_conf.get(cstr.CSTR_DISTRO) - if distro is None: - log.cl_error("no [%s] found of node configuration [%s]", - cstr.CSTR_DISTRO, node_conf) - return None - - purpose = node_conf.get(cstr.CSTR_PURPOSE) - if purpose is None: - log.cl_error("no [%s] found of node configuration [%s]", - cstr.CSTR_PURPOSE, node_conf) - return None - - if purpose != PURPOSE_BUILD and purpose != PURPOSE_TEST: - log.cl_error("unknown purpose [%s] of test host configuration [%s]", - purpose, node_conf) - return None - - if purpose == PURPOSE_BUILD: - concurrency = node_conf.get(cstr.CSTR_CONCURRENCY) - if concurrency is None: - log.cl_error("no [%s] found of node configuration [%s]", - cstr.CSTR_CONCURRENCY, node_conf) - return None - else: - concurrency = 1 - kvm = node_conf.get(cstr.CSTR_KVM) - if kvm is None: - log.cl_debug("no [%s] found of kvm host configuration [%s]", - cstr.CSTR_KVM, node_conf) - kvm_server_hostname = None - kvm_template_ipv4_address = None - template_hostname = None - kvm_template = None - else: - kvm_server_hostname = kvm.get(cstr.CSTR_KVM_SERVER_HOSTNAME) - if kvm_server_hostname is None: - log.cl_error("no [%s] found of kvm host configuration [%s]", - cstr.CSTR_KVM_SERVER_HOSTNAME, kvm) - return None - - kvm_template_ipv4_address = kvm.get(cstr.CSTR_KVM_TEMPLATE_IPV4_ADDRESS) - if kvm_template_ipv4_address is None: - log.cl_error("no [%s] found of kvm host configuration [%s]", - cstr.CSTR_KVM_TEMPLATE_IPV4_ADDRESS, kvm) - return None - - template_hostname = kvm.get(cstr.CSTR_TEMPLATE_HOSTNAME) - if template_hostname is None: - log.cl_error("no [%s] found of kvm host configuration [%s]", - cstr.CSTR_TEMPLATE_HOSTNAME, kvm) - return None - - if template_hostname not in kvm_template_dict: - log.cl_error("no VM template with hostname [%s] is configured", - template_hostname) - return None - kvm_template = kvm_template_dict[template_hostname] - - tag = node_conf.get(cstr.CSTR_TAG) - - comname = match.group("comname") - if not match.group("range"): - # This assumes the /etc/hosts or LDAP is properly configured so - # we can get the IP by the hostname - ipv4_address = socket.gethostbyname(comname) - ipv4_addresses = [ipv4_address] - - l_host = TestHost(comname, distro, purpose, tag, - concurrency, ipv4_addresses=ipv4_addresses, - kvm_server_hostname=kvm_server_hostname, - kvm_template_ipv4_address=kvm_template_ipv4_address, - kvm_template=kvm_template) - hosts.append(l_host) - continue - - start = int(match.group("start")) - stop = int(match.group("stop")) + 1 - if start > stop: - log.cl_error("range error in host configuration [%s]", node_conf) - return None - for i in range(start, stop): - hostname = ("%s%d" % (comname, i)) - ipv4_address = socket.gethostbyname(hostname) - ipv4_addresses = [ipv4_address] - l_host = TestHost(hostname, distro, purpose, tag, - concurrency, kvm_server_hostname=kvm_server_hostname, - kvm_template_ipv4_address=kvm_template_ipv4_address, - ipv4_addresses=ipv4_addresses, - kvm_template=kvm_template) - hosts.append(l_host) - return hosts - - -def parse_config_test_hosts_and_templates(log, workspace, config, config_file): - """ - Parse the scheduler configuration - """ - test_host_configs = config.get(cstr.CSTR_TEST_HOSTS) - if test_host_configs is None: - log.cl_error("no section [%s] found in configuration file [%s]", - cstr.CSTR_TEST_HOSTS, config_file) - return None - - kvm_template_dict = lipe_virt.parse_templates_config(log, workspace, - config, config_file, - hosts=None) - if kvm_template_dict is None: - log.cl_error("failed to parse template configs in file [%s]", - config_file) - return None - - test_hosts = parse_config_test_hosts(log, test_host_configs, kvm_template_dict) - if test_hosts is None: - log.cl_error("failed to parse [%s] from configuration file [%s]", - cstr.CSTR_TEST_HOSTS, config_file) - return None - - return test_hosts - - -def parse_config_ip_addresses(log, config, config_fpath): - """ - Parse the IP adress config - """ - ip_addresses = [] - address_configs = config.get(cstr.CSTR_IP_ADDRESSES) - if address_configs is None: - log.cl_error("no section [%s] found in configuration file [%s]", - cstr.CSTR_IP_ADDRESSES, config_fpath) - return None - - for address_config in address_configs: - address = address_config.get(cstr.CSTR_IP_ADDRESS) - if address is None: - log.cl_error("one of the config in [%s] doesn't have [%s] " - "configured, please correct configuration file [%s]", - cstr.CSTR_IP_ADDRESSES, cstr.CSTR_IP_ADDRESS, - config_fpath) - return None - - bindnetaddr = address_config.get(cstr.CSTR_BINDNETADDR) - if bindnetaddr is None: - log.cl_error("the config of ip address with [%s] in [%s] doesn't " - "have [%s] configured, please correct configuration " - "file [%s]", address, cstr.CSTR_IP_ADDRESSES, - cstr.CSTR_BINDNETADDR, config_fpath) - return None - - ip_address = IPAddress(address, bindnetaddr) - ip_addresses.append(ip_address) - return ip_addresses - - -def signal_handler(signum, frame): - """ - Singnal hander. Set the shutting down flag. - """ - # pylint: disable=unused-argument,global-statement - log = GLOBAL_LOG - log.cl_info("signal handler called with signal [%d]", signum) - global SHUTTING_DOWN - SHUTTING_DOWN = True - - -def lipe_test_scheduler(log, workspace, config_fpath): - """ - Start to test LiPE holding the confiure lock - """ - # pylint: disable=bare-except,global-statement - global GLOBAL_LOG - - GLOBAL_LOG = log - - config_fd = open(config_fpath) - ret = 0 - try: - config = yaml.load(config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", config_fpath, - traceback.format_exc()) - ret = -1 - config_fd.close() - if ret: - return -1 - - scheduler_id = os.path.basename(workspace) - log.cl_info("LiPE test scheduler started, please check [%s] for more log", - workspace) - - scheduler_port = config.get(cstr.CSTR_PORT) - if scheduler_port is None: - scheduler_port = TEST_SCHEDULER_PORT - - addresses = parse_config_ip_addresses(log, config, - config_fpath) - if addresses is None: - log.cl_error("failed to parse config of addresses") - return -1 - - test_hosts = parse_config_test_hosts_and_templates(log, workspace, config, - config_fpath) - if test_hosts is None: - log.cl_error("failed to parse config test hosts and templates") - return -1 - - scheduler = TestScheduler(log, scheduler_id, test_hosts, addresses) - output = scheduler.ts_host_list(False) - log.cl_info("\n%s", output) - - output = scheduler.ts_ip_address_list(False) - log.cl_info("\n%s", output) - # Set signal hander before start to handling reqeust. - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - utils.thread_start(server_main, (scheduler, scheduler_port)) - utils.thread_start(scheduler.ts_recovery_main, ()) - - while not SHUTTING_DOWN: - scheduler.ts_jobs_check() - time.sleep(TEST_HEARTBEAT_TIMEOUT) - log.cl_info("stopping test scheduler service") - return 0 - - -def main(): - """ - Start to test LiPE - """ - cmd_general.main(TEST_SCHEDULER_CONFIG, TEST_SCHEDULER_LOG_DIR, - lipe_test_scheduler) diff --git a/lipe/pyltest_import_check b/lipe/pyltest_import_check deleted file mode 100755 index 05edadb..0000000 --- a/lipe/pyltest_import_check +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/python2 -u -# Copyright (c) 2019 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Check the import of pyltest source code -""" -from pybuild import pyltest_import_check - -if __name__ == "__main__": - pyltest_import_check.main() diff --git a/lipe/pylustre/__init__.py b/lipe/pylustre/__init__.py index 2a4805b..078a0d5 100644 --- a/lipe/pylustre/__init__.py +++ b/lipe/pylustre/__init__.py @@ -8,14 +8,11 @@ __all__ = ["clog", "daemon", "install_common", "install_common_nodeps", - "lipe_virt", "lustre", - "lustre_test", "lyaml", "parallel", "rwlock", "ssh_host", - "test_common", "time_util", "utils", "watched_io"] diff --git a/lipe/pylustre/constants.py b/lipe/pylustre/constants.py index 342e871..5d5ab1e 100644 --- a/lipe/pylustre/constants.py +++ b/lipe/pylustre/constants.py @@ -6,11 +6,6 @@ VAR_LOG_PATH = "/var/log" LIPE_INSTALL_CONFIG_FNAME = "lipe_install.conf" LIPE_INSTALL_CONFIG = ETC_DIR_PATH + LIPE_INSTALL_CONFIG_FNAME -LIPE_TEST_CONFIG_FNAME = "lipe_test.conf" -LIPE_TEST_CONFIG = ETC_DIR_PATH + LIPE_TEST_CONFIG_FNAME -LIPE_TEST_LOG_DIR_BASENAME = "lipe_test" -LIPE_TEST_LOG_DIR = VAR_LOG_PATH + "/" + LIPE_TEST_LOG_DIR_BASENAME - LIPE_BUILD_CONFIG_FNAME = "lipe_build.conf" LIPE_BUILD_CONFIG = ETC_DIR_PATH + LIPE_BUILD_CONFIG_FNAME LIPE_BUILD_LOG_DIR_BASENAME = "lipe_build_log" diff --git a/lipe/pylustre/lipe_virt.py b/lipe/pylustre/lipe_virt.py deleted file mode 100644 index 59ced80..0000000 --- a/lipe/pylustre/lipe_virt.py +++ /dev/null @@ -1,1814 +0,0 @@ -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Library for installing virtual machines -""" -# pylint: disable=too-many-lines -import sys -import traceback -import random -import re -import yaml - -# Local libs -from pylustre import utils -from pylustre import ssh_host -from pylustre import lustre -from pylustre import cstr -from pylustre import cmd_general - -LIPE_VIRT_CONFIG_FNAME = "lipe_virt.conf" -LIPE_VIRT_CONFIG = "/etc/" + LIPE_VIRT_CONFIG_FNAME -LIPE_VIRT_LOG_DIR = "/var/log/lipe_virt" -LIPE_UDEV_RULES = "/etc/udev/rules.d/80-lipe-name.rules" -LVIRT_IMAGE_SHARED_SUBFIX = "_shared" - - -class VirtTemplate(object): - """ - Each virtual machine template has an object of this type - """ - # pylint: disable=too-few-public-methods,too-many-instance-attributes - # pylint: disable=too-many-arguments - def __init__(self, iso, template_hostname, internet, network_configs, - image_dir, distro, ram_size, disk_sizes, dns, - bus_type=cstr.CSTR_BUS_SCSI, - server_host=None, server_host_id=None, reinstall=None): - self.vt_server_host = server_host - self.vt_server_host_id = server_host_id - self.vt_reinstall = reinstall - - self.vt_dns = dns - self.vt_iso = iso - self.vt_template_hostname = template_hostname - self.vt_internet = internet - self.vt_network_configs = network_configs - self.vt_image_dir = image_dir - self.vt_distro = distro - self.vt_ram_size = ram_size - self.vt_disk_sizes = disk_sizes - self.vt_bus_type = bus_type - - -class SharedDisk(object): - """ - Each shared disk has an object of this type - """ - # pylint: disable=too-few-public-methods,too-many-arguments - def __init__(self, disk_id, server_host, server_host_id, image_prefix, size): - self.sd_disk_id = disk_id - self.sd_server_host = server_host - self.sd_image_fpath = image_prefix + LVIRT_IMAGE_SHARED_SUBFIX - self.sd_size = size - self.sd_server_host_id = server_host_id - self.sd_targets = [] - - def _sd_create(self, log): - """ - Create the shared disk - """ - command = ("qemu-img create -f raw %s %sG" % - (self.sd_image_fpath, self.sd_size)) - - retval = self.sd_server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - self.sd_server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - def sd_add_target(self, log, target): - """ - Add a host which shares this disk - """ - log.cl_info("adding target [%s] on host [%s] to device [%s]", - target.st_target_name, target.st_host.sh_hostname, - self.sd_disk_id) - self.sd_targets.append(target) - - def _sd_share_target(self, log, target): - """ - Share the disk with the host - """ - # pylint: disable=too-many-branches,too-many-return-statements - target_name = target.st_target_name - host = target.st_host - - ret, devices = host.sh_lsscsi(log) - if ret: - log.cl_error("failed to get device on host [%s]", - host.sh_hostname) - return -1 - - command = ("virsh attach-disk %s %s %s --subdriver raw --persistent --cache=directsync" % - (host.sh_hostname, self.sd_image_fpath, target_name)) - retval = self.sd_server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - self.sd_server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Need to wait until VM shut off, otherwise "virsh change-media" won't - # change the XML file - ret = utils.wait_condition(log, host_check_lsscsi, - (host, len(devices) + 1)) - if ret: - log.cl_error("timeout when waiting the device number of host " - "[%s]", host.sh_hostname) - return ret - - ret, new_devices = host.sh_lsscsi(log) - if ret: - log.cl_error("failed to get device on host [%s]", - host.sh_hostname) - return -1 - - if len(new_devices) != len(devices) + 1: - log.cl_error("unexpected new devices number %s on host [%s], old " - "devices %s", new_devices, host.sh_hostname, devices) - return -1 - - new_device = None - for device in new_devices: - if device not in devices: - if new_device is not None: - log.cl_error("unexpected new devices %s on host [%s], " - "old devices %s", new_devices, - host.sh_hostname, devices) - return -1 - new_device = device - - serial = host.sh_device_serial(log, new_device) - if serial is None: - log.cl_error("failed to get serial of device [%s] on host [%s]", - serial, host.sh_hostname) - return -1 - - log.cl_debug("added device [%s] with serial number [%s] on host [%s]", - new_device, serial, host.sh_hostname) - command = ('echo \'ENV{ID_SERIAL}=="%s", SYMLINK+="mapper/%s"\' >> %s' % - (serial, self.sd_disk_id, LIPE_UDEV_RULES)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = "udevadm control --reload-rules && udevadm trigger" - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - device_link = "/dev/mapper/" + self.sd_disk_id - command = "readlink -f %s" % device_link - expect_stdout = new_device + "\n" - ret = host.sh_wait_update(log, command, expect_exit_status=0, - expect_stdout=expect_stdout) - if ret: - log.cl_error("created wrong symlink [%s] on host " - "[%s], expected [%s]", - device_link, host.sh_hostname, new_device) - return -1 - return 0 - - def sd_share(self, log): - """ - Share the disk on all hosts - """ - log.cl_info("sharing disk [%s]", self.sd_disk_id) - if len(self.sd_targets) == 0: - return 0 - ret = self._sd_create(log) - if ret: - log.cl_error("failed to create shared disk [%s] on host with " - "ID [%s]", self.sd_image_fpath, - self.sd_server_host_id) - return -1 - - for target in self.sd_targets: - ret = self._sd_share_target(log, target) - if ret: - log.cl_error("failed to share disk [%s] on server host with " - "ID [%s] to VM [%s]", self.sd_image_fpath, - self.sd_server_host_id, target.st_host.sh_hostname) - return -1 - return 0 - - -class SharedTarget(object): - """ - Each shared disk on each VM has an object of this type - """ - # pylint: disable=too-few-public-methods,too-many-arguments - def __init__(self, vm_host, target_name): - self.st_host = vm_host - self.st_target_name = target_name - - -def random_mac(): - """ - Generate random MAC address - """ - mac_parts = [random.randint(0x00, 0x7f), - random.randint(0x00, 0xff), - random.randint(0x00, 0xff)] - mac_string = "52:54:00" - for mac_part in mac_parts: - mac_string += ":" + ("%02x" % mac_part) - return mac_string - - -def vm_is_shut_off(log, server_host, hostname): - """ - Check whether vm is shut off - """ - state = server_host.sh_virsh_dominfo_state(log, hostname) - if state is None: - return False - elif state == "shut off": - return True - return False - - -def host_check_lsscsi(log, host, expect_dev_number): - """ - Check whether scsi number is expected - """ - ret, new_devices = host.sh_lsscsi(log) - if ret: - log.cl_error("failed to get device on host [%s]", - host.sh_hostname) - return -1 - if len(new_devices) == expect_dev_number: - return 0 - else: - return -1 - - -def vm_copy_in(log, server_host, vm, src, dest): - """ - Copy file @src on server_host into dir @dest on guest @vm - """ - retval = server_host.sh_run(log, "which virt-copy-in") - if retval.cr_exit_status != 0: - command = ("yum install libguestfs-tools -y") - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to install libguestfs-tools via " - "command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - ret = 0 - command = ("virt-copy-in -d %s %s %s" % (vm, src, dest)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], ret = [%d], " - "stdout = [%s], stderr = [%s]", - command, server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - ret = -1 - - # "virt-copy-in -d" may fail if multiple device on guest domain, try - # "virt-copy-in -i" with each device then. - if ret == -1: - command = ("virsh domblklist %s --details | grep disk | awk '{print $4}'" % vm) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], ret = [%d], " - "stdout = [%s], stderr = [%s]", - command, server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - images = retval.cr_stdout.splitlines() - for image in images: - command = ("virt-copy-in -i %s %s %s" % (image, src, dest)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status == 0: - return 0 - - log.cl_error("failed to copy file [%s] from server [%s] into " - "[%s] of vm [%s]", src, server_host.sh_hostname, dest, vm) - return ret - - -def vm_check_shut_off(log, server_host, hostname): - """ - Check whether vm is shut off - """ - off = vm_is_shut_off(log, server_host, hostname) - if off: - return 0 - return -1 - - -def vm_delete(log, server_host, hostname): - """ - Delete a virtual machine - """ - existed = True - active = True - state = server_host.sh_virsh_dominfo_state(log, hostname) - if state is None: - existed = False - active = False - elif state == "shut off": - active = False - - if active: - command = ("virsh destroy %s" % hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - if existed: - command = ("virsh undefine %s" % hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - return 0 - - -def vm_clone(log, workspace, server_host, hostname, network_configs, ips, - template_hostname, image_dir, distro, internet, disk_number): - """ - Create virtual machine - """ - # pylint: disable=too-many-arguments,too-many-locals,too-many-return-statements - # pylint: disable=too-many-branches,too-many-statements - log.cl_info("cloning host [%s] from template [%s]", hostname, - template_hostname) - host_ip = ips[0] - ret = vm_delete(log, server_host, hostname) - if ret: - return -1 - - command = ("ping -c 1 %s" % host_ip) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status == 0: - log.cl_error("IP [%s] already used by a host", host_ip) - return -1 - - command = ("ping -c 1 %s" % hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status == 0: - log.cl_error("host [%s] already up", hostname) - return -1 - - active = True - state = server_host.sh_virsh_dominfo_state(log, template_hostname) - if state is None: - log.cl_error("template [%s] doesn't exist on host [%s]", - template_hostname, server_host.sh_hostname) - return -1 - elif state == "shut off": - active = False - - if active: - command = ("virsh destroy %s" % template_hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - file_options = "" - for disk_index in range(disk_number): - file_options += (" --file %s/%s_%d.img" % - (image_dir, hostname, disk_index)) - - command = ("rm -f %s/%s_%d.img" % - (image_dir, hostname, disk_index)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = ("virt-clone --original %s --name %s%s" % - (template_hostname, hostname, file_options)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - local_host_dir = workspace + "/" + hostname - ret = utils.mkdir(local_host_dir) - if ret: - log.cl_error("failed to create directory [%s] on local host", - local_host_dir) - return -1 - - # net.ifnames=0 biosdevname=0 has been added to grub, so the interface - # name will always be eth* - eth_number = 0 - for eth_ip in ips: - network_config = network_configs[eth_number] - ifcfg = 'DEVICE="eth%d"\n' % eth_number - ifcfg += 'IPADDR="%s"\n' % eth_ip - ifcfg += 'NETMASK="%s"\n' % network_config["netmask"] - if "gateway" in network_config: - ifcfg += 'GATEWAY=\"%s"\n' % network_config["gateway"] - ifcfg += """ONBOOT=yes -BOOTPROTO="static" -TYPE=Ethernet -IPV6INIT=no -NM_CONTROLLED=no -""" - - ifcfg_fname = "ifcfg-eth%d" % eth_number - ifcfg_fpath = local_host_dir + "/" + ifcfg_fname - with open(ifcfg_fpath, "wt") as fout: - fout.write(ifcfg) - - host_ifcfg_fpath = workspace + "/" + ifcfg_fname - ret = server_host.sh_send_file(log, ifcfg_fpath, workspace) - if ret: - log.cl_error("failed to send file [%s] on local host to " - "directory [%s] on host [%s]", - ifcfg_fpath, workspace, - server_host.sh_hostname) - return -1 - - ret = vm_copy_in(log, server_host, hostname, host_ifcfg_fpath, - "/etc/sysconfig/network-scripts") - if ret: - return -1 - eth_number += 1 - - host_rules_fpath = workspace + "/70-persistent-net.rules" - command = ("> %s" % host_rules_fpath) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - ret = vm_copy_in(log, server_host, hostname, host_rules_fpath, - "/etc/udev/rules.d") - if ret: - return -1 - - if distro == ssh_host.DISTRO_RHEL6: - network_string = 'NETWORKING=yes\n' - network_string += 'HOSTNAME=%s\n' % hostname - network_fname = "network" - network_fpath = local_host_dir + "/" + network_fname - with open(network_fpath, "wt") as fout: - fout.write(network_string) - - host_network_fpath = workspace + "/" + network_fname - ret = server_host.sh_send_file(log, network_fpath, workspace) - if ret: - log.cl_error("failed to send file [%s] on local host to " - "directory [%s] on host [%s]", - network_fpath, workspace, - server_host.sh_hostname) - return -1 - - ret = vm_copy_in(log, server_host, hostname, host_network_fpath, - "/etc/sysconfig") - if ret: - return -1 - else: - host_hostname_fpath = workspace + "/hostname" - command = ("echo %s > %s" % (hostname, host_hostname_fpath)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - ret = vm_copy_in(log, server_host, hostname, host_hostname_fpath, - "/etc") - if ret: - return -1 - - command = ("virsh start %s" % hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Remove the record in known_hosts, otherwise ssh will fail - command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (host_ip)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Remove the record in known_hosts, otherwise ssh will fail - command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (hostname)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - vm_host = ssh_host.SSHHost(host_ip) - ret = vm_host.sh_wait_up(log) - if ret: - log.cl_error("failed to wait host [%s] up", - host_ip) - return -1 - - ret = vm_check(log, hostname, host_ip, distro, internet) - if ret: - return -1 - return 0 - - -def vm_check(log, hostname, host_ip, distro, internet): - """ - Check whether virtual machine is up and fine - """ - # pylint: disable=too-many-return-statements - vm_host = ssh_host.SSHHost(host_ip) - command = "hostname" - retval = vm_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host_ip, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - current_hostname = retval.cr_stdout.strip() - if current_hostname != hostname: - log.cl_error("wrong host name of the virtual machine [%s], expected " - "[%s], got [%s]", host_ip, hostname, current_hostname) - return -1 - - vm_host = ssh_host.SSHHost(hostname) - command = "hostname" - retval = vm_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - current_hostname = retval.cr_stdout.strip() - if current_hostname != hostname: - log.cl_error("wrong host name of the virtual machine [%s], expected " - "[%s], got [%s]", hostname, hostname, current_hostname) - return -1 - - vm_distro = vm_host.sh_distro(log) - if vm_distro != distro: - log.cl_error("wrong distro of the virtual machine [%s], expected " - "[%s], got [%s]", hostname, distro, vm_distro) - return -1 - - if internet: - if vm_host.sh_check_internet(log): - log.cl_error("virtual machine [%s] can not access Internet", - hostname) - return -1 - return 0 - - -def vm_start(log, workspace, server_host, hostname, network_configs, ips, - template_hostname, image_dir, distro, internet, disk_number): - """ - Start virtual machine, if vm is bad, clone it - """ - # pylint: disable=too-many-arguments,too-many-locals - log.cl_info("starting virtual machine [%s]", hostname) - host_ip = ips[0] - ret = vm_check(log, hostname, host_ip, distro, internet) - if ret == 0: - return 0 - - if vm_is_shut_off(log, server_host, hostname): - command = ("virsh start %s" % (hostname)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - vm_host = ssh_host.SSHHost(hostname) - ret = vm_host.sh_wait_up(log) - if ret == 0: - ret = vm_check(log, hostname, host_ip, distro, internet) - if ret == 0: - return 0 - - ret = vm_clone(log, workspace, server_host, hostname, network_configs, ips, - template_hostname, image_dir, distro, internet, disk_number) - if ret: - log.cl_error("failed to create virtual machine [%s] based on " - "template [%s]", hostname, template_hostname) - return -1 - return 0 - - -def mount_iso(log, workspace, server_host, iso_path): - """ - Mount the ISO, return the mnt path - """ - mnt_path = workspace + "/mnt/" + utils.random_word(8) - command = ("mkdir -p %s && mount -o loop %s %s" % - (mnt_path, iso_path, mnt_path)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - return mnt_path - - -def _vm_install(log, workspace, bus_type, image_dir, ram_size, disk_sizes, - distro, hostname, network_configs, server_host, iso_path, - mnt_path): - """ - Actually start to install by using virt_install - """ - # pylint: disable=too-many-arguments,too-many-locals - # pylint: disable=too-many-return-statements,too-many-statements - # pylint: disable=too-many-branches - ks_config = """# Kickstart file automatically generated by LiPE. -install -reboot -cdrom -lang en_US.UTF-8 -keyboard us -""" - pri_disk = "" - if bus_type == cstr.CSTR_BUS_VIRTIO: - pri_disk = cstr.CSTR_DISK_VIRTIO_PRIMARY - elif bus_type == cstr.CSTR_BUS_IDE: - pri_disk = cstr.CSTR_DISK_IDE_PRIMARY - elif bus_type == cstr.CSTR_BUS_SCSI: - pri_disk = cstr.CSTR_DISK_SCSI_PRIMARY - else: - log.cl_error("unsupported bus type [%s], please correct it", - bus_type) - return -1 - ks_config += """rootpw password -firewall --disabled -authconfig --enableshadow --passalgo=sha512 -selinux --disabled -timezone --utc Asia/Shanghai -""" - ks_config += """bootloader --location=mbr --driveorder=%s --append="crashkernel=auto net.ifnames=0 biosdevname=0"\ -""" % pri_disk - ks_config += """ -zerombr -clearpart --all --initlabel -""" - ks_config += "part / --fstype=ext4 --grow --size=500 --ondisk=%s --asprimary" % pri_disk - ks_config += """ -repo --name="Media" --baseurl=file:///mnt/source --cost=100 -%packages -@Core -%end -%post --log=/var/log/anaconda/post-install.log -#!/bin/bash -# Configure hostname, somehow virt-install --name doesn't work -""" - if distro == ssh_host.DISTRO_RHEL6: - ks_config += 'echo NETWORKING=yes > /etc/sysconfig/network\n' - ks_config += ('echo HOSTNAME=%s >> /etc/sysconfig/network\n' % - (hostname)) - elif distro == ssh_host.DISTRO_RHEL7: - ks_config += "echo %s > /etc/hostname\n" % (hostname) - else: - log.cl_error("wrong distro [%s]", distro) - return -1 - ks_config += "# Configure network\n" - eth_number = 0 - ens_number = 3 - for network_config in network_configs: - # net.ifnames=0 biosdevname=0 will be added to GRUB_CMDLINE_LINUX, so the - # interface name will always be eth* - ks_config += "# Network eth%d\n" % eth_number - ks_config += ("rm -f /etc/sysconfig/network-scripts/ifcfg-ens%d\n" % - ens_number) - ks_config += ("cat << EOF > /etc/sysconfig/network-scripts/ifcfg-eth%d\n" % - eth_number) - ks_config += "DEVICE=eth%d\n" % eth_number - ks_config += 'IPADDR="%s"\n' % network_config["ip"] - ks_config += 'NETMASK="%s"\n' % network_config["netmask"] - if "gateway" in network_config: - ks_config += 'GATEWAY=\"%s"\n' % network_config["gateway"] - ks_config += """ONBOOT=yes -BOOTPROTO="static" -TYPE=Ethernet -IPV6INIT=no -NM_CONTROLLED=no -EOF -""" - eth_number += 1 - ens_number += 1 - - ks_config += "%end\n" - local_host_dir = workspace + "/" + hostname - ret = utils.mkdir(local_host_dir) - if ret: - log.cl_error("failed to create directory [%s] on local host", - local_host_dir) - return -1 - - ks_fname = "%s.ks" % hostname - ks_fpath = local_host_dir + "/" + ks_fname - with open(ks_fpath, "wt") as fout: - fout.write(ks_config) - - host_ks_fpath = workspace + "/" + ks_fname - ret = server_host.sh_send_file(log, ks_fpath, workspace) - if ret: - log.cl_error("failed to send file [%s] on local host to " - "directory [%s] on host [%s]", - ks_fpath, workspace, - server_host.sh_hostname) - return -1 - - command = "yum install virt-install -y" - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = ("virt-install --vcpus=1 --os-type=linux --hvm " - "--connect=qemu:///system --accelerate --serial pty -v " - "--nographics --noautoconsole --wait=-1 --force ") - command += "--ram=%s " % ram_size - for network_config in network_configs: - command += ("--network=%s " % (network_config["virt_install_option"])) - command += ("--name=%s " % (hostname)) - command += ("--initrd-inject=%s " % (host_ks_fpath)) - disk_index = 0 - for disk_size in disk_sizes: - disk_path = "%s/%s_%d.img" % (image_dir, hostname, disk_index) - remove_command = "rm -f %s" % disk_path - retval = server_host.sh_run(log, remove_command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - command += ("--disk path=%s,bus=%s,size=%s " % - (disk_path, bus_type, disk_size)) - disk_index += 1 - command += ("--location %s " % (mnt_path)) - command += ("--disk=%s,device=cdrom,perms=ro " % (iso_path)) - command += ("--extra-args='console=tty0 console=ttyS0,115200n8 " - "ks=file:/%s'" % (ks_fname)) - - if distro == ssh_host.DISTRO_RHEL6: - install_timeout = 600 - elif distro == ssh_host.DISTRO_RHEL7: - install_timeout = 1200 - - retval = server_host.sh_run(log, command, timeout=install_timeout) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - -def vm_install(log, workspace, server_host, iso_path, hostname, internet, dns, - network_configs, image_dir, distro, ram_size, disk_sizes, - bus_type=cstr.CSTR_BUS_SCSI): - """ - Install virtual machine from ISO - """ - # pylint: disable=too-many-arguments,too-many-locals - # pylint: disable=too-many-return-statements,too-many-statements - # pylint: disable=too-many-branches - ret = vm_delete(log, server_host, hostname) - if ret: - return -1 - - network_config = network_configs[0] - host_ip = network_config["ip"] - command = ("ping -c 1 %s" % host_ip) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status == 0: - log.cl_error("IP [%s] is already used by a host", host_ip) - return -1 - - command = ("ping -c 1 %s" % hostname) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status == 0: - log.cl_error("host [%s] is already up", hostname) - return -1 - - mnt_path = mount_iso(log, workspace, server_host, iso_path) - if mnt_path is None: - log.cl_error("failed to get mnt path of ISO [%s]", iso_path) - return -1 - - ret = _vm_install(log, workspace, bus_type, image_dir, ram_size, disk_sizes, - distro, hostname, network_configs, server_host, iso_path, - mnt_path) - if ret: - log.cl_error("failed install VM") - - command = ("umount %s" % (mnt_path)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = ("rmdir %s" % (mnt_path)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - if ret: - log.cl_error("quiting because failed to install VM") - return -1 - - ret = server_host.sh_run(log, "which sshpass") - if ret.cr_exit_status != 0: - command = ("yum install sshpass -y") - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Remove the record in known_hosts, otherwise ssh will fail - command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (host_ip)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # When virt-install finished, the virtual machine starts to reboot - # so wait a little bit here until the host is up. Need - # StrictHostKeyChecking=no, otherwise exit code will be 6 (ENOENT) - expect_stdout = hostname + "\n" - command = ("sshpass -p password ssh -o StrictHostKeyChecking=no " - "root@%s hostname" % (host_ip)) - ret = server_host.sh_wait_update(log, command, expect_exit_status=0, - expect_stdout=expect_stdout) - if ret: - log.cl_error("failed to wait host [%s] up", hostname) - return -1 - - command = ("sshpass -p password ssh root@%s " - "\"mkdir /root/.ssh && chmod 600 /root/.ssh\"" % (host_ip)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = ("sshpass -p password scp /root/.ssh/* root@%s:/root/.ssh" % (host_ip)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - vm_host = ssh_host.SSHHost(host_ip) - command = "> /root/.ssh/known_hosts" - retval = vm_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - vm_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = "hostname" - retval = vm_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - vm_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - real_hostname = retval.cr_stdout.strip() - if real_hostname != hostname: - log.cl_error("wrong hostname, expected [%s], got [%s]", - hostname, real_hostname) - return -1 - - if internet: - ret = vm_host.sh_enable_dns(log, dns) - if ret: - log.cl_error("failed to enable dns on host [%s]") - return -1 - - command = "yum install rsync -y" - retval = vm_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - vm_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Do not check the return status, because the connection could be stopped - command = "init 0" - vm_host.sh_run(log, command) - - # Need to wait until VM shut off, otherwise "virsh change-media" won't - # change the XML file - ret = utils.wait_condition(log, vm_check_shut_off, (server_host, hostname)) - if ret: - log.cl_error("failed when waiting host [%s] on [%s] shut off", - hostname, server_host.sh_hostname) - return ret - - # Find the CDROM device - command = ("virsh domblklist %s --details | grep cdrom | " - "awk '{print $3}'" % (hostname)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - cdroms = retval.cr_stdout.splitlines() - if len(cdroms) != 1: - log.cl_error("unexpected cdroms: [%s]", - retval.cr_stdout) - return -1 - cdrom = cdroms[0] - - command = ("virsh change-media %s %s --eject" % (hostname, cdrom)) - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - return 0 - - -def target_index2name(log, target_index, bus_type=cstr.CSTR_BUS_SCSI): - """ - Return the target name according to index - 0 -> sda - 1 -> sdb - ... - """ - ascii_number = ord('a') - ascii_number += target_index - - prefix = "" - if bus_type == cstr.CSTR_BUS_VIRTIO: - prefix = cstr.CSTR_DISK_VIRTIO_PREFIX - elif bus_type == cstr.CSTR_BUS_IDE: - prefix = cstr.CSTR_DISK_IDE_PREFIX - elif bus_type == cstr.CSTR_BUS_SCSI: - prefix = cstr.CSTR_DISK_SCSI_PREFIX - else: - log.cl_error("unsupported bus type [%s], please correct it", bus_type) - return None - - return prefix + chr(ascii_number) - - -def lipe_parse_sharedisks_configs(log, shared_disk_configs, shared_disks, - hosts, config_fpath): - """ - Parse shared disk configs. - """ - if shared_disk_configs is None or len(shared_disk_configs) == 0: - return -1 - - for shared_disk_config in shared_disk_configs: - disk_id = utils.config_value(shared_disk_config, cstr.CSTR_DISK_ID) - if disk_id is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_DISK_ID, config_fpath) - return -1 - - size = utils.config_value(shared_disk_config, cstr.CSTR_SIZE) - if size is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_SIZE, config_fpath) - return -1 - - server_host_id = utils.config_value(shared_disk_config, cstr.CSTR_SERVER_HOST_ID) - if server_host_id is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_SERVER_HOST_ID, config_fpath) - return -1 - if server_host_id not in hosts: - log.cl_error("SSH host with ID [%s] is NOT configured in " - "[%s], please correct file [%s]", - cstr.CSTR_SERVER_HOST_ID, cstr.CSTR_SSH_HOSTS, - config_fpath) - return -1 - - server_host = hosts[server_host_id] - - image_file = utils.config_value(shared_disk_config, cstr.CSTR_IMAGE_FILE) - if image_file is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_IMAGE_FILE, config_fpath) - return -1 - - shared_disk = SharedDisk(disk_id, server_host, server_host_id, image_file, size) - shared_disks[disk_id] = shared_disk - - return 0 - - -def lipe_vm_reboot(log, host, hostserver): - """ - Reset the guest vm on hostserver - """ - # pylint: disable=too-many-return-statements,too-many-locals - # pylint: disable=too-many-branches,too-many-statements - ret = host.sh_reboot(log) - if ret == 0: - return 0 - # reboot failed? try hard reset - command = "virsh reset %s" % host.sh_hostname - retval = hostserver.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], ", - "ret = [%d], stdout = [%s], stderr = [%s]", - command, hostserver.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, retval.cr_stderr) - return -1 - - # wait for the host up after hard reset - if host.sh_wait_up(log): - log.cl_error("host [%s] failed to startup, even after hard reset.", - host.sh_hostname) - return -1 - return 0 - - -def parse_templates_config(log, workspace, config, config_fpath, hosts=None): - """ - Parse the template configurations - """ - # pylint: disable=too-many-locals,too-many-branches,too-many-statements - template_configs = utils.config_value(config, cstr.CSTR_TEMPLATES) - if template_configs is None: - log.cl_error("no section [%s] found in configuration file [%s]", - cstr.CSTR_TEMPLATES, config_fpath) - return None - - templates = {} - for template_config in template_configs: - template_hostname = utils.config_value(template_config, - cstr.CSTR_HOSTNAME) - if template_hostname is None: - log.cl_error("can NOT find [%s] in the config of a " - "SSH host, please correct file [%s]", - cstr.CSTR_HOSTNAME, config_fpath) - return None - - internet = utils.config_value(template_config, - cstr.CSTR_INTERNET) - if internet is None: - internet = False - log.cl_debug("no [%s] is configured, will " - "not add internet support", cstr.CSTR_INTERNET) - - if internet: - dns = utils.config_value(template_config, cstr.CSTR_DNS) - if dns is None: - log.cl_error("no [%s] is configured, when internet support " - "is enabled, please correct file [%s]", - cstr.CSTR_DNS, config_fpath) - return None - - ram_size = utils.config_value(template_config, cstr.CSTR_RAM_SIZE) - if ram_size is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_RAM_SIZE, config_fpath) - return None - - disk_sizes = utils.config_value(template_config, - cstr.CSTR_DISK_SIZES) - if disk_sizes is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_DISK_SIZES, config_fpath) - return None - - bus_type = utils.config_value(template_config, - cstr.CSTR_BUS_TYPE) - if bus_type is None: - log.cl_info("no [%s] is configured, use scsi as default", - cstr.CSTR_BUS_TYPE) - bus_type = cstr.CSTR_BUS_SCSI - - network_configs = utils.config_value(template_config, - cstr.CSTR_NETWORK_CONFIGS) - if network_configs is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_NETWORK_CONFIGS, config_fpath) - return None - - iso = utils.config_value(template_config, cstr.CSTR_ISO) - if iso is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_ISO, config_fpath) - return None - - distro = utils.config_value(template_config, cstr.CSTR_DISTRO) - if distro is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_DISTRO, config_fpath) - return None - - image_dir = utils.config_value(template_config, cstr.CSTR_IMAGE_DIR) - if image_dir is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_IMAGE_DIR, config_fpath) - return None - - if hosts is None: - server_host_id = None - server_host = None - reinstall = None - else: - server_host_id = utils.config_value(template_config, - cstr.CSTR_SERVER_HOST_ID) - if server_host_id is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_SERVER_HOST_ID, config_fpath) - return None - - if server_host_id not in hosts: - log.cl_error("SSH host with ID [%s] is NOT configured in " - "[%s], please correct file [%s]", - cstr.CSTR_SERVER_HOST_ID, cstr.CSTR_SSH_HOSTS, - config_fpath) - return None - - server_host = hosts[server_host_id] - command = "mkdir -p %s" % workspace - retval = server_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return None - - reinstall = utils.config_value(template_config, - cstr.CSTR_REINSTALL) - if reinstall is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_REINSTALL, config_fpath) - return None - - template = VirtTemplate(iso, template_hostname, internet, - network_configs, image_dir, distro, ram_size, - disk_sizes, dns, - bus_type=bus_type, - server_host=server_host, - server_host_id=server_host_id, - reinstall=reinstall) - templates[template_hostname] = template - return templates - - -def lipe_vm_install(log, workspace, config, config_fpath): - """ - Start to install virtual machine - """ - # pylint: disable=too-many-return-statements,too-many-locals - # pylint: disable=too-many-branches,too-many-statements - ssh_host_configs = utils.config_value(config, cstr.CSTR_SSH_HOSTS) - if ssh_host_configs is None: - log.cl_error("can NOT find [%s] in the config file, " - "please correct file [%s]", - cstr.CSTR_SSH_HOSTS, config_fpath) - return -1 - - hosts = {} - for host_config in ssh_host_configs: - host_id = host_config[cstr.CSTR_HOST_ID] - if host_id is None: - log.cl_error("can NOT find [%s] in the config of a " - "SSH host, please correct file [%s]", - cstr.CSTR_HOST_ID, config_fpath) - return -1 - - hostname = utils.config_value(host_config, cstr.CSTR_HOSTNAME) - if hostname is None: - log.cl_error("can NOT find [%s] in the config of SSH host " - "with ID [%s], please correct file [%s]", - cstr.CSTR_HOSTNAME, host_id, config_fpath) - return -1 - - ssh_identity_file = utils.config_value(host_config, cstr.CSTR_SSH_IDENTITY_FILE) - - if host_id in hosts: - log.cl_error("multiple SSH hosts with the same ID [%s], please " - "correct file [%s]", host_id, config_fpath) - return -1 - host = ssh_host.SSHHost(hostname, ssh_identity_file) - hosts[host_id] = host - - kvm_template_dict = parse_templates_config(log, workspace, config, config_fpath, hosts=hosts) - if kvm_template_dict is None: - log.cl_error("failed to parse the config of templates") - return -1 - - for template in kvm_template_dict.values(): - iso = template.vt_iso - template_hostname = template.vt_template_hostname - internet = template.vt_internet - network_configs = template.vt_network_configs - image_dir = template.vt_image_dir - distro = template.vt_distro - ram_size = template.vt_ram_size - disk_sizes = template.vt_disk_sizes - bus_type = template.vt_bus_type - server_host = template.vt_server_host - reinstall = template.vt_reinstall - dns = template.vt_dns - - state = server_host.sh_virsh_dominfo_state(log, template_hostname) - if not reinstall and state is not None: - log.cl_debug("skipping reinstall of template [%s] according to config", - template_hostname) - continue - - ret = vm_install(log, workspace, server_host, iso, template_hostname, - internet, dns, network_configs, image_dir, distro, - ram_size, disk_sizes, bus_type) - if ret: - log.cl_error("failed to create virtual machine template [%s]", - template_hostname) - return -1 - - shared_disks = {} - shared_disk_configs = utils.config_value(config, cstr.CSTR_SHARED_DISKS) - if shared_disk_configs is None: - log.cl_info("can NOT find [%s] in the config file [%s], " - "ignore it.", - cstr.CSTR_SHARED_DISKS, config_fpath) - else: - ret = lipe_parse_sharedisks_configs(log, shared_disk_configs, - shared_disks, hosts, - config_fpath) - if ret: - log.cl_error("failed to parse [%s] in the config file [%s], " - "please correct it.", - cstr.CSTR_SHARED_DISKS, config_fpath) - return -1 - - vm_host_configs = utils.config_value(config, cstr.CSTR_VM_HOSTS) - if vm_host_configs is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_VM_HOSTS, config_fpath) - return -1 - - vm_hosts = [] - hosts_servers_mapping = dict() - shared_disk_ids_mapping = dict() - hosts_string = "" - for vm_host_config in vm_host_configs: - hostname = utils.config_value(vm_host_config, cstr.CSTR_HOSTNAME) - if hostname is None: - log.cl_error("no [hostname] is configured for a vm_host, " - "please correct file [%s]", config_fpath) - return -1 - - ips = utils.config_value(vm_host_config, cstr.CSTR_HOST_IPS) - if ips is None: - log.cl_error("no [%s] is configured for a vm_host, " - "please correct file [%s]", cstr.CSTR_HOST_IPS, - config_fpath) - return -1 - - template_hostname = utils.config_value(vm_host_config, - cstr.CSTR_TEMPLATE_HOSTNAME) - if template_hostname is None: - log.cl_error("can NOT find [%s] in the config of a " - "SSH host, please correct file [%s]", - cstr.CSTR_TEMPLATE_HOSTNAME, config_fpath) - return -1 - - if template_hostname not in kvm_template_dict: - log.cl_error("template with hostname [%s] is NOT configured in " - "[%s], please correct file [%s]", - template_hostname, cstr.CSTR_TEMPLATES, config_fpath) - return -1 - - template = kvm_template_dict[template_hostname] - - reinstall = utils.config_value(vm_host_config, cstr.CSTR_REINSTALL) - state = template.vt_server_host.sh_virsh_dominfo_state(log, hostname) - if reinstall is None: - reinstall = False - if state is None: - reinstall = True - - if not reinstall: - ret = vm_start(log, workspace, - template.vt_server_host, - hostname, - template.vt_network_configs, - ips, - template.vt_template_hostname, - template.vt_image_dir, - template.vt_distro, - template.vt_internet, - len(template.vt_disk_sizes)) - if ret: - log.cl_error("virtual machine [%s] can't be started", - hostname) - return -1 - else: - ret = vm_clone(log, workspace, - template.vt_server_host, - hostname, - template.vt_network_configs, - ips, - template.vt_template_hostname, - template.vt_image_dir, - template.vt_distro, - template.vt_internet, - len(template.vt_disk_sizes)) - if ret: - log.cl_error("failed to create virtual machine [%s] based on " - "template [%s]", hostname, - template.vt_template_hostname) - return -1 - - host_ip = ips[0] - vm_host = lustre.LustreServerHost(hostname) - hosts_string += ("%s %s\n" % (host_ip, hostname)) - vm_hosts.append(vm_host) - hosts_servers_mapping[hostname] = template.vt_server_host - shared_disk_ids = utils.config_value(vm_host_config, - cstr.CSTR_SHARED_DISK_IDS) - if shared_disk_ids is None or shared_disk_configs is None: - continue - shared_disk_ids_mapping[hostname] = shared_disk_ids - - host_configs = utils.config_value(config, cstr.CSTR_HOSTS) - if host_configs is not None: - for host_config in host_configs: - hostname = utils.config_value(host_config, cstr.CSTR_HOSTNAME) - if hostname is None: - log.cl_debug("can NOT find [%s] in the config file, " - "please correct file [%s]", - cstr.CSTR_HOSTNAME, config_fpath) - continue - - host_ip = utils.config_value(host_config, cstr.CSTR_IP) - if host_ip is None: - log.cl_debug("can NOT find [%s] in the config file, " - "please correct file [%s]", - cstr.CSTR_IP, config_fpath) - continue - hosts_string += ("%s %s\n" % (host_ip, hostname)) - else: - log.cl_debug("can NOT find [%s] in the config file [%s], " - "ignore it", - cstr.CSTR_HOSTS, config_fpath) - - hosts_fpath = workspace + "/hosts" - with open(hosts_fpath, "wt") as hosts_file: - with open("/etc/hosts") as local_hosts: - for line in local_hosts: - hosts_file.write(line) - - hosts_file.write(hosts_string) - hosts_file.flush() - - for host in vm_hosts: - # Cleanup log dirs, as previous clownfish testing may generate - # lots of logs. - command = "rm -rf /var/log/lipe*" - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Umount all mount points - command = ("cat /proc/mounts") - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - pattern = (r"^(?P\S+) (?P/var/log/clownfish\S*) .+$") - regular = re.compile(pattern) - - for line in retval.cr_stdout.splitlines(): - # log.cl_debug("checking line [%s]", line) - match = regular.match(line) - if not match: - continue - - mount_point = match.group("mount_point") - command = "umount %s" % mount_point - retval = host.sh_run(log, command) - if retval.cr_exit_status != 0: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - command = "rm -rf /var/log/clownfish*" - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - log.cl_info("preparing virtual machine [%s] after starting it", - host.sh_hostname) - ret = host.sh_send_file(log, hosts_fpath, "/etc") - if ret: - log.cl_error("failed to send hosts file [%s] on local host to " - "directory [%s] on host [%s]", - hosts_fpath, workspace, - host.sh_hostname) - return -1 - - # Clear the known_hosts, otherwise the reinstalled hosts can't be - # accessed by other hosts - command = "> /root/.ssh/known_hosts" - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - # Stop Corosync to kill all possible Clownfish server - for host in vm_hosts: - service_names = ["corosync", "pacemaker"] - for service_name in service_names: - ret = host.sh_service_stop(log, service_name) - if ret: - log.cl_error("failed to stop service [%s] on host [%s]", - service_name, host.sh_hostname) - return -1 - - ret = host.sh_service_disable(log, service_name) - if ret: - log.cl_error("failed to disable service [%s] on host [%s]", - service_name, host.sh_hostname) - return -1 - - # umount all Lustre clients first - reboot_hosts = [] - for host in vm_hosts: - ret = host.lsh_lustre_umount_services(log, client_only=True) - if ret: - log.cl_info("failed to umount Lustre clients on host [%s], " - "reboot is needed", host.sh_hostname) - reboot_hosts.append(host) - - # umount all Lustre servers - for host in vm_hosts: - ret = host.lsh_lustre_umount_services(log) - if ret: - log.cl_info("failed to umount Lustre servers on host [%s], " - "reboot is needed", host.sh_hostname) - if host not in reboot_hosts: - reboot_hosts.append(host) - - for host in reboot_hosts: - ret = lipe_vm_reboot(log, host, - hosts_servers_mapping[host.sh_hostname]) - if ret: - log.cl_error("failed to reboot host [%s]", - host.sh_hostname) - return -1 - - for host in vm_hosts: - # Destroy all ZFS pool - ret = host.sh_destroy_zfs_pools(log) - if ret: - log.cl_info("failed to destroy ZFS pools on host [%s], " - "reboot is needed", host.sh_hostname) - ret = lipe_vm_reboot(log, host, - hosts_servers_mapping[host.sh_hostname]) - if ret: - log.cl_error("failed to reboot host [%s]", - host.sh_hostname) - return -1 - - ret = host.sh_destroy_zfs_pools(log) - if ret: - log.cl_info("failed to destroy ZFS pools on host [%s] even " - "after reboot", host.sh_hostname) - return -1 - - # Detach all shared disks - hostname = host.sh_hostname - server_host = hosts_servers_mapping[hostname] - ret = server_host.sh_virsh_detach_domblks(log, hostname, - LVIRT_IMAGE_SHARED_SUBFIX) - if ret: - log.cl_error("failed to deatch disks on VM [%s]", - hostname) - return -1 - - # Generate the targets of shared disks - shared_disk_ids = shared_disk_ids_mapping[hostname] - if shared_disk_ids is None or shared_disk_configs is None: - continue - - command = ("> %s" % LIPE_UDEV_RULES) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - server_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - target_index = 0 - for shared_disk_id in shared_disk_ids: - if shared_disk_id not in shared_disks: - log.cl_error("shared disk with ID [%s] is not configured", - shared_disk_id) - return -1 - - shared_disk = shared_disks[shared_disk_id] - - if template.vt_server_host_id != shared_disk.sd_server_host_id: - log.cl_error("shared disk with ID [%s] is not configured " - "on host with ID [%s]. It is on host with ID " - "[%s] instead, thus can't share it on VM [%s].", - shared_disk_id, template.vt_server_host_id, - shared_disk.sd_server_host_id, hostname) - return -1 - - while True: - target_name = target_index2name(log, target_index) - target_index += 1 - command = "ls /dev/%s" % target_name - retval = host.sh_run(log, command) - # If the device exists, use another device - if retval.cr_exit_status: - break - shared_target = SharedTarget(host, target_name) - shared_disk.sd_add_target(log, shared_target) - - for shared_disk in shared_disks.values(): - ret = shared_disk.sd_share(log) - if ret: - log.cl_error("failed to share disk [%s] on server host with " - "ID [%s]", shared_disk.sd_image_fpath, - shared_disk.sd_server_host_id) - return -1 - return 0 - - -def lipe_virt(log, workspace, config_fpath): - """ - Start to test holding the confiure lock - """ - # pylint: disable=too-many-branches,bare-except,too-many-locals - # pylint: disable=too-many-statements - config_fd = open(config_fpath) - ret = 0 - try: - config = yaml.load(config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", config_fpath, - traceback.format_exc()) - ret = -1 - config_fd.close() - if ret: - return -1 - - try: - ret = lipe_vm_install(log, workspace, config, config_fpath) - except: - ret = -1 - log.cl_error("exception: %s", traceback.format_exc()) - - if ret: - log.cl_error("failed to install the VMs, please check [%s] for more " - "log", workspace) - else: - log.cl_info("installed the VMs successfully, please check [%s] for more " - "log", workspace) - return ret - - -def usage(): - """ - Print usage string - """ - utils.oprint("Usage: %s " % sys.argv[0]) - - -def main(): - """ - Install virtual machines - """ - cmd_general.main(LIPE_VIRT_CONFIG, LIPE_VIRT_LOG_DIR, - lipe_virt) diff --git a/lipe/pylustre/lustre_test.py b/lipe/pylustre/lustre_test.py deleted file mode 100644 index b53cab7..0000000 --- a/lipe/pylustre/lustre_test.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) 2017 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com - -""" -Lustre test library -""" -import time - -# Local libs -from pylustre import utils -from pylustre import watched_io - -MULTIOP = "/usr/lib64/lustre/tests/multiop" -PAUSING = "PAUSING\n" - - -def check_file_executable(log, host, fpath): - """ - Check the file is executable - """ - command = ("test -f %s && test -x %s " % (fpath, fpath)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return 0 - - -class Multiop(object): - """ - multiop process on a host - """ - def __init__(self, host, fpath, args, stdout_file, stderr_file): - # pylint: disable=too-many-arguments - self.mop_host = host - self.mop_command = MULTIOP + " " + fpath + " " + args - self.mop_stdout_file = stdout_file - self.mop_stderr_file = stderr_file - self.mop_retval = None - self.mop_stdout = "" - self.mop_exited = False - - def mop_wait_pausing(self, log): - """ - Wait until the multiop is pausing - """ - return self._mop_wait_output(log, PAUSING) - - def _mop_wait_output(self, log, expected, timeout=60, sleep_interval=1): - """ - Wait until the output is expected - """ - waited = 0 - while True: - if self.mop_stdout == expected: - log.cl_debug("got expected output [%s]", expected) - return 0 - - if waited < timeout: - waited += sleep_interval - time.sleep(sleep_interval) - continue - log.cl_error("timeout when waiting output, expected [%s], " - "got [%s]", expected, self.mop_stdout) - return -1 - return -1 - - def mop_watcher_stdout(self, args, new_log): - """ - log watcher of stdout - """ - # pylint: disable=unused-argument - log = args["log"] - if len(new_log) == 0: - return - self.mop_stdout += new_log - log.cl_debug("stdout of multiop [%s]: [%s]", self.mop_command, - new_log) - - def mop_watcher_stderr(self, args, new_log): - """ - log wather of stderr - """ - log = args["log"] - # pylint: disable=unused-argument - if len(new_log) == 0: - return - log.cl_debug("stderr of multiop [%s]: [%s]", self.mop_command, - new_log) - - def _mop_thread_main(self, log): - """ - Thread of running multiop - """ - host = self.mop_host - args = {} - args["log"] = log - stdout_fd = watched_io.watched_io_open(self.mop_stdout_file, - self.mop_watcher_stdout, args) - stderr_fd = watched_io.watched_io_open(self.mop_stderr_file, - self.mop_watcher_stderr, args) - log.cl_debug("start to run command [%s] on host [%s]", - self.mop_command, host.sh_hostname) - retval = host.sh_run(log, self.mop_command, stdout_tee=stdout_fd, - stderr_tee=stderr_fd, return_stdout=False, - return_stderr=False, timeout=None, flush_tee=True) - stdout_fd.close() - stderr_fd.close() - - log.cl_debug("thread of multiop [%s] is exiting", - self.mop_command) - self.mop_retval = retval - self.mop_exited = True - - def mop_start(self, log): - """ - Start the process of multiop - """ - utils.thread_start(self._mop_thread_main, (log)) - - def mop_pkill(self, log): - """ - Kill the process of running multiop - """ - return self.mop_host.sh_pkill(log, self.mop_command) - - def mop_signal(self, log): - """ - Send USR1 singal to the process - """ - return self.mop_host.sh_pkill(log, self.mop_command, - special_signal="USR1") - - def mop_wait_exit(self, log, timeout=60, sleep_interval=1, quiet=False): - """ - Wait until the process exits - """ - waited = 0 - while True: - if self.mop_exited: - log.cl_debug("multiop thread exited") - return 0 - - if waited < timeout: - waited += sleep_interval - time.sleep(sleep_interval) - continue - if not quiet: - log.cl_error("timeout when waiting the multiop thread to exit") - return -1 - return -1 diff --git a/lipe/pylustre/test_common.py b/lipe/pylustre/test_common.py deleted file mode 100644 index 6e68e1f..0000000 --- a/lipe/pylustre/test_common.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright (c) 2018 DataDirect Networks, Inc. -# All Rights Reserved. -# Author: lixi@ddn.com -""" -Library for testing -""" - -import traceback -import os -import yaml - -# Local libs -from pylustre import utils -from pylustre import cstr -from pylustre import ssh_host -from pylustre import watched_io -from pylustre import lyaml -from pylustre import lipe_virt - - -def install_with_iso_mounted(args, mnt_path): - """ - Run the install test - """ - # pylint: disable=too-many-locals,too-many-arguments - log, workspace, install_server, install_config, cmd_name, install_config_fname = args - # Make sure install server is local host, since this will overwrite the - # local config files - uuid_install = install_server.sh_uuid(log) - if uuid_install is None: - log.cl_error("failed to get the UUID on host [%s]", - install_server.sh_hostname) - return -1 - - local_host = ssh_host.SSHHost("localhost", local=True) - uuid_local = local_host.sh_uuid(log) - if uuid_local is None: - log.cl_error("failed to get the UUID on localhost") - return -1 - - if uuid_local == uuid_install: - log.cl_error("please do NOT use host [%s] as the install server, " - "since it is the localhost, and installation test " - "would overwrite the local configuration files", - install_server.sh_hostname) - return -1 - - ret = install_server.sh_rpm_find_and_uninstall(log, "grep lipe") - if ret: - log.cl_error("failed to uninstall LiPE rpms on host [%s]", - install_server.sh_hostname) - return -1 - - # Fix me: LiPE depends on liblustreapi.so, but installation scripts don't - package_dir = mnt_path + "/" + cstr.CSTR_PACKAGES - command = ("rpm -ivh %s/lipe-pylustre-*.x86_64.rpm " - "%s/lipe-1.*.x86_64.rpm " - "%s/lipe-client-1.*.x86_64.rpm " - "%s/lipe-server-1.*.x86_64.rpm " - "%s/lipe-clownfish-*.x86_64.rpm --nodeps" % - (package_dir, package_dir, package_dir, package_dir, package_dir)) - retval = install_server.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - install_server.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - install_config_fpath = (workspace + "/" + install_config_fname) - config_string = ("""# -# Configuration file for installing %s from DDN -# -""" % (cmd_name)) - config_string += yaml.dump(install_config, Dumper=lyaml.YamlDumper, - default_flow_style=False) - try: - with open(install_config_fpath, 'w') as yaml_file: - yaml_file.write(config_string) - except: - log.cl_error("failed to save the config file to [%s]") - return -1 - - ret = install_server.sh_send_file(log, install_config_fpath, "/etc") - if ret: - log.cl_error("failed to send file [%s] on local host to " - "/etc on host [%s]", - install_config_fpath, - install_server.sh_hostname) - return -1 - - args = {} - args["log"] = log - args["hostname"] = install_server.sh_hostname - stdout_file = (workspace + "/" + cmd_name + "_install.stdout") - stderr_file = (workspace + "/" + cmd_name + "_install.stderr") - stdout_fd = watched_io.watched_io_open(stdout_file, - watched_io.log_watcher_info, args) - stderr_fd = watched_io.watched_io_open(stderr_file, - watched_io.log_watcher_error, args) - command = ("%s_install" % (cmd_name)) - retval = install_server.sh_run(log, command, stdout_tee=stdout_fd, - stderr_tee=stderr_fd, return_stdout=False, - return_stderr=False, timeout=None, - flush_tee=True) - stdout_fd.close() - stderr_fd.close() - - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d]", - command, - install_server.sh_hostname, - retval.cr_exit_status) - return -1 - return 0 - - -def mount_and_run(log, host, host_iso_path, funct, args): - """ - Mount the ISO and run @funct with @args - """ - # pylint: disable=bare-except,too-many-arguments - mnt_path = "/mnt/" + utils.random_word(8) - - command = ("mkdir -p %s && mount -o loop %s %s" % - (mnt_path, host_iso_path, mnt_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - try: - ret = funct(args, mnt_path) - if ret: - log.cl_error("failed to run funct with ISO mnt [%s]", mnt_path) - except: - ret = -1 - log.cl_error("exception: %s", traceback.format_exc()) - - command = ("umount %s" % (mnt_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - ret = -1 - - command = ("rmdir %s" % (mnt_path)) - retval = host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - return ret - - -def mount_and_install(log, workspace, host, host_iso_path, config, - cmd_name, install_config_fname): - """ - Mount the ISO and install - """ - # pylint: disable=too-many-arguments - args = (log, workspace, host, config, cmd_name, install_config_fname) - ret = mount_and_run(log, host, host_iso_path, install_with_iso_mounted, args) - if ret: - log.cl_error("failed to mount and install") - return ret - return 0 - - -def start_install(log, workspace, install_server, install_config, config_fpath, - cmd_name, install_config_fname): - """ - Start do real install action - """ - # pylint: disable=too-many-locals,too-many-arguments - command = "mkdir -p %s" % workspace - retval = install_server.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - install_server.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - local_host = ssh_host.SSHHost("localhost", local=True) - command = "ls lipe-*.iso" - retval = local_host.sh_run(log, command) - if retval.cr_exit_status: - log.cl_error("failed to run command [%s] on host [%s], " - "ret = [%d], stdout = [%s], stderr = [%s]", - command, - local_host.sh_hostname, - retval.cr_exit_status, - retval.cr_stdout, - retval.cr_stderr) - return -1 - - current_dir = os.getcwd() - iso_names = retval.cr_stdout.split() - if len(iso_names) != 1: - log.cl_error("found unexpected ISOs [%s] under currect directory " - "[%s]", iso_names, current_dir) - return -1 - - iso_name = iso_names[0] - iso_path = current_dir + "/" + iso_name - - ret = install_server.sh_send_file(log, config_fpath, workspace) - if ret: - log.cl_error("failed to send Clownfish config [%s] on local host to " - "directory [%s] on host [%s]", - config_fpath, workspace, - install_server.sh_hostname) - return -1 - config_fname = os.path.basename(config_fpath) - - ret = install_server.sh_send_file(log, iso_path, workspace) - if ret: - log.cl_error("failed to send LiPE ISO [%s] on local host to " - "directory [%s] on host [%s]", - iso_path, workspace, - install_server.sh_hostname) - return -1 - - host_iso_path = workspace + "/" + iso_name - host_config_fpath = workspace + "/" + config_fname - install_config[cstr.CSTR_ISO_PATH] = host_iso_path - install_config[cstr.CSTR_CONFIG_FPATH] = host_config_fpath - ret = mount_and_install(log, workspace, install_server, host_iso_path, - install_config, cmd_name, install_config_fname) - if ret: - log.cl_error("failed to test installation on host [%s]", - install_server.sh_hostname) - return -1 - return 0 - - -def test_install(log, workspace, install_config_fpath, - skip_install, install_server, cmd_name, - install_config_fname): - """ - Start to test - """ - # pylint: disable=too-many-arguments - install_config_fd = open(install_config_fpath) - ret = 0 - try: - install_config = yaml.load(install_config_fd) - except: - log.cl_error("not able to load [%s] as yaml file: %s", - install_config_fpath, traceback.format_exc()) - ret = -1 - install_config_fd.close() - if ret: - return -1 - - config_fpath = utils.config_value(install_config, - cstr.CSTR_CONFIG_FPATH) - if config_fpath is None: - log.cl_error("can NOT find [%s] in the installation config, " - "please correct file [%s]", - cstr.CSTR_CONFIG_FPATH, install_config_fpath) - return -1 - - if not skip_install: - ret = start_install(log, workspace, install_server, install_config, - config_fpath, cmd_name, install_config_fname) - if ret: - log.cl_error("failed to run install test") - return -1 - return 0 - - -def test_install_virt(log, workspace, test_config, test_config_fpath): - """ - Start to install virt - """ - skip_virt = utils.config_value(test_config, - cstr.CSTR_SKIP_VIRT) - if skip_virt is None: - log.cl_debug("no [%s] is configured, do not skip checking virt") - skip_virt = False - - if skip_virt: - log.cl_debug("skip checking virt") - return 0 - - virt_config_fpath = utils.config_value(test_config, - cstr.CSTR_VIRT_CONFIG) - if virt_config_fpath is None: - log.cl_error("no [%s] is configured, please correct file [%s]", - cstr.CSTR_VIRT_CONFIG, test_config_fpath) - return -1 - ret = lipe_virt.lipe_virt(log, workspace, virt_config_fpath) - if ret: - log.cl_error("failed to install the virtual machines") - return -1 - return 0 diff --git a/lipe/systemd/lipe_test_scheduler.service b/lipe/systemd/lipe_test_scheduler.service deleted file mode 100644 index 06f719e..0000000 --- a/lipe/systemd/lipe_test_scheduler.service +++ /dev/null @@ -1,10 +0,0 @@ -[Unit] -Description=Lipe Test Scheduler, common scheduler framework, manages the usage of test hosts and services the test launchers. - -[Service] -Type=simple -ExecStart=/usr/bin/lipe_test_scheduler -User=root - -[Install] -WantedBy=multi-user.target -- 1.8.3.1