*~
/build
/example_configs/clownfish/clownfish_test.conf
-/example_configs/lipe/lipe_test.conf
/lipe.spec
/lipe-*.tar.bz2
/lipe-*.tar.gz
endif
PYTHON_COMMANDS = \
- gen_lipe_test \
ldsync \
lipe_build \
lipe_expression_tests \
lipe_install \
lipe_install_build_deps \
lipe_launch \
- lipe_test \
- lipe_test_console \
- lipe_test_launch \
- lipe_test_scheduler \
- lipe_virt \
loris_backup \
loris_crontab \
loris_test \
- lpcc \
- pyltest_import_check
+ lpcc
EXTRA_DIST= \
$(PYTHON_COMMANDS) \
detect-distro.sh \
lipe-revision.sh \
example_configs/clownfish/seperate_mgs/clownfish.conf \
- example_configs/clownfish/seperate_mgs/lipe_virt.conf \
example_configs/lipe/lipe_install.conf \
example_configs/lipe/lipe_launch.json \
example_configs/loris/loris.conf \
- example_configs/ltest/lipe_test_scheduler.conf \
example_configs/hotpool/* \
init.d/* \
lipe.conf \
pylipe/*.py \
pyloris/*.py \
pylustre/*.py \
- pyltest/*.py \
scripts/*.sh \
systemd/* \
man/* \
.pylintrc
-PYLTEST_FILES = $(wildcard pyltest/*.py)
PYTHON_LIB_FILES = $(wildcard pyclownfish/*.py pylustre/*.py pyloris/*.py)
-PYTHON_LIB_FILES += $(PYLTEST_FILES)
PYTHON_FILES = $(PYTHON_LIB_FILES) $(PYTHON_COMMANDS)
PYTHON_CHECKS = $(PYTHON_FILES:%=%.python_checked)
-PYLTEST_CHECKS = $(PYLTEST_FILES:%=%.pyltest_import_checked)
-PYTHON_CHECKS += $(PYLTEST_CHECKS)
CHECKS = $(PYTHON_CHECKS)
-%.pyltest_import_checked: %
- python2 ./pyltest_import_check $<
- touch $@
-
%.python_checked: % pylipe/.pylintrc
@if test $< != $(PYTHON_PROTOBUF); then \
PYLINTRC=pylipe/.pylintrc $(PYLINT) --disable=I $< || exit 1; \
rm -f compile depcomp install-sh missing
PYLUSTRE_RPM = build/RPMS/x86_64/lipe-pylustre-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm
-PYLTEST_RPM = build/RPMS/x86_64/lipe-pyltest-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm
CLOWNFISH_RPM = build/RPMS/x86_64/lipe-clownfish-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm
LIPE_RPM = build/RPMS/x86_64/lipe-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm
LIPE_DEBUGINFO_RPM = build/RPMS/x86_64/lipe-debuginfo-$(PACKAGE_VERSION)-$(LIPE_RELEASE).el$(DISTRO_RELEASE)*.x86_64.rpm
cp $(LIPE_RPM) $(PACKAGE_PATH)
cp $(LIPE_DEBUGINFO_RPM) $(PACKAGE_PATH)
cp $(PYLUSTRE_RPM) $(PACKAGE_PATH)
- cp $(PYLTEST_RPM) $(PACKAGE_PATH)
cp $(LORIS_RPM) $(PACKAGE_PATH)
cp $(LIPE_CLIENT_RPM) $(PACKAGE_PATH)
cp $(LIPE_SERVER_RPM) $(PACKAGE_PATH)
touch configure.ac &&
autoheader \
&& aclocal \
-&& $libtoolize --ltdl --copy --force \
+&& $libtoolize --copy --force \
&& automake --add-missing --copy \
&& autoconf
+++ /dev/null
-../seperate_mgs/lipe_virt.conf
\ No newline at end of file
+++ /dev/null
-# Configuration file of installing virtual machines
-#
-# Configuration Guide:
-#
-#
-ssh_hosts: # Array of hosts
- - host_id: server17 # ID of this SSH host
- hostname: server17 # The host name
- ssh_identity_file: /root/.ssh/id_dsa # The SSH key to connect to the host
-templates: # The templates to installed
- - hostname: server17_rhel6_template # Template hostname name
- internet: true # Whether to enable Internet access
- dns: 10.0.0.253 # The DNS IP
- ram_size: 2048 # Ram size in MB
- disk_sizes: # Disks to attach to this VM
- - 10 # Disk size in GB
- iso: /work/ISOs/CentOS-6.9-x86_64-bin-DVD1.iso # The path of ISO
- reinstall: false # Whether to reinstall
- network_configs: # Configurations of network interfaces
- - gateway: 10.0.0.253
- ip: 10.0.0.189
- netmask: 255.255.252.0
- virt_install_option: bridge=br0
- server_host_id: server17 # On which host this template exists
- image_dir: /images2 # The path to save virtual machine images
- distro: rhel6 # The distro version
- - hostname: server17_rhel7_template
- internet: true
- dns: 10.0.0.253
- ram_size: 2048
- disk_sizes:
- - 10
- iso: /work/ISOs/CentOS-7-x86_64-DVD-1611.iso
- reinstall: false
- network_configs:
- - gateway: 10.0.0.253
- ip: 10.0.0.190
- netmask: 255.255.252.0
- virt_install_option: bridge=br0
- server_host_id: server17
- image_dir: /images2
- distro: rhel7
-shared_disks: # Array of disks shared by multiple hosts
- - disk_id: lipe_mgs # ID of the disk
- size: 1 # Size of the disk (GB)
- server_host_id: server17 # On which host this disk exists
- image_file: /images2/lipe_mgs.img # Path of the image file
- - disk_id: lipe0_mdt0
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_mdt0.img
- - disk_id: lipe0_mdt1
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_mdt1.img
- - disk_id: lipe0_ost0
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_ost0.img
- - disk_id: lipe0_ost1
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_ost1.img
- - disk_id: lipe0_ost2
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_ost2.img
- - disk_id: lipe0_ost3
- size: 5
- server_host_id: server17
- image_file: /images2/lipe0_ost3.img
- - disk_id: lipe1_mdt0
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_mdt0.img
- - disk_id: lipe1_mdt1
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_mdt1.img
- - disk_id: lipe1_ost0
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_ost0.img
- - disk_id: lipe1_ost1
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_ost1.img
- - disk_id: lipe1_ost2
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_ost2.img
- - disk_id: lipe1_ost3
- size: 5
- server_host_id: server17
- image_file: /images2/lipe1_ost3.img
-vm_hosts: # Array of hosts
- - hostname: server17-el7-vm1 # The host name
- ips: # The host IPs
- - 10.0.1.148
- reinstall: false # Whether to reinstall this vm
- template_hostname: server17_rhel7_template # The hostname of template
- shared_disk_ids:
- - lipe_mgs
- - lipe0_mdt0
- - lipe0_mdt1
- - hostname: server17-el7-vm2
- ips:
- - 10.0.1.149
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe_mgs
- - lipe0_mdt0
- - lipe0_mdt1
- - hostname: server17-el7-vm3
- ips:
- - 10.0.1.251
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe0_ost0
- - lipe0_ost1
- - lipe0_ost2
- - lipe0_ost3
- - hostname: server17-el7-vm4
- ips:
- - 10.0.1.252
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe0_ost0
- - lipe0_ost1
- - lipe0_ost2
- - lipe0_ost3
- - hostname: server17-el7-vm5
- ips:
- - 10.0.1.253
- reinstall: false
- template_hostname: server17_rhel7_template # The hostname of template
- shared_disk_ids:
- - lipe1_mdt0
- - lipe1_mdt1
- - hostname: server17-el7-vm6
- ips:
- - 10.0.1.254
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe1_mdt0
- - lipe1_mdt1
- - hostname: server17-el7-vm7
- ips:
- - 10.0.1.255
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe1_ost0
- - lipe1_ost1
- - lipe1_ost2
- - lipe1_ost3
- - hostname: server17-el7-vm8
- ips:
- - 10.0.2.197
- reinstall: false
- template_hostname: server17_rhel7_template
- shared_disk_ids:
- - lipe1_ost0
- - lipe1_ost1
- - lipe1_ost2
- - lipe1_ost3
- - hostname: server17-el7-vm9
- ips:
- - 10.0.2.198
- reinstall: false
- template_hostname: server17_rhel7_template
- - hostname: server17-el7-vm10
- ips:
- - 10.0.2.199
- reinstall: false
- template_hostname: server17_rhel7_template
- - hostname: server17-el7-vm11
- ips:
- - 10.0.2.200
- reinstall: false
- template_hostname: server17_rhel7_template
- - hostname: server17-el7-vm12
- ips:
- - 10.0.2.201
- reinstall: false
- template_hostname: server17_rhel7_template
-hosts: # Array of hosts to add into /etc/hosts of VMs
- - hostname: server17 # Hostname of the host
- ip: 10.0.0.37 # IP of the host
+++ /dev/null
-../clownfish/seperate_mgs/lipe_virt.conf
\ No newline at end of file
+++ /dev/null
-# Configuration file of LiPE test Scheduler Service
-#
-# Configuration Guide:
-# port:
-# network port used to connect to the scheduler service, defalut to 1234
-#
-# log_dir:
-# log diraectory to save latest scheduler log
-#
-# $test_hosts:
-# Hosts used to build LiPE or to run LiPE tests.
-#
-# $templates:
-# template used to recover the broken VMs
-#
-port: 1234
-test_hosts: # Array of hosts
- - hostname: server17-el7-vm[1-9] # The host name
- purpose: test # The purpose of these hosts, either test or build
- distro: rhel7 # Distro, support rhel6,rhel7 now, debian serise is WIP
- kvm: # KVM related config
- kvm_server_hostname: server17 # KVM host server, that these nodes run on
- kvm_template_ipv4_address: 10.0.0.190 # IP adress of the template, that used to recover the kvm VM
- template_hostname: rhel7_template # The template hostname configured in $templates
- concurrency: 1
- - hostname: server17
- purpose: build
- distro: rhel7
- concurrency: 3
-templates: # The templates to installed
- - hostname: rhel6_template # Template hostname name
- internet: true # Whether to enable Internet access
- dns: 10.0.0.253 # The DNS IP
- ram_size: 2048 # Ram size in MB
- bus_type: virtio # virt bus type, virtio, scsi, ide
- disk_sizes: # Disks attached to this VM
- - 10
- - 2
- iso: /work/ISOs/CentOS-6.9-x86_64-bin-DVD1.iso # The path of ISO
- reinstall: false # Whether to reinstall
- network_configs: # Configurations of network interfaces
- - gateway: 10.0.0.253
- ip: 10.0.0.189
- netmask: 255.255.252.0
- virt_install_option: bridge=br0
- image_dir: /images/ # Where are the virt image stored.
- distro: rhel6 # The distro version
- - hostname: rhel7_template
- internet: true
- dns: 10.0.0.253
- ram_size: 2048
- bus_type: virtio
- disk_sizes:
- - 10
- - 2
- iso: /work/ISOs/CentOS-7-x86_64-Minimal-1804.iso
- reinstall: false
- network_configs:
- - gateway: 10.0.0.253
- ip: 10.0.0.189
- netmask: 255.255.252.0
- virt_install_option: bridge=br0
- image_dir: /images/
- distro: rhel7
-ip_addresses:
- - ip_address: 10.0.0.40
- bindnetaddr: 10.0.0.0
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Generate lipe_test.conf
-"""
-from pybuild import gen_lipe_test
-
-if __name__ == "__main__":
- gen_lipe_test.main()
+++ /dev/null
-#!/bin/sh
-#
-# Starts/stop the lipe test scheduler service
-#
-# chkconfig: 345 95 5
-# description: Lipe Test Scheduler
-# configs: /etc/lipe_test_scheduler.conf
-
-### BEGIN INIT INFO
-# Required-Start: $local_fs
-# Required-Stop: $local_fs
-# Default-Start: 345
-# Default-Stop: 95
-# Short-Description: Lipe Test Scheduler
-# Description:Lipe Test Scheduler, common scheduler framework, manages the usage of test hosts and services the test launchers.
-### END INIT INFO
-
-. /etc/rc.d/init.d/functions
-
-exec=/usr/bin/lipe_test_scheduler
-prog="lipe_test_scheduler"
-
-start() {
- [ -x $exec ] || exit 5
- echo -n $"Starting $prog ..."
- $exec > /dev/null 2> /dev/null &
- echo
-}
-
-stop() {
- echo -n $"Stopping $prog ..."
- if [ -n "`pidfileofproc $exec`" ] ; then
- killproc $exec
- else
- killall $prog > /dev/null 2> /dev/null &
- fi
- echo
-}
-
-restart() {
- stop
- start
-}
-
-reload() {
- restart
-}
-
-force_reload() {
- restart
-}
-
-rh_status() {
- # run checks to determine if the service is running or use generic status
- status $prog
-}
-
-rh_status_q() {
- rh_status >/dev/null 2>&1
-}
-
-
-case "$1" in
- start)
- rh_status_q && exit 0
- $1
- ;;
- stop)
- rh_status_q || exit 0
- $1
- ;;
- restart)
- $1
- ;;
- reload)
- rh_status_q || exit 7
- $1
- ;;
- force-reload)
- force_reload
- ;;
- status)
- rh_status
- ;;
- condrestart|try-restart)
- rh_status_q || exit 0
- restart
- ;;
- *)
- echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
- exit 2
-esac
-exit $?
%description clownfish
Clownfish manages Lustre clusters for HA purposes.
-%package pyltest
-Summary: Python Library of LiPE common test framework
-Requires: lipe-pylustre = %{version}-%{release}
-Provides: lipe-pyltest = %{version}-%{release}
-%if %{with systemd}
-Requires(post): systemd
-Requires(preun): systemd
-Requires(postun): systemd
-BuildRequires: systemd
-%else
-Requires(post): chkconfig
-Requires(preun): chkconfig
-%endif
-Group: Applications/System
-
-%description pyltest
-Pyltest is a common test framework for LiPE
-
-%post pyltest
-%if %{with systemd}
-%systemd_post lipe_test_scheduler.service
-%endif
-
-%preun pyltest
-%if %{with systemd}
-%systemd_preun lipe_test_scheduler.service
-%else
-/sbin/service lipe_test_scheduler stop >/dev/null 2>&1 ||:
-/sbin/chkconfig --del lipe_test_scheduler
-%endif
-
-%postun pyltest
-%if %{with systemd}
-%systemd_postun_with_restart lipe_test_scheduler.service
-%else
-/sbin/service lipe_test_scheduler condrestart >/dev/null 2>&1 ||:
-%endif
-
%package server
Summary: Lipe Server Package
Requires: lustre
python2 -m py_compile pyclownfish/*.py
python2 -m py_compile pylipe/*.py
python2 -m py_compile pyloris/*.py
-python2 -m py_compile pyltest/*.py
%endif
-find pyclownfish pylustre pylipe pyloris pyltest -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f
+find pyclownfish pylustre pylipe pyloris -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f
%install
rm -rf $RPM_BUILD_ROOT
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d
cp \
lpcc \
- lipe_virt \
src/lpcc_purge \
$RPM_BUILD_ROOT%{_bindir}
cp -a pylustre $RPM_BUILD_ROOT%{python_sitelib}
cp -a \
- example_configs/clownfish/seperate_mgs/lipe_virt.conf \
lpcc.conf \
$RPM_BUILD_ROOT%{_sysconfdir}
lipe_run_action \
lipe_install \
lipe_launch \
- lipe_test \
- lipe_test_console \
- lipe_test_launch \
- lipe_test_scheduler \
loris_backup \
loris_crontab \
loris_test \
cp -a pyclownfish $RPM_BUILD_ROOT%{python_sitelib}
cp -a pylipe $RPM_BUILD_ROOT%{python_sitelib}
cp -a pyloris $RPM_BUILD_ROOT%{python_sitelib}
-cp -a pyltest $RPM_BUILD_ROOT%{python_sitelib}
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}
cp -a \
example_configs/clownfish/seperate_mgs/clownfish.conf \
example_configs/lipe/lipe_install.conf \
example_configs/lipe/lipe_launch.json \
example_configs/loris/loris.conf \
- example_configs/ltest/lipe_test_scheduler.conf \
lipe.conf \
$RPM_BUILD_ROOT%{_sysconfdir}
mkdir -p $RPM_BUILD_ROOT%{_unitdir}/
install -m 0644 -D systemd/lpcc.service $RPM_BUILD_ROOT%{_unitdir}/lpcc.service
%if %{with server}
- install -m 0644 -D systemd/lipe_test_scheduler.service \
- $RPM_BUILD_ROOT%{_unitdir}/lipe_test_scheduler.service
%if %{with hotpool}
install -m 0644 -D systemd/lpurge@.service \
$RPM_BUILD_ROOT%{_unitdir}/lpurge@.service
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d
install -m 0744 -D init.d/lpcc \
$RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/lpcc
-%if %{with server}
- install -m 0744 -D init.d/lipe_test_scheduler \
- $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/lipe_test_scheduler
-%endif # end server
%endif
install -m 0644 man/lpcc.8 $RPM_BUILD_ROOT%{_mandir}/man8/
install -m 0644 man/lpcc-start.8 $RPM_BUILD_ROOT%{_mandir}/man8/
%files pylustre
%{python2_sitelib}/pylustre
-%{_bindir}/lipe_virt
-%config(noreplace) %{_sysconfdir}/lipe_virt.conf
%files lpcc
%defattr(-,root,root)
%{_bindir}/lcreatemany
%config(noreplace) %{_sysconfdir}/clownfish.conf
-%files pyltest
-%{python2_sitelib}/pyltest
-%{_bindir}/lipe_test_console
-%{_bindir}/lipe_test_launch
-%{_bindir}/lipe_test_scheduler
-%config(noreplace) %{_sysconfdir}/lipe_test_scheduler.conf
-%if %{with systemd}
- %{_unitdir}/lipe_test_scheduler.service
-%else
- %{_sysconfdir}/rc.d/init.d/lipe_test_scheduler
-%endif
-
%files server
%defattr(-,root,root)
%{_bindir}/ext4_inode2path
%{_bindir}/lipe_convert_expr
%{_bindir}/lipe_install
%{_bindir}/lipe_launch
-%{_bindir}/lipe_test
%{_bindir}/lfill
%{_bindir}/lipe_scan
%{_bindir}/lipe_scan2
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Test LIPE
-"""
-from pylipe import lipe_test
-
-if __name__ == "__main__":
- lipe_test.main()
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Start the scheduler
-"""
-from pyltest import lipe_test_console
-
-if __name__ == "__main__":
- lipe_test_console.main()
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Start the Lipe test process
-"""
-from pyltest import lipe_test_launch
-
-if __name__ == "__main__":
- lipe_test_launch.main()
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Start the scheduler
-"""
-from pyltest import lipe_test_scheduler
-
-if __name__ == "__main__":
- lipe_test_scheduler.main()
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Install virtual machines
-"""
-from pylustre import lipe_virt
-
-if __name__ == "__main__":
- lipe_virt.main()
"""
Python library for build
"""
-__all__ = ["gen_lipe_test",
- "lipe_build",
- "pyltest_import_check",
+__all__ = ["lipe_build",
"lipe_expression_tests"]
+++ /dev/null
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-
-"""
-Generate lipe_test.conf
-"""
-import os
-import sys
-import yaml
-
-from pylustre import utils
-from pylustre import cstr
-from pylustre import lipe_virt
-from pylustre import clog
-from pylustre import lyaml
-from pylustre import constants
-from pylipe import lipe_test
-
-
-def usage():
- """
- Print usage string
- """
- utils.oprint("Usage: %s config_file" %
- sys.argv[0])
-
-
-def main():
- """
- Generate lipe_test.conf
- """
- # pylint: disable=bare-except,not-callable
- log = clog.get_log()
- if sys.version[0] == '2':
- reload(sys)
- if hasattr(sys, "setdefaultencoding"):
- set_encoding = getattr(sys, "setdefaultencoding", None)
- set_encoding('UTF-8')
- else:
- os.environ["PYTHONIOENCODING"] = 'UTF-8'
-
- if len(sys.argv) != 2:
- usage()
- sys.exit(-1)
-
- config_fpath = sys.argv[1]
- config = {}
- config[cstr.CSTR_VIRT_CONFIG] = lipe_virt.LIPE_VIRT_CONFIG
- config[cstr.CSTR_SKIP_VIRT] = False
- config[cstr.CSTR_SKIP_INSTALL] = False
- config[cstr.CSTR_LIPE_INSTALL_CONFIG] = constants.LIPE_INSTALL_CONFIG
- install_server = {}
- install_server[cstr.CSTR_HOSTNAME] = "installhost"
- install_server[cstr.CSTR_SSH_IDENTITY_FILE] = "/root/.ssh/id_dsa"
- config[cstr.CSTR_INSTALL_SERVER] = install_server
- tests = []
- for test_funct in lipe_test.LIPE_TESTS:
- tests.append(test_funct.__name__)
- config[cstr.CSTR_ONLY_TESTS] = tests
- config_string = ("""#
-# Configuration file for testing LiPE from DDN
-#
-# Please comment the test names under "%s" if want to skip some tests
-#
-# Please set "%s" to true if LiPE and Clownfish is already installed and
-# properly running.
-#
-# Please set "%s" to true if the virtual machines are already
-# installed and properly running.
-#
-""" % (cstr.CSTR_ONLY_TESTS, cstr.CSTR_SKIP_INSTALL, cstr.CSTR_SKIP_VIRT))
- config_string += yaml.dump(config, Dumper=lyaml.YamlDumper,
- default_flow_style=False)
- try:
- with open(config_fpath, 'w') as yaml_file:
- yaml_file.write(config_string)
- except:
- log.cl_error("""Failed to save the config file. To avoid data lose, please save the
-following config manually:""")
- sys.stdout.write(config_string)
- sys.exit(-1)
- log.cl_info("Config file saved to file [%s]", config_fpath)
- sys.exit(0)
+++ /dev/null
-# Copyright (c) 2019 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Library for checking the import of pyltest source code
-"""
-import sys
-
-# Local libs
-from pylustre import clog
-
-
-def check_import(log, fpath):
- """
- Check the import of file path
- """
- with open(fpath, "r") as fd:
- lines = fd.readlines()
-
- for line in lines:
- if line.startswith("from pyclownfish import"):
- log.cl_error("file [%s] imports library from pyclownfish, which "
- "is not allowed", fpath)
- return -1
- if line.startswith("from pylipe import"):
- log.cl_error("file [%s] imports library from pylipe, which "
- "is not allowed", fpath)
- return -1
- return 0
-
-
-def main():
- """
- Check the source code files
- """
- log = clog.get_log()
- for arg in sys.argv[1:]:
- log.cl_info("checking file [%s]", arg)
- ret = check_import(log, arg)
- if ret:
- log.cl_error("file [%s] imported wrong library", arg)
- sys.exit(-1)
- sys.exit(0)
"lipe_flist_handle",
"lipe_install",
"lipe_install_nodeps",
- "lipe_launch",
- "lipe_test",
- "lipe_hotpool_test"]
+ "lipe_launch"]
+++ /dev/null
-# Copyright (c) 2020 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: Gu Zheng <gzheng@ddn.com>
-"""
-Regression test for lipe/hotpool (lamigo&lpurge)
-"""
-# pylint: disable=too-many-lines
-import math
-import os
-import time
-import traceback
-import threading
-import yaml
-from pylustre import lustre
-from pylustre import utils
-
-
-# constant params
-HOTPOOL_LAMIGO = "lamigo"
-HOTPOOL_LPURGE = "lpurge"
-HOTPOOL_CLIENT = "/mnt/hotpool_test"
-HOTPOOL_POOL_FAST = "fast"
-HOTPOOL_POOL_SLOW = "slow"
-HOTPOOL_LAMIGO_DUMP_PREFIX = "lamigo.dump"
-HOTPOOL_LAMIGO_DEBUG_PREFIX = "lamigo.debug"
-HOTPOOL_LAMIGO_CONFIG_PREFIX = "lamigo.conf"
-HOTPOOL_LPURGE_DUMP_PREFIX = "lpurge.dump"
-HOTPOOL_LPURGE_DEBUG_PREFIX = "lpurge.debug"
-HOTPOOL_LPURGE_CONFIG_PREFIX = "lpurge.conf"
-
-LPURGE_ETC_CONFIG_DIR = "/etc/lpurge/"
-
-
-# lamigo option key words
-LAMIGO_CONFIG_MDT = "mdt"
-LAMIGO_CONFIG_DAEMONIZE = "daemonize"
-LAMIGO_CONFIG_MOUNTPOINT = "mountpoint"
-LAMIGO_CONFIG_CACHE_SIZE = "max-cache"
-LAMIGO_CONFIG_CHANGELOG_USER = "user"
-LAMIGO_CONFIG_MIN_AGE = "min-age"
-LAMIGO_CONFIG_AGENT = "agent"
-LAMIGO_CONFIG_SRC_POOL = "src"
-LAMIGO_CONFIG_TGT_POOL = "tgt"
-LAMIGO_CONFIG_DUMP_FILE = "dump"
-LAMIGO_CONFIG_DEBUG_FILE = "debug"
-LAMIGO_CONFIG_IML_RE_SOCKET = "iml-re-socket"
-LAMIGO_CONFIG_IML_EX_SOCKET = "iml-ex-socket"
-LAMIGO_CONFIG_NUM_THREADS = "num-threads"
-LAMIGO_CONFIG_RESCAN = "rescan"
-LAMIGO_CONFIG_POOL_REFRESH = "pool-refresh"
-LAMIGO_CONFIG_PROGRESS_INTERVAL = "progress-interval"
-
-LAMIGO_DUMP_CONFIG_SECTION = "config"
-
-LAMIGO_DUMP_KEYWORDS_MAP = {
- LAMIGO_CONFIG_CHANGELOG_USER: "chlg_user",
- LAMIGO_CONFIG_MDT: "mdtname",
- LAMIGO_CONFIG_MOUNTPOINT: "mountpoint",
- LAMIGO_CONFIG_SRC_POOL: "source_pool",
- LAMIGO_CONFIG_TGT_POOL: "target_pool",
- LAMIGO_CONFIG_MIN_AGE: "min_age",
- LAMIGO_CONFIG_CACHE_SIZE: "max_cache",
- LAMIGO_CONFIG_RESCAN: "rescan",
- LAMIGO_CONFIG_NUM_THREADS: "thread_count",
- LAMIGO_CONFIG_POOL_REFRESH: "pool_refresh",
- LAMIGO_CONFIG_PROGRESS_INTERVAL: "progress_interval",
- LAMIGO_CONFIG_IML_RE_SOCKET: "iml_re_socket",
- LAMIGO_CONFIG_IML_EX_SOCKET: "iml_ex_socket"}
-
-# lpurge option key words
-LPURGE_CONFIG_DEVICE = "device" # lustre-OST0000
-LPURGE_CONFIG_FREELO = "freelo" # 50
-LPURGE_CONFIG_FREEHI = "freehi" # 80
-LPURGE_CONFIG_DEBUG = "debug"
-LPURGE_CONFIG_DUMP = "dump"
-LPURGE_CONFIG_MAX_JOBS = "max_jobs" # 8
-LPURGE_CONFIG_SCAN_THREADS = "scan_threads" # 1
-LPURGE_CONFIG_POOL = "pool" # fast_pool
-LPURGE_CONFIG_MDS = "mds" # 0:host:/mnt/lustre
-LPURGE_CONFIG_MOUNT = "mount" # /mnt/lustre
-LPURGE_CONFIG_SCAN_RATE = "scan_rate" # 10000
-LPURGE_CONFIG_DRYRUN = "dryrun" # true
-LPURGE_CONFIG_IML_SOCKET = "iml_socket" # /tmp/mylpurge.socket
-LPURGE_CONFIG_SLOT_SIZE = "slot_size" # 1048576
-LPURGE_CONFIG_CHECK_INTERVAL = "interval"
-
-LPURGE_DUMP_CONFIG_SECTION = "config"
-
-LPURGE_DUMP_KEYWORDS_MAP = {
- LPURGE_CONFIG_FREEHI: "free_high",
- LPURGE_CONFIG_FREELO: "free_low",
- LPURGE_CONFIG_DEVICE: "ostname",
- LPURGE_CONFIG_MOUNT: "mountpoint",
- LPURGE_CONFIG_POOL: "pool",
- LPURGE_CONFIG_MDS: "mds",
- LPURGE_CONFIG_MAX_JOBS: "max_jobs",
- LPURGE_CONFIG_CHECK_INTERVAL: "check_interval",
- LPURGE_CONFIG_SCAN_RATE: "scan_rate",
- LPURGE_CONFIG_SCAN_THREADS: "scan_threads",
- LPURGE_CONFIG_SLOT_SIZE: "slot_size",
- LPURGE_CONFIG_IML_SOCKET: "iml_socket"}
-
-HOTPOOL_TEST_CASES = []
-
-
-class HotpoolTestContext(object):
- """
- test context for hotpool
- """
- # pylint: disable=too-many-instance-attributes
- def __init__(self, log, workspace, clowfish_instance):
- self.htc_log = log
- self.htc_workspace = workspace
- self.htc_clowfish_instance = clowfish_instance
- self.htc_lustre_fs = None
- self.htc_fsname = ""
- self.htc_mgs_host = None
- self.htc_mdt = None
- self.htc_mdt_instance = None
- self.htc_mdt_index = ""
- self.htc_mds_host = None
- self.htc_fast_osts = list()
- self.htc_slow_osts = list()
- self.htc_fast_oss_host = None
- self.htc_changelog_user = ""
- self.htc_ost_list = list()
- self.htc_ost_pools = dict()
- self.htc_additional_clients = list()
- self.htc_client_mountpoint = "/mnt/hotpool_test"
-
- def prepare_lustre_fs(self):
- """
- prepare lustre for following test cases
- """
- # pylint: disable=too-many-branches,too-many-statements
- # reformat & mount lustre instance to get a pure testing environment
- rc = self.htc_clowfish_instance.ci_umount_all(self.htc_log)
- if rc < 0:
- self.htc_log.cl_error("failed to umount clownfish instance")
- return rc
-
- rc = self.htc_clowfish_instance.ci_format_all(self.htc_log)
- if rc < 0:
- self.htc_log.cl_error("failed to reformat clownfish instance")
- return rc
-
- rc = self.htc_clowfish_instance.ci_mount_all(self.htc_log)
- if rc < 0:
- self.htc_log.cl_error("failed to mount clownfish instance")
- return rc
-
- for lustrefs in self.htc_clowfish_instance.ci_lustres.values():
- # hint: use lustre instance which has 6 osts (4ldiskfs + 2zfs)
- if len(lustrefs.lf_osts) > 4:
- self.htc_lustre_fs = lustrefs
- break
- if self.htc_lustre_fs is None:
- self.htc_log.cl_error("can't find valid lustre fs for hotpool testing, exit")
- return -1
-
- self.htc_fsname = self.htc_lustre_fs.lf_fsname
-
- # get mgs
- if self.htc_lustre_fs.lf_mgs is not None:
- mgs = self.htc_lustre_fs.lf_mgs
- elif self.htc_lustre_fs.lf_mgs_mdt is not None:
- mgs = self.htc_lustre_fs.lf_mgs_mdt
- else:
- self.htc_log.cl_error("no mgs found from lustre %s",
- self.htc_fsname)
- return -1
-
- mgsi = mgs.ls_mounted_instance(self.htc_log)
- if mgsi is None:
- self.htc_log.cl_error("failed to get active mgs service from lustre %s",
- self.htc_fsname)
- return -1
- self.htc_mgs_host = mgsi.lsi_host
-
- # get mdt0
- if len(self.htc_lustre_fs.lf_mdts) < 1:
- self.htc_log.cl_error("no mdt found from lustre %s",
- self.htc_fsname)
- return -1
-
- mdt0 = None
- for mdt in self.htc_lustre_fs.lf_mdts.values():
- if mdt.ls_index == 0:
- mdt0 = mdt
- if mdt0 is None:
- self.htc_log.cl_error("no valid MDT instance found from lustre %s",
- self.htc_fsname)
- return -1
- self.htc_mdt = mdt0
-
- self.htc_mdt_index = "-".join([self.htc_fsname, self.htc_mdt.ls_index_string])
- mdti = mdt0.ls_mounted_instance(self.htc_log)
- if mdti is None:
- self.htc_log.cl_error("failed to get mdt service for mdt %s",
- self.htc_mdt_index)
- return -1
- self.htc_mdt_instance = mdti
- self.htc_mds_host = mdti.lsi_host
-
- # get fast osts
- for ost in self.htc_lustre_fs.lf_osts.values():
- if ost.ls_backfstype == lustre.BACKFSTYPE_LDISKFS:
- self.htc_ost_list.append(ost)
- if len(self.htc_ost_list) < 4:
- self.htc_log.cl_error("invalid ost list, at least 4, but got %d",
- len(self.htc_ost_list))
- return -1
-
- oss_host_osts_mapping = dict()
- for ost in self.htc_ost_list:
- osti = ost.ls_mounted_instance(self.htc_log)
- if osti is None:
- self.htc_log.cl_error("failed to get ost service for ost %s",
- ost.ls_index_string)
- return -1
- if oss_host_osts_mapping.get(osti.lsi_host.sh_hostname) is None:
- oss_host_osts_mapping[osti.lsi_host.sh_hostname] = list()
- oss_host_osts_mapping[osti.lsi_host.sh_hostname].append(ost)
- if len(oss_host_osts_mapping[osti.lsi_host.sh_hostname]) >= 2:
- self.htc_fast_oss_host = osti.lsi_host
-
- if self.htc_fast_oss_host is None:
- self.htc_log.cl_error("failed to get host with two active ost service")
- return -1
-
- self.htc_fast_osts = oss_host_osts_mapping[self.htc_fast_oss_host.sh_hostname][0:2]
- self.htc_slow_osts = list(set(self.htc_ost_list).difference(set(self.htc_fast_osts)))[0:2]
-
- if (len(self.htc_slow_osts) < 2) or (len(self.htc_fast_osts) < 2):
- self.htc_log.cl_error("failed to get enough(two or more) active osts for hotpool testing")
- return -1
- return 0
-
- def prepare_additional_clients(self):
- """
- Setup additional client mountpoint on mds add fast oss
- """
- for host in [self.htc_mds_host, self.htc_fast_oss_host]:
- new_client = lustre.LustreClient(self.htc_log,
- self.htc_lustre_fs,
- host,
- self.htc_client_mountpoint,
- add_to_host=True)
- self.htc_additional_clients.append(new_client)
- rc = new_client.lc_mount(self.htc_log)
- if rc < 0:
- self.htc_log.cl_error("failed to mount client [%s] to host [%s]",
- self.htc_client_mountpoint,
- host.sh_hostname)
- return rc
- return 0
-
- def prepare_changelog_user(self):
- """
- Prepare changelog user for following tests
- """
- self.htc_changelog_user = self.htc_mdt_instance.mdti_changelog_register(self.htc_log)
- if self.htc_changelog_user is None:
- self.htc_log.cl_error("failed to register changelog user to mdt %s",
- self.htc_mdt_index)
- return -1
- return 0
-
- def prepare_ost_pools(self):
- """
- Setup ost pool properly
- """
- fast_pool = lustre.LustrePool(self.htc_lustre_fs,
- HOTPOOL_POOL_FAST)
- rc = fast_pool.lp_new(self.htc_log, self.htc_mgs_host)
- if rc < 0:
- self.htc_log.cl_error("failed to create pool [%s] from host [%s]",
- fast_pool.lp_fullname,
- self.htc_mgs_host.sh_hostname)
- return rc
- rc = fast_pool.lp_add(self.htc_log, self.htc_mgs_host,
- self.htc_fast_osts)
- if rc < 0:
- self.htc_log.cl_error("failed to add osts [%s] into pool [%s] from host [%s]",
- [ost.ls_index_string for ost in self.htc_fast_osts],
- fast_pool.lp_fullname, self.htc_mgs_host.sh_hostname)
- return rc
-
- self.htc_ost_pools[HOTPOOL_POOL_FAST] = fast_pool
-
- slow_pool = lustre.LustrePool(self.htc_lustre_fs,
- HOTPOOL_POOL_SLOW)
- rc = slow_pool.lp_new(self.htc_log, self.htc_mgs_host)
- if rc < 0:
- self.htc_log.cl_error("failed to create pool [%s] from host [%s]",
- fast_pool.lp_fullname,
- self.htc_mgs_host.sh_hostname)
- return rc
- rc = slow_pool.lp_add(self.htc_log, self.htc_mgs_host,
- self.htc_slow_osts)
- if rc < 0:
- self.htc_log.cl_error("failed to add osts [%s] into pool [%s] from host [%s]",
- [ost.ls_index_string for ost in self.htc_slow_osts],
- fast_pool.lp_fullname, self.htc_mgs_host.sh_hostname)
- return rc
- self.htc_ost_pools[HOTPOOL_POOL_SLOW] = slow_pool
- return 0
-
- def cleanup(self):
- """
- Cleanup the hotpool environment
- """
- for client in self.htc_additional_clients:
- client.lc_umount(self.htc_log)
- idx = client.lc_client_name
- if idx in self.htc_lustre_fs.lf_clients.keys():
- self.htc_lustre_fs.lf_clients.pop(idx)
- for pool in self.htc_ost_pools.values():
- pool.lp_remove(self.htc_log, self.htc_mgs_host, pool.lp_osts)
- pool.lp_destroy(self.htc_log, self.htc_mgs_host)
- self.htc_clowfish_instance.ci_umount_all(self.htc_log)
-
-
-def hotpool_test_genarate_tmpfile(workspace, prefix):
- """
- generate a timestamped file with prefix
- :return:
- """
- time_stamp = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
- raw_path = os.path.join(workspace, str(prefix))
- return ".".join([raw_path, time_stamp])
-
-
-def hotpool_test_generate_config(log, config_file, options_dict):
- """
- Generate a config from an options dict
- """
-
- try:
- with open(config_file, "w") as fd:
- for k, v in options_dict.items():
- if isinstance(v, bool):
- fd.write("%s=%s\n" % (k, str(v).lower()))
- else:
- fd.write("%s=%s\n" % (k, v))
- fd.write("debug\n")
- except:
- log.cl_error("failed to generate config [%s] for options:%s ",
- config_file, options_dict)
- return -1
- return 0
-
-
-def hotpool_test_setup(log, workspace, clowfish_instance):
- """
- Initialize & setup test context
- """
- hotpool_test_ctx = HotpoolTestContext(log, workspace,
- clowfish_instance)
-
- rc = hotpool_test_ctx.prepare_lustre_fs()
- if rc < 0:
- log.cl_error("failed to prepare testing environment")
- return None
-
- rc = hotpool_test_ctx.prepare_additional_clients()
- if rc < 0:
- log.cl_error("failed to mount additional clients")
- return None
-
- rc = hotpool_test_ctx.prepare_changelog_user()
- if rc < 0:
- log.cl_error("failed to register changelog user")
- return None
-
- rc = hotpool_test_ctx.prepare_ost_pools()
- if rc < 0:
- log.cl_error("failed to setup ost pools")
- return None
- return hotpool_test_ctx
-
-
-def hotpool_test_run(test_context):
- """
- Run hotpool test cases
- """
- rc = 0
- for case in HOTPOOL_TEST_CASES:
- try:
- test_context.htc_log.cl_info("start test case [%s]",
- case.__name__)
- rc = case(test_context)
- # break test routine only for fail
- if rc < 0:
- test_context.htc_log.cl_error("test case [%s] failed",
- case.__name__)
- break
- elif rc > 0:
- test_context.htc_log.cl_info("test case [%s] skipped",
- case.__name__)
- else:
- test_context.htc_log.cl_info("test %s success",
- case.__name__)
- except:
- test_context.htc_log.cl_error("%s",
- traceback.format_exc())
- rc = -1
- break
- test_context.cleanup()
- return rc
-
-
-def hotpool_test(log, workspace, clownfish_instance):
- """
- Run hotpool test on target cluster instance
- """
- test_ctx = hotpool_test_setup(log, workspace, clownfish_instance)
- if test_ctx is None:
- log.cl_error("failed to start hotpool test")
- return -1
- log.cl_info("starting hotpool test")
-
- rc = hotpool_test_run(test_ctx)
- if rc:
- log.cl_error("hotpool test failed")
- else:
- log.cl_info("hotpool test done")
- return rc
-
-
-def hotpool_test_start_lamigo_with_options(log, host, params):
- """
- Start lamigo with commandline options
- :return: 0: success, -1: fail
- """
- assert isinstance(params, dict)
-
- rescan_enabled = params.get(LAMIGO_CONFIG_RESCAN)
- scan_thread_count = params.get(LAMIGO_CONFIG_NUM_THREADS)
- if rescan_enabled and scan_thread_count:
- assert isinstance(rescan_enabled, int)
- assert isinstance(scan_thread_count, int)
- rescan_options = "-r -n %d" % scan_thread_count
- else:
- rescan_options = ""
-
- command = "lamigo -u {user} -m {mdt} -a {min-age} -b{debug} -c {max-cache} " \
- "-w {dump} -g {agent} -s {src} -t {tgt} %s -M {mountpoint}" % rescan_options
- command = command.format(**params)
-
- log.cl_info("starting lamigo with command [%s] on host [%s]",
- command, host.sh_hostname)
- ret_val = host.sh_run(log, command, wait=False)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout,
- ret_val.cr_stderr)
- return -1
- return 0
-
-
-def hotpool_test_start_lamigo_with_config(test_ctx, host, config_file):
- """
- Start lamigo with config_file
- :return: 0: success, -1: fail
- """
- log = test_ctx.htc_log
- workspace = test_ctx.htc_workspace
- # send config to host
- rc = host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, host.sh_hostname)
- return -1
-
- rc = host.sh_send_file(log, config_file, workspace)
- if rc < 0:
- log.cl_error("failed to send config file [%s] to target host [%s]",
- config_file, host.sh_hostname)
- return -1
-
- # start lamigo with config
- command = "lamigo -f %s" % config_file
- log.cl_info("starting lamigo with command [%s] on host [%s]",
- command, host.sh_hostname)
- retval = host.sh_run(log, command, wait=False)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def hotpool_test_start_lpurge(test_ctx, host, config_file):
- """
- Start lpurge
- :return: 0: success, -1: fail
- """
- log = test_ctx.htc_log
- workspace = test_ctx.htc_workspace
- rc = host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, host.sh_hostname)
- return -1
- # send config to host
- rc = host.sh_send_file(log, config_file, workspace)
- if rc < 0:
- test_ctx.cl_error("failed to send config file [%s] to target host [%s]",
- config_file, host.sh_hostname)
- return -1
-
- # start lpurge with config
- command = "lpurge -f %s" % config_file
- log.cl_info("starting lpurge with command [%s] on host [%s]",
- command, host.sh_hostname)
- retval = host.sh_run(log, command, wait=False)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def hotpool_test_start_lpurge_service(test_ctx, host,
- params_dict, ost_index):
- """
- Start lpurge service unit
- """
- log = test_ctx.htc_log
- ect_conf_dir = os.path.join(LPURGE_ETC_CONFIG_DIR, test_ctx.htc_fsname)
- rc = host.sh_mkdir(log, ect_conf_dir)
- if rc < 0:
- return -1
-
- config_file = hotpool_test_genarate_tmpfile(test_ctx.htc_workspace,
- HOTPOOL_LPURGE_CONFIG_PREFIX)
- rc = hotpool_test_generate_config(log, config_file, params_dict)
- if rc < 0:
- return -1
-
- rc = host.sh_send_file(log, config_file,
- os.path.join(ect_conf_dir,
- ost_index + ".conf"))
- if rc < 0:
- return -1
-
- command = "systemctl start lpurge@%s-%s.service" % (test_ctx.htc_fsname, ost_index)
- log.cl_info("starting lpurge with command [%s] on host [%s]",
- command, host.sh_hostname)
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def hotpool_test_trigger_dump(log, host, dump_file, pid):
- """
- Trigger dump via sending signal SIGUSR1
- """
- if not pid:
- log.cl_error("pid is NULL")
- return -1
-
- # pylint: disable=too-many-arguments
- timeout = 10
- time_start = time.time()
- while True:
- # trigger SIGUSR1 to dump status
- rc = host.sh_kill(log, pid, special_signal=10)
- if rc != 0:
- log.cl_error("failed to trigger dump stats on [%s],"
- "ret = [%d]",
- host.sh_hostname, rc)
- return -1
- time.sleep(1)
- command = """awk 'END { print NR }' %s""" % dump_file
- retval = host.sh_run(log, command, timeout=10)
- if retval.cr_exit_status == 0:
- file_size = retval.cr_stdout.strip()
- if file_size and int(file_size) > 0:
- return 0
-
- time_now = time.time()
- elapsed = time_now - time_start
- if elapsed >= timeout:
- log.cl_error("dumping file [%s] not completed after 10 seconds\n",
- dump_file)
- break
- return -1
-
-
-def hotpool_test_stop_process(log, host, process, pid_list):
- """
- Stop lamigo/lpurge
- :return: 0: success, 1: no lamigo process found, -1: fail
- """
- if not pid_list:
- log.cl_error("pid list is NULL")
- return -1
-
- for pid in pid_list:
- rc = host.sh_kill(log, pid, 15)
- if rc:
- return -1
- return hotpool_test_wait_process_terminate(log, host, process)
-
-
-def hotpool_test_wait_process_terminate(log, host, process):
- """
- Wait for @process terminate
- :return: 0: success, -1: failure
- """
- def alive(retval, arg=0):
- """
- Internal compare function
- """
- if retval.cr_exit_status == 0:
- instance_count = retval.cr_stdout.strip()
- if instance_count and int(instance_count) > arg:
- return True
- return False
-
- still_exist = host.sh_wait_condition(log, "pkill --signal 0 -c -x %s" % process,
- alive, 0,
- timeout=10, sleep_interval=1)
- if still_exist:
- log.cl_error("process %s is still alive after 10 seconds\n",
- process)
- return -1
- return 0
-
-
-def hotpool_test_wait_process_launch(log, host, process, instance_count=1):
- """
- Wait for @process launch
- :return: 0: success, -1: failure
- """
- def active(retval, arg=0):
- """
- Internal compare function
- """
- if retval.cr_exit_status == 0:
- count = retval.cr_stdout.strip()
- if count and int(count) == arg:
- return 0
- return -1
-
- not_active = host.sh_wait_condition(log, "pkill --signal 0 -c -x %s" % process,
- active, instance_count,
- timeout=5, sleep_interval=1)
- if not_active:
- log.cl_error("process %s is still not active after 10 seconds\n",
- process)
- return -1
- return 0
-
-
-def hotpool_test_lamigo_validate_options(log, dump_file, expected_dict):
- """
- validate options in dump_file, compared with expected_dict
- """
- assert isinstance(expected_dict, dict)
- # parse dump file to get dumped config options
- with open(dump_file) as fd:
- dump_data = yaml.load(fd)
- config_options = dump_data.get(LAMIGO_DUMP_CONFIG_SECTION)
- if config_options is None:
- # no *config* section found
- log.cl_error("invalid dump file :%s, no [%s] section found\n",
- dump_file, LAMIGO_DUMP_CONFIG_SECTION)
- return -1
-
- match = True
- for k, v in expected_dict.items():
- dump_key = LAMIGO_DUMP_KEYWORDS_MAP.get(k)
- if dump_key is None:
- continue
- get_v = config_options.get(dump_key)
- if get_v is None:
- # no such field found
- log.cl_error("invalid dump file :%s, no [%s] field found\n",
- dump_file, dump_key)
- return -1
-
- if k == LAMIGO_CONFIG_SRC_POOL:
- pool_list = get_v.split(",")
- if v not in pool_list:
- match = False
- log.cl_error("expected %s:%s\nfound %s:%s",
- k, v, k, get_v)
- break
- elif get_v != v:
- match = False
- log.cl_info("expected %s:%s\nfound %s:%s",
- k, v, k, get_v)
- break
-
- if not match:
- log.cl_info("expected options:%s\nfound options:%s",
- expected_dict, config_options)
- return -1
- return 0
-
-
-def hotpool_test_lpurge_validate_options(log, dump_file, expected_dict):
- """
- validate options in dump_file, compared with expected_dict
- """
- assert isinstance(expected_dict, dict)
- # parse dump file to get config options
- with open(dump_file) as fd:
- dump_data = yaml.load(fd)
-
- config_options = dump_data.get(LPURGE_DUMP_CONFIG_SECTION)
- if config_options is None:
- # no *config* section found
- log.cl_error("invalid dump file :%s, no [%s] section found\n",
- dump_file, LPURGE_DUMP_CONFIG_SECTION)
- return -1
-
- match = True
- for k, v in expected_dict.items():
- dump_key = LPURGE_DUMP_KEYWORDS_MAP.get(k)
- if dump_key is None:
- continue
- get_v = config_options.get(dump_key)
- if get_v is None:
- # no such field found
- log.cl_error("invalid dump file :%s, no [%s] field found\n",
- dump_file, dump_key)
- return -1
-
- if k == LPURGE_CONFIG_MDS:
- mds_list = get_v.split(",")
- if v not in mds_list:
- match = False
- log.cl_error("expected %s:%s\nfound %s:%s",
- k, v, k, get_v)
- break
- elif get_v != v:
- match = False
- log.cl_error("expected %s:%s\nfound %s:%s",
- k, v, k, get_v)
- break
-
- if not match:
- log.cl_info("expected options:%s\nfound options:%s",
- expected_dict, config_options)
- return -1
- return 0
-
-
-def hotpool_test_generate_basic_lamigo_parameters(test_context):
- """
- Generate basic parameters (dict format) to lamigo from @test_context
- :return: parameters dict
- """
- assert isinstance(test_context, HotpoolTestContext)
- workspace = test_context.htc_workspace
- params = {LAMIGO_CONFIG_CHANGELOG_USER: test_context.htc_changelog_user,
- LAMIGO_CONFIG_MDT: test_context.htc_mdt_index,
- LAMIGO_CONFIG_AGENT: "%s:%s:1" % (test_context.htc_mds_host.sh_hostname,
- test_context.htc_client_mountpoint),
- LAMIGO_CONFIG_MIN_AGE: 5,
- LAMIGO_CONFIG_CACHE_SIZE: 2048576,
- LAMIGO_CONFIG_DEBUG_FILE: hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LAMIGO_DEBUG_PREFIX),
- LAMIGO_CONFIG_DUMP_FILE: hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LAMIGO_DUMP_PREFIX),
- LAMIGO_CONFIG_SRC_POOL: test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name,
- LAMIGO_CONFIG_TGT_POOL: test_context.htc_ost_pools[HOTPOOL_POOL_SLOW].lp_name,
- LAMIGO_CONFIG_MOUNTPOINT: test_context.htc_client_mountpoint}
- return params
-
-
-def hotpool_test_generate_basic_lpurge_parameters(test_context):
- """
- Generate basic parameters (dict format) to lpurge from @test_context
- :param test_context:
- :return:
- """
- mds_host = test_context.htc_mds_host
- fast_pool = test_context.htc_ost_pools[HOTPOOL_POOL_FAST]
- ost_device = "-".join([test_context.htc_fsname,
- test_context.htc_fast_osts[0].ls_index_string])
-
- params = {LPURGE_CONFIG_DEVICE: ost_device,
- LPURGE_CONFIG_FREELO: 30,
- LPURGE_CONFIG_FREEHI: 50,
- LPURGE_CONFIG_MAX_JOBS: 2,
- LPURGE_CONFIG_SCAN_THREADS: 1,
- LPURGE_CONFIG_POOL: fast_pool.lp_name,
- LPURGE_CONFIG_MDS: "0:%s:%s" % (mds_host.sh_hostname,
- test_context.htc_client_mountpoint),
- LPURGE_CONFIG_MOUNT: test_context.htc_client_mountpoint,
- LPURGE_CONFIG_SCAN_RATE: 10000,
- LPURGE_CONFIG_SLOT_SIZE: 1048576}
- return params
-
-
-def hotpool_test_file_mirrored(log, host, file_path):
- """
- Check whether file has more than one mirror
- return 1 for True, 0 for False, -1 for error
- stripe info is dumped to return values @stripe_info
- """
- command = "lfs getstripe %s" % file_path
- ret_val = host.sh_run(log, command)
- if ret_val.cr_exit_status != 0:
- log.cl_error("failed to run [%s] on host [%s], ret = [%d], "
- "stdout = [%s], stderr = [%s]", command,
- host.sh_hostname, ret_val.cr_exit_status,
- ret_val.cr_stdout, ret_val.cr_stderr)
- return -1, None
- if not ret_val.cr_stdout:
- log.cl_error("no stripe info of file [%s]", file_path)
- return -1, None
-
- stripe_info = ret_val.cr_stdout.strip()
- for line in stripe_info.splitlines():
- raw_line = line.strip()
- if raw_line.startswith("lcm_mirror_count:"):
- mirror_info = raw_line.split(':')
- mirror_count = mirror_info[1].strip()
- if int(mirror_count) >= 2:
- return True, ret_val.cr_stdout
- elif int(mirror_count) == 1:
- return False, ret_val.cr_stdout
- log.cl_error("no mirror info of file [%s]", file_path)
- return -1, ret_val.cr_stdout
-
-
-def hotpool_test_lamigo_wait_replicated(log, host, pid, stats_file,
- replicated_count, timeout=10):
- """
- Wait for the count of replicated item get @replicated_count
- :return: 0: success, -1: failure
- """
- # pylint: disable=too-many-arguments
- time_start = time.time()
- while True:
- # trigger SIGUSR1 to dump lpurge status
- rc = host.sh_kill(log, pid, special_signal=10)
- if rc != 0:
- log.cl_error("failed to trigger lamigo dump stats on [%s],"
- "ret = [%d]",
- host.sh_hostname, rc)
- return -1
- time.sleep(1)
- command = """grep replicated %s | awk -F ':' '{print $2}'""" % stats_file
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to distro name on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- host.sh_hostname, retval.cr_exit_status,
- retval.cr_stdout, retval.cr_stderr)
- return -1
- raw_out = retval.cr_stdout.strip()
- count = int(raw_out) if raw_out else 0
- if count >= replicated_count:
- return 0
-
- time_now = time.time()
- elapsed = time_now - time_start
- if elapsed < timeout:
- log.cl_info("replicated:%d, expected:%d, wait more time",
- count, replicated_count)
- continue
- log.cl_error("timeout (%d s) on waiting for lamigo replicated [%d] "
- "files on host [%s]",
- timeout, replicated_count, host.sh_hostname)
- return -1
- return -1
-
-
-def hotpool_test_lpurge_wait_purged(log, host, pid, stats_file,
- purge_count, timeout=10):
- """
- Wait for the count of purged items get @purge_count
- :return: 0: success, -1: failure
- """
- # pylint: disable=too-many-arguments
- time_start = time.time()
- while True:
- # trigger SIGUSR1 to dump lpurge status
- rc = host.sh_kill(log, pid, special_signal=10)
- if rc != 0:
- log.cl_error("failed to trigger lpurge dump stats on [%s],"
- "ret = [%d]", host.sh_hostname, rc)
- return -1
- time.sleep(1)
- command = """grep purged %s | awk -F ':' '{print $2}'""" % stats_file
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to distro name on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- host.sh_hostname, retval.cr_exit_status,
- retval.cr_stdout, retval.cr_stderr)
- return -1
- raw_out = retval.cr_stdout.strip()
- purged = int(raw_out) if raw_out else 0
- if purged >= purge_count:
- return 0
-
- time_now = time.time()
- elapsed = time_now - time_start
- if elapsed < timeout:
- log.cl_info("pruged:%d, expected:%d, wait more time",
- purged, purge_count)
- continue
- log.cl_error("timeout (%d s) on waiting for lpurge processed [%d] "
- "files on host [%s]",
- timeout, purge_count, host.sh_hostname)
-
- command = """journalctl -u 'lpurge*'"""
- retval = host.sh_run(log, command)
- log.cl_error("lpurge logs = [%s], [%s]", retval.cr_stdout, retval.cr_stderr)
- return -1
- return -1
-
-
-def hotpool_test_lpurge_check_mirror_status(log, test_host, file_list):
- """
- Check the mirror state of files in file_list after purged
- """
- assert isinstance(file_list, list)
-
- for file_path in file_list:
- mirrored, stripe_info = hotpool_test_file_mirrored(log, test_host,
- file_path)
- if mirrored < 0:
- log.cl_error("failed to get stripe info of file [%s]",
- file_path)
- return -1
- elif mirrored:
- log.cl_error("%s, still extended mirrors after purge\n%s",
- file_path, stripe_info)
- return -1
- return 0
-
-
-def hotpool_create_mirror_dir(log, host, dirname, pool, ost_index=None):
- """
- Create dir, and set mirror info if specified
- """
- # pylint: disable=too-many-arguments
- rc = host.sh_mkdir(log, dirname)
- if rc < 0:
- return -1
-
- if ost_index is not None:
- ost_index_option = "-o %s" % ost_index
- else:
- ost_index_option = ""
-
- command = "lfs mirror create -N -p %s %s %s" % (pool, ost_index_option, dirname)
- ret_val = host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout, ret_val.cr_stderr)
- return -1
- return 0
-
-
-def hotpool_test_lamigo_start(test_context):
- """
- Test launch lamigo
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
-
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options,
- (log, test_host, params))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lamigo")
- lamigo_t.join()
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_start)
-
-
-def hotpool_test_lamigo_start_with_config(test_context):
- """
- Test run lamigo with config
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
-
- config_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LAMIGO_CONFIG_PREFIX)
- rc = hotpool_test_generate_config(log, config_file, params)
- if rc < 0:
- log.cl_error("failed to generate lamigo config with parameters: %s",
- params)
- return rc
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_config,
- (test_context, test_host, config_file))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- # check pid
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lamigo")
- lamigo_t.join()
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_start_with_config)
-
-
-def hotpool_test_lamigo_comand_line_options(test_context):
- """
- Test lamigo options (dumped to dumpfile)
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
- dump_file = params[LAMIGO_CONFIG_DUMP_FILE]
-
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options,
- (log, test_host, params))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
-
- rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0])
- if rc_1:
- log.cl_error("can't find file [%s] on host [%s]",
- dump_file, test_host.sh_hostname)
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lamigo")
- else:
- rc = rc_1
-
- lamigo_t.join()
- if rc:
- return rc
-
- # get dumpfile from host
- rc = test_host.sh_get_file(log, dump_file, test_context.htc_workspace)
- if rc < 0:
- log.cl_error("failed to get file [%s] from host [%s]",
- dump_file, test_host.sh_hostname)
- return rc
-
- return hotpool_test_lamigo_validate_options(log, dump_file, params)
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_comand_line_options)
-
-
-def hotpool_test_lamigo_config_options(test_context):
- """
- Test lamigo config options
- :return: 0: success, others: fail
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
- dump_file = params[LAMIGO_CONFIG_DUMP_FILE]
-
- config_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LAMIGO_CONFIG_PREFIX)
- rc = hotpool_test_generate_config(log, config_file, params)
- if rc < 0:
- log.cl_error("failed to generate lamigo config with parameters: %s",
- params)
- return rc
-
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_config,
- (test_context, test_host, config_file))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
-
- rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0])
- if rc_1:
- log.cl_error("can't find file [%s] on host [%s]",
- dump_file, test_host.sh_hostname)
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lamigo")
- else:
- rc = rc_1
-
- lamigo_t.join()
- if rc:
- return rc
- # get dumpfile from host
- rc = test_host.sh_get_file(log, dump_file, workspace)
- if rc < 0:
- log.cl_error("failed to get file [%s] from host [%s]",
- dump_file, test_host.sh_hostname)
- return rc
-
- return hotpool_test_lamigo_validate_options(log, dump_file, params)
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_config_options)
-
-
-def hotpool_test_lamigo_mirror_target(test_context):
- """
- Test whether lamigo creates mirror on target/slow pool
- """
- # pylint: disable=too-many-locals,too-many-branches,too-many-statements
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- dump_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LAMIGO_DUMP_PREFIX)
-
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
- params[LAMIGO_CONFIG_DUMP_FILE] = dump_file
- fast_pool_name = test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name
- min_age = params[LAMIGO_CONFIG_MIN_AGE]
-
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- test_dir = os.path.join(test_context.htc_client_mountpoint,
- "test_lamigo_mirror_target")
- ret = test_host.sh_rmdir_if_exist(log, test_dir)
- if ret:
- # already there, maybe with files under it, create a new one
- test_dir = ".".join([test_dir,
- time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))])
-
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options,
- (log, test_host, params))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- # wait lamigo be active
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if not pid_list:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
- elif len(pid_list) > 1:
- log.cl_error("more than one lamigo instances found, PIDs:%s",
- pid_list)
- lamigo_t.join()
- return -1
-
- rc = hotpool_create_mirror_dir(log, test_host, test_dir,
- fast_pool_name)
- if rc:
- log.cl_error("failed to create testdir [%s], exit", test_dir)
- hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- lamigo_t.join()
- return rc
-
- file_count = 10
- file_list = list()
- flist = [os.path.join(test_dir, str(i)) for i in range(file_count)]
- for item in flist:
- rc = test_host.sh_fill_binary_file(log, item, 1048576)
- if rc:
- log.cl_error("failed to create file [%s], exit", item)
- for f in file_list:
- test_host.sh_remove_file(log, f)
- test_host.sh_rmdir_if_exist(log, test_dir)
- hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- lamigo_t.join()
- return rc
- file_list.append(item)
-
- test_host.sh_run(log, "sync", timeout=10)
-
- # sleep a while, and wait for replication done
- time.sleep(min_age)
- rc1 = hotpool_test_lamigo_wait_replicated(log, test_host, pid_list[0],
- dump_file, file_count,
- timeout=60)
- if rc1 < 0:
- log.cl_error("lamigo: replicate timeout")
-
- # stop lamigo
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- lamigo_t.join()
- if rc < 0:
- log.cl_error("failed to stop lamigo")
- else:
- rc = rc1
-
- # check mirror
- if rc == 0:
- for item in file_list:
- mirrored, stripe_info = hotpool_test_file_mirrored(log,
- test_host, item)
- if mirrored is True:
- continue
- elif mirrored is False:
- log.cl_error("file [%s] not replicated\n%s",
- item, stripe_info)
- rc = -1
- break
- else:
- log.cl_error("failed to get stripeinfo on [%s]", item)
- rc = -1
- break
-
- # cleanup test dir
- for item in file_list:
- test_host.sh_remove_file(log, item)
- test_host.sh_rmdir_if_exist(log, test_dir)
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_mirror_target)
-
-
-def hotpool_test_lamigo_no_mirror_on_continuous_write(test_context):
- """
- Test lamigo shouldn't create mirror on continuous modify
- """
- # pylint: disable=too-many-statements,too-many-locals
- log = test_context.htc_log
- test_host = test_context.htc_mds_host
- test_host.sh_pkill(log, HOTPOOL_LAMIGO)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LAMIGO)
- if rc:
- log.cl_error("there are other running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LAMIGO)
- return -1
- params = hotpool_test_generate_basic_lamigo_parameters(test_context)
- fast_pool_name = test_context.htc_ost_pools[HOTPOOL_POOL_FAST].lp_name
- min_age = params[LAMIGO_CONFIG_MIN_AGE]
-
- try:
- lamigo_t = utils.thread_start(hotpool_test_start_lamigo_with_options,
- (log, test_host, params))
- except threading.ThreadError:
- log.cl_error("failed to start lamigo with parameters: %s",
- params)
- return -1
-
- # wait lamigo be active
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LAMIGO)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lamigo instance")
- lamigo_t.join()
- return -1
-
- test_file = os.path.join(test_context.htc_client_mountpoint,
- "test_lamigo_no_mirror_on_continuous_write")
- if test_host.sh_path_exists(log, test_file) == 1:
- test_file = ".".join([test_file,
- time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))])
-
- command = "lfs mirror create -N -p %s %s" % (fast_pool_name, test_file)
- ret_val = test_host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout, ret_val.cr_stderr)
- hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- lamigo_t.join()
- return -1
-
- rc = test_host.sh_fill_binary_file(log, test_file, 1048576)
- if rc < 0:
- log.cl_error("failed to create file [%s], exit", test_file)
- hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- lamigo_t.join()
- return rc
-
- # modify
- for _ in range(min_age * 2):
- time.sleep(1)
-
- rc = test_host.sh_fill_binary_file(log, test_file, 4096,
- random=True,
- dd_options="conv=notrunc")
- if rc:
- log.cl_error("failed to mpdify file [%s], exit", test_file)
- break
-
- mirrored, stripe_info = hotpool_test_file_mirrored(log, test_host,
- test_file)
- # should not be mirror extended
- if mirrored is False:
- continue
- elif mirrored is True:
- log.cl_error("file [%s] has been replicated\n%s",
- test_file, stripe_info)
- rc = -1
- break
- else:
- log.cl_error("failed to get stripeinfo on [%s]",
- test_file)
- rc = -1
- break
-
- rc1 = hotpool_test_stop_process(log, test_host, HOTPOOL_LAMIGO, pid_list)
- if rc1 < 0:
- log.cl_error("failed to stop lamigo")
- rc = rc1
-
- lamigo_t.join()
- test_host.sh_remove_file(log, test_file)
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lamigo_no_mirror_on_continuous_write)
-
-
-def hotpool_test_lpurge_start(test_context):
- """
- Test lpurge start
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- params = hotpool_test_generate_basic_lpurge_parameters(test_context)
- test_host = test_context.htc_fast_oss_host
- test_host.sh_pkill(log, HOTPOOL_LPURGE)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LPURGE)
- return -1
-
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
-
- config_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LPURGE_CONFIG_PREFIX)
- rc = hotpool_test_generate_config(log, config_file, params)
- if rc < 0:
- log.cl_error("failed to generate lpurge config with parameters: %s",
- params)
- return rc
-
- try:
- lpurge_t = utils.thread_start(hotpool_test_start_lpurge,
- (test_context, test_host, config_file))
- except threading.ThreadError:
- log.cl_error("failed to start lpurge with parameters: %s",
- params)
- return -1
-
- # check pid
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lpurge instance")
- lpurge_t.join()
- return -1
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lpurge")
- lpurge_t.join()
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_start)
-
-
-def hotpool_test_lpurge_service(test_context):
- """
- Test lpurge service (systemd unit)
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- service_subname = "-".join([test_context.htc_fsname,
- test_context.htc_fast_osts[0].ls_index_string])
- params = hotpool_test_generate_basic_lpurge_parameters(test_context)
- test_host = test_context.htc_fast_oss_host
- test_host.sh_pkill(log, HOTPOOL_LPURGE)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LPURGE)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
-
- rc = hotpool_test_start_lpurge_service(test_context, test_host,
- params,
- test_context.htc_fast_osts[0].ls_index_string)
- if rc < 0:
- return -1
-
- rc = hotpool_test_wait_process_launch(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there's no running instance of [%s]\n",
- HOTPOOL_LPURGE)
- return -1
-
- service_unitname = "lpurge@%s.service" % service_subname
- # check service status
- command = "systemctl is-active %s" % service_unitname
- ret_val = test_host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- ret_val.cr_exit_status, ret_val.cr_stdout,
- ret_val.cr_stderr)
- return -1
-
- if ret_val.cr_stdout.strip() != "active":
- log.cl_error("unit %s status:[%s]", service_unitname,
- ret_val.cr_stdout.strip())
- return -1
-
- rc = test_host.sh_service_stop(log, service_unitname)
- if rc < 0:
- log.cl_error("failed to stop systemd unit [%s] on host [%s]",
- service_unitname, test_host.sh_hostname)
- return rc
- return 0
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_service)
-
-
-def hotpool_test_lpurge_multiple_services(test_context):
- """
- Test start multiple lpurge service instance
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- service_units = list()
- test_host = test_context.htc_fast_oss_host
- test_host.sh_pkill(log, HOTPOOL_LPURGE)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LPURGE)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- params = hotpool_test_generate_basic_lpurge_parameters(test_context)
- for ost in test_context.htc_fast_osts:
- fast_ost_device = "-".join([test_context.htc_fsname,
- ost.ls_index_string])
-
- params[LPURGE_CONFIG_DEVICE] = fast_ost_device
- service_unit = "lpurge@%s.service" % fast_ost_device
-
- rc = hotpool_test_start_lpurge_service(test_context, test_host,
- params, ost.ls_index_string)
- if rc < 0:
- log.cl_error("failed to start systemd unit [%s] on host [%s]",
- service_unit, test_host.sh_hostname)
- break
- service_units.append(service_unit)
-
- if rc < 0:
- for unit in service_units:
- test_host.sh_service_stop(log, unit)
- return -1
-
- rc = hotpool_test_wait_process_launch(log, test_host,
- HOTPOOL_LPURGE, instance_count=2)
- if rc:
- log.cl_error("there's no running instance of [%s]\n",
- HOTPOOL_LPURGE)
- return -1
-
- for unit in service_units:
- command = "systemctl is-active %s" % unit
- ret_val = test_host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout,
- ret_val.cr_stderr)
- rc = -1
- break
-
- if ret_val.cr_stdout.strip() != "active":
- log.cl_error("unit %s status:[%s]", unit,
- ret_val.cr_stdout.strip())
- rc = -1
- break
-
- for unit in service_units:
- rc_1 = test_host.sh_service_stop(log, unit)
- if rc_1 < 0:
- log.cl_error("failed to stop systemd unit [%s] on host [%s]",
- unit, test_host.sh_hostname)
- rc = -1
-
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_multiple_services)
-
-
-def hotpool_test_lpurge_options(test_context):
- """
- Test lpurge options
- """
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- params = hotpool_test_generate_basic_lpurge_parameters(test_context)
- dump_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LPURGE_DUMP_PREFIX)
- params[LPURGE_CONFIG_DUMP] = dump_file
- test_host = test_context.htc_fast_oss_host
- test_host.sh_pkill(log, HOTPOOL_LPURGE)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LPURGE)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
- config_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LPURGE_CONFIG_PREFIX)
- rc = hotpool_test_generate_config(log, config_file, params)
- if rc < 0:
- log.cl_error("failed to generate lpurge config with parameters: %s",
- params)
- return rc
-
- try:
- lpurge_t = utils.thread_start(hotpool_test_start_lpurge,
- (test_context, test_host, config_file))
- except threading.ThreadError:
- log.cl_error("failed to start lpurge with parameters: %s",
- params)
- return -1
-
- # check pid
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE)
- if pid_list is None or len(pid_list) == 0:
- log.cl_error("can't find lpurge instance")
- lpurge_t.join()
- return -1
-
- rc_1 = hotpool_test_trigger_dump(log, test_host, dump_file, pid_list[0])
- if rc_1:
- log.cl_error("can't find file [%s] on host [%s]",
- dump_file, test_host.sh_hostname)
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list)
- if rc < 0:
- log.cl_error("failed to stop lpurge")
- else:
- rc = rc_1
-
- lpurge_t.join()
- if rc:
- return rc
- # get dumpfile from host
- rc = test_host.sh_get_file(log, dump_file, workspace)
- if rc < 0:
- log.cl_error("failed to get file [%s] from host:%s",
- dump_file, test_host.sh_hostname)
- return rc
-
- return hotpool_test_lpurge_validate_options(log, dump_file, params)
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_options)
-
-
-def hotpool_test_lpurge_purge_effect(test_context):
- """
- Test lpurge options
- """
- # pylint: disable=too-many-locals,too-many-branches,too-many-statements
- log = test_context.htc_log
- workspace = test_context.htc_workspace
- fast_pool = test_context.htc_ost_pools[HOTPOOL_POOL_FAST]
- slow_pool = test_context.htc_ost_pools[HOTPOOL_POOL_SLOW]
-
- params = hotpool_test_generate_basic_lpurge_parameters(test_context)
- params[LPURGE_CONFIG_FREELO] = 30
- params[LPURGE_CONFIG_FREEHI] = 99
- dump_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LPURGE_DUMP_PREFIX)
- params[LPURGE_CONFIG_DUMP] = dump_file
- test_host = test_context.htc_fast_oss_host
- test_host.sh_pkill(log, HOTPOOL_LPURGE)
- rc = hotpool_test_wait_process_terminate(log, test_host, HOTPOOL_LPURGE)
- if rc:
- log.cl_error("there are another running instances of [%s], "
- "and can't be stopped\n", HOTPOOL_LPURGE)
- return -1
- rc = test_host.sh_mkdir(log, workspace)
- if rc < 0:
- log.cl_error("failed to create remote workspace [%s] to target host [%s]",
- workspace, test_host.sh_hostname)
- return -1
-
- config_file = hotpool_test_genarate_tmpfile(workspace,
- HOTPOOL_LPURGE_CONFIG_PREFIX)
-
- # create test dir, and set stripe to fast pool
- test_dir = os.path.join(HOTPOOL_CLIENT, "test_scan_fids")
- rc = hotpool_create_mirror_dir(log, test_host, test_dir,
- fast_pool.lp_name,
- test_context.htc_fast_osts[0].ls_index)
- if rc < 0:
- return rc
-
- # choose a suitable file size, a bit larger than 1/100 target ost size
- command = "lctl get_param osd*.%s.kbytestotal -n | head -1" % params[LPURGE_CONFIG_DEVICE]
- ret_val = test_host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout, ret_val.cr_stderr)
- return -1
- ost_size = float(ret_val.cr_stdout.strip())
- file_size_mb = int(math.ceil(ost_size / (100 * 1024)))
- # create 10 mirrored files and resync, store the FID into the list
- files_count = 10
- file_list = [os.path.join(test_dir, str(i)) for i in range(files_count)]
- for item in file_list:
- rc = test_host.sh_fill_binary_file(log, item, file_size_mb * 1048576)
- if rc:
- return -1
-
- test_host.sh_run(log, "sync", timeout=10)
- # extend mirror to slow pool
- for item in file_list:
- command = "lfs mirror extend -N -p %s -o %d %s" % (slow_pool.lp_name,
- slow_pool.lp_osts[0].ls_index,
- item)
- ret_val = test_host.sh_run(log, command)
- if ret_val.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- ret_val.cr_exit_status,
- ret_val.cr_stdout, ret_val.cr_stderr)
- return -1
-
- test_host.sh_run(log, "sync", timeout=10)
-
- # start lpurge
- rc = hotpool_test_generate_config(log, config_file, params)
- if rc < 0:
- log.cl_error("failed to generate config from parameters: %s",
- params)
- return -1
-
- try:
- lpurge_t = utils.thread_start(hotpool_test_start_lpurge,
- (test_context, test_host, config_file))
- except threading.ThreadError:
- log.cl_error("failed to start lpurge with parameters: %s",
- params)
- return -1
-
- # check pid
- pid_list = test_host.sh_get_pids(log, HOTPOOL_LPURGE)
- if not pid_list:
- log.cl_error("can't find lpurge instance")
- lpurge_t.join()
- return -1
- elif len(pid_list) > 1:
- log.cl_error("more than one lpurge instances found, PIDs: %s\n",
- pid_list)
- lpurge_t.join()
- return -1
-
- # trigger dump and check replicated count to determine where the purge progress
- # is done or not
- rc1 = hotpool_test_lpurge_wait_purged(log, test_host, pid_list[0],
- dump_file, files_count,
- timeout=30)
- if rc1 < 0:
- log.cl_error("failed to wait purge progress done")
-
- rc = hotpool_test_stop_process(log, test_host, HOTPOOL_LPURGE, pid_list)
- lpurge_t.join()
- if rc < 0:
- log.cl_error("failed to stop lpurge")
-
- if rc < 0 or rc1 < 0:
- return -1
-
- rc = hotpool_test_lpurge_check_mirror_status(log, test_host, file_list)
-
- # cleanup test dir
- for item in file_list:
- test_host.sh_remove_file(log, item)
- test_host.sh_rmdir_if_exist(log, test_dir)
- return rc
-
-
-HOTPOOL_TEST_CASES.append(hotpool_test_lpurge_purge_effect)
+++ /dev/null
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Test Library for LiPE
-LiPE is a policy engine for Lustre
-"""
-# pylint: disable=too-many-lines
-import time
-import traceback
-import random
-import shutil
-import stat
-import os
-import json
-import yaml
-import prettytable
-
-from pylustre import cmd_general
-from pylustre import test_common
-from pylustre import ssh_host
-from pylustre import watched_io
-from pylustre import cstr
-from pylustre import utils
-from pylustre import time_util
-from pylustre import lustre
-from pylustre import constants
-from pylustre import clog
-from pylipe import lipe
-from pylipe import lipe_find
-from pylipe import lipe_install_nodeps
-from pylipe import lipe_constant
-from pylipe import lipe_hotpool_test
-from pyclownfish import clownfish
-
-LIPE_TESTS = []
-LIPE_LAUNCH_CONFIG = None
-LIPE_CLUSTER = None
-
-
-LIPE_PYLUSTRE_RPM = "lipe-pylustre*"
-LIPE_PYLTEST_RPM = "lipe-pyltest*"
-LIPE_CLOWNFISH_RPM = "lipe-clownfish*"
-LIPE_RPM = "lipe-1.*"
-LIPE_LORIS_RPM = "lipe-loris*"
-LIPE_CLIENT_RPM = "lipe-client*"
-LIPE_SERVER_RPM = "lipe-server*"
-LIPE_HSM_RPM = "lipe-hsm*"
-LIPE_LPCC_RPM = "lipe-lpcc*"
-
-# We assumed all the other non-lipe deps are all installed first
-# here we only test the deps of lipe internal rpms.
-# key: target rpm, value: dep rpm (if the target rpm has)
-LIPE_RPM_DEPENDENCY_DICT = {
- LIPE_PYLUSTRE_RPM: [],
- LIPE_SERVER_RPM: [],
- LIPE_HSM_RPM: [LIPE_PYLUSTRE_RPM],
- LIPE_LORIS_RPM: [LIPE_PYLUSTRE_RPM],
- LIPE_LPCC_RPM: [LIPE_PYLUSTRE_RPM],
- LIPE_CLOWNFISH_RPM: [LIPE_PYLUSTRE_RPM],
- LIPE_RPM: [LIPE_PYLUSTRE_RPM],
- LIPE_CLIENT_RPM: [LIPE_PYLUSTRE_RPM, LIPE_RPM]
-}
-
-TEST_POOL_NAME = "pool0"
-TEST_STRIPE_SIZE = 1048576
-
-
-def get_rpm_path(log, test_host, rpms_dir, rpm_pattern):
- """
- Get the real-path of target rpm
- """
- command = "ls %s/%s" % (rpms_dir, rpm_pattern)
- retval = test_host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = %d, stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- lines = retval.cr_stdout.strip().splitlines()
- if len(lines) != 1:
- log.cl_error("command [%s] on host [%s] has unexpected output "
- "ret = %d, stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- return lines[0]
-
-
-def _test_rpms_dependency(args, mnt_path):
- """
- Key function of rpm deps testing
- """
- # pylint: disable=too-many-locals,too-many-arguments,too-many-statements
- # pylint: disable=too-many-branches
- # Make sure install server is not local host, since this will overwrite the
- # local config files
- log, test_host = args
- uuid_install = test_host.sh_uuid(log)
- if uuid_install is None:
- log.cl_error("failed to get the UUID on host [%s]",
- test_host.sh_hostname)
- return -1
-
- local_host = ssh_host.SSHHost("localhost", local=True)
- uuid_local = local_host.sh_uuid(log)
- if uuid_local is None:
- log.cl_error("failed to get the UUID on localhost")
- return -1
-
- if uuid_local == uuid_install:
- log.cl_error("please do NOT use host [%s] as the install server, "
- "since it is the localhost, and installation test "
- "would overwrite the local configuration files",
- local_host.sh_hostname)
- return -1
-
- rpms_dir = mnt_path + "/" + cstr.CSTR_PACKAGES
-
- for target_rpm in LIPE_RPM_DEPENDENCY_DICT:
- # python-crontab needed by lipe-loris is generated by EXAScaler.
- if target_rpm == LIPE_LORIS_RPM:
- continue
- log.cl_info("testing dependency for RPM [%s]", target_rpm)
-
- ret = test_host.sh_rpm_find_and_uninstall(log, "grep lipe")
- if ret:
- log.cl_error("failed to uninstall LiPE rpms on host [%s]",
- test_host.sh_hostname)
- return -1
-
- rpms = LIPE_RPM_DEPENDENCY_DICT[target_rpm]
- rpms.append(target_rpm)
- command = "rpm -ivh"
- for rpm in rpms:
- rpm_fpath = get_rpm_path(log, test_host, rpms_dir, rpm)
- if rpm_fpath is None:
- log.cl_error("failed to get RPM path [%s] in dir [%s]",
- rpm, rpms_dir)
- return -1
- command += " " + rpm_fpath
-
- retval = test_host.sh_run(log, command,
- timeout=ssh_host.LONGEST_TIME_RPM_INSTALL)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = %d, stdout = [%s], stderr = [%s]",
- command, test_host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def test_rpms_dependency(log, workspace, host):
- """
- Start do real install action
- """
- # pylint: disable=too-many-locals,too-many-arguments
- command = "mkdir -p %s" % workspace
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- local_host = ssh_host.SSHHost("localhost", local=True)
- command = "ls lipe-*.iso"
- retval = local_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- local_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- current_dir = os.getcwd()
- iso_names = retval.cr_stdout.split()
- if len(iso_names) != 1:
- log.cl_error("found unexpected ISOs [%s] under currect directory "
- "[%s]", iso_names, current_dir)
- return -1
-
- iso_name = iso_names[0]
- iso_path = current_dir + "/" + iso_name
-
- ret = host.sh_send_file(log, iso_path, workspace)
- if ret:
- log.cl_error("failed to send LiPE ISO [%s] on local host to "
- "directory [%s] on host [%s]",
- iso_path, workspace,
- host.sh_hostname)
- return -1
-
- host_iso_path = workspace + "/" + iso_name
- args = (log, host)
- ret = test_common.mount_and_run(log, host, host_iso_path,
- _test_rpms_dependency, args)
- if ret:
- log.cl_error("failed to mount and run RPM dependency")
- return ret
- return 0
-
-
-def umount_prepare_format_mount(log, workspace, clownfish_instance):
- """
- prepare the hosts, mount file systems, umount file system
- """
- # pylint: disable=invalid-name
- ret = clownfish_instance.ci_umount_all(log)
- if ret:
- log.cl_error("failed to umount all")
- return -1
-
- ret = clownfish_instance.ci_prepare_all(log, workspace)
- if ret:
- log.cl_error("failed to prepare all")
- return -1
-
- ret = clownfish_instance.ci_format_all(log)
- if ret:
- log.cl_error("failed to format all")
- return -1
-
- ret = clownfish_instance.ci_mount_all(log)
- if ret:
- log.cl_error("failed to mount all")
- return -1
-
- return 0
-
-LIPE_TESTS.append(umount_prepare_format_mount)
-
-
-def get_device_id(host_id, device):
- """
- Get the device ID
- """
- common_string = "/dev/mapper/"
- if device.startswith(common_string):
- device = device[len(common_string):]
- else:
- common_string = "/dev/"
- if device.startswith(common_string):
- device = device[len(common_string):]
-
- device_name = ""
- for char in device:
- if char.isalnum():
- device_name += char
- else:
- device_name += "_"
-
- device_id = ("%s_%s" % (host_id, device_name))
- return device_id
-
-
-def finish_lipe_launch_config(log, clownfish_instance, config):
- """
- Generate the json config file of lipe_launch
-
- groups of config should be generated already
- """
- # pylint: disable=too-many-locals
- group_names = []
- groups = config[lipe.LIPE_CONFIG_GROUPS]
- for group in groups:
- group_name = group[lipe.LIPE_CONFIG_NAME]
- group_names.append(group_name)
-
- device_ids = []
- device_configs = []
- ssh_host_configs = []
- ssh_hosts = {}
- for lustrefs in clownfish_instance.ci_lustres.values():
- # Add all client hosts into ssh_hosts so as to enable fid2path
- for client in lustrefs.lf_clients.values():
- host = client.lc_host
- host_id = host.sh_host_id
-
- if host_id not in ssh_hosts:
- ssh_hosts[host_id] = host
- ssh_host_config = {}
- ssh_host_config[cstr.CSTR_HOST_ID] = host_id
- ssh_host_config[cstr.CSTR_HOSTNAME] = host.sh_hostname
- ssh_host_config[cstr.CSTR_SSH_IDENTITY_FILE] = host.sh_identity_file
- ssh_host_configs.append(ssh_host_config)
-
- for mdt in lustrefs.lf_mdts.values():
- for mdti in mdt.ls_instances.values():
- host = mdti.lsi_host
- device = mdti.lsi_device
- host_id = host.sh_host_id
-
- # Use the information to fill json file of lipe_launch
- if host_id not in ssh_hosts:
- ssh_hosts[host_id] = host
- ssh_host_config = {}
- ssh_host_config[cstr.CSTR_HOST_ID] = host_id
- ssh_host_config[cstr.CSTR_HOSTNAME] = host.sh_hostname
- ssh_host_config[cstr.CSTR_SSH_IDENTITY_FILE] = host.sh_identity_file
- ssh_host_configs.append(ssh_host_config)
-
- device_id = get_device_id(host_id, device)
- if device_id in device_ids:
- log.cl_error("multiple devices with the same ID [%s]",
- device_id)
- return -1
-
- device_config = {}
- device_config[cstr.CSTR_HOST_ID] = host_id
- device_config[lipe.LIPE_CONFIG_PATH] = device
- device_config[lipe.LIPE_CONFIG_GROUPS] = group_names
- device_config[lipe.LIPE_CONFIG_DEVICE_ID] = device_id
- device_configs.append(device_config)
-
- config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs
- config[lipe.LIPE_CONFIG_DEVICES] = device_configs
- config[lipe.LIPE_CONFIG_ONLY_SCAN_ACTIVE] = True
- return 0
-
-
-def get_lipe_launch_result(logdir, service_instance):
- """
- Get the lipe_launch result for the test
- """
- # pylint: disable=too-many-locals
- host = service_instance.lsi_host
- host_id = host.sh_host_id
- device = service_instance.lsi_device
-
- # Should use the same device_id/group_name/action/argument/counter_name in launch_config
- # This could be get from launch_config actually
- device_id = get_device_id(host_id, device)
- group_name = "group_name"
- action = lipe.LAT_SHELL_CMD_FID
- argument = "stat $LUSTRE_MNT/.lustre/fid/$LUSTRE_FID"
- counter_name = "fname_reg_of_old_file"
-
- local_device_workspace = logdir + "/" + device_id
- result = lipe.LipeCounterResult(local_device_workspace, group_name,
- action, argument, counter_name)
- return result
-
-
-def lipe_launch_on_host(log, host, parent_dir, config):
- """
- Run lipe_launch on a web host
- """
- basename = "launch_host-" + host.sh_hostname
- logdir = parent_dir + "/" + basename
-
- command = "mkdir -p %s" % logdir
- retval = utils.run(command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on local host, "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- command = "mkdir -p %s" % logdir
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- config_fpath = logdir + "/lipe_launch.json"
- with open(config_fpath, 'w') as outfile:
- json.dump(config, outfile, indent=4)
-
- ret = host.sh_send_file(log, config_fpath, logdir)
- if ret:
- log.cl_error("failed to send config file [%s] of localhost to [%s] "
- "directory [%s] of host [%s]",
- config_fpath, logdir, config_fpath,
- host.sh_hostname)
- return None
-
- return_value = 0
- command = ("%s --logdir %s --config %s" %
- (lipe.LIPE_LAUNCH, logdir, config_fpath))
- stdout_file = logdir + "/" + "lipe_launch.stdout.log"
- stderr_file = logdir + "/" + "lipe_launch.stderr.log"
- retval = host.sh_watched_run(log, command, stdout_file, stderr_file)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- host_local_dir = logdir + "/" + host.sh_hostname
- command = "mkdir -p %s" % host_local_dir
- retval = utils.run(command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on local host, "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- ret = host.sh_get_and_clean_dir(log, logdir, host_local_dir)
- if ret:
- log.cl_error("failed to get and clean dir [%s] on host [%s]",
- logdir, host.sh_hostname)
- if return_value is -1:
- log.cl_error("please check [%s] on host [%s] and [%s] on "
- "local host to debug why lipe_launch failed",
- logdir, host.sh_hostname, host_local_dir)
- else:
- log.cl_error("please check why copying and removing directory "
- "failed after success of lipe_launch test")
- return None
-
- if return_value:
- return None
-
- return host_local_dir + "/" + basename
-
-
-def lipe_launch_with_example_config_ftype(log, clownfish_instance, logdir, flist_type):
- """
- Run lipe_launch with example config, flist_type needs to be specified
- """
- config = LIPE_LAUNCH_CONFIG.copy()
-
- config[lipe.LIPE_CONFIG_FLIST_TYPE] = flist_type
- ret = finish_lipe_launch_config(log, clownfish_instance, config)
- if ret:
- log.cl_error("failed to finish the lipe_launch config")
- return -1
-
- for mdt_host in LIPE_CLUSTER.lc_mdt_hosts:
- result_dir = lipe_launch_on_host(log, mdt_host, logdir, config)
- if result_dir is None:
- log.cl_error("failed to run lipe_launch on host [%s]",
- mdt_host.sh_hostname)
- return -1
-
- return 0
-
-
-def lipe_launch_with_example_config(log, logdir, clownfish_instance):
- """
- Run lipe_launch with example config
- """
- # pylint: disable=unused-argument
- for flist_type in lipe.LIPE_FLIST_TYPES:
- # the directory will be created in lipe_launch_on_host()
- # by "mkdir -p"
- basename = "flist_type-" + flist_type
- ret = lipe_launch_with_example_config_ftype(log, clownfish_instance, logdir + "/" + basename,
- flist_type)
- if ret:
- log.cl_error("failed to run lipe launch with example config and "
- "flist type [%s]", flist_type)
- return ret
-
- return 0
-
-
-LIPE_TESTS.append(lipe_launch_with_example_config)
-
-
-def run_lipe_launch_stat(log, logdir, host, service_instance,
- launch_config):
- """
- Run lipe_launch on a device
- """
- # pylint: disable=too-many-locals
- result_dir = lipe_launch_on_host(log, host, logdir, launch_config)
- if result_dir is None:
- log.cl_error("failed to run lipe_launch with config")
- return None
-
- result = get_lipe_launch_result(result_dir, service_instance)
- return result
-
-
-def _check_fid_lipe_launch_found(log, parent_dir, host, mdti,
- launch_config, fid, expect_found=True):
- """
- Check whether the found number is expected
- """
- # pylint: disable=too-many-arguments
- workspace = (parent_dir + "/" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
- ret = utils.mkdir(workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- workspace)
- log.cl_abort = True
- return -1
-
- # Do not abort here, since lipe_launch might still be able to find the
- # inode on MDT, but can not execute stat on the client side. In this case,
- # lipe_launch will fail, but next round lipe_launch could be able to
- # run successfully.
- result = run_lipe_launch_stat(log, workspace, host, mdti, launch_config)
- if result is None:
- log.cl_error("failed to run lipe_launch on Lustre service instance [%s]",
- mdti.lsi_service_instance_name)
- return -1
-
- ret = result.lcr_load_json(log)
- if ret:
- log.cl_error("failed to load json file of lipe_launch result on Lustre "
- "service instance [%s]", mdti.lsi_service_instance_name)
- log.cl_abort = True
- return -1
-
- found = result.lcr_fid_is_found(fid)
- if found:
- found_string = ""
- else:
- found_string = " not"
- log.cl_debug("FID [%s] is%s found on Lustre service instance [%s]", fid,
- found_string, mdti.lsi_service_instance_name)
-
- if found == expect_found:
- return 0
- else:
- return -1
-
-
-def check_fid_lipe_launch_found(log, parent_dir, host, mdti,
- launch_config, fid):
- """
- Check whether the fid is found
- """
- # pylint: disable=too-many-arguments
- return _check_fid_lipe_launch_found(log, parent_dir, host, mdti,
- launch_config, fid, expect_found=True)
-
-
-def check_fid_lipe_launch_missing(log, parent_dir, host, mdti,
- launch_config, fid):
- """
- Check whether the fid is not found
- """
- # pylint: disable=too-many-arguments
- return _check_fid_lipe_launch_found(log, parent_dir, host, mdti,
- launch_config, fid, expect_found=False)
-
-
-def generate_lipe_launch_stat_config(log, clownfish_instance, fname):
- """
- Generate the config for stating each file
- """
- # pylint: disable=unused-argument
- config = {}
- groups = []
-
- group = {}
- group_name = "group_name"
- group[lipe.LIPE_CONFIG_NAME] = group_name
-
- rules = []
- rule = {}
- rule[lipe.LIPE_CONFIG_ACTION] = lipe.LAT_SHELL_CMD_FID
- funct = lipe_constant.LIPE_POLICY_FUNCTION_FNAME_REG
- rule[lipe.LIPE_CONFIG_EXPRESSION] = ('%s("%s")' % (funct, fname))
- rule[lipe.LIPE_CONFIG_ARGUMENT] = "stat $LUSTRE_MNT/.lustre/fid/$LUSTRE_FID"
- rule[lipe.LIPE_CONFIG_COUNTER_NAME] = "fname_reg_of_old_file"
- rules.append(rule)
- group[lipe.LIPE_CONFIG_RULES] = rules
- groups.append(group)
- config[lipe.LIPE_CONFIG_GROUPS] = groups
-
- ret = finish_lipe_launch_config(log, clownfish_instance, config)
- if ret:
- log.cl_error("failed to finish the lipe_launch config")
- return None
-
- return config
-
-
-def test_lipe_launch_type(log, workspace, clownfish_instance, mdti, host,
- lustre_dir_path, inode_type=stat.S_IFREG,
- major=None, minor=None, path=None):
- """
- Create an remove file/dir with a special type and check whether lipe_launch
- can find it
- """
- # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
- # pylint: disable=too-many-locals
- inode_fname = "new_inode"
- inode_path = lustre_dir_path + "/" + inode_fname
- type_string = utils.file_type2string(inode_type)
-
- ret = host.sh_create_inode(log, inode_path, inode_type=inode_type,
- major=major, minor=minor, path=path)
- if ret:
- log.cl_error("failed create inode [%s] with type [%s] on host [%s]",
- inode_path, type_string, host.sh_hostname)
- return -1
-
- fid = lustre.lfs_path2fid(log, host, inode_path)
- if fid is None:
- log.cl_error("failed to get FID of file [%s] on host [%s]",
- inode_path, host.sh_hostname)
- return -1
-
- creation_workspace = workspace + "/after_creation"
- ret = utils.mkdir(creation_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- creation_workspace)
- return -1
-
- config = generate_lipe_launch_stat_config(log, clownfish_instance, inode_fname)
- if config is None:
- log.cl_error("generate lipe_launch stat config",
- creation_workspace)
- return -1
-
- waited = False
- for mdt_host in LIPE_CLUSTER.lc_mdt_hosts:
- if not waited:
- ret = utils.wait_condition(log, check_fid_lipe_launch_found,
- (creation_workspace, mdt_host, mdti, config,
- fid))
- else:
- ret = check_fid_lipe_launch_found(log, creation_workspace,
- mdt_host, mdti, config, fid)
- if ret:
- log.cl_error("can not find FID [%s] on Lustre service instance "
- "[%s] by running lipe_launch on host [%s] after "
- "creation of [%s] with type [%s] on host [%s]", fid,
- mdti.lsi_service_instance_name, host.sh_hostname,
- inode_path, type_string, mdt_host.sh_hostname)
- return -1
- waited = True
-
- ret = host.sh_remove_inode(log, inode_path, inode_type=inode_type)
- if ret:
- log.cl_error("failed to remove inode [%s] with type [%s] on host [%s]",
- inode_path, type_string, host.sh_hostname)
- return -1
-
- waited = False
- for mdt_host in LIPE_CLUSTER.lc_mdt_hosts:
- if not waited:
- ret = utils.wait_condition(log, check_fid_lipe_launch_missing,
- (creation_workspace, mdt_host, mdti, config,
- fid))
- else:
- ret = check_fid_lipe_launch_missing(log, creation_workspace,
- mdt_host, mdti, config, fid)
- if ret:
- log.cl_error("can not find FID [%s] on Lustre service instance "
- "[%s] by running lipe_launch on host [%s] after "
- "creation of [%s] with type [%s] on host [%s]", fid,
- mdti.lsi_service_instance_name, host.sh_hostname,
- inode_path, type_string, mdt_host.sh_hostname)
- return -1
- waited = True
-
- return 0
-
-
-def test_lipe_launch_mdt(log, workspace, clownfish_instance, mdt):
- """
- Test whether lipe_launch can find the newly created file/directory on a mdt
- """
- # pylint: disable=too-many-locals
- mdt_index = mdt.ls_index
- lustrefs = mdt.ls_lustre_fs
- clients = lustrefs.lf_clients
- if len(clients) == 0:
- log.cl_warning("no client for file system [%s] configured, skip test of "
- "file creation", lustrefs.lf_fsname)
- return 0
-
- mdti = mdt.ls_mounted_instance(log)
- if mdti is None:
- log.cl_error("failed to get the active instance of Lustre service [%s]",
- mdt.ls_service_name)
- return -1
-
- lustre_relative_dir_path = ("lipe_launch_" + mdt.ls_service_name + "_" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
-
- launch_workspace = workspace + "/" + "dir_path"
- ret = utils.mkdir(launch_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- launch_workspace)
- return -1
-
- client = random.choice(clients.values())
- host = client.lc_host
- mnt = client.lc_mnt
- lustre_dir_path = mnt + "/" + lustre_relative_dir_path
-
- command = ("lfs mkdir -c 1 -i %d %s" % (mdt_index, lustre_dir_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- inode_types = [stat.S_IFDIR, stat.S_IFCHR, stat.S_IFBLK, stat.S_IFREG,
- stat.S_IFIFO, stat.S_IFLNK, stat.S_IFSOCK]
- for inode_type in inode_types:
- major = None
- minor = None
- path = None
- if inode_type == stat.S_IFCHR or inode_type == stat.S_IFBLK:
- major = 7
- minor = 1
- elif inode_type == stat.S_IFLNK:
- path = "unkown"
- ret = test_lipe_launch_type(log, workspace, clownfish_instance, mdti, host,
- lustre_dir_path, inode_type=inode_type,
- major=major, minor=minor, path=path)
- if ret:
- log.cl_error("failed to test the lipe_launch for mdt "
- "[%s] with inode type [%s]",
- mdt.ls_service_name, inode_type)
- return -1
-
- command = ("rmdir %s" % (lustre_dir_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def test_lipe_launch(log, workspace, clownfish_instance):
- """
- Test whether lipe_launch can find and remove the newly created
- file/directory
- """
- for lustrefs in clownfish_instance.ci_lustres.values():
- for mdt in lustrefs.lf_mdts.values():
- mdt_workspace = workspace + "/" + mdt.ls_service_name
- ret = utils.mkdir(mdt_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- mdt_workspace)
- return -1
-
- ret = test_lipe_launch_mdt(log, mdt_workspace, clownfish_instance, mdt)
- if ret:
- log.cl_error("failed to test lipe_launch on "
- "Lustre service [%s]", mdt.ls_service_name)
- return ret
- return 0
-
-LIPE_TESTS.append(test_lipe_launch)
-
-
-def lipe_find_device(log, workspace, host, device, find_option,
- action_option=None):
- """
- Run lipe_find on a device
- """
- # pylint: disable=too-many-locals, too-many-arguments
- identity = time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f")
- stdout_file = workspace + "/" + "lipe_find.stdout.log"
- stderr_file = workspace + "/" + "lipe_find.stderr.log"
-
- args = {}
- args[watched_io.WATCHEDIO_LOG] = log
- args[watched_io.WATCHEDIO_HOSTNAME] = host.sh_hostname
- stdout_fd = watched_io.watched_io_open(stdout_file,
- watched_io.log_watcher_info, args)
- stderr_fd = watched_io.watched_io_open(stderr_file,
- watched_io.log_watcher_error, args)
- if not action_option:
- command = ("lipe_find --identity %s %s %s" % (identity, device, find_option))
- else:
- command = ("lipe_find --identity %s %s %s %s" % (identity, action_option,
- device, find_option))
-
- log.cl_info("start to run command [%s] on host [%s]", command,
- host.sh_hostname)
- retval = host.sh_run(log, command, stdout_tee=stdout_fd,
- stderr_tee=stderr_fd, timeout=None)
- stdout_fd.close()
- stderr_fd.close()
-
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], "
- "stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- find_workspace = lipe_find.LIPE_FIND_LOG_DIR + "/" + identity
- ret = host.sh_get_file(log, find_workspace, workspace)
- if ret:
- log.cl_error("failed to get file [%s] from on host [%s] to local "
- "host directory [%s] ",
- workspace, host.sh_hostname, workspace)
- return None
-
- local_find_workspace = workspace + "/" + "lfs_find_log_" + identity
- shutil.move(workspace + "/" + identity, local_find_workspace)
-
- device_id = lipe_find.generate_device_id(0, device)
- local_device_workspace = local_find_workspace + "/" + device_id
- result = lipe_find.LipeFindCounterResult(local_device_workspace)
- return result
-
-
-def lipe_find_service_instance(log, workspace, service_instance, find_option,
- action_option=None):
- """
- Run lipe_find on the instance of a Lustre service
- """
- host = service_instance.lsi_host
- device = service_instance.lsi_device
- result = lipe_find_device(log, workspace, host, device, find_option,
- action_option=action_option)
- if result is None:
- log.cl_error("failed to run lipe_find on device [%s] of host "
- "[%s]", device, host.sh_hostname)
- return None
- return result
-
-
-def check_found_number(log, parent_dir, mdti, find_option, expected_count,
- action_option=None):
- """
- Check whether the found number is expected
- """
- # pylint: disable=too-many-arguments
- workspace = (parent_dir + "/" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
- ret = utils.mkdir(workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- workspace)
- log.cl_abort = True
- return -1, None
-
- result = lipe_find_service_instance(log, workspace, mdti, find_option,
- action_option=action_option)
- if result is None:
- log.cl_error("failed to run lipe_find on Lustre service instance [%s]",
- mdti.lsi_service_instance_name)
- log.cl_abort = True
- return -1, None
-
- ret = result.lcr_load_json(log)
- if ret:
- log.cl_error("failed to load json file of lipe_find result on Lustre "
- "service instance [%s]", mdti.lsi_service_instance_name)
- log.cl_abort = True
- return -1, None
-
- if result.lcr_found_count != expected_count:
- log.cl_debug("unexpected found inode number on Lustre service "
- "instance [%s], expected [%s], got [%d]",
- mdti.lsi_service_instance_name,
- expected_count, result.lcr_found_count)
- return -1, None
-
- return 0, result
-
-
-def _check_fid_found(log, parent_dir, mdti, find_option, fid,
- expect_found=True, expect_count=None,
- action_option=None):
- """
- Check whether the fid is found (or not) as expected
- """
- # pylint: disable=too-many-arguments
- workspace = (parent_dir + "/" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
- ret = utils.mkdir(workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- workspace)
- log.cl_abort = True
- return -1
-
- result = lipe_find_service_instance(log, workspace, mdti, find_option,
- action_option=action_option)
- if result is None:
- log.cl_error("failed to run lipe_find on Lustre service instance [%s]",
- mdti.lsi_service_instance_name)
- log.cl_abort = True
- return -1
-
- ret = result.lcr_load_json(log)
- if ret:
- log.cl_error("failed to load json file of lipe_find result on Lustre "
- "service instance [%s]", mdti.lsi_service_instance_name)
- log.cl_abort = True
- return -1
-
- found = result.lcr_fid_is_found(fid)
- if found:
- found_string = ""
- else:
- found_string = " not"
- log.cl_debug("FID [%s] is%s found on Lustre service instance [%s]", fid,
- found_string, mdti.lsi_service_instance_name)
-
- found_count = result.lcr_items_count(log)
- if found and expect_count is not None and found_count != expect_count:
- log.cl_error("expect [%d] items, [%d] found.\n",
- expect_count, found_count)
- return -1
-
- if found == expect_found:
- return 0
- else:
- return -1
-
-
-def check_fid_found(log, parent_dir, mdti, find_option, fid,
- expect_count=None, action_option=None):
- """
- Check whether the fid found
- """
- # pylint: disable=too-many-arguments
- return _check_fid_found(log, parent_dir, mdti, find_option, fid,
- expect_found=True, expect_count=expect_count,
- action_option=action_option)
-
-
-def check_fid_missing(log, parent_dir, mdti, find_option, fid, action_option=None):
- """
- Check whether the fid is not found
- """
- # pylint: disable=too-many-arguments
- return _check_fid_found(log, parent_dir, mdti, find_option, fid,
- expect_found=False, action_option=action_option)
-
-
-def lipe_find_posix_client(log, workspace, client, find_option,
- action_option=None):
- """
- Run lipe_find on a Lustre client
- """
- host = client.lc_host
- mnt = client.lc_mnt
- result = lipe_find_device(log, workspace, host, mnt, find_option,
- action_option=action_option)
- if result is None:
- log.cl_error("failed to run lipe_find on path [%s] of host "
- "[%s]", mnt, host.sh_hostname)
- return None
- return result
-
-
-def _check_fid_found_client(log, parent_dir, client, find_option, fid,
- expect_found=True, expect_count=None,
- action_option=None):
- """
- Check whether the fid is found (or not) as expected from client
- """
- # pylint: disable=too-many-arguments
- workspace = (parent_dir + "/" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
- ret = utils.mkdir(workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- workspace)
- log.cl_abort = True
- return -1
-
- result = lipe_find_posix_client(log, workspace, client, find_option, action_option=action_option)
- if result is None:
- log.cl_error("failed to run lipe_find on Lustre client [%s]",
- client.lc_client_name)
- log.cl_abort = True
- return -1
-
- ret = result.lcr_load_json(log)
- if ret:
- log.cl_error("failed to load json file of lipe_find result on Lustre "
- "client [%s]", client.lc_client_name)
- log.cl_abort = True
- return -1
-
- found = result.lcr_fid_is_found(fid)
- if found:
- found_string = ""
- else:
- found_string = " not"
- log.cl_debug("FID [%s] is%s found on Lustre client [%s]", fid,
- found_string, client.lc_client_name)
-
- found_count = result.lcr_items_count(log)
- if found and expect_count is not None and found_count != expect_count:
- log.cl_error("expect [%d] items, [%d] found.\n",
- expect_count, found_count)
- return -1
-
- if found == expect_found:
- return 0
- else:
- return -1
-
-
-def check_fid_found_client(log, parent_dir, client, find_option, fid,
- expect_count=None, action_option=None):
- """
- Check whether the fid found on a Lustre client
- """
- # pylint: disable=too-many-arguments
- return _check_fid_found_client(log, parent_dir, client, find_option, fid,
- expect_found=True, expect_count=expect_count,
- action_option=action_option)
-
-
-def check_fid_missing_client(log, parent_dir, client, find_option, fid, action_option=None):
- """
- Check whether the fid is not found on a Lustre client
- """
- # pylint: disable=too-many-arguments
- return _check_fid_found_client(log, parent_dir, client, find_option, fid,
- expect_found=False, action_option=action_option)
-
-
-def lipe_find_message(fid, mdti, host, inode_path,
- inode_type=stat.S_IFREG, major=None, minor=None,
- path=None, stripe_index=None, pool=None,
- component_layout=False):
- """
- Return the message for a failure of lipe_find
- """
- # pylint: disable=too-many-arguments
- if pool:
- pool_name = pool.lp_name
- else:
- pool_name = "None"
- return ("FID [%s], service [%s], client [%s], inode path: [%s], "
- "type [%s], major [%s], minor [%s], symbol link path [%s], "
- "stripe index [%s], pool [%s], component_layout [%s]" %
- (fid, mdti.lsi_service_instance_name,
- host.sh_hostname, inode_path,
- utils.file_type2string(inode_type),
- major, minor, path, stripe_index, pool_name, component_layout))
-
-
-class LipeFindTestCase(object):
- """
- Each test case of lipe_find has @found_count object of this type
- """
- # pylint: disable=too-few-public-methods,too-many-instance-attributes
- def __init__(self, expression, found, expect_count=None, skip_server=False,
- skip_client=False, option=None):
- # pylint: disable=too-many-arguments
- self.lftc_expression = expression
- self.lftc_option = option
- self.lftc_found = found
- self.lftc_expect_count = expect_count
- self.lftc_skip_server = skip_server
- self.lftc_skip_client = skip_client
- self.lftc_filename, self.lftc_lineno, self.lftc_func = clog.back_caller(2)
- self.lftc_debug_info = ("%s:%s %s()" %
- (os.path.basename(self.lftc_filename),
- self.lftc_lineno,
- self.lftc_func))
-
-
-def test_find_options(log, parent_dir, client, mdti, test_cases, fid, msg):
- """
- Run lipe_find with options that should find and should not find a fid
- """
- # pylint: disable=too-many-arguments,too-many-locals
- for test_case in test_cases:
- find_option = test_case.lftc_expression
- action_option = test_case.lftc_option
- skip_server = test_case.lftc_skip_server
- skip_client = test_case.lftc_skip_client
- debug_info = test_case.lftc_debug_info
- expect_count = test_case.lftc_expect_count
- action_msg = " action [%s]" % action_option if action_option else ""
- if test_case.lftc_found:
- if not skip_server:
- ret = check_fid_found(log, parent_dir, mdti, find_option, fid,
- expect_count=expect_count, action_option=action_option)
- if ret:
- log.cl_error("expected to find fid with option [%s]%s for test [%s], "
- "but got failure, %s", find_option, action_msg, debug_info, msg)
- return -1
-
- if not skip_client:
- ret = check_fid_found_client(log, parent_dir, client, find_option, fid,
- expect_count=expect_count, action_option=action_option)
- if ret:
- log.cl_error("expected to find fid with option [%s]%s for test [%s] from "
- "Lustre client, but got failure, %s",
- find_option, action_msg, debug_info, msg)
- return -1
- else:
- if not skip_server:
- ret = check_fid_missing(log, parent_dir, mdti, find_option,
- fid, action_option=action_option)
- if ret:
- log.cl_error("expected to NOT find FID with option [%s]%s for test [%s], "
- "but got failure, %s", find_option, action_msg, debug_info, msg)
- return -1
-
- if not skip_client:
- ret = check_fid_missing_client(log, parent_dir, client,
- find_option, fid, action_option=action_option)
- if ret:
- log.cl_error("expected to NOT find FID with option [%s]%s for test [%s] "
- "from Lustre client, but got failure, %s",
- find_option, action_msg, debug_info, msg)
- return -1
- return 0
-
-
-def test_lipe_find_size(log, parent_dir, client, mdti, fid, host,
- inode_path, inode_type, msg, size=True):
- """
- Check whether the lipe_find with -size or -blocks is expected
- """
- # pylint: disable=too-many-arguments
- # The Lustre file should be empty file that has no DoM
- if size:
- attribute = "size"
- else:
- attribute = "blocks"
- size = host.sh_get_file_size(log, inode_path, size=size)
- if size < 0:
- log.cl_error("failed to get file %s, %s", attribute, msg)
- return -1
-
- if inode_type != stat.S_IFREG:
- have_size = True
- elif not lipe_constant.HAVE_LUSTRE_PFL:
- # If no PFL support, LiPE is not able get the size or LSoM from object
- # on MDT.
- have_size = False
- else:
- have_size = True
-
- cases = []
- if not have_size:
- cases.append(LipeFindTestCase("-%s %sc" % (attribute, size), False))
- cases.append(LipeFindTestCase("-%s +%sc" % (attribute, size), False))
- cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size + 1)), False))
- cases.append(LipeFindTestCase("! -%s %sc" % (attribute, size), False))
- if size != 0:
- cases.append(LipeFindTestCase("! -%s +%sc" % (attribute, (size - 1)),
- False))
- return 0
-
- cases.append(LipeFindTestCase("-%s %sc" % (attribute, size), True))
- cases.append(LipeFindTestCase("! -%s %sc" % (attribute, size), False))
- cases.append(LipeFindTestCase("-%s +%sc" % (attribute, size), False))
- cases.append(LipeFindTestCase("-%s -%sc" % (attribute, size), False))
-
- cases.append(LipeFindTestCase("-%s %sc" % (attribute, (size + 1)), False))
- cases.append(LipeFindTestCase("! -%s %sc" % (attribute, (size + 1)), True))
- cases.append(LipeFindTestCase("-%s +%sc" % (attribute, (size + 1)), False))
- cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size + 1)), True))
-
- if size != 0:
- cases.append(LipeFindTestCase("-%s %sc" % (attribute, (size - 1)), False))
- cases.append(LipeFindTestCase("! -%s %sc" % (attribute, (size - 1)), True))
- cases.append(LipeFindTestCase("-%s +%sc" % (attribute, (size - 1)), True))
- cases.append(LipeFindTestCase("-%s -%sc" % (attribute, (size - 1)), False))
-
- return test_find_options(log, parent_dir, client, mdti, cases, fid, msg)
-
-
-def test_lipe_find_mdt_actions(log, workspace, mdti, client,
- lustre_dir_path, pool):
- """
- Create and remove a regular file and check whether lipe_find can find it
- and perform action against.
- """
- # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
- # pylint: disable=too-many-locals
- inode_fname = "new_Inode"
- inode_type = stat.S_IFREG
- inode_path = lustre_dir_path + "/" + inode_fname
- host = client.lc_host
-
- if mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS:
- return 0
-
- pool_name = None
- if pool is not None:
- pool_name = pool.lp_name
-
- ret = lustre.lustre_file_setstripe(log, host, inode_path,
- stripe_size=TEST_STRIPE_SIZE)
- if ret:
- log.cl_error("failed to create inode [%s] with type [%s] on host [%s]",
- inode_path, utils.file_type2string(inode_type),
- host.sh_hostname)
- return -1
-
- ret = lustre.lustre_file_mirror_extend(log, host, inode_path,
- pool_name=pool_name)
- if ret:
- log.cl_error("failed to extend the mirror of inode [%s] with type [%s] on host [%s]",
- inode_path, utils.file_type2string(inode_type),
- host.sh_hostname)
- return -1
-
- command = "cat %s" % (inode_path)
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- fid = lustre.lfs_path2fid(log, host, inode_path)
- if fid is None:
- log.cl_error("failed get fid of file [%s] on host [%s]",
- inode_path, host.sh_hostname)
- return -1
-
- stat_result = host.sh_stat(log, inode_path)
- if stat_result is None:
- log.cl_error("failed stat file [%s] on host [%s]",
- inode_path, host.sh_hostname)
- return -1
-
- creation_workspace = workspace + "/after_creation"
- ret = utils.mkdir(creation_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- creation_workspace)
- return -1
-
- msg = lipe_find_message(fid, mdti, host, inode_path,
- inode_type=inode_type, pool=pool)
-
- ret = utils.wait_condition(log, check_fid_found,
- (creation_workspace, mdti, "", fid))
- if ret:
- log.cl_error("expected to find fid after inode creation,"
- " but got failure, %s", msg)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase('-pool %s -fid %s' % (pool_name, fid), True))
-
- ret = test_find_options(log, creation_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find with given options after inode "
- "creation")
- return -1
-
- action_run_workspace = workspace + "/actions_run"
- ret = utils.mkdir(action_run_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- action_run_workspace)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase("-pool %s -fid %s" % (pool_name, fid),
- True, skip_client=True,
- option="--agents %s -purge %s" % (host.sh_hostname, pool_name)))
-
- ret = test_find_options(log, action_run_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find with given options")
- return -1
-
- action_post_workspace = workspace + "/actions_post"
- ret = utils.mkdir(action_post_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- action_post_workspace)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase("-pool %s -fid %s" % (pool_name, fid), False))
-
- ret = test_find_options(log, action_post_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find with given options after inode "
- "creation")
- return -1
-
- ret = host.sh_remove_inode(log, inode_path, inode_type=inode_type)
- if ret:
- log.cl_error("failed to remove inode [%s] with type [%s] on host [%s]",
- inode_path, utils.file_type2string(inode_type),
- host.sh_hostname)
- return -1
-
- return 0
-
-
-def test_lipe_find_skip_substripe_inodes(log, workspace, mdti, client,
- lustre_dir_path):
- """
- Create a striped directory, then check whether lipe_find can skip
- dir substripe inodes.
- """
- # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
- # pylint: disable=too-many-locals
- # pylint: disable=too-many-locals,too-many-arguments,too-many-branches
- mdt = mdti.lsi_service
- lustrefs = mdt.ls_lustre_fs
- client_host = client.lc_host
- mgs = lustrefs.lf_mgs_mdt
- if mgs is None:
- mgs = lustrefs.lf_mgs
- if mgs is None:
- log.cl_error("Lustre file system [%s] doesn't have a mgs",
- lustrefs.lf_fsname)
- return -1
-
- mgsi = mgs.ls_mounted_instance(log)
- if mgsi is None:
- log.cl_error("failed to get the active instance of Lustre service [%s]",
- mgs.ls_service_name)
- return -1
-
- command = ("lfs mkdir -c 2 -i %d %s" % (mdt.ls_index, lustre_dir_path))
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return 1
-
- fid = lustre.lfs_path2fid(log, client_host, lustre_dir_path)
- cases = list()
- cases.append(LipeFindTestCase("", True, expect_count=1, skip_client=True))
- msg = lipe_find_message(fid, mdti, client_host, lustre_dir_path)
- test_workspace = workspace + "/scan_against_striped_dir"
- ret = utils.mkdir(test_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- test_workspace)
- return -1
-
- ret = test_find_options(log, test_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find to get all inodes")
- return -1
- return 0
-
-
-def test_lipe_find_skip_snapshot_inodes(log, workspace, mdti, client,
- lustre_dir_path):
- """
- Create regular files and creat snapshots, then check whether lipe_find can skip
- snapshot inodes.
- """
- # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
- # pylint: disable=too-many-locals
- # pylint: disable=too-many-locals,too-many-arguments,too-many-branches
- mdt = mdti.lsi_service
- lustrefs = mdt.ls_lustre_fs
- client_host = client.lc_host
- mgs = lustrefs.lf_mgs_mdt
- if mgs is None:
- mgs = lustrefs.lf_mgs
- if mgs is None:
- log.cl_error("Lustre file system [%s] doesn't have a mgs",
- lustrefs.lf_fsname)
- return -1
-
- mgsi = mgs.ls_mounted_instance(log)
- if mgsi is None:
- log.cl_error("failed to get the active instance of Lustre service [%s]",
- mgs.ls_service_name)
- return -1
-
- for mdt in lustrefs.lf_mdts.values():
- mdti = mdt.ls_mounted_instance(log)
- if mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS:
- log.cl_debug("skip snapshot test for zfs backend")
- return 1
- for ost in lustrefs.lf_osts.values():
- osti = ost.ls_mounted_instance(log)
- if osti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS:
- log.cl_debug("skip snapshot test for zfs backend")
- return 1
-
- command = ("mkdir %s" % lustre_dir_path)
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- inode_path = os.path.join(lustre_dir_path, "test_inode")
- ret = client_host.sh_create_inode(log, inode_path)
- if ret:
- log.cl_error("failed to create regular file [%s] on host [%s]",
- inode_path, client_host.sh_hostname)
- return -1
- client_host.sh_run(log, "sync")
-
- fid = lustre.lfs_path2fid(log, client_host, inode_path)
- if fid is None:
- log.cl_error("failed get fid of file [%s] on host [%s]",
- inode_path, client_host.sh_hostname)
- return -1
-
- test_workspace = workspace + "/scan_before_snapshot"
- ret = utils.mkdir(test_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- test_workspace)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True))
- msg = lipe_find_message(fid, mdti, client_host, inode_path)
-
- ret = test_find_options(log, test_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find to get all inodes")
- return -1
-
- ret = lustrefs.lf_create_snapshot(log, "snapshot0")
- if ret:
- log.cl_debug("failed to create snapshot, skip following test")
- return 0
-
- test_workspace = workspace + "/scan_after_snapshot"
- ret = utils.mkdir(test_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- test_workspace)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True))
- msg = lipe_find_message(fid, mdti, client_host, inode_path)
-
- ret = test_find_options(log, test_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find to get all inodes after one snapshot created")
- return -1
-
- for i in range(10):
- sname = "snapshot" + str(i)
- ret = lustrefs.lf_create_snapshot(log, sname)
- if ret:
- log.cl_debug("failed to create snapshot with name [%s]", sname)
- return -1
-
- test_workspace = workspace + "/scan_after_10_more_snapshots"
- ret = utils.mkdir(test_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- test_workspace)
- return -1
-
- cases = list()
- cases.append(LipeFindTestCase("", True, expect_count=2, skip_client=True))
- msg = lipe_find_message(fid, mdti, client_host, inode_path)
-
- ret = test_find_options(log, test_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find to get all inodes after 10 more snapshots created")
- return -1
-
- ret = client_host.sh_remove_inode(log, inode_path)
- if ret:
- log.cl_error("failed to remove inode [%s] on host [%s]",
- inode_path, client_host.sh_hostname)
- return -1
- return 0
-
-
-def test_lipe_find_mdt_type(log, workspace, mdti, client, lustre_dir_path,
- inode_type=stat.S_IFREG, major=None, minor=None,
- path=None, stripe_index=None, pool=None,
- component_layout=False):
- """
- Create an remove file/dir with a special type and check whether lipe_find
- can find it
- """
- # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
- # pylint: disable=too-many-locals
- inode_fname = "new_Inode"
- upper_fname = inode_fname.upper()
- lower_fname = inode_fname.lower()
- assert(upper_fname != inode_fname and lower_fname != inode_fname)
- inode_path = lustre_dir_path + "/" + inode_fname
- inode_paths = [inode_path]
- type_option = lipe_find.file_type2option(inode_type)
- type_constant = lipe_find.file_option2lipe_constant(type_option)
- host = client.lc_host
-
- pool_name = None
- if pool is not None:
- pool_name = pool.lp_name
- assert inode_type == stat.S_IFREG
-
- if inode_type == stat.S_IFREG:
- assert stripe_index is not None
- ret = lustre.lustre_file_setstripe(log, host, inode_path,
- stripe_index=stripe_index,
- pool_name=pool_name,
- stripe_size=TEST_STRIPE_SIZE)
- else:
- ret = host.sh_create_inode(log, inode_path, inode_type=inode_type,
- major=major, minor=minor, path=path)
- if ret:
- log.cl_error("failed to create inode [%s] with type [%s] on host [%s]",
- inode_path, utils.file_type2string(inode_type),
- host.sh_hostname)
- return -1
-
- if component_layout:
- assert stripe_index is not None
- assert inode_type == stat.S_IFREG
- ret = lustre.lustre_file_mirror_extend(log, host, inode_path,
- stripe_index=stripe_index,
- pool_name=pool_name)
- if ret:
- log.cl_error("failed to extend the mirror of inode [%s] with type [%s] on host [%s]",
- inode_path, utils.file_type2string(inode_type),
- host.sh_hostname)
- return -1
- comp_count = 2
- else:
- comp_count = 1
-
- # If dir has no stripe set, then it doesn't have LOV xattr.
- # And find -stripe-count will not be able to find it.
- # So set the stripe count here.
- if inode_type == stat.S_IFDIR:
- assert stripe_index is not None
- command = ("lfs setstripe -c 1 -i %s -S %s %s " %
- (stripe_index, TEST_STRIPE_SIZE, inode_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- fid = lustre.lfs_path2fid(log, host, inode_path)
- if fid is None:
- log.cl_error("failed get fid of file [%s] on host [%s]",
- inode_path, host.sh_hostname)
- return -1
-
- if inode_type != stat.S_IFDIR:
- nlink = 10
- for i in range(0, nlink - 1):
- link_path = lustre_dir_path + "/" + "link" + str(i)
- inode_paths.append(link_path)
- command = "ln %s %s" % (inode_path, link_path)
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- else:
- nlink = 2
-
- # If a regular file has never been written, LSoM won't be updated. Then
- # block number in LSoM will be kept 0. But the new file's block number
- # might not be zero if the file locates on ZFS OST. So, open the file
- # here to sync the LSoM.
- if inode_type == stat.S_IFREG:
- command = "cat %s" % (inode_path)
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- stat_result = host.sh_stat(log, inode_path)
- if stat_result is None:
- log.cl_error("failed stat file [%s] on host [%s]",
- inode_path, host.sh_hostname)
- return -1
-
- if stat_result.st_nlink != nlink:
- log.cl_error("failed wrong nlink of file [%s], expected [%d], got [%d]",
- nlink, stat_result.st_nlink)
- return -1
-
- creation_workspace = workspace + "/after_creation"
- ret = utils.mkdir(creation_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- creation_workspace)
- return -1
-
- msg = lipe_find_message(fid, mdti, host, inode_path,
- inode_type=inode_type, major=major,
- minor=minor, path=path,
- stripe_index=stripe_index, pool=pool,
- component_layout=component_layout)
-
- find_option = ""
- ret = utils.wait_condition(log, check_fid_found,
- (creation_workspace, mdti, find_option, fid))
- if ret:
- log.cl_error("expected to find fid with option [%s] after inode "
- "creation, but got failure, %s", find_option, msg)
- return -1
-
- # Do not need to use wait_condition since now, since the inode should have
- # been be synced during last wait_condition
- cases = []
-
- cases.append(LipeFindTestCase("-fid %s" % fid, True))
- cases.append(LipeFindTestCase('-expr \'fid_match("%s")\'' % fid, True))
- cases.append(LipeFindTestCase('-fid "*"', True))
- cases.append(LipeFindTestCase('-fid "*%s"' % fid[3:], True))
- cases.append(LipeFindTestCase('-fid "%s*"' % fid[:-3], True))
- cases.append(LipeFindTestCase('-fid "*%s*"' % fid[3:-3], True))
- cases.append(LipeFindTestCase("-fid X%s" % fid, False))
- cases.append(LipeFindTestCase("-fid %sX" % fid, False))
-
- cases.append(LipeFindTestCase("-name %s" % inode_fname, True))
- cases.append(LipeFindTestCase('-expr \'fname_match("%s")\'' % inode_fname, True))
- cases.append(LipeFindTestCase("-name %s" % upper_fname, False))
- cases.append(LipeFindTestCase("-name %s" % lower_fname, False))
- cases.append(LipeFindTestCase('-name "*"', True))
- cases.append(LipeFindTestCase("! -name %s" % inode_fname, False))
- cases.append(LipeFindTestCase("! -name %s" % upper_fname, True))
- cases.append(LipeFindTestCase("! -name %s" % lower_fname, True))
- cases.append(LipeFindTestCase('! -name "*"', False))
-
- cases.append(LipeFindTestCase("-iname %s" % inode_fname, True))
- cases.append(LipeFindTestCase('-expr \'fname_imatch("%s")\'' % inode_fname, True))
- cases.append(LipeFindTestCase("-iname %s" % upper_fname, True))
- cases.append(LipeFindTestCase('-expr \'fname_imatch("%s")\'' % upper_fname, True))
- cases.append(LipeFindTestCase("-iname %s" % lower_fname, True))
- cases.append(LipeFindTestCase('-iname "*"', True))
- cases.append(LipeFindTestCase("! -iname %s" % inode_fname, False))
- cases.append(LipeFindTestCase("! -iname %s" % upper_fname, False))
- cases.append(LipeFindTestCase("! -iname %s" % lower_fname, False))
- cases.append(LipeFindTestCase('! -iname "*"', False))
-
- cases.append(LipeFindTestCase("-inum %s" % stat_result.st_ino,
- True, skip_server=True))
- cases.append(LipeFindTestCase('-expr "inum == %s"' % stat_result.st_ino,
- True, skip_server=True))
- cases.append(LipeFindTestCase('-expr "== inum %s"' % stat_result.st_ino,
- True, skip_server=True))
- cases.append(LipeFindTestCase("! -inum %s" % stat_result.st_ino,
- False, skip_server=True))
-
- cases.append(LipeFindTestCase("-type %s" % type_option, True))
- cases.append(LipeFindTestCase('-expr "== type %s"' % type_constant, True))
- cases.append(LipeFindTestCase('-expr "type == %s"' % type_constant, True))
- cases.append(LipeFindTestCase('-expr \'(type == %s) && fid_match("%s")\'' % (type_constant, fid), True))
- cases.append(LipeFindTestCase('-expr \'(type==%s)&& fid_match("%s")\'' % (type_constant, fid), True))
- cases.append(LipeFindTestCase("! -type %s" % type_option, False))
- cases.append(LipeFindTestCase('-expr \'(type != %s) || fid_match("%s")\'' % (type_constant, fid), True))
-
- if pool_name is None:
- cases.append(LipeFindTestCase('-pool "*"', False))
- cases.append(LipeFindTestCase('-pool-regex ".*"', False))
- else:
- cases.append(LipeFindTestCase('-pool "*"', True))
- cases.append(LipeFindTestCase('-pool-regex ".*"', True))
- cases.append(LipeFindTestCase('-pool %s' % pool_name, True))
- cases.append(LipeFindTestCase('-pool-regex %s' % pool_name, True))
- cases.append(LipeFindTestCase('-pool %sx' % pool_name, False))
- cases.append(LipeFindTestCase('-pool-regex %sx' % pool_name, False))
- cases.append(LipeFindTestCase('-pool x%s' % pool_name, False))
- cases.append(LipeFindTestCase('-pool-regex x%s' % pool_name, False))
- cases.append(LipeFindTestCase('-stripe-count 0', False))
- cases.append(LipeFindTestCase('! -stripe-count 1', False))
- cases.append(LipeFindTestCase('-stripe-count 2', False))
- cases.append(LipeFindTestCase('-stripe-size %s' % (TEST_STRIPE_SIZE - 1), False))
- cases.append(LipeFindTestCase('-stripe-size %s' % (TEST_STRIPE_SIZE + 1), False))
- cases.append(LipeFindTestCase('-stripe-size +%s' % TEST_STRIPE_SIZE, False))
-
- # Only regular files have objects on OSTs.
- if inode_type == stat.S_IFREG:
- cases.append(LipeFindTestCase("-ost %s" % stripe_index, True))
- cases.append(LipeFindTestCase('-expr "ost(%s)"' % stripe_index, True))
- cases.append(LipeFindTestCase("! -ost %s" % stripe_index, False))
- cases.append(LipeFindTestCase("-ost %s" % (stripe_index + 1), False))
- cases.append(LipeFindTestCase("! -ost %s" % (stripe_index + 1), True))
-
- # -stripe-index is the same with -ost
- cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, True))
- cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, False))
- cases.append(LipeFindTestCase("-stripe-index %s" % (stripe_index + 1), False))
- cases.append(LipeFindTestCase("! -stripe-index %s" % (stripe_index + 1), True))
- elif inode_type == stat.S_IFDIR:
- # The directoy has been "lfs setstripe", it is considered to have no
- # object, but the LOV EA is valid.
- cases.append(LipeFindTestCase("-ost %s" % stripe_index, False))
- cases.append(LipeFindTestCase("! -ost %s" % stripe_index, True))
- cases.append(LipeFindTestCase("-ost %s" % (stripe_index + 1), False))
- cases.append(LipeFindTestCase("! -ost %s" % (stripe_index + 1), True))
-
- # -stripe-index is the same with -ost
- cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, False))
- cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, True))
- cases.append(LipeFindTestCase("-stripe-index %s" % (stripe_index + 1), False))
- cases.append(LipeFindTestCase("! -stripe-index %s" % (stripe_index + 1), True))
- else:
- cases.append(LipeFindTestCase("-ost 0", False))
- cases.append(LipeFindTestCase("! -ost 0", False))
- cases.append(LipeFindTestCase("-ost 0", False))
- cases.append(LipeFindTestCase("! -ost 0", False))
-
- # -stripe-index is the same with -ost
- cases.append(LipeFindTestCase("-stripe-index 0", False))
- cases.append(LipeFindTestCase("! -stripe-index 0", False))
- cases.append(LipeFindTestCase("-stripe-index 0", False))
- cases.append(LipeFindTestCase("! -stripe-index 0", False))
-
- cases.append(LipeFindTestCase('-stripe-size %s' % TEST_STRIPE_SIZE, False))
- cases.append(LipeFindTestCase('-stripe-size -%s' % (TEST_STRIPE_SIZE + 1), False))
- cases.append(LipeFindTestCase('-stripe-size +%s' % (TEST_STRIPE_SIZE - 1), False))
-
- cases.append(LipeFindTestCase('-comp-count %s' % (comp_count - 1), False))
- cases.append(LipeFindTestCase('-component-count %s' % (comp_count - 1), False))
- cases.append(LipeFindTestCase('-comp-count %s' % (comp_count + 1), False))
- cases.append(LipeFindTestCase('-component-count %s' % (comp_count + 1), False))
- cases.append(LipeFindTestCase('-comp-count +%s' % comp_count, False))
- cases.append(LipeFindTestCase('-component-count +%s' % comp_count, False))
- cases.append(LipeFindTestCase('-comp-count -%s' % comp_count, False))
- cases.append(LipeFindTestCase('-component-count -%s' % comp_count, False))
-
- # Only regular files and the directories with stripe configuration have
- # LOV EA. For inode with LOV EA, any expression with
- # pool_match/pool_reg will not be matched.
- if inode_type == stat.S_IFREG or inode_type == stat.S_IFDIR:
- if pool_name is None:
- cases.append(LipeFindTestCase('! -pool "*"', True))
- cases.append(LipeFindTestCase('! -pool-regex ".*"', True))
- else:
- cases.append(LipeFindTestCase('! -pool "*"', False))
- cases.append(LipeFindTestCase('! -pool-regex ".*"', False))
- cases.append(LipeFindTestCase('! -stripe-count 0', True))
- cases.append(LipeFindTestCase('-stripe-count 1', True))
- cases.append(LipeFindTestCase('! -stripe-count 2', True))
-
- cases.append(LipeFindTestCase('-stripe-size %s' % TEST_STRIPE_SIZE, True))
- cases.append(LipeFindTestCase('-stripe-size -%s' % (TEST_STRIPE_SIZE + 1), True))
- cases.append(LipeFindTestCase('-stripe-size +%s' % (TEST_STRIPE_SIZE - 1), True))
- cases.append(LipeFindTestCase('-layout raid0', True))
- cases.append(LipeFindTestCase('-layout raid0,raid0', True))
-
- cases.append(LipeFindTestCase('-comp-count %s' % comp_count, True))
- cases.append(LipeFindTestCase('-component-count %s' % comp_count, True))
- cases.append(LipeFindTestCase('-comp-count -%s' % (comp_count + 1), True))
- cases.append(LipeFindTestCase('-component-count -%s' % (comp_count + 1), True))
- cases.append(LipeFindTestCase('-comp-count +%s' % (comp_count - 1), True))
- cases.append(LipeFindTestCase('-component-count +%s' % (comp_count - 1), True))
- else:
- cases.append(LipeFindTestCase('! -pool "*"', False))
- cases.append(LipeFindTestCase('! -pool-regex ".*"', False))
- cases.append(LipeFindTestCase('! -stripe-count 0', False))
- cases.append(LipeFindTestCase('-stripe-count 1', False))
- cases.append(LipeFindTestCase('! -stripe-count 2', False))
-
- cases.append(LipeFindTestCase('-layout raid0', False))
- cases.append(LipeFindTestCase('-layout raid0,raid0', False))
-
- cases.append(LipeFindTestCase('-comp-count %s' % comp_count, False))
- cases.append(LipeFindTestCase('-component-count %s' % comp_count, False))
- cases.append(LipeFindTestCase('-comp-count -%s' % (comp_count + 1), False))
- cases.append(LipeFindTestCase('-component-count -%s' % (comp_count + 1), False))
- cases.append(LipeFindTestCase('-comp-count +%s' % (comp_count - 1), False))
- cases.append(LipeFindTestCase('-component-count +%s' % (comp_count - 1), False))
-
- cases.append(LipeFindTestCase('-layout released', False))
- cases.append(LipeFindTestCase('-layout mdt', False))
- cases.append(LipeFindTestCase('-layout raid0,released', False))
- cases.append(LipeFindTestCase('-layout raid0,mdt', False))
- cases.append(LipeFindTestCase('-layout mdt,released', False))
- cases.append(LipeFindTestCase('-layout raid0,mdt,released', False))
-
- cases.append(LipeFindTestCase("-uid 0", True))
- cases.append(LipeFindTestCase("! ! -uid 0", True))
- cases.append(LipeFindTestCase("! -uid 0", False))
- cases.append(LipeFindTestCase("! -uid 1 ! -uid 0", False))
- cases.append(LipeFindTestCase("-user 0", True))
- cases.append(LipeFindTestCase("! -user 0", False))
- cases.append(LipeFindTestCase("! -user 1 ! -user 0", False))
- cases.append(LipeFindTestCase("-user root", True))
- cases.append(LipeFindTestCase("! -user root", False))
- cases.append(LipeFindTestCase("! -user bin ! -user root", False))
- cases.append(LipeFindTestCase("-nouser", False))
-
- cases.append(LipeFindTestCase("-gid 0", True))
- cases.append(LipeFindTestCase("! -gid 0", False))
- cases.append(LipeFindTestCase("-group 0", True))
- cases.append(LipeFindTestCase("! -group 0", False))
- cases.append(LipeFindTestCase("-group root", True))
- cases.append(LipeFindTestCase("! -group root", False))
- cases.append(LipeFindTestCase("-nogroup", False))
-
- for xtime in ["atime", "ctime", "mtime"]:
- cases.append(LipeFindTestCase("-%s 0" % xtime, True))
- cases.append(LipeFindTestCase("! -%s 0" % xtime, False))
- cases.append(LipeFindTestCase("-%s +0" % xtime, False))
- cases.append(LipeFindTestCase("! -%s +0" % xtime, True))
- cases.append(LipeFindTestCase("-%s -0" % xtime, False))
- cases.append(LipeFindTestCase("! -%s -0" % xtime, True))
- cases.append(LipeFindTestCase("-%s -1" % xtime, True))
- cases.append(LipeFindTestCase("! -%s -1" % xtime, False))
-
- cases.append(LipeFindTestCase('-expr "(%s > (sys_time - 1 * days) || uid == 100) && gid == 0"' % xtime, True))
-
- # The time between file creation and test should never be longer
- # than 10 hours
- for xtime in ["amin", "cmin", "mmin"]:
- cases.append(LipeFindTestCase("-%s +600" % xtime, False))
- cases.append(LipeFindTestCase("! -%s +600" % xtime, True))
- cases.append(LipeFindTestCase("-%s -0" % xtime, False))
- cases.append(LipeFindTestCase("! -%s -0" % xtime, True))
- cases.append(LipeFindTestCase("-%s -600" % xtime, True))
- cases.append(LipeFindTestCase("! -%s -600" % xtime, False))
-
- # The ctime and atime might be slightly different, so don't expect that
- # "-used 0" can find the file. And because of this, "-used -0" might find
- # the file unexpctedly.
- cases.append(LipeFindTestCase("-used +0", False))
- cases.append(LipeFindTestCase("-used 1", False))
- cases.append(LipeFindTestCase("-used -1", True))
- cases.append(LipeFindTestCase("-used +1", False))
-
- cases.append(LipeFindTestCase("-name %s -type %s" % (inode_fname, type_option),
- True))
- cases.append(LipeFindTestCase('-name "*" -type %s' % (type_option),
- True))
-
- if inode_type == stat.S_IFDIR or inode_type == stat.S_IFREG:
- cases.append(LipeFindTestCase("-empty", True))
- cases.append(LipeFindTestCase("! -empty", False))
- else:
- cases.append(LipeFindTestCase("-empty", False))
- cases.append(LipeFindTestCase("! -empty", True))
-
- cases.append(LipeFindTestCase("-entries 0", True))
- cases.append(LipeFindTestCase("! -entries 0", False))
- cases.append(LipeFindTestCase("-entries +0", False))
- cases.append(LipeFindTestCase("! -entries +0", True))
- cases.append(LipeFindTestCase("-entries -0", False))
- cases.append(LipeFindTestCase("! -entries -0", True))
-
- cases.append(LipeFindTestCase("-links %d" % nlink, True))
- cases.append(LipeFindTestCase("-links %d" % (nlink + 1), False))
- cases.append(LipeFindTestCase("-links %d" % (nlink - 1), False))
-
- # Some file types support setfattr to user xattr
- if inode_type in [stat.S_IFREG, stat.S_IFDIR]:
- command = ("setfattr -h -n user.xattr_name -v xattr_value %s" % (inode_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- cases.append(LipeFindTestCase('-xattr "user.xattr_name=*"', True))
- cases.append(LipeFindTestCase('-xattr "*=xattr_value"', True))
- cases.append(LipeFindTestCase('-xattr "user.xattr_name=xattr_value"', True))
- cases.append(LipeFindTestCase('-xattr "user.xattr_name*=xattr_value"', True))
- cases.append(LipeFindTestCase('-xattr "user.*=xattr_value"', True))
- cases.append(LipeFindTestCase('-xattr "user.*=*_value"', True))
-
- cases.append(LipeFindTestCase('-xattr "*=*"', True))
- cases.append(LipeFindTestCase('-xattr "trusted.lma=*"', True))
- cases.append(LipeFindTestCase('-xattr "invalid_name=*"', False))
- cases.append(LipeFindTestCase('-xattr "*=invalid_value"', False))
-
- cases.append(LipeFindTestCase('-perm -0', True))
- cases.append(LipeFindTestCase('-perm /0', True))
-
- perm = stat_result.st_mode & 07777
- perm_str = oct(perm)
- cases.append(LipeFindTestCase('-perm %s' % perm_str, True))
- cases.append(LipeFindTestCase('-perm -%s' % perm_str, True))
- cases.append(LipeFindTestCase('-perm /%s' % perm_str, True))
- # for each bit up to 010000
- for i in range(12):
- bit = 1 << i
- if (perm & bit) == 0:
- more_perm = perm | bit
- more_perm_str = oct(more_perm)
- cases.append(LipeFindTestCase('-perm %s' % more_perm_str, False))
- cases.append(LipeFindTestCase('-perm -%s' % more_perm_str, False))
- cases.append(LipeFindTestCase('-perm /%s' % more_perm_str, True))
- else:
- less_perm = perm & (~bit)
- less_perm_str = oct(less_perm)
- cases.append(LipeFindTestCase('-perm %s' % less_perm_str, False))
- cases.append(LipeFindTestCase('-perm -%s' % less_perm_str, True))
- cases.append(LipeFindTestCase('-perm /%s' % less_perm_str, True))
-
- if (mdti.lsi_service.ls_backfstype == lustre.BACKFSTYPE_ZFS and
- not lipe_constant.HAVE_ZFS_ZPL_PROJID):
- # ZFS versions earlier than 0.8 don't provide proper interfaces to
- # read the project IDs.
- cases.append(LipeFindTestCase('-projid 0', False, skip_client=True))
- cases.append(LipeFindTestCase('-projid +0', False, skip_client=True))
- cases.append(LipeFindTestCase('-projid -1', False, skip_client=True))
- # Client can still get the projid
- cases.append(LipeFindTestCase('-projid 0', True, skip_server=True))
- cases.append(LipeFindTestCase('-projid +0', False, skip_server=True))
- cases.append(LipeFindTestCase('-projid -1', True, skip_server=True))
- else:
- cases.append(LipeFindTestCase('-projid 0', True))
- cases.append(LipeFindTestCase('-expr "projid == 0"', True))
- cases.append(LipeFindTestCase('-expr "== projid 0"', True))
- cases.append(LipeFindTestCase('-projid +0', False))
- cases.append(LipeFindTestCase('-projid -1', True))
-
- ret = test_find_options(log, creation_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find with given options after inode "
- "creation")
- return -1
-
- ret = test_lipe_find_size(log, creation_workspace, client, mdti, fid, host,
- inode_path, inode_type, msg, size=True)
- if ret:
- log.cl_error("failed to run find -size test after creation, %s", msg)
- return -1
-
- ret = test_lipe_find_size(log, creation_workspace, client, mdti, fid, host,
- inode_path, inode_type, msg, size=False)
- if ret:
- log.cl_error("failed to run find -blocks test after creation, %s", msg)
- return -1
-
- action_delete_workspace = workspace + "/action_delete"
- ret = utils.mkdir(action_delete_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- action_delete_workspace)
- return -1
-
- cases = []
- cases.append(LipeFindTestCase("-fid %s" % fid, True, skip_client=True,
- option="--agents %s -delete" % client.lc_host.sh_hostname))
- ret = test_find_options(log, action_delete_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to remove files with option '-delete' removal")
- return -1
-
- removal_workspace = workspace + "/after_removal"
- ret = utils.mkdir(removal_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- removal_workspace)
- return -1
-
- find_option = ""
- ret = utils.wait_condition(log, check_fid_missing,
- (removal_workspace, mdti, find_option, fid))
- if ret:
- log.cl_error("can find FID UNEXPECTEDLY after removal with option "
- "[%s], %s", find_option, msg)
- return -1
-
- # Do not need to use wait_condition since now, since the inode should have
- # been be synced during last wait_condition
- cases = []
- cases.append(LipeFindTestCase("-name %s" % inode_fname, False))
- cases.append(LipeFindTestCase("! -name %s" % inode_fname, False))
- cases.append(LipeFindTestCase('-name "*"', False))
- cases.append(LipeFindTestCase('! -name "*"', False))
- cases.append(LipeFindTestCase("-type %s" % type_option, False))
- cases.append(LipeFindTestCase("! -type %s" % type_option, False))
-
- if stripe_index is not None:
- cases.append(LipeFindTestCase("-ost %s" % stripe_index, False))
- cases.append(LipeFindTestCase("! -ost %s" % stripe_index, False))
- cases.append(LipeFindTestCase("-stripe-index %s" % stripe_index, False))
- cases.append(LipeFindTestCase("! -stripe-index %s" % stripe_index, False))
-
- cases.append(LipeFindTestCase('-pool ".*"', False))
- cases.append(LipeFindTestCase('! -pool ".*"', False))
-
- cases.append(LipeFindTestCase('-pool-regex ".*"', False))
- cases.append(LipeFindTestCase('! -pool-regex ".*"', False))
-
- cases.append(LipeFindTestCase("-uid 0", False))
- cases.append(LipeFindTestCase("! -uid 0", False))
- cases.append(LipeFindTestCase("-user 0", False))
- cases.append(LipeFindTestCase("! -user 0", False))
- cases.append(LipeFindTestCase("-user root", False))
- cases.append(LipeFindTestCase("! -user root", False))
-
- cases.append(LipeFindTestCase("-gid 0", False))
- cases.append(LipeFindTestCase("! -gid 0", False))
- cases.append(LipeFindTestCase("-group 0", False))
- cases.append(LipeFindTestCase("! -group 0", False))
- cases.append(LipeFindTestCase("-group root", False))
- cases.append(LipeFindTestCase("! -group root", False))
-
- cases.append(LipeFindTestCase("-size 0", False))
- cases.append(LipeFindTestCase("! -size 0", False))
- cases.append(LipeFindTestCase("-blocks 0", False))
- cases.append(LipeFindTestCase("! -blocks 0", False))
- cases.append(LipeFindTestCase("-empty", False))
- cases.append(LipeFindTestCase("! -empty", False))
-
- cases.append(LipeFindTestCase("-inum %s" % stat_result.st_ino,
- False, skip_server=True))
- cases.append(LipeFindTestCase("! -inum %s" % stat_result.st_ino,
- False, skip_server=True))
-
- cases.append(LipeFindTestCase('-xattr "*=*"', False))
-
- cases.append(LipeFindTestCase('-perm -0', False))
- cases.append(LipeFindTestCase('-perm /0', False))
- cases.append(LipeFindTestCase('-stripe-count 1', False))
- cases.append(LipeFindTestCase('! -stripe-count 1', False))
- cases.append(LipeFindTestCase('-projid 0', False))
- cases.append(LipeFindTestCase('-projid +0', False))
- cases.append(LipeFindTestCase('-projid -1', False))
-
- cases.append(LipeFindTestCase('-layout raid0', False))
- cases.append(LipeFindTestCase('-layout released', False))
- cases.append(LipeFindTestCase('-layout mdt', False))
-
- ret = test_find_options(log, removal_workspace, client, mdti, cases,
- fid, msg)
- if ret:
- log.cl_error("failed to test lipe_find with given options after inode "
- "removal")
- return -1
- return 0
-
-
-def test_lipe_find_mdt_all_inode_types(log, workspace, mdti, client,
- lustre_dir_path, pool):
- """
- Test whether lipe_find can find the newly created file/directory on a mdt
- """
- # pylint: disable=too-many-locals,too-many-arguments
- # pylint: disable=too-many-branches,too-many-statements
- mdt = mdti.lsi_service
- lustrefs = mdt.ls_lustre_fs
- mdt_index = mdt.ls_index
- client_host = client.lc_host
-
- command = ("lfs mkdir -c 1 -i %d %s" % (mdt_index, lustre_dir_path))
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- inode_types = [stat.S_IFDIR, stat.S_IFCHR, stat.S_IFBLK, stat.S_IFREG,
- stat.S_IFIFO, stat.S_IFLNK, stat.S_IFSOCK]
- for inode_type in inode_types:
- major = None
- minor = None
- path = None
- if inode_type == stat.S_IFCHR or inode_type == stat.S_IFBLK:
- major = 7
- minor = 1
- elif inode_type == stat.S_IFLNK:
- path = "unknown"
-
- if inode_type == stat.S_IFREG:
- # Test layout component
- ret = test_lipe_find_mdt_type(log, workspace, mdti, client,
- lustre_dir_path, inode_type=inode_type,
- stripe_index=0, component_layout=True)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] on regular file type with component layout",
- mdti.lsi_service_instance_name)
- return -1
-
- # Need to test different stripe
- for ost in lustrefs.lf_osts.values():
- ost_index = ost.ls_index
- ret = test_lipe_find_mdt_type(log, workspace, mdti, client,
- lustre_dir_path, inode_type=inode_type,
- stripe_index=ost_index)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] on regular file type",
- mdti.lsi_service_instance_name)
- return -1
-
- # Test OST pool
- ret = test_lipe_find_mdt_type(log, workspace, mdti, client,
- lustre_dir_path, inode_type=inode_type,
- stripe_index=0, pool=pool)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] with regular file type on OST pool [%s]",
- mdti.lsi_service_instance_name,
- pool.lp_name)
- return -1
- else:
- ret = test_lipe_find_mdt_type(log, workspace, mdti, client,
- lustre_dir_path, inode_type=inode_type,
- stripe_index=1,
- major=major, minor=minor, path=path)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] with inode type [%s]",
- mdti.lsi_service_instance_name, inode_type)
- return -1
-
- # test action options
- ret = test_lipe_find_mdt_actions(log, workspace, mdti, client,
- lustre_dir_path, pool)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] with actions", mdti.lsi_service_instance_name)
- return -1
-
- command = ("rmdir %s" % (lustre_dir_path))
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # test lipe skip snapshot inodes
- ret = test_lipe_find_skip_snapshot_inodes(log, workspace, mdti, client,
- lustre_dir_path)
- if ret < 0:
- log.cl_error("failed to test lipe_find skip snapshot inodes against "
- "[%s]", mdti.lsi_service_instance_name)
- return -1
- elif ret > 0:
- log.cl_debug("skip test lipe_find against snapshot inodes")
- return 0
-
- command = ("rmdir %s" % (lustre_dir_path))
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # test lipe skip dir substripe inodes
- ret = test_lipe_find_skip_substripe_inodes(log, workspace, mdti, client,
- lustre_dir_path)
- if ret < 0:
- log.cl_error("failed to test lipe_find skip dir substripe inodes against "
- "[%s]", mdti.lsi_service_instance_name)
- return -1
- elif ret > 0:
- log.cl_debug("skip test lipe_find against dir substripe inodes")
- return 0
-
- command = ("rmdir %s" % (lustre_dir_path))
- retval = client_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- client_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-def test_lipe_find_mdt(log, workspace, mdt, pool):
- """
- Test whether lipe_find can find the newly created file/directory on a mdt
- """
- lustrefs = mdt.ls_lustre_fs
- clients = lustrefs.lf_clients
- if len(clients) == 0:
- log.cl_warning("no client for file system [%s] configured, skip test of "
- "file creation", lustrefs.lf_fsname)
- return 0
-
- mdti = mdt.ls_mounted_instance(log)
- if mdti is None:
- log.cl_error("failed to get the active instance of Lustre service [%s]",
- mdt.ls_service_name)
- return -1
-
- lustre_relative_dir_path = ("lipe_test_" + mdt.ls_service_name + "_" +
- time_util.local_strftime(time_util.utcnow(),
- "%Y-%m-%d-%H_%M_%S.%f"))
-
- find_workspace = workspace + "/" + "dir_path"
- ret = utils.mkdir(find_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- find_workspace)
- return -1
-
- client = random.choice(clients.values())
- lustre_dir_path = client.lc_mnt + "/" + lustre_relative_dir_path
-
- ret = test_lipe_find_mdt_all_inode_types(log, workspace, mdti, client,
- lustre_dir_path, pool)
- if ret:
- log.cl_error("failed to test the lipe_find for mdt instance "
- "[%s] with all kinds of inode types",
- mdti.lsi_service_instance_name)
- return -1
- return 0
-
-
-def test_lipe_find_lustrefs(log, workspace, lustrefs):
- """
- Run lipe_find tests on a Lustre file system
- """
- # Create OST pool that is going to be used later
- mgs = lustrefs.lf_mgs_mdt
- if mgs is None:
- mgs = lustrefs.lf_mgs
- if mgs is None:
- log.cl_error("Lustre file system [%s] doesn't have a mgs",
- lustrefs.lf_fsname)
- return -1
-
- mgsi = mgs.ls_mounted_instance(log)
- if mgsi is None:
- log.cl_error("failed to get the active instance of Lustre service [%s]",
- mgs.ls_service_name)
- return -1
-
- pool_name = TEST_POOL_NAME
- pool = lustre.LustrePool(lustrefs, pool_name)
- ret = pool.lp_new(log, mgsi.lsi_host)
- if ret:
- log.cl_error("failed to create pool [%s] on file system [%s]",
- pool_name, lustrefs.lf_fsname)
- return ret
-
- ret = pool.lp_add(log, mgsi.lsi_host, lustrefs.lf_osts.values())
- if ret:
- log.cl_error("failed to add OSTs to pool [%s] on file system [%s]",
- pool_name, lustrefs.lf_fsname)
- return ret
-
- for mdt in lustrefs.lf_mdts.values():
- mdt_workspace = workspace + "/" + mdt.ls_service_name
- ret = utils.mkdir(mdt_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- mdt_workspace)
- return -1
-
- ret = test_lipe_find_mdt(log, mdt_workspace, mdt, pool)
- if ret:
- log.cl_error("failed to test the inode number of lipe_find on "
- "Lustre service [%s]", mdt.ls_service_name)
- return ret
-
- ret = pool.lp_remove(log, mgsi.lsi_host, lustrefs.lf_osts.values())
- if ret:
- log.cl_error("failed to remove OSTs from pool [%s] on file system [%s]",
- pool_name, lustrefs.lf_fsname)
- return ret
-
- ret = pool.lp_destroy(log, mgsi.lsi_host)
- if ret:
- log.cl_error("failed to destroy pool [%s] on file system [%s]",
- pool_name, lustrefs.lf_fsname)
- return ret
-
- return 0
-
-
-def test_lipe_find(log, workspace, clownfish_instance):
- """
- Test whether lipe_find can find the newly created file/directory
- """
- # To save time, ony test the lustre file system with maximum number of
- # MDTs
- biggest_lustrefs = None
- for lustrefs in clownfish_instance.ci_lustres.values():
- if ((biggest_lustrefs is None) or
- (len(biggest_lustrefs.lf_mdts) < len(lustrefs.lf_mdts))):
- biggest_lustrefs = lustrefs
- if biggest_lustrefs is None:
- log.cl_error("no lustre file system to test lipe_find")
- return -1
- return test_lipe_find_lustrefs(log, workspace, biggest_lustrefs)
-
-
-LIPE_TESTS.append(test_lipe_find)
-
-
-def lipe_test_prepare(log, workspace, lipe_install_config_fpath):
- """
- Load the lipe.conf file in JSON format
- """
- # pylint: disable=global-statement
- lipe_install_config_fd = open(lipe_install_config_fpath)
- ret = 0
- try:
- lipe_install_config = yaml.load(lipe_install_config_fd)
- except:
- log.cl_error("not able to load [%s] as yaml file: %s",
- lipe_install_config_fpath, traceback.format_exc())
- ret = -1
- lipe_install_config_fd.close()
- if ret:
- return -1
-
- lipe_config_fpath = utils.config_value(lipe_install_config,
- cstr.CSTR_CONFIG_FPATH)
- if lipe_config_fpath is None:
- log.cl_error("can NOT find [%s] in the installation config, "
- "please correct file [%s]",
- cstr.CSTR_CONFIG_FPATH,
- lipe_install_config_fpath)
- return -1
-
- global LIPE_LAUNCH_CONFIG
- with open(lipe_config_fpath) as json_file:
- LIPE_LAUNCH_CONFIG = json.load(json_file)
-
- global LIPE_CLUSTER
- # Pass None mnt_path, which should be fine
- LIPE_CLUSTER = lipe_install_nodeps.lipe_init_cluster(log, workspace, None,
- lipe_install_config,
- lipe_install_config_fpath)
- if LIPE_CLUSTER is None:
- log.cl_error("failed to parse LiPE config, please correct file [%s]",
- lipe_install_config_fpath)
- return -1
-
- return 0
-
-
-def do_test(log, workspace, clownfish_instance, test_config, test_config_fpath,
- test_functs):
- """
- Run test
- """
- # pylint: disable=too-many-branches,too-many-locals,too-many-arguments
- # pylint: disable=too-many-statements
- test_dict = {}
- for test_funct in test_functs:
- test_dict[test_funct.__name__] = test_funct
-
- quit_on_error = True
- only_test_configs = utils.config_value(test_config,
- cstr.CSTR_ONLY_TESTS)
- if only_test_configs is None:
- log.cl_debug("no [%s] is configured, run all tests",
- cstr.CSTR_ONLY_TESTS)
- selected_tests = test_functs
- else:
- selected_tests = []
- for test_name in only_test_configs:
- if test_name not in test_dict:
- log.cl_error("test [%s] doenot exist, please correct file "
- "[%s]", test_name, test_config_fpath)
- return -1
- test_funct = test_dict[test_name]
- selected_tests.append(test_funct)
-
- not_selected_tests = []
- for test_funct in test_functs:
- if test_funct not in selected_tests:
- not_selected_tests.append(test_funct)
-
- passed_tests = []
- failed_tests = []
- skipped_tests = []
- test_durations = {}
-
- for test_func in selected_tests:
- test_name = test_func.__name__
- test_workspace = workspace + "/" + test_name
- ret = utils.mkdir(test_workspace)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- test_workspace)
- return -1
-
- log.cl_info("starting test [%s]", test_name)
- start_time = time.time()
- ret = test_func(log, test_workspace, clownfish_instance)
- duration_time = time.time() - start_time
- test_durations[test_func.__name__] = duration_time
- if ret < 0:
- log.cl_error("test [%s] failed, duration %f seconds", test_name,
- duration_time)
- failed_tests.append(test_func)
- if quit_on_error:
- return -1
- elif ret == 1:
- log.cl_warning("test [%s] skipped, duration %f seconds", test_name,
- duration_time)
- skipped_tests.append(test_func)
- else:
- log.cl_info("test [%s] passed, duration %f seconds", test_name,
- duration_time)
- passed_tests.append(test_func)
-
- table = prettytable.PrettyTable()
- table.field_names = ["Test name", "Result", "Duration"]
- for test_func in not_selected_tests:
- test_name = test_func.__name__
- table.add_row([test_name, "Not selected", "0"])
-
- for test_func in skipped_tests:
- test_name = test_func.__name__
- table.add_row([test_name, "Skipped", "%f seconds" % test_durations[test_name]])
-
- for test_func in failed_tests:
- test_name = test_func.__name__
- table.add_row([test_name, "Failed", "%f seconds" % test_durations[test_name]])
-
- for test_func in passed_tests:
- test_name = test_func.__name__
- table.add_row([test_name, "Passed", "%f seconds" % test_durations[test_name]])
-
- log.cl_stdout(table)
- return ret
-
-
-def _lipe_test(log, workspace, test_config, test_config_fpath):
- """
- Run LiPE test
- """
- # pylint: disable=too-many-statements,too-many-branches,too-many-locals
- # pylint: disable=global-statement
- log.cl_info("installing virtul machines")
- ret = test_common.test_install_virt(log, workspace, test_config,
- test_config_fpath)
- if ret:
- log.cl_error("failed to install virtual machines")
- return -1
-
- log.cl_info("installing LiPE")
- lipe_install_config_fpath = utils.config_value(test_config,
- cstr.CSTR_LIPE_INSTALL_CONFIG)
- if lipe_install_config_fpath is None:
- log.cl_error("can NOT find [%s] in the test config, "
- "please correct file [%s]",
- cstr.CSTR_LIPE_INSTALL_CONFIG, test_config_fpath)
- return -1
-
- skip_install = utils.config_value(test_config,
- cstr.CSTR_SKIP_INSTALL)
- if skip_install is None:
- log.cl_debug("no [%s] is configured, do not skip install")
- skip_install = False
-
- install_server_config = utils.config_value(test_config,
- cstr.CSTR_INSTALL_SERVER)
- if install_server_config is None:
- log.cl_error("can NOT find [%s] in the config file [%s], "
- "please correct it", cstr.CSTR_INSTALL_SERVER,
- test_config_fpath)
- return -1
-
- install_server_hostname = utils.config_value(install_server_config,
- cstr.CSTR_HOSTNAME)
- if install_server_hostname is None:
- log.cl_error("can NOT find [%s] in the config of installation host, "
- "please correct file [%s]",
- cstr.CSTR_HOSTNAME, test_config_fpath)
- return None
-
- ssh_identity_file = utils.config_value(install_server_config,
- cstr.CSTR_SSH_IDENTITY_FILE)
- install_server = ssh_host.SSHHost(install_server_hostname,
- identity_file=ssh_identity_file)
- ret = test_common.test_install(log, workspace, lipe_install_config_fpath,
- skip_install, install_server, "lipe",
- constants.LIPE_INSTALL_CONFIG_FNAME)
- if ret:
- log.cl_error("failed to test installation of LiPE")
- return -1
-
- clownfish_config_fpath = utils.config_value(test_config,
- cstr.CSTR_CLOWNFISH_CONFIG)
- if clownfish_config_fpath is None:
- log.cl_error("can NOT find [%s] in the test config, "
- "please correct file [%s]",
- cstr.CSTR_CLOWNFISH_CONFIG,
- test_config_fpath)
- return -1
-
- clownfish_config_fd = open(clownfish_config_fpath)
- ret = 0
- try:
- clownfish_config = yaml.load(clownfish_config_fd)
- except:
- log.cl_error("not able to load [%s] as yaml file: %s",
- clownfish_config_fpath, traceback.format_exc())
- ret = -1
- clownfish_config_fd.close()
- if ret:
- return -1
-
- clownfish_instance = clownfish.init_instance(log, workspace,
- clownfish_config,
- clownfish_config_fpath)
- if clownfish_instance is None:
- log.cl_error("failed to init the Clownfish instance, please check "
- "config file [%s]", clownfish_config_fpath)
- return -1
-
- ret = lipe_test_prepare(log, workspace, lipe_install_config_fpath)
- if ret:
- log.cl_error("failed to prepare to run LiPE tests")
- return -1
-
- log.cl_info("running LiPE tests")
- ret = do_test(log, workspace, clownfish_instance, test_config,
- test_config_fpath, LIPE_TESTS)
- if ret:
- log.cl_error("failed to run LiPE tests")
- return ret
-
- # Run hotpool test
- ret = lipe_hotpool_test.hotpool_test(log, workspace, clownfish_instance)
- if ret:
- log.cl_error("failed to test HotPool")
- return ret
-
- # This test needs that all Lustre RPMs are installed and the test will
- # remove the LiPE RPMs, so run this test after all tests
- log.cl_info("testing LiPE rpms dependency")
- ret = test_rpms_dependency(log, workspace, LIPE_CLUSTER.lc_mdt_hosts[0])
- if ret:
- log.cl_error("failed to test LiPE rpms dependence")
- return ret
- return 0
-
-
-def lipe_test(log, workspace, config_fpath):
- """
- Start to test LiPE holding the confiure lock
- """
- # pylint: disable=bare-except
- config_fd = open(config_fpath)
- ret = 0
- try:
- config = yaml.load(config_fd)
- except:
- log.cl_error("not able to load [%s] as yaml file: %s", config_fpath,
- traceback.format_exc())
- ret = -1
- config_fd.close()
- if ret:
- return -1
-
- ret = _lipe_test(log, workspace, config, config_fpath)
- if ret:
- log.cl_error("test of LiPE failed, please check [%s] for more "
- "log", workspace)
- else:
- log.cl_info("test of LiPE passed, please check [%s] "
- "for more log", workspace)
- return ret
-
-
-def main():
- """
- Start to test LiPE
- """
- cmd_general.main(constants.LIPE_TEST_CONFIG,
- constants.LIPE_TEST_LOG_DIR,
- lipe_test)
+++ /dev/null
-"""
-Python library for LiPE test
-"""
-__all__ = ["lipe_test_console",
- "lipe_test_launch",
- "lipe_test_scheduler"]
+++ /dev/null
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Console that manages the scheduler
-"""
-import xmlrpclib
-import readline
-import logging
-import getopt
-import sys
-import traceback
-
-# local libs
-from pylustre import clog
-from pylustre import utils
-from pylustre import time_util
-from pyltest import lipe_test_scheduler
-
-
-LIPE_TEST_CONSOLE_LOG_DIR = "/var/log/lipe_test_console"
-
-
-class LipeTestConsoleCompleter(object):
- """
- Completer of command
- """
- # pylint: disable=too-few-public-methods
- def __init__(self, options):
- self.ltcc_options = options
- self.ltcc_current_candidates = []
- return
-
- def ltcc_complete(self, text, state):
- # pylint: disable=unused-argument,too-many-nested-blocks
- """
- The complete function of the completer
- """
- response = None
- if state == 0:
- # This is the first time for this text,
- # so build a match list.
- origline = readline.get_line_buffer()
- begin = readline.get_begidx()
- end = readline.get_endidx()
- being_completed = origline[begin:end]
- words = origline.split()
- if not words:
- self.ltcc_current_candidates = sorted(self.ltcc_options.keys())
- else:
- try:
- if begin == 0:
- # first word
- candidates = self.ltcc_options.keys()
- else:
- # later word
- first = words[0]
- candidates = self.ltcc_options[first]
- if being_completed:
- # match options with portion of input
- # being completed
- self.ltcc_current_candidates = []
- for candidate in candidates:
- if not candidate.startswith(being_completed):
- continue
- self.ltcc_current_candidates.append(candidate)
- else:
- # matching empty string so use all candidates
- self.ltcc_current_candidates = candidates
- except (KeyError, IndexError):
- self.ltcc_current_candidates = []
- try:
- response = self.ltcc_current_candidates[state]
- except IndexError:
- response = None
- return response
-
-
-def tconsole_input_init():
- """
- Initialize the input completer
- """
- readline.parse_and_bind("tab: complete")
- readline.parse_and_bind("set editing-mode vi")
- # Register our completer function
- completer = LipeTestConsoleCompleter({"help": [],
- "host_cleanup": [],
- "host_list": [],
- "ip_list": [],
- "ip_cleanup": [],
- "job_list": [],
- "job_kill": []})
- readline.set_completer(completer.ltcc_complete)
-
-
-def tconsole_input_fini():
- """
- Stop the input completer
- """
- readline.set_completer(None)
-
-
-def tconsole_command_help(proxy, arg_string):
- # pylint: disable=unused-argument
- """
- Print the help string
- """
- logging.info("help: show help messages")
- return 0
-
-
-def tconsole_command_host_list(proxy, arg_string):
- # pylint: disable=unused-variable
- """
- List the hosts
- """
- error = False
- args = arg_string.split()
- options, remainder = getopt.getopt(args,
- "he",
- ["--error",
- "--help"])
- for opt, arg in options:
- if opt in ("-e", "--error"):
- error = True
- elif opt in ("-h", "--help"):
- print """Usage: host_list [-e|--error]
- -e: print hosts that have cleanup error
- -h: print this string"""
- sys.exit(0)
-
- output = proxy.ts_host_list(error)
- print "%s" % output
- return 0
-
-
-def tconsole_command_ip_list(proxy, arg_string):
- # pylint: disable=unused-variable
- """
- List the IP addreses
- """
- error = False
- args = arg_string.split()
- options, remainder = getopt.getopt(args,
- "he",
- ["--error",
- "--help"])
- for opt, arg in options:
- if opt in ("-e", "--error"):
- error = True
- elif opt in ("-h", "--help"):
- print """Usage: host_list [-e|--error]
- -e: print hosts that have cleanup error
- -h: print this string"""
- sys.exit(0)
-
- output = proxy.ts_ip_address_list(error)
- print "%s" % output
- return 0
-
-
-def tconsole_command_job_list(proxy, arg_string):
- """
- List all the active jobs on the scheduler
- """
- # pylint: disable=unused-argument
- jobs = proxy.ts_job_list()
- print "%s" % jobs
- return 0
-
-
-def tconsole_command_job_kill(proxy, arg_string):
- """
- Kill a job
- """
- jobid = arg_string
- scheduler_id = proxy.ts_get_id()
- ret = proxy.ts_job_stop(scheduler_id, jobid)
- return ret
-
-
-def tconsole_command_host_cleanup(proxy, arg_string):
- """
- Fix the host
- """
- arg_string = arg_string.strip()
- args = arg_string.split()
- if len(args) == 1:
- hostname = args[0]
- else:
- logging.error("""Usage: host_cleanup <hostname>""")
- return -1
- ret = proxy.ts_host_cleanup(hostname)
- return ret
-
-
-def tconsole_command_ip_cleanup(proxy, arg_string):
- """
- Cleanup the IP address
- """
- arg_string = arg_string.strip()
- args = arg_string.split()
- if len(args) == 1:
- ip_address = args[0]
- else:
- logging.error("""Usage: ip_cleanup <ip_address>""")
- return -1
- ret = proxy.ts_ip_cleanup(ip_address)
- return ret
-
-
-def tconsole_command(proxy, line):
- """
- Run a command in the console
- """
- # pylint: disable=broad-except
- functions = {"help": tconsole_command_help,
- "host_cleanup": tconsole_command_host_cleanup,
- "host_list": tconsole_command_host_list,
- "ip_list": tconsole_command_ip_list,
- "ip_cleanup": tconsole_command_ip_cleanup,
- "job_list": tconsole_command_job_list,
- "job_kill": tconsole_command_job_kill}
- if " " in line:
- command, arg_string = line.split(' ', 1)
- else:
- command = line
- arg_string = ""
-
- try:
- func = functions[command]
- except (KeyError, IndexError), err:
- func = None
-
- # Run system command
- if func is not None:
- try:
- ret = func(proxy, arg_string)
- except Exception, err:
- logging.error("failed to run command [%s %s] %s, %s",
- command, arg_string, err,
- traceback.format_exc())
- return -1
- else:
- logging.error("no command: %s\n", line)
- ret = -1
- return ret
-
-
-def tconsole_input_loop(proxy):
- """
- Loop and excute the command
- """
- while True:
- line = raw_input('> ("q" to quit): ')
- if line == 'q' or line == 'quit':
- break
- tconsole_command(proxy, line)
-
-
-def usage():
- """
- Print the usage of the command
- """
- utils.oprint("Usage: {cmd} <server>\n"
- " server: the server address\n\n"
- "examples:\n"
- "{cmd} --> use http://localhost:1234 as the server address\n"
- "{cmd} -s localhost\n"
- "{cmd} -s localhost\n"
- "{cmd} -s http://localhost:1234\n"
- "{cmd} -s http://10.0.0.10:1234".format(cmd=sys.argv[0]))
-
-
-def main():
- """
- Run the console
- """
- # pylint: disable=unused-variable
- now = time_util.utcnow()
- workspace = (LIPE_TEST_CONSOLE_LOG_DIR + "/" +
- time_util.local_strftime(now, ('%Y-%m-%d-%H:%M:%S')))
- ret = utils.run("mkdir -p %s" % workspace)
- if ret.cr_exit_status != 0:
- utils.eprint("failed to create directory [%s]" % workspace)
- sys.exit(1)
-
- log = clog.get_log(resultsdir=workspace)
-
- argc = len(sys.argv)
- if argc == 1:
- server = "http://localhost:1234"
- elif argc == 2:
- arg = sys.argv[1]
- if arg == "-h" or arg == "--help":
- usage()
- sys.exit(0)
- server = arg
- if not server.startswith("http://"):
- server = "http://" + server
- if server.count(":") != 2:
- server = server + ":" + str(lipe_test_scheduler.TEST_SCHEDULER_PORT)
-
- log.cl_info("connecting to server [%s]", server)
- proxy = xmlrpclib.ServerProxy(server, allow_none=True)
-
- tconsole_input_init()
- tconsole_input_loop(proxy)
- tconsole_input_fini()
+++ /dev/null
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Console that manages the scheduler
-"""
-# pylint: disable=too-many-lines
-import xmlrpclib
-import getopt
-import sys
-import os
-import time
-import traceback
-import yaml
-
-# local libs
-from pylustre import clog
-from pylustre import utils
-from pylustre import time_util
-from pylustre import cstr
-from pylustre import ssh_host
-from pylustre import lyaml
-from pylustre import lipe_virt
-from pylustre import lustre
-from pylustre import constants
-from pyltest import lipe_test_scheduler
-
-
-LIPE_TEST_LAUNCH_LOG_DIR = "/var/log/lipe_test_launch"
-CHECK_TIME = time_util.utcnow()
-EXIT_REASON = "unkown reason"
-SHUTTING_DOWN = False
-DEFAULT_HOST_TIMEOUT = 86400
-HOST_ALLOCATION_INTERVAL = 3
-DEFAULT_LUSTRE_RPM_DIR = "/lustre_rpms"
-DEFAULT_E2FSPROGS_RPM_DIR = "/e2fsprogs_rpms"
-DEV_MAPPER_PREFIX = "/dev/mapper/"
-LIPE_ISO_PATTERN = "lipe-*.x86_64.iso"
-LIPE_MD5_PATTERN = "lipe-*.x86_64.md5"
-
-
-def usage():
- """
- Print the usage of the command
- """
- command = sys.argv[0]
- utils.oprint("Usage: %s [--lustre|-l <lustre_dir>] [--e2fsprogs|-e <e2fsprogs_dir>]\n"
- " [--server|-s <server>] [--source_path|-p <source_path>]\n"
- " [--host_timeout <host_timeout>]\n"
- "\n"
- "lustre_dir:\n"
- " The dir of Lustre RPMs, usually generated by lbuild.\n"
- " By default '%s'\n"
- "e2fsprogs_dir:\n"
- " The dir of E2fsprogs RPMs.\n"
- " By default '%s'.\n"
- "server:\n"
- " The server address.\n"
- " By default localhost.\n"
- "source_path:\n"
- " The path to lipe source code.\n"
- " By default current directory.\n"
- "host_timeout:\n"
- " The seconds to wait for host allocation.\n"
- " By default %d seconds. 0 means wait for ever.\n"
- "\n"
- "examples:\n"
- "%s\n"
- "%s -s localhost\n"
- "%s -s http://localhost -p /dir/to/lipe.git\n"
- "%s -s http://localhost:1234 -p /dir/to/lipe.git\n"
- "%s -s http://10.0.0.10:1234 -p /dir/to/lipe.git\n"
- "%s -l /dir/to/lustre_rpms -e /dir/to/e2fsprogs_rpms -s http://10.0.0.10:1234 -p /dir/to/lipe.git"
- % (command, DEFAULT_LUSTRE_RPM_DIR, DEFAULT_E2FSPROGS_RPM_DIR,
- DEFAULT_HOST_TIMEOUT, command, command, command,
- command, command, command))
-
-
-class LaunchArg(object):
- """
- The arg of launch command
- """
- # pylint: disable=too-few-public-methods,too-many-instance-attributes
- def __init__(self):
- self.la_server = "http://localhost:1234"
- self.la_source_path = os.getcwd()
- self.la_source_lipe_launch_config_file = (self.la_source_path + "/" +
- constants.LIPE_LAUNCH_CONFIG_FNAME)
- self.la_host_wait_time = DEFAULT_HOST_TIMEOUT
- self.la_lustre_dir = DEFAULT_LUSTRE_RPM_DIR
- self.la_e2fsprogs_dir = DEFAULT_E2FSPROGS_RPM_DIR
- # Init when building
- self.la_test_host_source_path = None
- self.la_test_host_iso_fpath = None
- self.la_test_host_iso_dir = None
- self.la_test_host_md5_fpath = None
-
- def la_update_server(self, log, server):
- """
- Update the server URL
- """
- if not server.startswith("http://"):
- server = "http://" + server
- if server.count(":") != 2:
- server = server + ":" + str(lipe_test_scheduler.TEST_SCHEDULER_PORT)
- self.la_server = server
- log.cl_debug("updated the url to [%s]", server)
- return 0
-
- def la_update_source_path(self, log, path):
- """
- Update the source path
- """
- if not os.path.isdir(path):
- log.cl_error("source directory [%s] is not a directory", path)
- return -1
-
- lipe_launch_config_file = path + "/example_configs/lipe/" + constants.LIPE_LAUNCH_CONFIG_FNAME
- if not os.path.isfile(lipe_launch_config_file):
- log.cl_error("source directory [%s] doesn't have file [%s]", path,
- constants.LIPE_LAUNCH_CONFIG_FNAME)
- return -1
- self.la_source_lipe_launch_config_file = lipe_launch_config_file
- self.la_source_path = path
- log.cl_debug("updated the source path to [%s]", path)
- return 0
-
- def la_update_host_wait_time(self, log, second):
- """
- Update the host wait time
- """
- self.la_host_wait_time = second
- log.cl_debug("the host wait time is [%d]", self.la_host_wait_time)
- return 0
-
- def la_update_lustre_dir(self, log, lustre_dir):
- """
- Update the dir of Lustre RPMs
- """
- if not os.path.isdir(lustre_dir):
- log.cl_error("Lustre RPMs directory [%s] is not a directory",
- lustre_dir)
- return -1
- self.la_lustre_dir = lustre_dir
- log.cl_debug("updated the lustre_dir to [%s]", lustre_dir)
- return 0
-
- def la_update_e2fsprogs_dir(self, log, e2fsprogs_dir):
- """
- Update the dir of E2fsprogs RPMs
- """
- if not os.path.isdir(e2fsprogs_dir):
- log.cl_error("E2fsprogs RPMs directory [%s] is not a directory",
- e2fsprogs_dir)
- return -1
- self.la_e2fsprogs_dir = e2fsprogs_dir
- log.cl_debug("updated the url to [%s]", e2fsprogs_dir)
- return 0
-
- def la_check_arguments(self, log):
- """
- Check whether all arguments are valid
- """
- ret = self.la_update_server(log, self.la_server)
- if ret:
- return ret
-
- ret = self.la_update_host_wait_time(log, self.la_host_wait_time)
- if ret:
- return ret
-
- ret = self.la_update_source_path(log, self.la_source_path)
- if ret:
- return ret
-
- ret = self.la_update_lustre_dir(log, self.la_lustre_dir)
- if ret:
- return ret
-
- ret = self.la_update_lustre_dir(log, self.la_lustre_dir)
- if ret:
- return ret
-
- ret = self.la_update_e2fsprogs_dir(log, self.la_e2fsprogs_dir)
- if ret:
- return ret
-
- return 0
-
-
-class ClientRPCHost(lipe_test_scheduler.RPCHost):
- """
- The host for transfering between scheduler and its clients
- """
- # pylint: disable=too-few-public-methods
- def __init__(self, hostname,
- global_template_hostname=None,
- kvm_template_config=None,
- kvm_server_hostname=None,
- expected_distro=None, ipv4_addresses=None,
- kvm_template_ipv4_address=None):
- # pylint: disable=too-many-arguments
- super(ClientRPCHost, self).__init__(hostname,
- kvm_server_hostname=kvm_server_hostname,
- expected_distro=expected_distro,
- ipv4_addresses=ipv4_addresses,
- kvm_template_ipv4_address=kvm_template_ipv4_address)
- self.crh_host = ssh_host.SSHHost(hostname)
- self.crh_global_template_hostname = global_template_hostname
- self.crh_kvm_template_config = kvm_template_config
- self.crh_shared_disk_ids = []
-
- def crh_add_shared_disk(self, disk_id):
- """
- Add the shared disk ID
- """
- self.crh_shared_disk_ids.append(disk_id)
-
-
-def send_lipe_source(log, workspace, build_host, launch_argument):
- """
- Send the lipe source codes to remote host
- """
- source_path = launch_argument.la_source_path
- log.cl_info("sending the source code in directory [%s] of local host to "
- "directory [%s] on host [%s]", source_path, workspace,
- build_host.sh_hostname)
- ret = build_host.sh_send_file(log, source_path, workspace)
- if ret:
- log.cl_error("failed to send directory [%s] on local host to "
- "directory [%s] on host [%s]",
- source_path, workspace, build_host.sh_hostname)
- return -1
- log.cl_info("sent directory [%s] on local host to "
- "directory [%s] on host [%s]",
- source_path, workspace, build_host.sh_hostname)
- basename = os.path.basename(source_path)
-
- remote_source_path = workspace + "/" + constants.LIPE_BUILD_LOG_DIR_BASENAME
- origin_remote_source_path = workspace + "/" + basename
- if origin_remote_source_path != remote_source_path:
- command = ("mv %s %s" %
- (origin_remote_source_path, remote_source_path))
- retval = build_host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, build_host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- log.cl_error("please clean up the dir [%s] on host [%s] "
- "manually to avoid exhaustion of disk space on that "
- "host", origin_remote_source_path,
- build_host.sh_hostname)
- return -1
-
- launch_argument.la_test_host_source_path = remote_source_path
- launch_argument.la_test_host_iso_dir = remote_source_path + "/ISO"
- return 0
-
-
-def ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host):
- """
- Add the server of rpc_host to ssh_host_dict
- """
- kvm_server_hostname = rpc_host.lrh_kvm_server_hostname
- if kvm_server_hostname not in ssh_host_dict:
- ssh_host_dict[kvm_server_hostname] = True
- ssh_host_config = {}
- ssh_host_config[cstr.CSTR_HOST_ID] = kvm_server_hostname
- ssh_host_config[cstr.CSTR_HOSTNAME] = kvm_server_hostname
- ssh_host_configs.append(ssh_host_config)
- log.cl_debug("adding KVM server [%s] to config",
- kvm_server_hostname)
-
-
-def generate_lipe_virt_config_ssh_hosts(log, hosts, pairs, config):
- """
- Generate the ssh_hosts part of the lipe_virt config
- """
- ssh_host_configs = []
- ssh_host_dict = {}
-
- for rpc_hosts in pairs:
- rpc_host = rpc_hosts[0]
- ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host)
-
- for rpc_host in hosts:
- ssh_hosts_add(log, ssh_host_dict, ssh_host_configs, rpc_host)
-
- config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs
-
-
-def generate_lipe_virt_config_templates(log, kvm_server_dict, config):
- """
- Generate the templates part of the lipe_virt config
- """
- template_configs = []
-
- # kvm_server_dict has keys of server hostnames, and values of
- # diction. The diction has keys of global template hostname,
- # and values of full template config
- for diction in kvm_server_dict.values():
- for global_template_hostname, template_config in diction.iteritems():
- template_configs.append(template_config)
- log.cl_debug("adding template host [%s] to config",
- global_template_hostname)
-
- config[cstr.CSTR_TEMPLATES] = template_configs
-
-
-def vm_hosts_add(log, vm_host_configs, rpc_host):
- """
- Add the rpc_host to vm_host_configs
- """
- vm_host_config = {}
- vm_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname
- vm_host_config[cstr.CSTR_REINSTALL] = False
- vm_host_config[cstr.CSTR_TEMPLATE_HOSTNAME] = rpc_host.crh_global_template_hostname
- vm_host_config[cstr.CSTR_SHARED_DISK_IDS] = rpc_host.crh_shared_disk_ids
- vm_host_config[cstr.CSTR_IPS] = rpc_host.lrh_ipv4_addresses
- vm_host_configs.append(vm_host_config)
- log.cl_debug("adding VM host [%s] to config", rpc_host.lrh_hostname)
-
-
-def generate_lipe_virt_config_vm_hosts(log, hosts, pairs, config):
- """
- Generate the vm_hosts part of the lipe_virt config
- """
- vm_host_configs = []
-
- for rpc_hosts in pairs:
- for rpc_host in rpc_hosts:
- vm_hosts_add(log, vm_host_configs, rpc_host)
-
- for rpc_host in hosts:
- vm_hosts_add(log, vm_host_configs, rpc_host)
-
- config[cstr.CSTR_VM_HOSTS] = vm_host_configs
-
-
-def generate_lipe_virt_config_shared_disks(shared_disk_dict, config):
- """
- Generate the shared_disks part of the lipe_virt config
- """
- config[cstr.CSTR_SHARED_DISKS] = shared_disk_dict.values()
-
-
-def config_shared_disk_add(log, shared_disk_dict, disk_id, size,
- image_fpath, server_host_id):
- """
- Add shared disk to the dict
- """
- # pylint: disable=too-many-arguments
- if disk_id in shared_disk_dict:
- log.cl_error("disk ID [%s] already exists", disk_id)
- return -1
- shared_disk_config = {}
- shared_disk_config[cstr.CSTR_DISK_ID] = disk_id
- shared_disk_config[cstr.CSTR_SIZE] = size
- shared_disk_config[cstr.CSTR_SERVER_HOST_ID] = server_host_id
- shared_disk_config[cstr.CSTR_IMAGE_FILE] = image_fpath
- shared_disk_dict[disk_id] = shared_disk_config
- return 0
-
-
-def add_shared_device(log, pair, shared_disk_dict, disk_id, size):
- """
- Save the shared disk IDs into the ClientRPCHost
- """
- host0 = pair[0]
- template_config = host0.crh_kvm_template_config
- # Image dir might be different for the templates of two hosts,
- # use the first one's
- image_dir = template_config[cstr.CSTR_IMAGE_DIR]
- # Host ID should be the same for two hosts
- image_fname = (host0.lrh_hostname + "_shared_" +
- str(len(host0.crh_shared_disk_ids)) + ".img")
- image_fpath = os.path.join(image_dir, image_fname)
- server_host_id = template_config[cstr.CSTR_SERVER_HOST_ID]
-
- ret = config_shared_disk_add(log, shared_disk_dict, disk_id, size,
- image_fpath, server_host_id)
- if ret:
- log.cl_error("failed to add shared disk with ID [%s]", disk_id)
- return -1
-
- for host in pair:
- host.crh_add_shared_disk(disk_id)
- return 0
-
-
-class TestCluster(object):
- """
- The cluster to run test
-
- Host 0: LiPE install server, Lustre client
- Host 1: Lustre client
-
- Pair 0: MGS/lipe0-MDS, devices: mgs(1GB, LDISKFS), mdt0(5GB, LDISKFS), mdt1(5GB, ZFS)
- Pair 1: lipe0-OSS0, devices: ost0(5GB, LDISKFS), ost1(5GB, LDISKFS), ost2(5GB, ZFS)
- Pair 2: lipe0-OSS1, devices: ost3(5GB, LDISKFS), ost4(5GB, LDISKFS), ost5(5GB, LDISKFS)
- Pair 3: lipe1-MDS/OSS, devices: mdt0(5GB, ZFS), ost0(5GB, LDISKFS), ost1(5GB, ZFS)
- """
- # pylint: disable=too-few-public-methods,too-many-instance-attributes
- # pylint: disable=too-many-locals
- PAIR_NUMBER = 4
- HOST_NUMBER = 2
-
- def __init__(self, workspace, test_host, hosts, pairs, rpc_ip_address,
- kvm_server_dict):
- # pylint: disable=too-many-arguments,too-many-statements
- self.tc_vm_hosts = []
- self.tc_vm_hosts += hosts
- for pair in pairs:
- self.tc_vm_hosts += pair
- self.tc_lipe_test_logdir = (workspace + "/" +
- constants.LIPE_TEST_LOG_DIR_BASENAME)
- self.tc_rpc_ip_address = rpc_ip_address
- self.tc_kvm_server_dict = kvm_server_dict
- self.tc_workspace = workspace
- self.tc_cluster_id = utils.random_word(7)
- self.tc_hosts = hosts
- self.tc_client_host0 = hosts[0]
- self.tc_client_host1 = hosts[1]
- self.tc_test_host = test_host
- self.tc_install_server = self.tc_hosts[0]
- self.tc_pairs = pairs
- self.tc_mgs_pair = pairs[0]
- self.tc_mgt_size = 1
- self.tc_mdt_size = 5
- self.tc_ost_size = 5
- self.tc_mgt_disk_id = self.tc_cluster_id + "_mgt"
- self.tc_fs0_fsname = self.tc_cluster_id + "0"
- self.tc_fs0_mnt = "/mnt/" + self.tc_fs0_fsname
- self.tc_fs0_mdt_disk_id_prefix = (self.tc_fs0_fsname + "_" +
- "mdt")
- self.tc_fs0_ost_disk_id_prefix = (self.tc_fs0_fsname + "_" +
- "ost")
- self.tc_fs0_mdt_number_per_mds = 2
- self.tc_fs0_ost_number_per_oss = 3
- self.tc_fs0_mds_pair0 = pairs[0]
- self.tc_fs0_oss_pair0 = pairs[1]
- self.tc_fs0_oss_pair1 = pairs[2]
- self.tc_fs0_oss_pairs = [pairs[1], pairs[2]]
-
- self.tc_fs1_fsname = self.tc_cluster_id + "1"
- self.tc_fs1_mnt = "/mnt/" + self.tc_fs1_fsname
- self.tc_fs1_mdt_disk_id_prefix = (self.tc_fs1_fsname + "_" +
- "mdt")
- self.tc_fs1_ost_disk_id_prefix = (self.tc_fs1_fsname + "_" +
- "ost")
- self.tc_fs1_mdt_number_per_mds = 1
- self.tc_fs1_ost_number_per_oss = 2
- self.tc_fs1_mds_pair0 = pairs[3]
- self.tc_fs1_oss_pair0 = pairs[3]
-
- fname = lipe_virt.LIPE_VIRT_CONFIG_FNAME
- self.tc_lipe_virt_config_fpath = workspace + "/" + fname
- fname = constants.CLOWNFISH_CONFIG_FNAME
- self.tc_clownfish_config_fpath = workspace + "/" + fname
- fname = constants.LIPE_INSTALL_CONFIG_FNAME
- self.tc_lipe_install_config_fpath = workspace + "/" + fname
- fname = constants.LIPE_TEST_CONFIG_FNAME
- self.tc_lipe_test_config_fpath = workspace + "/" + fname
- fname = constants.LIPE_BUILD_CONFIG_FNAME
- self.tc_lipe_build_config_fpath = workspace + "/" + fname
- fname = constants.LIPE_LAUNCH_CONFIG_FNAME
- self.tc_lipe_launch_fpath = workspace + "/" + fname
- self.tc_test_host_lustre_rpm_dir = workspace + DEFAULT_LUSTRE_RPM_DIR
- self.tc_test_host_e2fsprogs_rpm_dir = workspace + DEFAULT_E2FSPROGS_RPM_DIR
-
- def _tc_add_shared_mgt(self, log, shared_disk_dict):
- """
- Save the shared disk MGS into the ClientRPCHost
- """
- return add_shared_device(log, self.tc_mgs_pair, shared_disk_dict,
- self.tc_mgt_disk_id,
- self.tc_mgt_size)
-
- def _tc_fs0_add_shared_devices(self, log, shared_disk_dict):
- """
- Save the shared disks of file system 0
- """
- for mdt_index in range(self.tc_fs0_mdt_number_per_mds):
- mdt_disk_id = self.tc_fs0_mdt_disk_id_prefix + str(mdt_index)
- ret = add_shared_device(log, self.tc_fs0_mds_pair0,
- shared_disk_dict,
- mdt_disk_id,
- self.tc_mdt_size)
- if ret:
- log.cl_error("failed to add shared disk for [%s]", mdt_disk_id)
- return -1
-
- for pair_index, pair in enumerate(self.tc_fs0_oss_pairs):
- for index in range(self.tc_fs0_ost_number_per_oss):
- ost_index = index + (self.tc_fs0_ost_number_per_oss * pair_index)
- ost_disk_id = self.tc_fs0_ost_disk_id_prefix + str(ost_index)
- ret = add_shared_device(log, pair,
- shared_disk_dict,
- ost_disk_id,
- self.tc_ost_size)
- if ret:
- log.cl_error("failed to add shared disk for [%s]", ost_disk_id)
- return -1
- return 0
-
- def _tc_fs1_add_shared_devices(self, log, shared_disk_dict):
- """
- Save the shared disks of file system 1
- """
- for mdt_index in range(self.tc_fs1_mdt_number_per_mds):
- mdt_disk_id = self.tc_fs1_mdt_disk_id_prefix + str(mdt_index)
- ret = add_shared_device(log, self.tc_fs1_mds_pair0,
- shared_disk_dict,
- mdt_disk_id,
- self.tc_mdt_size)
- if ret:
- log.cl_error("failed to add shared disk for [%s]", mdt_disk_id)
- return -1
-
- for ost_index in range(self.tc_fs1_ost_number_per_oss):
- ost_disk_id = self.tc_fs1_ost_disk_id_prefix + str(ost_index)
- ret = add_shared_device(log, self.tc_fs1_oss_pair0,
- shared_disk_dict,
- ost_disk_id,
- self.tc_ost_size)
- if ret:
- log.cl_error("failed to add shared disk for [%s]", ost_disk_id)
- return -1
- return 0
-
- def tc_add_shared_devices(self, log, shared_disk_dict):
- """
- Save the shared disks into the ClientRPCHost
- """
- ret = self._tc_add_shared_mgt(log, shared_disk_dict)
- if ret:
- return -1
-
- ret = self._tc_fs0_add_shared_devices(log, shared_disk_dict)
- if ret:
- return -1
-
- ret = self._tc_fs1_add_shared_devices(log, shared_disk_dict)
- if ret:
- return -1
-
- return 0
-
- def tc_send_lustre_e2fsprogs_rpms(self, log, launch_argument):
- """
- Send Lustre RPMs and E2fsprogs RPMs
- """
- host = self.tc_test_host.crh_host
- workspace = self.tc_workspace
-
- local_rpm_dir = launch_argument.la_lustre_dir
- test_host = self.tc_test_host.crh_host
- ret = test_host.sh_send_file(log, local_rpm_dir, workspace)
- if ret:
- log.cl_error("failed to send dir [%s] on local host to "
- "directory [%s] on host [%s]",
- local_rpm_dir, workspace, test_host.sh_hostname)
- return -1
- basename = os.path.basename(local_rpm_dir)
- remote_rpm_dir = workspace + "/" + basename
- remote_lustre_rpm_dir = self.tc_test_host_lustre_rpm_dir
- if remote_rpm_dir != remote_lustre_rpm_dir:
- command = ("mv %s %s" % (remote_rpm_dir, remote_lustre_rpm_dir))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- local_rpm_dir = launch_argument.la_e2fsprogs_dir
- test_host = self.tc_test_host.crh_host
- ret = test_host.sh_send_file(log, local_rpm_dir, workspace)
- if ret:
- log.cl_error("failed to send dir [%s] on local host to "
- "directory [%s] on host [%s]",
- local_rpm_dir, workspace, test_host.sh_hostname)
- return -1
- basename = os.path.basename(local_rpm_dir)
- remote_rpm_dir = workspace + "/" + basename
- remote_e2fsprogs_rpm_dir = self.tc_test_host_e2fsprogs_rpm_dir
- if remote_rpm_dir != remote_e2fsprogs_rpm_dir:
- command = ("mv %s %s" % (remote_rpm_dir, remote_e2fsprogs_rpm_dir))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- def tc_get_and_clean_dir(self, log, host, logdir):
- """
- Get and clean log
- """
- host_local_dir = self.tc_workspace + "/" + host.sh_hostname
- ret = utils.mkdir(host_local_dir)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- host_local_dir)
- log.cl_error("please backup the log [%s] on host "
- "[%s] manually for debug purpose",
- logdir, host.sh_hostname)
- return -1
-
- ret = host.sh_get_and_clean_dir(log, logdir, host_local_dir)
- if ret:
- log.cl_error("failed to get and clean dir [%s] on host [%s]",
- logdir, host.sh_hostname)
- return ret
-
- def tc_run_lipe_test(self, log, launch_argument):
- """
- Run lipe test
- """
- return_value = 0
- workspace = self.tc_workspace
- host = self.tc_test_host.crh_host
-
- source_path = launch_argument.la_test_host_source_path
- command = ("cd %s && ./lipe_test --logdir %s --config %s" %
- (source_path, self.tc_lipe_test_logdir,
- self.tc_lipe_test_config_fpath))
- stdout_file = workspace + "/" + "lipe_test_watching.stdout"
- stderr_file = workspace + "/" + "lipe_test_watching.stderr"
- retval = host.sh_watched_run(log, command, stdout_file, stderr_file)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return_value = -1
-
- ret = self.tc_get_and_clean_dir(log, host,
- self.tc_lipe_test_logdir)
- if ret:
- log.cl_error("failed to get and clean dir [%s] on host [%s]",
- self.tc_lipe_test_logdir, host.sh_hostname)
- if return_value:
- log.cl_error("please check [%s] on host [%s] and [%s] on "
- "local host to debug why test of lipe failed",
- self.tc_lipe_test_logdir, host.sh_hostname,
- workspace)
- else:
- log.cl_error("please check why copying remove directory "
- "failed after success of clownfish test")
- return_value = -1
- return return_value
-
- def _tc_generate_clownfish_config_lustre_distributions(self, config):
- """
- Generate the lustre distributions part of clownfish.conf
- """
- lustre_distr_configs = []
- lustre_distr_config = {}
- lustre_distr_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id
- lustre_distr_config[cstr.CSTR_LUSTRE_RPM_DIR] = self.tc_test_host_lustre_rpm_dir
- lustre_distr_config[cstr.CSTR_E2FSPROGS_RPM_DIR] = self.tc_test_host_e2fsprogs_rpm_dir
- lustre_distr_configs.append(lustre_distr_config)
- config[cstr.CSTR_LUSTRE_DISTRIBUTIONS] = lustre_distr_configs
-
- def _tc_generate_clownfish_config_ssh_hosts(self, config):
- """
- Generate the ssh_hosts part of clownfish.conf
- """
- ssh_host_configs = []
- for pair in self.tc_pairs:
- for rpc_host in pair:
- ssh_host_config = {}
- ssh_host_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- ssh_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname
- ssh_host_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id
- ssh_host_configs.append(ssh_host_config)
-
- for rpc_host in [self.tc_client_host0, self.tc_client_host1]:
- ssh_host_config = {}
- ssh_host_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- ssh_host_config[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname
- ssh_host_config[cstr.CSTR_LUSTRE_DISTRIBUTION_ID] = self.tc_cluster_id
- ssh_host_configs.append(ssh_host_config)
- config[cstr.CSTR_SSH_HOSTS] = ssh_host_configs
-
- def _tc_generate_clownfish_config_mgs_list(self, config):
- """
- Generate the mgs_list part of clownfish.conf
- """
- mgs_configs = []
- mgs_config = {}
- mgs_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id
- mgs_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS
- mgs_instance_configs = []
- for rpc_host in self.tc_mgs_pair:
- mgs_instance_config = {}
- mgs_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- mgs_instance_config[cstr.CSTR_DEVICE] = DEV_MAPPER_PREFIX + self.tc_mgt_disk_id
- mgs_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp"
- mgs_instance_configs.append(mgs_instance_config)
- mgs_config[cstr.CSTR_INSTANCES] = mgs_instance_configs
- mgs_configs.append(mgs_config)
- config[cstr.CSTR_MGS_LIST] = mgs_configs
-
- def _tc_generate_clownfish_config_fs0_mdts(self, lustre_config):
- """
- Generate the mdts part of lustre 0 of clownfish.conf
- """
- mdt_configs = []
- for mdt_index in range(self.tc_fs0_mdt_number_per_mds):
- mdt_config = {}
- mdt_config[cstr.CSTR_IS_MGS] = False
- mdt_config[cstr.CSTR_INDEX] = mdt_index
- # The last MDT on each MDS pair is ZFS
- if mdt_index == self.tc_fs0_mdt_number_per_mds - 1:
- is_ldiskfs = False
- else:
- is_ldiskfs = True
- if is_ldiskfs:
- mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS
- else:
- mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS
- mdt_instance_configs = []
- for rpc_host in self.tc_fs0_mds_pair0:
- mdt_instance_config = {}
- mdt_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- disk_id = self.tc_fs0_mdt_disk_id_prefix + str(mdt_index)
- block_device = DEV_MAPPER_PREFIX + disk_id
- if is_ldiskfs:
- device = block_device
- else:
- zpool = disk_id
- device = zpool + ("/mdt%d" % mdt_index)
- mdt_instance_config[cstr.CSTR_DEVICE] = device
- mdt_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp"
- mdt_instance_configs.append(mdt_instance_config)
- if not is_ldiskfs:
- zpool_create = ("zpool create -f %s %s" %
- (zpool, block_device))
- mdt_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create
- mdt_config[cstr.CSTR_INSTANCES] = mdt_instance_configs
- mdt_configs.append(mdt_config)
- lustre_config[cstr.CSTR_MDTS] = mdt_configs
-
- def _tc_generate_clownfish_config_fs0_osts(self, lustre_config):
- """
- Generate the osts part of lustre 0 of clownfish.conf
- """
- ost_configs = []
-
- for pair_index, pair in enumerate(self.tc_fs0_oss_pairs):
- for index in range(self.tc_fs0_ost_number_per_oss):
- ost_config = {}
- ost_config[cstr.CSTR_IS_MGS] = False
- ost_index = index + (self.tc_fs0_ost_number_per_oss * pair_index)
- ost_config[cstr.CSTR_INDEX] = ost_index
- # The last OST on the first OSS pair will be formated with ZFS,
- # others ldiskfs. This will make sure at least the second OSS
- # pair have three LDISKFS OSTs, thus one of the OSS in this pair
- # have two active LDISKFS OSTs which is a requirement for hotpool test.
- if ost_index == self.tc_fs0_ost_number_per_oss - 1:
- is_ldiskfs = False
- ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS
- else:
- is_ldiskfs = True
- ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS
-
- ost_instance_configs = []
- for rpc_host in pair:
- ost_instance_config = {}
- ost_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- disk_id = self.tc_fs0_ost_disk_id_prefix + str(ost_index)
- block_device = DEV_MAPPER_PREFIX + disk_id
- if is_ldiskfs:
- device = block_device
- else:
- zpool = disk_id
- device = zpool + ("/ost%d" % index)
- zpool_create = ("zpool create -f %s %s" %
- (zpool, block_device))
- ost_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create
- ost_instance_config[cstr.CSTR_DEVICE] = device
- ost_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp"
- ost_instance_configs.append(ost_instance_config)
- ost_config[cstr.CSTR_INSTANCES] = ost_instance_configs
- ost_configs.append(ost_config)
- lustre_config[cstr.CSTR_OSTS] = ost_configs
-
- def _tc_generate_clownfish_config_fs0_clients(self, lustre_config):
- """
- Generate the clients part of a lustre file system of clownfish.conf
- """
- clients_configs = []
- clients_config = {}
- clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host0.lrh_hostname
- clients_config[cstr.CSTR_MNT] = self.tc_fs0_mnt
- clients_config[cstr.CSTR_OPTIONS] = "user_xattr"
- clients_configs.append(clients_config)
-
- clients_config = {}
- clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host1.lrh_hostname
- clients_config[cstr.CSTR_MNT] = self.tc_fs0_mnt
- clients_config[cstr.CSTR_OPTIONS] = "user_xattr"
- clients_configs.append(clients_config)
- lustre_config[cstr.CSTR_CLIENTS] = clients_configs
-
- def _tc_generate_clownfish_config_fs1_mdts(self, lustre_config):
- """
- Generate the mdts part of lustre 1 of clownfish.conf
- """
- mdt_configs = []
- for mdt_index in range(self.tc_fs1_mdt_number_per_mds):
- mdt_config = {}
- mdt_config[cstr.CSTR_IS_MGS] = False
- mdt_config[cstr.CSTR_INDEX] = mdt_index
- if mdt_index == self.tc_fs1_mdt_number_per_mds - 1:
- is_ldiskfs = False
- else:
- is_ldiskfs = True
- if is_ldiskfs:
- mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS
- else:
- mdt_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS
- mdt_instance_configs = []
- for rpc_host in self.tc_fs1_mds_pair0:
- mdt_instance_config = {}
- mdt_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- disk_id = self.tc_fs1_mdt_disk_id_prefix + str(mdt_index)
- block_device = DEV_MAPPER_PREFIX + disk_id
- if is_ldiskfs:
- device = block_device
- else:
- zpool = disk_id
- device = zpool + ("/mdt%d" % mdt_index)
- mdt_instance_config[cstr.CSTR_DEVICE] = device
- mdt_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp"
- mdt_instance_configs.append(mdt_instance_config)
- if not is_ldiskfs:
- zpool_create = ("zpool create -f %s %s" %
- (zpool, block_device))
- mdt_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create
- mdt_config[cstr.CSTR_INSTANCES] = mdt_instance_configs
- mdt_configs.append(mdt_config)
- lustre_config[cstr.CSTR_MDTS] = mdt_configs
-
- def _tc_generate_clownfish_config_fs1_osts(self, lustre_config):
- """
- Generate the osts part of lustre 1 of clownfish.conf
- """
- ost_configs = []
- for ost_index in range(self.tc_fs1_ost_number_per_oss):
- ost_config = dict()
- ost_config[cstr.CSTR_IS_MGS] = False
- ost_config[cstr.CSTR_INDEX] = ost_index
- if ost_index == self.tc_fs1_ost_number_per_oss - 1:
- is_ldiskfs = False
- else:
- is_ldiskfs = True
- if is_ldiskfs:
- ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_LDISKFS
- else:
- ost_config[cstr.CSTR_BACKFSTYPE] = lustre.BACKFSTYPE_ZFS
- ost_instance_configs = []
- for rpc_host in self.tc_fs1_oss_pair0:
- ost_instance_config = dict()
- ost_instance_config[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- disk_id = self.tc_fs1_ost_disk_id_prefix + str(ost_index)
- block_device = DEV_MAPPER_PREFIX + disk_id
- if is_ldiskfs:
- device = block_device
- else:
- zpool = disk_id
- device = zpool + ("/ost%d" % ost_index)
- ost_instance_config[cstr.CSTR_DEVICE] = device
- ost_instance_config[cstr.CSTR_NID] = rpc_host.lrh_ipv4_addresses[0] + "@tcp"
- ost_instance_configs.append(ost_instance_config)
- if not is_ldiskfs:
- zpool_create = ("zpool create -f %s %s" %
- (zpool, block_device))
- ost_instance_config[cstr.CSTR_ZPOOL_CREATE] = zpool_create
- ost_config[cstr.CSTR_INSTANCES] = ost_instance_configs
- ost_configs.append(ost_config)
- lustre_config[cstr.CSTR_OSTS] = ost_configs
-
- def _tc_generate_clownfish_config_fs1_clients(self, lustre_config):
- """
- Generate the clients part of a lustre file system of clownfish.conf
- """
- clients_configs = []
- clients_config = {}
- clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host0.lrh_hostname
- clients_config[cstr.CSTR_MNT] = self.tc_fs1_mnt
- clients_config[cstr.CSTR_OPTIONS] = "user_xattr"
- clients_configs.append(clients_config)
-
- clients_config = {}
- clients_config[cstr.CSTR_HOST_ID] = self.tc_client_host1.lrh_hostname
- clients_config[cstr.CSTR_MNT] = self.tc_fs1_mnt
- clients_config[cstr.CSTR_OPTIONS] = "user_xattr"
- clients_configs.append(clients_config)
- lustre_config[cstr.CSTR_CLIENTS] = clients_configs
-
- def _tc_generate_clownfish_config_lustres(self, config):
- """
- Generate the lustres part of clownfish.conf
- """
- lustre_configs = []
- lustre_config = {}
- lustre_config[cstr.CSTR_FSNAME] = self.tc_fs0_fsname
- lustre_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id
- self._tc_generate_clownfish_config_fs0_mdts(lustre_config)
- self._tc_generate_clownfish_config_fs0_osts(lustre_config)
- self._tc_generate_clownfish_config_fs0_clients(lustre_config)
- lustre_configs.append(lustre_config)
-
- lustre_config = {}
- lustre_config[cstr.CSTR_FSNAME] = self.tc_fs1_fsname
- lustre_config[cstr.CSTR_MGS_ID] = self.tc_cluster_id
- self._tc_generate_clownfish_config_fs1_mdts(lustre_config)
- self._tc_generate_clownfish_config_fs1_osts(lustre_config)
- self._tc_generate_clownfish_config_fs1_clients(lustre_config)
- lustre_configs.append(lustre_config)
- config[cstr.CSTR_LUSTRES] = lustre_configs
-
- def tc_generate_clownfish_config(self, log):
- """
- Generate clownfish.conf
- """
- config = {}
- config[cstr.CSTR_LAZY_PREPARE] = True
- config[cstr.CSTR_HIGH_AVAILABILITY] = False
- self._tc_generate_clownfish_config_lustre_distributions(config)
- self._tc_generate_clownfish_config_ssh_hosts(config)
- self._tc_generate_clownfish_config_mgs_list(config)
- self._tc_generate_clownfish_config_lustres(config)
-
- config_fpath = self.tc_clownfish_config_fpath
- start_string = """#
-# Configuration file of Clownfish
-#
-"""
- ret = self.tc_write_and_send_config(log, config, config_fpath,
- start_string)
- if ret:
- log.cl_error("failed to write and send clownfish.conf")
- return -1
-
- log.cl_info("config of clownfish.conf is saved to [%s]", config_fpath)
- return 0
-
- def tc_generate_lipe_virt_config(self, log, shared_disk_dict):
- """
- Generate the config that can be digested by lipe_virt command
- """
- kvm_server_dict = self.tc_kvm_server_dict
- hosts = self.tc_hosts
- pairs = self.tc_pairs
- config = {}
- generate_lipe_virt_config_shared_disks(shared_disk_dict, config)
- generate_lipe_virt_config_templates(log, kvm_server_dict, config)
- generate_lipe_virt_config_ssh_hosts(log, hosts, pairs, config)
- generate_lipe_virt_config_vm_hosts(log, hosts, pairs, config)
-
- config_fpath = self.tc_lipe_virt_config_fpath
- start_string = """#
-# Configuration file of installing virtual machines, used by lipe_virt command
-#
-"""
- ret = self.tc_write_and_send_config(log, config, config_fpath,
- start_string)
- if ret:
- log.cl_error("failed to write and send lipe_virt.conf")
- return -1
-
- log.cl_info("config of lipe_virt.conf is saved to [%s]", config_fpath)
- return 0
-
- def tc_write_and_send_config(self, log, config, config_fpath,
- start_string):
- """
- Write the config and send to test host
- """
- config_string = start_string
- config_string += yaml.dump(config, Dumper=lyaml.YamlDumper,
- default_flow_style=False)
-
- try:
- with open(config_fpath, 'w') as yaml_file:
- yaml_file.write(config_string)
- except:
- sys.stdout.write(config_string)
- return -1
-
- workspace = self.tc_workspace
- test_host = self.tc_test_host.crh_host
- ret = test_host.sh_send_file(log, config_fpath, workspace)
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "directory [%s] on host [%s]",
- config_fpath, workspace, test_host.sh_hostname)
- return -1
-
- def tc_generate_lipe_install_config(self, log, launch_argument):
- """
- Generate lipe_install.conf
- """
- # pylint: disable=too-many-locals
- # send the example web json file to test host
- workspace = self.tc_workspace
- test_host = self.tc_test_host.crh_host
- ret = test_host.sh_send_file(log,
- launch_argument.la_source_lipe_launch_config_file,
- workspace)
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "directory [%s] on host [%s]",
- launch_argument.la_source_lipe_launch_config_file,
- workspace, test_host.sh_hostname)
- return -1
-
- config = {}
- config[cstr.CSTR_CONFIG_FPATH] = self.tc_lipe_launch_fpath
- config[cstr.CSTR_ISO_PATH] = launch_argument.la_test_host_iso_fpath
-
- ssh_hosts = []
- mdt_hosts = []
- for rpc_host in self.tc_fs0_mds_pair0 + self.tc_fs1_mds_pair0:
- mdt_host = {}
- mdt_host[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- mdt_hosts.append(mdt_host)
- ssh_host_dict = {}
- ssh_host_dict[cstr.CSTR_HOST_ID] = rpc_host.lrh_hostname
- ssh_host_dict[cstr.CSTR_HOSTNAME] = rpc_host.lrh_hostname
- ssh_hosts.append(ssh_host_dict)
- config[cstr.CSTR_MDT_HOSTS] = mdt_hosts
-
- # Add oss hosts into ssh_hosts list to make lpurge available there
- for oss_host in self.tc_fs0_oss_pair0 + self.tc_fs0_oss_pair1:
- ssh_host_dict = {}
- ssh_host_dict[cstr.CSTR_HOST_ID] = oss_host.lrh_hostname
- ssh_host_dict[cstr.CSTR_HOSTNAME] = oss_host.lrh_hostname
- ssh_hosts.append(ssh_host_dict)
-
- # Need to add client hosts here too, otherwise, lipe_run_action
- # might be missing
- for client_host in [self.tc_client_host0, self.tc_client_host1]:
- ssh_host_dict = {}
- ssh_host_dict[cstr.CSTR_HOST_ID] = client_host.lrh_hostname
- ssh_host_dict[cstr.CSTR_HOSTNAME] = client_host.lrh_hostname
- ssh_hosts.append(ssh_host_dict)
- config[cstr.CSTR_SSH_HOSTS] = ssh_hosts
-
- config_fpath = self.tc_lipe_install_config_fpath
- start_string = """#
-# Configuration file for installing LiPE
-#
-"""
- ret = self.tc_write_and_send_config(log, config, config_fpath,
- start_string)
- if ret:
- log.cl_error("failed to write and send lipe_install.conf")
- return -1
- log.cl_info("config of lipe_install.conf saved to [%s]", config_fpath)
- return 0
-
- def tc_generate_lipe_test_config(self, log):
- """
- Generate lipe_test.conf
- """
- config = {}
- config[cstr.CSTR_CLOWNFISH_CONFIG] = self.tc_clownfish_config_fpath
- install_server_config = {}
- install_server_config[cstr.CSTR_HOSTNAME] = self.tc_install_server.lrh_hostname
- config[cstr.CSTR_INSTALL_SERVER] = install_server_config
- config[cstr.CSTR_LIPE_INSTALL_CONFIG] = self.tc_lipe_install_config_fpath
- config[cstr.CSTR_SKIP_INSTALL] = False
- config[cstr.CSTR_SKIP_VIRT] = False
- config[cstr.CSTR_VIRT_CONFIG] = self.tc_lipe_virt_config_fpath
-
- config_fpath = self.tc_lipe_test_config_fpath
- start_string = """#
-# Configuration file for testing LiPE
-#
-"""
- ret = self.tc_write_and_send_config(log, config, config_fpath,
- start_string)
- if ret:
- log.cl_error("failed to write and send lipe_install.conf")
- return -1
- log.cl_info("config of lipe_test.conf is saved to [%s]", config_fpath)
- return 0
-
- def tc_generate_lipe_build_config(self, log):
- """
- Generate the config that can be digested by lipe_build command
- """
- config = {}
- config[cstr.CSTR_ZFS_SUPPORT] = True
-
- config_fpath = self.tc_lipe_build_config_fpath
- start_string = """#
-# Configuration file of building LiPE ISO
-#
-"""
- ret = self.tc_write_and_send_config(log, config, config_fpath,
- start_string)
- if ret:
- log.cl_error("failed to write and send lipe_virt.conf")
- return -1
-
- log.cl_info("config of lipe_virt.conf is saved to [%s]", config_fpath)
- return 0
-
- def tc_build_lipe(self, log, launch_argument):
- """
- Send the lipe source codes to remote host and build it, get the ISO back
- """
- workspace = self.tc_workspace
- host = self.tc_test_host.crh_host
-
- ret = send_lipe_source(log, workspace, host, launch_argument)
- if ret:
- return -1
-
- remote_source_path = launch_argument.la_test_host_source_path
- lipe_build_logdir = (remote_source_path + "/" +
- constants.LIPE_BUILD_LOG_DIR_BASENAME)
- log.cl_info("building lipe in directory [%s] on host [%s]",
- remote_source_path, host.sh_hostname)
- command = ("cd %s && ./lipe_build --logdir %s --config %s" %
- (remote_source_path, lipe_build_logdir,
- self.tc_lipe_build_config_fpath))
- stdout_file = workspace + "/" + "lipe_build_watching.stdout"
- stderr_file = workspace + "/" + "lipe_build_watching.stderr"
- retval = host.sh_watched_run(log, command, stdout_file, stderr_file)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- ret = self.tc_get_and_clean_build_dir(log, launch_argument)
- if ret:
- log.cl_error("please check [%s] on host [%s] and [%s] on "
- "local host to debug why build of lipe failed",
- remote_source_path, host.sh_hostname, workspace)
- return -1
- return 0
-
- def tc_get_and_clean_build_dir(self, log, launch_argument):
- """
- Get and clean the build dir on test host
- """
- remote_source_path = launch_argument.la_test_host_source_path
- host = self.tc_test_host.crh_host
-
- ret = self.tc_get_and_clean_dir(log, host, remote_source_path)
- if ret:
- log.cl_error("failed to get and clean dir [%s] on host [%s], "
- "please backup it manually for further debugging",
- remote_source_path, host.sh_hostname)
- return -1
- return 0
-
- def tc_prepare_workspace(self, log):
- """
- Create the workspace
- """
- workspace = self.tc_workspace
- build_host = self.tc_test_host.crh_host
-
- command = ("mkdir -p %s" % (workspace))
- retval = build_host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, build_host.sh_hostname,
- retval.cr_exit_status, retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
- def tc_cleanup_vm_hosts(self, log):
- """
- Cleanup the virtual machones hosts
- """
- for host in self.tc_vm_hosts:
- ret = host.crh_host.sh_rpm_find_and_uninstall(log, "grep lipe")
- if ret:
- log.cl_error("failed to uninstall LiPE RPMs on host "
- "[%s]", host.crh_host.sh_hostname)
- return -1
-
- ret = host.crh_host.sh_rpm_find_and_uninstall(log, "grep clownfish")
- if ret:
- log.cl_error("failed to uninstall Clownfish RPMs on host "
- "[%s]", host.crh_host.sh_hostname)
- return -1
-
- # Ignore the error since these mnt might not exist yet
- command = "umount %s/*" % constants.LIPE_INSTALL_MNT_DIR
- host.crh_host.sh_run(log, command)
- command = ("rmdir %s/*" % (constants.LIPE_INSTALL_MNT_DIR))
- host.crh_host.sh_run(log, command)
- return 0
-
- def tc_run_test_without_removing_build_dir(self, log, launch_argument):
- """
- Run test without removing build dir
- """
- shared_disk_dict = {}
- ret = self.tc_add_shared_devices(log, shared_disk_dict)
- if ret:
- log.cl_error("failed to add shared devices")
- return -1
-
- ret = self.tc_generate_lipe_virt_config(log, shared_disk_dict)
- if ret:
- log.cl_error("failed to generate config of lipe_virt")
- return -1
-
- ret = self.tc_generate_clownfish_config(log)
- if ret:
- log.cl_error("failed to generate config of clownfish")
- return -1
-
- ret = self.tc_generate_lipe_install_config(log, launch_argument)
- if ret:
- log.cl_error("failed to generate config of installing lipe")
- return -1
-
- ret = self.tc_generate_lipe_test_config(log)
- if ret:
- log.cl_error("failed to generate config of testing lipe")
- return -1
-
- ret = self.tc_send_lustre_e2fsprogs_rpms(log, launch_argument)
- if ret:
- log.cl_error("failed to send Lustre and E2fsprogs RPMs")
- return -1
-
- ret = self.tc_run_lipe_test(log, launch_argument)
- if ret:
- log.cl_error("failed to run LiPE test")
- return -1
-
- def tc_run_test(self, log, launch_argument):
- """
- Run test on this cluster
- """
- # pylint: disable=too-many-locals,too-many-arguments,too-many-branches
- ret = self.tc_cleanup_vm_hosts(log)
- if ret:
- log.cl_error("failed to clean up VMs")
- return -1
-
- ret = self.tc_prepare_workspace(log)
- if ret:
- log.cl_error("failed to prepare workspace")
- return -1
-
- ret = self.tc_generate_lipe_build_config(log)
- if ret:
- log.cl_error("failed to generate lipe_build.conf")
- return -1
-
- ret = self.tc_build_lipe(log, launch_argument)
- if ret:
- log.cl_error("failed to build LiPE")
- return -1
-
- return_value = self.tc_run_test_without_removing_build_dir(log,
- launch_argument)
- if return_value:
- log.cl_error("failed to run tests")
-
- ret = self.tc_get_and_clean_build_dir(log, launch_argument)
- if ret:
- log.cl_error("failed to get and clean build directory on test host")
- return_value = -1
- return return_value
-
-
-def _allocate_resources(log, scheduler_id, jobid, proxy, descs):
- """
- Allocate resources from server
- """
- rpc_descriptors = proxy.ts_resources_allocate(scheduler_id, jobid, descs)
- if len(rpc_descriptors) == 0:
- log.cl_info("not enough resources to allocate")
- return -1, None, None
- same_kvm_host_descriptors = []
- other_descriptors = []
- lipe_test_scheduler.rpc2descriptors(log, rpc_descriptors,
- same_kvm_host_descriptors,
- other_descriptors)
- return 0, same_kvm_host_descriptors, other_descriptors
-
-
-def allocate_resources(log, scheduler_id, jobid, proxy, descs,
- timeout=DEFAULT_HOST_TIMEOUT,
- sleep_interval=HOST_ALLOCATION_INTERVAL):
- """
- Allocate resources from server, wait if necessary
- """
- # pylint: disable=too-many-arguments,unused-variable
- return utils.wait_condition(log, _allocate_resources,
- (scheduler_id, jobid, proxy, descs),
- timeout=timeout,
- sleep_interval=sleep_interval)
-
-
-def allocate_hosts_and_ip(log, scheduler_id, jobid, proxy):
- """
- Allocate hosts and IP
- """
- # pylint: disable=unused-variable
- descs = []
- des = lipe_test_scheduler.ResourceDescriptorIPAddress()
- descs.append(des)
- desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_BUILD)
- descs.append(desc)
- desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_TEST,
- number_min=TestCluster.HOST_NUMBER,
- number_max=TestCluster.HOST_NUMBER)
- descs.append(desc)
- for i in range(TestCluster.PAIR_NUMBER):
- desc = lipe_test_scheduler.ResourceDescriptorHost(lipe_test_scheduler.PURPOSE_TEST,
- number_min=2, number_max=2,
- same_kvm_server=True)
- descs.append(desc)
-
- ret, decs1, decs2 = allocate_resources(log, scheduler_id, jobid, proxy,
- descs)
- if ret:
- log.cl_error("failed to allocate hosts and IP")
- return ret, decs1, decs2
-
-
-def resource2host(res, kvm_server_dict):
- """
- Transfer the RPCHost to ClientRPCHost
- """
- hostname = res.lrh_hostname
- kvm_template = res.lrh_kvm_template
- kvm_server_hostname = res.lrh_kvm_server_hostname
- if kvm_template is not None:
- template_hostname = kvm_template["vt_template_hostname"]
- # kvm_server_dict has keys of server hostnames, and values of
- # diction. The diction has keys of global template hostname,
- # and values of full template config
- if kvm_server_hostname not in kvm_server_dict:
- kvm_template_per_sever_dict = {}
- kvm_server_dict[kvm_server_hostname] = kvm_template_per_sever_dict
- kvm_template_per_sever_dict = kvm_server_dict[kvm_server_hostname]
-
- global_template_hostname = kvm_server_hostname + "_" + template_hostname
- if global_template_hostname not in kvm_template_per_sever_dict:
- kvm_template_config = {}
- # The template is an object of VirtTemplate
- kvm_template_config[cstr.CSTR_DNS] = kvm_template["vt_dns"]
- kvm_template_config[cstr.CSTR_ISO] = kvm_template["vt_iso"]
- kvm_template_config[cstr.CSTR_HOSTNAME] = global_template_hostname
- kvm_template_config[cstr.CSTR_INTERNET] = kvm_template["vt_internet"]
- kvm_template_config[cstr.CSTR_NETWORK_CONFIGS] = kvm_template["vt_network_configs"]
- kvm_template_config[cstr.CSTR_IMAGE_DIR] = kvm_template["vt_image_dir"]
- kvm_template_config[cstr.CSTR_DISTRO] = kvm_template["vt_distro"]
- kvm_template_config[cstr.CSTR_RAM_SIZE] = kvm_template["vt_ram_size"]
- kvm_template_config[cstr.CSTR_DISK_SIZES] = kvm_template["vt_disk_sizes"]
- kvm_template_config[cstr.CSTR_BUS_TYPE] = kvm_template["vt_bus_type"]
- kvm_template_config[cstr.CSTR_SERVER_HOST_ID] = kvm_server_hostname
- kvm_template_config[cstr.CSTR_REINSTALL] = False
- kvm_template_per_sever_dict[global_template_hostname] = kvm_template_config
- kvm_template_config = kvm_template_per_sever_dict[global_template_hostname]
- else:
- kvm_template_config = None
- global_template_hostname = None
-
- rpc_host = ClientRPCHost(hostname, global_template_hostname=global_template_hostname,
- kvm_template_config=kvm_template_config,
- kvm_server_hostname=kvm_server_hostname,
- expected_distro=res.lrh_expected_distro,
- ipv4_addresses=res.lrh_ipv4_addresses)
- return rpc_host
-
-
-def resources2hosts(log, resources, kvm_server_dict):
- """
- Transfer an array of RPCHost to an array of ClientRPCHost
- """
- rpc_hosts = []
- hostnames = []
- for res in resources:
- rpc_host = resource2host(res, kvm_server_dict)
- rpc_hosts.append(rpc_host)
- hostnames.append(rpc_host.lrh_hostname)
-
- log.cl_info("allocated hosts %s", hostnames)
- return rpc_hosts
-
-
-def descriptor2build_hosts(log, desc, build_host, kvm_server_dict):
- """
- Transfer an descriptor of build host to an object of ClientRPCHost
- """
- if build_host is not None:
- log.cl_error("allocated too many build host")
- return None
- if len(desc.rd_resources) != 1:
- log.cl_error("unexpected host number in the test resource, "
- "expected %d, got %d", TestCluster.HOST_NUMBER,
- len(desc.rd_resources))
- return None
- build_host = resource2host(desc.rd_resources[0], kvm_server_dict)
- log.cl_info("allocated build host %s", build_host.lrh_hostname)
- return build_host
-
-
-def descriptor2pair_hosts(log, desc, pairs, kvm_server_dict):
- """
- Transfer an descriptor of a pair of hosts to an array of ClientRPCHost
- """
- if desc.rdh_purpose != lipe_test_scheduler.PURPOSE_TEST:
- log.cl_error("unexpected purpose, expected [%s], got [%s]",
- lipe_test_scheduler.PURPOSE_TEST,
- desc.rdh_purpose)
- return None
- if len(pairs) >= TestCluster.PAIR_NUMBER:
- log.cl_error("allocated too many pair hosts")
- return None
- if len(desc.rd_resources) != 2:
- log.cl_error("unexpected host number in a pair resource, "
- "expected 2, got %d", len(desc.rd_resources))
- return None
- rpc_hosts = resources2hosts(log, desc.rd_resources, kvm_server_dict)
- pairs.append(rpc_hosts)
- return 0
-
-
-def descriptor2single_hosts(log, desc, single_hosts, kvm_server_dict):
- """
- Transfer an descriptor of single hosts to an array of ClientRPCHost
- """
- if desc.rdh_purpose != lipe_test_scheduler.PURPOSE_TEST:
- log.cl_error("unexpected purpose, expected [%s], got [%s]",
- lipe_test_scheduler.PURPOSE_TEST,
- desc.rdh_purpose)
- return None
- if len(single_hosts) != 0:
- log.cl_error("allocated too many hosts")
- return None
- if len(desc.rd_resources) != TestCluster.HOST_NUMBER:
- log.cl_error("unexpected host number in the test resource, "
- "expected %d, got %d", TestCluster.HOST_NUMBER,
- len(desc.rd_resources))
- return None
- return resources2hosts(log, desc.rd_resources, kvm_server_dict)
-
-
-def descriptions2cluster(log, workspace, descs):
- """
- Use the allocated resources to group a cluster
- """
- hosts = []
- build_host = None
- kvm_server_dict = {}
- rpc_ip_address = None
- pairs = []
- for desc in descs:
- if desc.rd_type == lipe_test_scheduler.RESOURCE_TYPE_IP_ADDRESS:
- if rpc_ip_address is not None:
- log.cl_error("unexpected multiple IPs")
- return None
- if len(desc.rd_resources) != 1:
- log.cl_error("unexpected IP number in a resource, expected 1, "
- "got %d", len(desc.rd_resources))
- return None
- rpc_ip_address = desc.rd_resources[0]
- log.cl_info("got address [%s]", rpc_ip_address.ripa_address)
- elif desc.rd_type == lipe_test_scheduler.RESOURCE_TYPE_HOST:
- if desc.rdh_same_kvm_server:
- ret = descriptor2pair_hosts(log, desc, pairs, kvm_server_dict)
- if ret:
- return None
- elif desc.rdh_purpose == lipe_test_scheduler.PURPOSE_BUILD:
- build_host = descriptor2build_hosts(log, desc, build_host,
- kvm_server_dict)
- if build_host is None:
- return None
- else:
- hosts = descriptor2single_hosts(log, desc, hosts, kvm_server_dict)
- if hosts is None:
- return None
- return TestCluster(workspace, build_host, hosts, pairs, rpc_ip_address, kvm_server_dict)
-
-
-def run_test_with_resources_allocated(log, workspace, launch_argument, descs):
- """
- Run the test with resources allocated
- """
- cluster = descriptions2cluster(log, workspace, descs)
- if cluster is None:
- log.cl_error("failed to group to a cluster from the resources")
- return -1
- return cluster.tc_run_test(log, launch_argument)
-
-
-def run_test_connected(log, workspace, launch_argument, scheduler_id, jobid, proxy):
- """
- Run the test with connection to scheduler
- """
- # pylint: disable=too-many-arguments
- ret, descs1, descs2 = allocate_hosts_and_ip(log, scheduler_id, jobid,
- proxy)
- if ret:
- log.cl_error("failed to allocate hosts and IP")
- return -1
-
- descs = descs1 + descs2
-
- retval = run_test_with_resources_allocated(log, workspace, launch_argument,
- descs)
-
- ret = proxy.ts_resources_release(scheduler_id, jobid, descs)
- if ret:
- log.cl_error("failed to release hosts and IPs")
- return -1
- return retval
-
-
-def send_heartbeat(log, proxy, scheduler_id, jobid):
- """
- Send heartbeat to scheduler
- Improvement: the main thread should send heatbeat and check time
- from time to time in case the heatbeat thread is broken
- """
- # pylint: disable=global-statement,broad-except
- global CHECK_TIME, EXIT_REASON, SHUTTING_DOWN
- ret = 0
-
- log.cl_debug("sending heartbeat")
- now = time_util.utcnow()
- try:
- ret = proxy.ts_job_heartbeat(scheduler_id, jobid)
- if ret == 0:
- CHECK_TIME = now
- else:
- SHUTTING_DOWN = True
- return -1
- except Exception, err:
- disconnet_time = (now - CHECK_TIME).seconds
- if disconnet_time <= lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT:
- extra_string = ("will time out in [%d] seconds" %
- (lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT - disconnet_time))
- else:
- extra_string = ("already time out")
- log.cl_error("failed to send heartbeat of job [%s], "
- "%s: %s, %s",
- jobid, extra_string, str(err), traceback.format_exc())
- ret = -1
- disconnet_time = (now - CHECK_TIME).seconds
- if disconnet_time > lipe_test_scheduler.TEST_HEARTBEAT_TIMEOUT:
- EXIT_REASON = ("heatbeat of job [%s] time out, shutting down" % jobid)
- SHUTTING_DOWN = True
- return -1
- if ret:
- log.cl_error("failed to send heartbeat")
- else:
- log.cl_debug("sent heartbeat")
- return ret
-
-
-def heartbeat_thread(log, proxy, scheduler_id, jobid):
- """
- Thread that send heatbeat
- """
- while True:
- time.sleep(lipe_test_scheduler.TEST_HEARTBEAT_INTERVAL)
- send_heartbeat(log, proxy, scheduler_id, jobid)
-
-
-def run_test(log, workspace, launch_argument):
- """
- Run the test
- """
- log.cl_info("connecting to server [%s]", launch_argument.la_server)
- proxy = xmlrpclib.ServerProxy(launch_argument.la_server, allow_none=True)
- scheduler_id = proxy.ts_get_id()
- jobid = proxy.ts_job_start(scheduler_id)
- log.cl_info("got job ID [%s]", jobid)
-
- utils.thread_start(heartbeat_thread, (log, proxy, scheduler_id, jobid))
-
- ret = run_test_connected(log, workspace, launch_argument, scheduler_id,
- jobid, proxy)
-
- proxy.ts_job_stop(scheduler_id, jobid)
- log.cl_info("released job [%s]", jobid)
- return ret
-
-
-def parse_options(log):
- """
- Parse the options
- """
- # pylint: disable=too-many-branches
- launch_argument = LaunchArg()
- options, arguments = getopt.getopt(sys.argv[1:],
- "e:hl:p:s:",
- ["e2fsprogs",
- "help",
- "host_timeout=",
- "lustre="
- "source_path=",
- "server="])
-
- for opt, arg in options:
- if opt == "--e2fsprogs" or opt == "-e":
- ret = launch_argument.la_update_e2fsprogs_dir(log, arg)
- if ret:
- log.cl_error("invalid e2fsprogs path option [%s %s]", opt, arg)
- sys.exit(1)
- elif opt == "--help" or opt == "-h":
- usage()
- sys.exit(0)
- elif opt == "--host_timeout":
- ret = launch_argument.la_update_host_wait_time(log, int(arg))
- if ret:
- log.cl_error("invalid server option [%s]", arg)
- sys.exit(1)
- elif opt == "--lustre" or opt == "-l":
- ret = launch_argument.la_update_lustre_dir(log, arg)
- if ret:
- log.cl_error("invalid lustre path option [%s %s]", opt, arg)
- sys.exit(1)
- elif opt == "--server" or opt == "-s":
- ret = launch_argument.la_update_server(log, arg)
- if ret:
- log.cl_error("invalid server option [%s]", arg)
- sys.exit(1)
- elif opt == "--source_path" or opt == "-p":
- ret = launch_argument.la_update_source_path(log, arg)
- if ret:
- log.cl_error("invalid source path option [%s %s]", opt, arg)
- sys.exit(1)
- else:
- log.cl_error("unkown option [%s %s]", opt, arg)
- usage()
- sys.exit(1)
-
- if len(arguments) != 0:
- log.cl_error("unkown arguments %s", arguments)
- usage()
- sys.exit(1)
-
- ret = launch_argument.la_check_arguments(log)
- if ret:
- sys.exit(1)
- return launch_argument
-
-
-def main():
- """
- Run the test
- """
- now = time_util.utcnow()
- workspace = (LIPE_TEST_LAUNCH_LOG_DIR + "/" +
- time_util.local_strftime(now, ('%Y-%m-%d-%H_%M_%S')))
- retval = utils.run("mkdir -p %s" % workspace)
- if retval.cr_exit_status != 0:
- utils.eprint("failed to create directory [%s]\n" % workspace)
- sys.exit(1)
-
- log = clog.get_log(resultsdir=workspace)
- launch_argument = parse_options(log)
-
- return_value = 0
- ret = run_test(log, workspace, launch_argument)
- if ret:
- log.cl_error("failed to launch test")
- return_value = - 1
- else:
- log.cl_info("successfully launched the test")
- log.cl_info("moving the log from [%s] to [%s]", workspace,
- launch_argument.la_source_path)
- log.cl_fini()
-
- # No matter failure or sucess, move the workspace to source path
- command = ("mv %s %s" % (workspace, launch_argument.la_source_path))
- retval = utils.run(command)
- if retval.cr_exit_status != 0:
- utils.eprint("failed to run command [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]" %
- (command,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr))
- utils.eprint("please check, backup and maybe delete directory of "
- "[%s] on local host" %
- (workspace))
- sys.exit(-1)
- basename = os.path.basename(workspace)
- logdir = launch_argument.la_source_path + "/" + basename
- if return_value:
- utils.eprint("please check [%s] for more log" % logdir)
- else:
- utils.oprint("please check [%s] for more log" % logdir)
-
- sys.exit(return_value)
+++ /dev/null
-# Copyright (c) 2016 DataDirect Networks, Inc.\r
-# All Rights Reserved.\r
-# Author: lixi@ddn.com\r
-"""\r
-The scheduler manages the usage of test hosts. All test launchers\r
-need to allocate hosts from the scheduler.\r
-"""\r
-# pylint: disable=too-many-lines\r
-import SimpleXMLRPCServer\r
-import threading\r
-import signal\r
-import time\r
-import os\r
-import re\r
-import traceback\r
-import socket\r
-import yaml\r
-\r
-# Local libs\r
-from pylustre import utils\r
-from pylustre import time_util\r
-from pylustre import cstr\r
-from pylustre import cmd_general\r
-from pylustre import lipe_virt\r
-from pylustre import ssh_host\r
-\r
-TEST_SCHEDULER_PORT = 1234\r
-TEST_SCHEDULER_LOG_DIR = "/var/log/lipe_test_scheduler"\r
-TEST_SCHEDULER_CONFIG = "/etc/lipe_test_scheduler.conf"\r
-\r
-\r
-PURPOSE_BUILD = "build"\r
-PURPOSE_TEST = "test"\r
-\r
-RESOURCE_TYPE_HOST = "host"\r
-RESOURCE_TYPE_IP_ADDRESS = "ip_address"\r
-\r
-GLOBAL_LOG = None\r
-SHUTTING_DOWN = False\r
-MIN_GOOD_RES_CHECK_INTERVAL = 7200\r
-MIN_BAD_RES_CHECK_INTERVAL = 3600\r
-# Need to wait at least this long time before assert that the IP is not\r
-# used by any host\r
-IP_MAX_FAILOVER_TIME = 60\r
-# The interval to check whether a IP is being used or not\r
-IP_CHECK_INTERVAL = 3\r
-# Need to check at least this times before assert that the IP is not\r
-# used by any host\r
-IP_MIN_CHECK_TIMES = 5\r
-\r
-# The heatbeat interval\r
-TEST_HEARTBEAT_INTERVAL = 10\r
-# The heatbeat timeout. Both scheduler and client will abort the job if\r
-# heatbeat is not recived/sent correctly for this long time.\r
-TEST_HEARTBEAT_TIMEOUT = 20\r
-\r
-\r
-class ScheduledResource(object):\r
- """\r
- Each resource has this type\r
- """\r
- # pylint: disable=too-few-public-methods,too-many-instance-attributes\r
- # This return value signs that the resource is being checked\r
- RESOURCE_IS_BUSY = 1\r
-\r
- def __init__(self, name, resource_type, concurrency):\r
- self.sr_is_clean = False\r
- # The time when cleaning up the resource\r
- self.sr_check_time = 0\r
- self.sr_max_concurrency = concurrency\r
- self.sr_concurrency = 0\r
- self.sr_job_sequence = None\r
- self.rr_resource_type = resource_type\r
- self.sr_error = 0\r
- self.sr_name = name\r
- self.sr_cleaning = False\r
-\r
- def sr_dirty(self):\r
- """\r
- Dirty the resource so as to check later\r
- """\r
- self.sr_check_time = 0\r
- self.sr_is_clean = False\r
-\r
-\r
-class RPCResouce(object):\r
- """\r
- The resource for transfering between scheduler and its clients\r
- """\r
- # pylint: disable=too-few-public-methods\r
- def __init__(self, resource_type):\r
- self.rr_resource_type = resource_type\r
-\r
-\r
-class RPCIPAddress(RPCResouce):\r
- # pylint: disable=too-few-public-methods,too-many-instance-attributes\r
- """\r
- The IP address to manage in this scheduler\r
- """\r
- def __init__(self, address, bindnetaddr):\r
- super(RPCIPAddress, self).__init__(RESOURCE_TYPE_IP_ADDRESS)\r
- self.ripa_address = address\r
- self.ripa_bindnetaddr = bindnetaddr\r
-\r
-\r
-class RPCHost(RPCResouce):\r
- """\r
- The host for transfering between scheduler and its clients\r
- """\r
- # pylint: disable=too-few-public-methods\r
- def __init__(self, hostname, kvm_server_hostname=None,\r
- expected_distro=None, ipv4_addresses=None,\r
- kvm_template_ipv4_address=None, kvm_template=None):\r
- # pylint: disable=too-many-arguments\r
- super(RPCHost, self).__init__(RESOURCE_TYPE_HOST)\r
- self.lrh_hostname = hostname\r
- self.lrh_kvm_server_hostname = kvm_server_hostname\r
- self.lrh_expected_distro = expected_distro\r
- self.lrh_ipv4_addresses = ipv4_addresses\r
- self.lrh_kvm_template_ipv4_address = kvm_template_ipv4_address\r
- # Only server side kvm_template uses to send info to client\r
- self.lrh_kvm_template = kvm_template\r
-\r
-\r
-class TestHost(ScheduledResource):\r
- # pylint: disable=too-few-public-methods,too-many-instance-attributes\r
- """\r
- The host that is managed by scheduler\r
- """\r
- def __init__(self, hostname, distro, purpose, tag, concurrency,\r
- ipv4_addresses=None, kvm_server_hostname=None,\r
- kvm_template_ipv4_address=None,\r
- kvm_template=None):\r
- # pylint: disable=too-many-arguments\r
- super(TestHost, self).__init__(hostname, RESOURCE_TYPE_HOST, concurrency)\r
- self.th_hostname = hostname\r
- self.th_purpose = purpose\r
- self.th_distro = distro\r
- self.th_tag = tag\r
- self.th_kvm_server_hostname = kvm_server_hostname\r
- self.th_kvm_template_ipv4_address = kvm_template_ipv4_address\r
- self.th_ipv4_addresses = ipv4_addresses\r
- self.th_kvm_template = kvm_template\r
- self.th_host = ssh_host.SSHHost(hostname)\r
-\r
- def th_print_info(self, log):\r
- """\r
- Print the info of this host\r
- """\r
- log.cl_debug("added host [%s], purpose [%s], distro [%s], tag [%s], "\r
- "kvm server [%s]",\r
- self.th_hostname,\r
- self.th_purpose,\r
- self.th_distro,\r
- self.th_tag,\r
- self.th_kvm_server_hostname)\r
-\r
- def _sr_cleanup(self, log, scheduler):\r
- """\r
- Clean up the host\r
-\r
- Improvement: call shared functions in lipe_virt directly\r
- Improvement: cleanup directories for spaces\r
- """\r
- # pylint: disable=unused-argument\r
- if self.th_purpose == PURPOSE_BUILD:\r
- return 0\r
-\r
- host = self.th_host\r
- service_names = ["corosync", "pacemaker"]\r
- for service_name in service_names:\r
- ret = host.sh_service_stop(log, service_name)\r
- if ret:\r
- log.cl_error("failed to stop service [%s] on host [%s]",\r
- service_name, host.sh_hostname)\r
- return -1\r
-\r
- ret = host.sh_service_disable(log, service_name)\r
- if ret:\r
- log.cl_error("failed to disable service [%s] on host [%s]",\r
- service_name, host.sh_hostname)\r
- return -1\r
-\r
- return 0\r
-\r
- def sr_cleanup(self, log, scheduler):\r
- """\r
- Clean up the host\r
-\r
- Improvement: call shared functions in lipe_virt directly\r
- Improvement: cleanup directories for spaces\r
- """\r
- log.cl_info("cleaning up host [%s]", self.th_hostname)\r
- self.sr_cleaning = True\r
- ret = self._sr_cleanup(log, scheduler)\r
- self.sr_cleaning = False\r
- if ret:\r
- log.cl_info("failed to clean up host [%s]", self.th_hostname)\r
- else:\r
- log.cl_info("cleaned up host [%s]", self.th_hostname)\r
- return ret\r
-\r
-\r
-def _wait_disconnected(log, host):\r
- """\r
- Check whether host is disconnected from this host\r
- """\r
- ret = host.sh_ping(log, slient=True)\r
- if ret:\r
- return 0\r
- log.cl_info("still able to connect to host [%s] from local host",\r
- host.sh_hostname)\r
- return -1\r
-\r
-\r
-def wait_disconnected(log, host, timeout=10, sleep_interval=1):\r
- """\r
- Wait until the host can not be connected from this host\r
- """\r
- return utils.wait_condition(log, _wait_disconnected,\r
- (host, ),\r
- timeout=timeout,\r
- sleep_interval=sleep_interval)\r
-\r
-\r
-class IPAddress(ScheduledResource):\r
- # pylint: disable=too-few-public-methods,too-many-instance-attributes\r
- """\r
- The IP address to manage in this scheduler\r
- """\r
- def __init__(self, address, bindnetaddr):\r
- super(IPAddress, self).__init__(address, RESOURCE_TYPE_IP_ADDRESS, 1)\r
- self.ipa_address = address\r
- self.ipa_bindnetaddr = bindnetaddr\r
- self.ipa_host = ssh_host.SSHHost(address)\r
-\r
- def _sr_cleanup(self, log, scheduler):\r
- """\r
- Cleanup the IP adress by stopping the corosync/pacemaker on any\r
- host that is using the IP\r
- """\r
- ip_host = self.ipa_host\r
- idle_time = None\r
- checked_times = 0\r
-\r
- while True:\r
- now_time = time.time()\r
- ret = ip_host.sh_ping(log, silent=True)\r
- if ret:\r
- log.cl_debug("can not ping IP [%s]", self.ipa_address)\r
- if idle_time is None:\r
- idle_time = now_time\r
- checked_times = 0\r
- checked_times += 1\r
- # The IP has not been used for a long time, and enough times\r
- # have been chcked, so clean to use\r
- if (idle_time + IP_MAX_FAILOVER_TIME < now_time and\r
- checked_times > IP_MIN_CHECK_TIMES):\r
- return 0\r
- # Not long enough to decide, sleep a while and check later\r
- time.sleep(IP_CHECK_INTERVAL)\r
- continue\r
- else:\r
- log.cl_debug("can ping IP [%s]", self.ipa_address)\r
- idle_time = None\r
- checked_times = 0\r
-\r
- command = "hostname"\r
- retval = ip_host.sh_run(log, command)\r
- if retval.cr_exit_status:\r
- log.cl_info("failed to run command [%s] on host [%s], "\r
- "ret = [%d], stdout = [%s], stderr = [%s]",\r
- command,\r
- ip_host.sh_hostname,\r
- retval.cr_exit_status,\r
- retval.cr_stdout,\r
- retval.cr_stderr)\r
- log.cl_info("maybe the host with IP [%s] has been cleaned up, "\r
- "will check in the next loop", self.ipa_address)\r
- continue\r
-\r
- lines = retval.cr_stdout.splitlines()\r
- if len(lines) != 1:\r
- log.cl_error("unexpected output of command [%s] on host [%s]: "\r
- "[%s]", command, ip_host.sh_hostname,\r
- retval.cr_stdout)\r
- return -1\r
- hostname = lines[0]\r
-\r
- res = scheduler.ts_find_host(hostname)\r
- if res is None:\r
- log.cl_error("host [%s] is not managed by the scheduler but "\r
- "is using IP [%s]", hostname, self.ipa_address)\r
- return -1\r
-\r
- # The host is being cleaned, so the IP might be released soon.\r
- # Check that in the next loop.\r
- if res.sr_cleaning:\r
- log.cl_info("host [%s] is being cleaned, will check IP [%s] "\r
- "in next loop", hostname, self.ipa_address)\r
- continue\r
-\r
- ret = scheduler.ts_resource_cleanup(res)\r
- if ret == ScheduledResource.RESOURCE_IS_BUSY:\r
- log.cl_info("host [%s] is busy, checking in next "\r
- "loop", hostname, self.ipa_address)\r
- continue\r
- elif ret:\r
- log.cl_error("failed to cleanup host [%s]",\r
- hostname)\r
- return -1\r
-\r
- ret = wait_disconnected(log, ip_host)\r
- if ret:\r
- log.cl_error("still be able to connect to [%s] after fixing "\r
- "host [%s]", ip_host.sh_hostname, hostname)\r
- return -1\r
- return 0\r
-\r
- def sr_cleanup(self, log, scheduler):\r
- """\r
- Cleanup the IP adress by stopping the corosync/pacemaker on any\r
- host that is using the IP\r
- """\r
- log.cl_info("cleaning up IP address [%s]", self.ipa_address)\r
- self.sr_cleaning = True\r
- ret = self._sr_cleanup(log, scheduler)\r
- self.sr_cleaning = False\r
- if ret:\r
- log.cl_info("failed to clean up IP address [%s]", self.ipa_address)\r
- else:\r
- log.cl_info("cleaned up IP address [%s]", self.ipa_address)\r
- return ret\r
-\r
-\r
-class ResourceDescriptor(object):\r
- # pylint: disable=too-few-public-methods\r
- """\r
- Used when trying to allocate a resource\r
- """\r
- def __init__(self, resource_type, number_min=1, number_max=1,\r
- resources=None):\r
- self.rd_type = resource_type\r
- self.rd_number_min = number_min\r
- self.rd_number_max = number_max\r
- if resources is None:\r
- self.rd_resources = []\r
- else:\r
- self.rd_resources = list(resources)\r
-\r
-\r
-class ResourceDescriptorIPAddress(ResourceDescriptor):\r
- # pylint: disable=too-few-public-methods\r
- """\r
- Used when trying to allocate a host\r
- """\r
- def __init__(self, number_min=1, number_max=1, rpc_addresses=None):\r
- super(ResourceDescriptorIPAddress, self).__init__(RESOURCE_TYPE_IP_ADDRESS,\r
- number_min=number_min,\r
- number_max=number_max,\r
- resources=rpc_addresses)\r
-\r
-\r
-class ResourceDescriptorHost(ResourceDescriptor):\r
- # pylint: disable=too-few-public-methods\r
- """\r
- Used when trying to allocate a host\r
- """\r
- def __init__(self, purpose, distro=ssh_host.DISTRO_RHEL7,\r
- same_kvm_server=False, tag=None, number_min=1, number_max=1,\r
- hosts=None):\r
- # pylint: disable=too-many-arguments\r
- super(ResourceDescriptorHost, self).__init__(RESOURCE_TYPE_HOST,\r
- number_min=number_min,\r
- number_max=number_max,\r
- resources=hosts)\r
- self.rdh_distro = distro\r
- self.rdh_purpose = purpose\r
- self.rdh_same_kvm_server = same_kvm_server\r
- self.rdh_tag = tag\r
-\r
-\r
-def resource_compare(host_x, host_y):\r
- """\r
- Sort the resource according to cleanup time\r
- """\r
- return host_x.sr_check_time > host_y.sr_check_time\r
-\r
-\r
-class TestSchedulerJob(object):\r
- """\r
- Each test client allocates a job in the scheduler. Hosts could be\r
- allocated into the job afterwards.\r
- """\r
- def __init__(self, scheduler, jobid, sequence):\r
- self.laj_jobid = jobid\r
- self.laj_hosts = []\r
- self.laj_scheduler = scheduler\r
- self.laj_sequence = sequence\r
- self.laj_check_time = time_util.utcnow()\r
- self.laj_ip_addresses = []\r
-\r
- def laj_host_add(self, lhost):\r
- """\r
- Add one host into the job\r
- """\r
- self.laj_hosts.append(lhost)\r
-\r
- def laj_has_host(self, lhost):\r
- """\r
- Check whether a host is in this job\r
- """\r
- return lhost in self.laj_hosts\r
-\r
- def laj_host_remove(self, lhost):\r
- """\r
- Remove one host from the job\r
- """\r
- self.laj_hosts.remove(lhost)\r
-\r
- def laj_has_ip_address(self, ip_address):\r
- """\r
- Check whether a ip address is in this job\r
- """\r
- return ip_address in self.laj_ip_addresses\r
-\r
- def laj_ip_address_add(self, ip_address):\r
- """\r
- Add one host into the job\r
- """\r
- self.laj_ip_addresses.append(ip_address)\r
-\r
- def laj_ip_address_remove(self, ip_address):\r
- """\r
- Remove one host from the job\r
- """\r
- self.laj_ip_addresses.remove(ip_address)\r
-\r
-\r
-def rpc2descriptors(log, rpc_descriptors, same_kvm_host_descriptors,\r
- other_descriptors):\r
- """\r
- Parse the descriptors from RPC to objects\r
- """\r
- # pylint: disable=too-many-locals\r
- for descriptor in rpc_descriptors:\r
- descriptor_type = descriptor["rd_type"]\r
- number_min = descriptor["rd_number_min"]\r
- number_max = descriptor["rd_number_max"]\r
- resources = descriptor["rd_resources"]\r
- if descriptor_type == RESOURCE_TYPE_HOST:\r
- distro = descriptor["rdh_distro"]\r
- purpose = descriptor["rdh_purpose"]\r
- same_kvm_server = descriptor["rdh_same_kvm_server"]\r
- tag = descriptor["rdh_tag"]\r
- hosts = []\r
- for res in resources:\r
- hostname = res["lrh_hostname"]\r
- kvm_server_hostname = res["lrh_kvm_server_hostname"]\r
- expected_distro = res["lrh_expected_distro"]\r
- ipv4_addresses = res["lrh_ipv4_addresses"]\r
- kvm_template_ipv4_address = res["lrh_kvm_template_ipv4_address"]\r
- kvm_template = res["lrh_kvm_template"]\r
- host = RPCHost(hostname, kvm_server_hostname=kvm_server_hostname,\r
- expected_distro=expected_distro,\r
- ipv4_addresses=ipv4_addresses,\r
- kvm_template_ipv4_address=kvm_template_ipv4_address,\r
- kvm_template=kvm_template)\r
- hosts.append(host)\r
- host_desc = ResourceDescriptorHost(purpose, distro=distro,\r
- same_kvm_server=same_kvm_server,\r
- tag=tag,\r
- number_min=number_min,\r
- number_max=number_max,\r
- hosts=hosts)\r
- if same_kvm_server:\r
- same_kvm_host_descriptors.append(host_desc)\r
- else:\r
- other_descriptors.append(host_desc)\r
- elif descriptor_type == RESOURCE_TYPE_IP_ADDRESS:\r
- rpc_addresses = []\r
- for res in resources:\r
- address = res["ripa_address"]\r
- bindnetaddr = res["ripa_bindnetaddr"]\r
- rpc_address = RPCIPAddress(address, bindnetaddr)\r
- rpc_addresses.append(rpc_address)\r
- ip_desc = ResourceDescriptorIPAddress(number_min=number_min,\r
- number_max=number_max,\r
- rpc_addresses=rpc_addresses)\r
- other_descriptors.append(ip_desc)\r
- else:\r
- log.cl_error("wrong descriptor type [%s]", descriptor_type)\r
- return -1\r
- return 0\r
-\r
-\r
-class TestScheduler(object):\r
- """\r
- The main object of the scheduler.\r
- """\r
- # pylint: disable=too-many-instance-attributes\r
- def __init__(self, log, scheduler_id, hosts, addresses):\r
- self.ts_log = log\r
- self.ts_resources = hosts + addresses\r
- self.ts_hosts = []\r
- self.ts_addresses = addresses\r
- self.ts_job_dict = {}\r
- self.ts_condition = threading.Condition()\r
- self.ts_jobid_sequence = 0\r
- self.ts_id = scheduler_id\r
- self.ts_id += ("_%d" % os.getpid())\r
- log.cl_info("ID of scheduler: [%s]", self.ts_id)\r
- self.ts_kvm_hosts_dict = {}\r
- for host in hosts:\r
- self._ts_add_host(host)\r
-\r
- def _ts_add_host(self, host):\r
- """\r
- Add host into the list\r
- """\r
- log = self.ts_log\r
- host.th_print_info(log)\r
- self.ts_hosts.append(host)\r
-\r
- kvm_server_hostname = host.th_kvm_server_hostname\r
-\r
- if kvm_server_hostname is not None:\r
- if kvm_server_hostname not in self.ts_kvm_hosts_dict:\r
- self.ts_kvm_hosts_dict[kvm_server_hostname] = []\r
- kvm_hosts = self.ts_kvm_hosts_dict[kvm_server_hostname]\r
- kvm_hosts.append(host)\r
-\r
- def ts_find_ip_address(self, ip_address):\r
- """\r
- Find the IP address by its hostname. Lock should be acquired in advance.\r
- """\r
- for ip_address_obj in self.ts_addresses:\r
- if ip_address_obj.ipa_address == ip_address:\r
- return ip_address_obj\r
- return None\r
-\r
- def ts_find_host(self, hostname):\r
- """\r
- Find the host by its hostname. Lock should be acquired in advance.\r
- """\r
- for lhost in self.ts_hosts:\r
- if lhost.th_hostname == hostname:\r
- return lhost\r
- return None\r
-\r
- def ts_get_id(self):\r
- """\r
- Return the scheduler ID. Scheduler ID is the ID of this scheduler. It\r
- could prevent clients from operating on a wrong scheduler. Usually\r
- called remotely by client.\r
- """\r
- return self.ts_id\r
-\r
- def ts_host_list(self, error):\r
- """\r
- List the hosts that the scheduler is managing. Usually called remotely\r
- by console.\r
- """\r
- log = self.ts_log\r
- log.cl_debug("listing host")\r
- format_string = "%-20s%-9s%-8s%-10s%-10s%-9s%-7s%-12s%-11s\n"\r
- output = format_string % ("Host", "Purpose", "Distro", "KVM host",\r
- "Job slot", "Job seq", "Error",\r
- "Clean time", "Next clean")\r
- output += '{0:->80}'.format("") + "\n"\r
- now = time.time()\r
- for lhost in self.ts_hosts:\r
- if error:\r
- if lhost.sr_concurrency > 0:\r
- continue\r
- if lhost.sr_is_clean:\r
- continue\r
- if lhost.sr_max_concurrency > 1:\r
- continue\r
- if lhost.sr_check_time == 0:\r
- fix_string = "not clean"\r
- if lhost.sr_cleaning:\r
- next_check_string = "cleaning"\r
- elif lhost.sr_concurrency > 0:\r
- next_check_string = "occupied"\r
- else:\r
- next_check_string = "initing"\r
- else:\r
- if not lhost.sr_is_clean:\r
- fix_string = "not clean"\r
- next_check = lhost.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL\r
- else:\r
- fix_time = time.gmtime(lhost.sr_check_time)\r
- fix_string = time.strftime("%H:%M:%S", fix_time)\r
- next_check = lhost.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL\r
- next_check_time = time.gmtime(next_check)\r
- next_check_string = time.strftime("%H:%M:%S", next_check_time)\r
- next_check_string += "(%d)" % (int(next_check - now))\r
- job_slot = ("%d/%d" % (lhost.sr_concurrency,\r
- lhost.sr_max_concurrency))\r
- output += (format_string %\r
- (lhost.th_hostname,\r
- lhost.th_purpose,\r
- lhost.th_distro,\r
- lhost.th_kvm_server_hostname,\r
- job_slot,\r
- lhost.sr_job_sequence,\r
- lhost.sr_error,\r
- fix_string,\r
- next_check_string))\r
- return output\r
-\r
- def ts_ip_address_list(self, error):\r
- """\r
- List the ip_address that the scheduler is managing. Usually called remotely\r
- by console.\r
- """\r
- log = self.ts_log\r
- log.cl_debug("listing IP addresses")\r
- format_string = "%-17s%-17s%-10s%-9s%-7s%-12s%-11s\n"\r
- output = format_string % ("IP", "Bindnetaddr",\r
- "Job slot", "Job seq", "Error",\r
- "Clean time", "Next clean")\r
- output += '{0:->80}'.format("") + "\n"\r
- now = time.time()\r
-\r
- for address in self.ts_addresses:\r
- if error:\r
- if address.sr_concurrency > 0:\r
- continue\r
- if address.sr_is_clean:\r
- continue\r
- if address.sr_max_concurrency > 1:\r
- continue\r
- if address.sr_check_time == 0:\r
- fix_string = "not clean"\r
- if address.sr_cleaning:\r
- next_check_string = "cleaning"\r
- elif address.sr_concurrency > 0:\r
- next_check_string = "occupied"\r
- else:\r
- next_check_string = "initing"\r
- else:\r
- if not address.sr_is_clean:\r
- fix_string = "not clean"\r
- next_check = address.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL\r
- else:\r
- fix_time = time.gmtime(address.sr_check_time)\r
- fix_string = time.strftime("%H:%M:%S", fix_time)\r
- next_check = address.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL\r
- next_check_time = time.gmtime(next_check)\r
- next_check_string = time.strftime("%H:%M:%S", next_check_time)\r
- next_check_string += "(%d)" % (int(next_check - now))\r
- job_slot = ("%d/%d" % (address.sr_concurrency,\r
- address.sr_max_concurrency))\r
- output += (format_string %\r
- (address.ipa_address,\r
- address.ipa_bindnetaddr,\r
- job_slot,\r
- address.sr_job_sequence,\r
- address.sr_error,\r
- fix_string,\r
- next_check_string))\r
- return output\r
-\r
- def _ts_host_list_allocate(self, host_list, job, distro, number_min,\r
- number_max, purpose, tag):\r
- """\r
- Allocate hosts from a list, if failed, return []\r
- """\r
- # pylint: disable=too-many-arguments,no-self-use\r
- log = self.ts_log\r
- log.cl_debug("listing IP addresses")\r
- rpc_hosts = []\r
- hosts = []\r
- # reverse the host from time to time, so that the host in the tail of\r
- # the list have equal chance to be allocated\r
- host_list.reverse()\r
-\r
- # Check the potential hosts that can be allocated\r
- for host in host_list:\r
- if host.th_distro != distro:\r
- log.cl_debug("the distro [%s] of host [%s] != [%s]",\r
- host.th_distro, host.th_hostname, distro)\r
- continue\r
- if host.th_purpose != purpose:\r
- log.cl_debug("the purpose [%s] of host [%s] != [%s]",\r
- host.th_purpose, host.th_hostname, purpose)\r
- continue\r
- if tag is not None and host.th_tag != tag:\r
- log.cl_debug("the tag [%s] of host [%s] != [%s]",\r
- host.th_tag, host.th_hostname, tag)\r
- continue\r
- if host.sr_max_concurrency <= host.sr_concurrency:\r
- log.cl_debug("no currency for host [%s]", host.th_hostname)\r
- continue\r
- hosts.append(host)\r
-\r
- # Not enough hosts, abort\r
- if len(hosts) < number_min:\r
- log.cl_error("not enough hosts to allocate, needs [%d], have [%d]",\r
- number_min, len(hosts))\r
- return rpc_hosts\r
-\r
- hosts.sort(resource_compare)\r
- # Allocate the hosts\r
- for host in hosts:\r
- if len(rpc_hosts) >= number_max:\r
- break\r
- host.sr_concurrency += 1\r
- host.sr_job_sequence = job.laj_sequence\r
- job.laj_host_add(host)\r
- rpc_host = RPCHost(host.th_hostname,\r
- kvm_server_hostname=host.th_kvm_server_hostname,\r
- expected_distro=host.th_distro,\r
- ipv4_addresses=host.th_ipv4_addresses,\r
- kvm_template=host.th_kvm_template)\r
- rpc_hosts.append(rpc_host)\r
- log.cl_debug("preallocated host [%s] for job [%s]",\r
- host.th_hostname, job.laj_jobid)\r
- return rpc_hosts\r
-\r
- def _ts_job_allocate_ip_resource_holding_lock(self, job, desc):\r
- """\r
- Allocated one resource for host, if fails, returen -1\r
- """\r
- log = self.ts_log\r
- log.cl_debug("allocating a IP resource for job [%s]", job.laj_jobid)\r
- rpc_addresses = []\r
- # reverse the IPs from time to time, so that the host in the tail of\r
- # the list have equal chance to be allocated\r
- self.ts_addresses.reverse()\r
-\r
- addresses = []\r
- # Check the potential addresses that can be allocated\r
- for address in self.ts_addresses:\r
- if address.sr_max_concurrency <= address.sr_concurrency:\r
- continue\r
- # Can not allocate an IP that might being used\r
- if not address.sr_is_clean:\r
- continue\r
- addresses.append(address)\r
-\r
- # Not enough hosts, abort\r
- if len(addresses) < desc.rd_number_min:\r
- log.cl_info("not enough IP to allocate, needs [%d], have [%d]",\r
- desc.rd_number_min, len(addresses))\r
- return -1\r
-\r
- addresses.sort(resource_compare)\r
- # Allocate the address\r
- for address in addresses:\r
- if len(rpc_addresses) >= desc.rd_number_max:\r
- break\r
- address.sr_concurrency += 1\r
- address.sr_job_sequence = job.laj_sequence\r
- job.laj_ip_address_add(address)\r
- rpc_address = RPCIPAddress(address.ipa_address,\r
- address.ipa_bindnetaddr)\r
- rpc_addresses.append(rpc_address)\r
- desc.rd_resources = rpc_addresses\r
- return 0\r
-\r
- def _ts_job_allocate_host_resource_holding_lock(self, job, desc):\r
- """\r
- Allocated one resource for host, if fails, returen -1\r
- """\r
- log = self.ts_log\r
- if not desc.rdh_same_kvm_server:\r
- log.cl_debug("allocating a host resource that doesn't need to "\r
- "share KVM server for job [%s]", job.laj_jobid)\r
- rpc_hosts = self._ts_host_list_allocate(self.ts_hosts, job,\r
- desc.rdh_distro,\r
- desc.rd_number_min,\r
- desc.rd_number_max,\r
- desc.rdh_purpose,\r
- desc.rdh_tag)\r
- else:\r
- log.cl_debug("allocating a hosts resource that shares KVM server "\r
- "for job [%s]", job.laj_jobid)\r
- for host_list in self.ts_kvm_hosts_dict.values():\r
- rpc_hosts = self._ts_host_list_allocate(host_list, job,\r
- desc.rdh_distro,\r
- desc.rd_number_min,\r
- desc.rd_number_max,\r
- desc.rdh_purpose,\r
- desc.rdh_tag)\r
- if len(rpc_hosts) != 0:\r
- break\r
- if len(rpc_hosts) == 0:\r
- return -1\r
- desc.rd_resources = rpc_hosts\r
- return 0\r
-\r
- def _ts_job_allocate_resource_holding_lock(self, job, desc):\r
- """\r
- Allocated one resource, if fails, returen -1\r
- """\r
- log = self.ts_log\r
- log.cl_debug("allocating a resource for job [%s]", job.laj_jobid)\r
- if desc.rd_type == RESOURCE_TYPE_HOST:\r
- return self._ts_job_allocate_host_resource_holding_lock(job, desc)\r
- elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS:\r
- return self._ts_job_allocate_ip_resource_holding_lock(job, desc)\r
- else:\r
- log.cl_error("wrong resource type [%s]", desc.rd_type)\r
- return -1\r
-\r
- def _ts_job_allocate_resources_holding_lock(self, job, descs):\r
- """\r
- Allocate multiple resources holding lock, if any of them fails, -1\r
- """\r
- log = self.ts_log\r
- log.cl_debug("allocating resources for job [%s]", job.laj_jobid)\r
- for desc in descs:\r
- ret = self._ts_job_allocate_resource_holding_lock(job, desc)\r
- if ret:\r
- log.cl_debug("failed to allocate resource, releasing "\r
- "allocated resource of job [%s]", job.laj_jobid)\r
- self._ts_job_release_resources_holding_lock(job, descs)\r
- return ret\r
- return 0\r
-\r
- def _ts_job_release_one_host_holding_lock(self, job, res):\r
- """\r
- Release a host resources\r
- """\r
- log = self.ts_log\r
- log.cl_debug("releasing host [%s] for job [%s]", res.lrh_hostname,\r
- job.laj_jobid)\r
- test_host = self.ts_find_host(res.lrh_hostname)\r
- if test_host is None:\r
- log.cl_error("failed to release host [%s], not exists in "\r
- "the scheduler", res.lrh_hostname)\r
- return -1\r
-\r
- if not job.laj_has_host(test_host):\r
- log.cl_error("failed to release host [%s], not used by the "\r
- "job [%s]", res.lrh_hostname, job.laj_jobid)\r
- return -1\r
-\r
- job.laj_host_remove(test_host)\r
- test_host.sr_job_sequence = None\r
- test_host.sr_concurrency -= 1\r
- return 0\r
-\r
- def _ts_job_release_one_ip_holding_lock(self, job, res):\r
- """\r
- Release a host resources\r
- """\r
- log = self.ts_log\r
- log.cl_debug("releasing a IP resource for job [%s]", job.laj_jobid)\r
- ip_address_obj = self.ts_find_ip_address(res.ripa_address)\r
- if ip_address_obj is None:\r
- log.cl_error("failed to release IP address [%s], not exists in "\r
- "the scheduler", res.ripa_address)\r
- return -1\r
-\r
- if not job.laj_has_ip_address(ip_address_obj):\r
- log.cl_error("failed to release IP address [%s], not used by the "\r
- "job [%s]", res.ripa_address, job.laj_jobid)\r
- return -1\r
-\r
- job.laj_ip_address_remove(ip_address_obj)\r
- ip_address_obj.sr_job_sequence = None\r
- ip_address_obj.sr_concurrency -= 1\r
- return 0\r
-\r
- def _ts_job_release_one_holding_lock(self, job, res):\r
- """\r
- Release one resource\r
- """\r
- log = self.ts_log\r
- if res.rr_resource_type == RESOURCE_TYPE_IP_ADDRESS:\r
- return self._ts_job_release_one_ip_holding_lock(job, res)\r
- elif res.rr_resource_type == RESOURCE_TYPE_HOST:\r
- return self._ts_job_release_one_host_holding_lock(job, res)\r
- else:\r
- log.cl_error("wrong resource type [%s]", res.rr_resource_type)\r
- return -1\r
-\r
- def _ts_job_release_resource_holding_lock(self, job, desc):\r
- """\r
- Release a resource\r
- """\r
- log = self.ts_log\r
- log.cl_debug("releasing a resource for job [%s]", job.laj_jobid)\r
- retval = 0\r
- for res in desc.rd_resources[:]:\r
- ret = self._ts_job_release_one_holding_lock(job, res)\r
- if ret:\r
- log.cl_error("failed to release one resource")\r
- retval = ret\r
- else:\r
- desc.rd_resources.remove(res)\r
-\r
- return retval\r
-\r
- def _ts_job_release_resources_holding_lock(self, job, descs):\r
- """\r
- Release a lot resources\r
- """\r
- log = self.ts_log\r
- log.cl_debug("releasing resources for job [%s]", job.laj_jobid)\r
- retval = 0\r
- for desc in descs:\r
- ret = self._ts_job_release_resource_holding_lock(job, desc)\r
- if ret:\r
- retval = ret\r
- return retval\r
-\r
- def _ts_resources_dirty_holding_lock(self, log, jobid, descs):\r
- """\r
- Dirty the resources in the descriptors\r
- Only call this when about to return the resources to client\r
- """\r
- # pylint: disable=no-self-use\r
- host_names = []\r
- ip_addresses = []\r
- for desc in descs:\r
- for rpc_res in desc.rd_resources:\r
- if desc.rd_type == RESOURCE_TYPE_HOST:\r
- res = self.ts_find_host(rpc_res.lrh_hostname)\r
- host_names.append(rpc_res.lrh_hostname)\r
- elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS:\r
- res = self.ts_find_ip_address(rpc_res.ripa_address)\r
- ip_addresses.append(rpc_res.ripa_address)\r
- else:\r
- log.cl_error("invalid resource type [%s]", desc.rd_type)\r
- return -1\r
- res.sr_dirty()\r
-\r
- log.cl_error("allocated hosts %s and IPs %s for job [%s]",\r
- host_names, ip_addresses, jobid)\r
- return 0\r
-\r
- def ts_resources_allocate(self, scheduler_id, jobid, descriptors):\r
- """\r
- Allocate multiple resources, if any of them fails, return []\r
- """\r
- log = self.ts_log\r
- log.cl_debug("allocating resources for job [%s]", jobid)\r
- if scheduler_id != self.ts_id:\r
- log.cl_error("wrong scheduler ID [%s], expected [%s]",\r
- scheduler_id, self.ts_id)\r
- return []\r
-\r
- same_kvm_host_descriptors = []\r
- other_descriptors = []\r
- ret = rpc2descriptors(log, descriptors, same_kvm_host_descriptors,\r
- other_descriptors)\r
- if ret:\r
- log.cl_error("failed to parse resource descriptors from RPC")\r
- return []\r
- ret_descriptors = same_kvm_host_descriptors + other_descriptors\r
-\r
- self.ts_condition.acquire()\r
- if jobid not in self.ts_job_dict:\r
- log.cl_error("resource allocation from unknown job [%s]", jobid)\r
- self.ts_condition.release()\r
- return []\r
- job = self.ts_job_dict[jobid]\r
- ret = self._ts_job_allocate_resources_holding_lock(job, same_kvm_host_descriptors)\r
- if ret == 0:\r
- ret = self._ts_job_allocate_resources_holding_lock(job, other_descriptors)\r
- if ret:\r
- log.cl_debug("failed to allocated resources for job [%s]", jobid)\r
- else:\r
- log.cl_error("failed to allocated host resources that share the "\r
- "same KVM server for job [%s]", jobid)\r
- ret = -1\r
-\r
- if ret:\r
- log.cl_debug("releasing allocated resource of job [%s]", jobid)\r
- self._ts_job_release_resources_holding_lock(job, ret_descriptors)\r
- ret_descriptors = []\r
- else:\r
- ret = self._ts_resources_dirty_holding_lock(log, jobid,\r
- ret_descriptors)\r
- if ret:\r
- self._ts_job_release_resources_holding_lock(job, ret_descriptors)\r
- ret_descriptors = []\r
-\r
- job.laj_check_time = time_util.utcnow()\r
- self.ts_condition.release()\r
- return ret_descriptors\r
-\r
- def _ts_print_release_message(self, log, jobid, descs):\r
- """\r
- Print the release message\r
- """\r
- # pylint: disable=no-self-use\r
- host_names = []\r
- ip_addresses = []\r
- for desc in descs:\r
- for rpc_res in desc.rd_resources:\r
- if desc.rd_type == RESOURCE_TYPE_HOST:\r
- host_names.append(rpc_res.lrh_hostname)\r
- elif desc.rd_type == RESOURCE_TYPE_IP_ADDRESS:\r
- ip_addresses.append(rpc_res.ripa_address)\r
- else:\r
- log.cl_error("invalid resource type [%s]", desc.rd_type)\r
- return -1\r
-\r
- log.cl_info("releasing hosts %s and IPs %s for job [%s]",\r
- host_names, ip_addresses, jobid)\r
- return 0\r
-\r
- def ts_resources_release(self, scheduler_id, jobid, descriptors):\r
- """\r
- Release multiple resources\r
- """\r
- log = self.ts_log\r
- if scheduler_id != self.ts_id:\r
- log.cl_error("wrong scheduler ID [%s], expected [%s]",\r
- scheduler_id, self.ts_id)\r
- return -1\r
-\r
- same_kvm_host_descriptors = []\r
- other_descriptors = []\r
- ret = rpc2descriptors(log, descriptors, same_kvm_host_descriptors,\r
- other_descriptors)\r
- if ret:\r
- log.cl_error("failed to parse resource descriptors from RPC")\r
- return -1\r
-\r
- descs = same_kvm_host_descriptors + other_descriptors\r
- self._ts_print_release_message(log, jobid, descs)\r
-\r
- self.ts_condition.acquire()\r
- if jobid not in self.ts_job_dict:\r
- log.cl_error("resource releasing from unknown job [%s]", jobid)\r
- self.ts_condition.release()\r
- return -1\r
- job = self.ts_job_dict[jobid]\r
- ret = self._ts_job_release_resources_holding_lock(job, descs)\r
- job.laj_check_time = time_util.utcnow()\r
- self.ts_condition.notifyAll()\r
- self.ts_condition.release()\r
- return ret\r
-\r
- def ts_ip_cleanup(self, ip_address):\r
- """\r
- fix a host\r
- """\r
- log = self.ts_log\r
- log.cl_info("cleaning up IP address [%s]", ip_address)\r
- res = self.ts_find_ip_address(ip_address)\r
- if res is None:\r
- log.cl_error("failed to cleanup IP address [%s], not exists in "\r
- "the scheduler", ip_address)\r
- return -1\r
-\r
- ret = self.ts_resource_cleanup(res)\r
- if ret:\r
- log.cl_error("failure during fix process of IP [%s]",\r
- ip_address)\r
- return -1\r
-\r
- log.cl_info("cleaned up IP [%s]", ip_address)\r
- return 0\r
-\r
- def ts_host_cleanup(self, hostname):\r
- """\r
- fix a host\r
- """\r
- log = self.ts_log\r
- log.cl_info("cleaning up host [%s]", hostname)\r
- res = self.ts_find_host(hostname)\r
- if res is None:\r
- log.cl_error("failed to cleanup host [%s], not exists in "\r
- "the scheduler", hostname)\r
- return -1\r
-\r
- ret = self.ts_resource_cleanup(res)\r
- if ret:\r
- log.cl_error("failure during fix process of host [%s]",\r
- hostname)\r
- return -1\r
-\r
- log.cl_info("cleaned up host [%s]", hostname)\r
- return 0\r
-\r
- def ts_job_start(self, scheduler_id):\r
- """\r
- Start a jobs in the scheduler. Usually called remotely by client.\r
- """\r
- log = self.ts_log\r
- if scheduler_id != self.ts_id:\r
- return -1\r
- self.ts_condition.acquire()\r
- sequence = self.ts_jobid_sequence\r
- jobid = time_util.local_strftime(time_util.utcnow(), "%Y-%m-%d-%H_%M_%S")\r
- jobid += ("-%d" % sequence)\r
- self.ts_jobid_sequence += 1\r
- log.cl_info("starting an new job [%s]", jobid)\r
-\r
- job = TestSchedulerJob(self, jobid, sequence)\r
- self.ts_job_dict[jobid] = job\r
- self.ts_condition.release()\r
- return jobid\r
-\r
- def ts_job_list(self):\r
- """\r
- List all active jobs in the scheduler. Usually called remotely by\r
- console.\r
- """\r
- log = self.ts_log\r
- log.cl_info("listing job")\r
- format_string = "%-25s%-6s%-10s\n"\r
- job_names = format_string % ("Name", "Hosts", "Heartbeat")\r
- job_names += "{0:->30}".format("") + "\n"\r
-\r
- now = time_util.utcnow()\r
- self.ts_condition.acquire()\r
- for job in self.ts_job_dict.values():\r
- diff = (now - job.laj_check_time).seconds\r
- diff_string = str(diff)\r
- if diff > TEST_HEARTBEAT_TIMEOUT:\r
- diff_string += "*"\r
- job_names += (format_string %\r
- (job.laj_jobid, str(len(job.laj_hosts)),\r
- diff_string))\r
- self.ts_condition.release()\r
- return job_names\r
-\r
- def _ts_resource_cleanup_holding_concurrency(self, res):\r
- """\r
- Check and fix a res\r
- This function assumes the concurrency of the res has already been held\r
- """\r
- # pylint: disable=bare-except\r
- log = self.ts_log\r
- # skip the heathy node\r
- log.cl_debug("checking resource [%s]", res.sr_name)\r
- try:\r
- ret = res.sr_cleanup(log, self)\r
- except:\r
- ret = -1\r
- log.cl_error("exception when cleaning up resource [%s]: [%s]",\r
- res.sr_name, traceback.format_exc())\r
- if ret:\r
- res.sr_error += 1\r
- res.sr_is_clean = False\r
- ret = -1\r
- else:\r
- res.sr_is_clean = True\r
- ret = 0\r
- res.sr_check_time = time.time()\r
-\r
- self.ts_condition.acquire()\r
- res.sr_concurrency = 0\r
- self.ts_condition.release()\r
- return ret\r
-\r
- def ts_resource_cleanup(self, res):\r
- """\r
- Check and fix a res\r
- """\r
- # pylint: disable=bare-except\r
- log = self.ts_log\r
- self.ts_condition.acquire()\r
- # If the node is being used by some job, skip it. Job reclaim\r
- # routine will release the dead nodes..\r
- if res.sr_concurrency > 0:\r
- self.ts_condition.release()\r
- log.cl_info("res [%s] is busy, skipping", res.sr_name)\r
- return ScheduledResource.RESOURCE_IS_BUSY\r
- # Set concurrency to max so no other one can use it.\r
- res.sr_concurrency = res.sr_max_concurrency\r
- self.ts_condition.release()\r
-\r
- return self._ts_resource_cleanup_holding_concurrency(res)\r
-\r
- def ts_recovery_main(self):\r
- """\r
- Checking the health of each nodes, repaire them if necessary.\r
- """\r
- # pylint: disable=bare-except\r
- self.ts_condition.acquire()\r
- log = self.ts_log\r
- while True:\r
- log.cl_debug("recovery thread is checking resources")\r
- fix_res = None\r
- res_fix_time = None\r
- now = time.time()\r
- wakeup_time = now + MIN_GOOD_RES_CHECK_INTERVAL\r
- for res in self.ts_resources:\r
- # Ignore the busy resources\r
- if res.sr_concurrency > 0:\r
- continue\r
-\r
- if res.sr_is_clean:\r
- fix_time = res.sr_check_time + MIN_GOOD_RES_CHECK_INTERVAL\r
- else:\r
- fix_time = res.sr_check_time + MIN_BAD_RES_CHECK_INTERVAL\r
-\r
- if fix_time > now:\r
- if fix_time < wakeup_time:\r
- wakeup_time = fix_time\r
- continue\r
-\r
- if (fix_res is None or\r
- res_fix_time < fix_time):\r
- fix_res = res\r
- res_fix_time = fix_time\r
- if fix_res is not None:\r
- # Hold the concurrency and create a thread to fix it\r
- fix_res.sr_concurrency = fix_res.sr_max_concurrency\r
- self.ts_condition.release()\r
- utils.thread_start(self._ts_resource_cleanup_holding_concurrency,\r
- (fix_res, ))\r
- self.ts_condition.acquire()\r
- continue\r
- # Sleep unless something happen or time for fixing again\r
- now = time.time()\r
- if wakeup_time > now:\r
- sleep_time = wakeup_time - now\r
- log.cl_debug("recovery thread is going to sleep for [%s] "\r
- "seconds", sleep_time)\r
- start_time = now\r
- self.ts_condition.wait(sleep_time)\r
- now = time.time()\r
- log.cl_debug("recovery thread slept [%s] seconds",\r
- now - start_time)\r
- self.ts_condition.release()\r
-\r
- def ts_jobs_check(self):\r
- """\r
- Checking the timeout of all active jobs. The scheduler checks all the\r
- jobs from time to time to cleanup timeout jobs.\r
- """\r
- log = self.ts_log\r
- log.cl_debug("scheduler is checking jobs")\r
- now = time_util.utcnow()\r
- stopped = False\r
- self.ts_condition.acquire()\r
- for job in self.ts_job_dict.values():\r
- log.cl_info("checking job [%s]", job.laj_jobid)\r
- diff = (now - job.laj_check_time).seconds\r
- if diff > TEST_HEARTBEAT_TIMEOUT:\r
- self._ts_job_stop(job)\r
- stopped = True\r
- if stopped:\r
- self.ts_condition.notifyAll()\r
- self.ts_condition.release()\r
- log.cl_debug("scheduler checked jobs")\r
-\r
- def _ts_job_stop(self, job):\r
- """\r
- Stop a job.\r
- """\r
- log = self.ts_log\r
- log.cl_info("job [%s] stopping", job.laj_jobid)\r
- for lhost in job.laj_hosts[:]:\r
- job.laj_host_remove(lhost)\r
- lhost.sr_job_sequence = None\r
- lhost.sr_concurrency -= 1\r
- for res in job.laj_ip_addresses[:]:\r
- job.laj_ip_address_remove(res)\r
- res.sr_job_sequence = None\r
- res.sr_concurrency -= 1\r
- del self.ts_job_dict[job.laj_jobid]\r
-\r
- def ts_job_stop(self, scheduler_id, jobid):\r
- """\r
- Stop a job. Usually called remotely by client or console.\r
- """\r
- log = self.ts_log\r
- if scheduler_id != self.ts_id:\r
- return -1\r
- self.ts_condition.acquire()\r
- if jobid not in self.ts_job_dict:\r
- log.cl_error("stopping unknown job [%s]", jobid)\r
- self.ts_condition.release()\r
- return -1\r
- job = self.ts_job_dict[jobid]\r
- self._ts_job_stop(job)\r
- self.ts_condition.notifyAll()\r
- self.ts_condition.release()\r
- return 0\r
-\r
- def ts_job_heartbeat(self, scheduler_id, jobid):\r
- """\r
- Stop a job. This is usually called remotely by client.\r
- """\r
- log = self.ts_log\r
- if scheduler_id != self.ts_id:\r
- log.cl_info("got a heartbeat from job [%s] with wrong scheduler "\r
- "ID, expected [%s], got [%s]", jobid, self.ts_id,\r
- scheduler_id)\r
- return -1\r
- log.cl_info("recived heatbeat of job [%s]", jobid)\r
- self.ts_condition.acquire()\r
- if jobid not in self.ts_job_dict:\r
- log.cl_error("heartbeat from unknown job [%s]", jobid)\r
- self.ts_condition.release()\r
- return -1\r
- job = self.ts_job_dict[jobid]\r
- job.laj_check_time = time_util.utcnow()\r
- self.ts_condition.release()\r
- return 0\r
-\r
-\r
-def server_main(scheduler, scheduler_port):\r
- """\r
- Main function of scheduler thread.\r
- """\r
- server = SimpleXMLRPCServer.SimpleXMLRPCServer(("0.0.0.0",\r
- scheduler_port),\r
- allow_none=True)\r
- server.register_introspection_functions()\r
- server.register_instance(scheduler)\r
- while not SHUTTING_DOWN:\r
- server.handle_request()\r
-\r
-\r
-def parse_config_test_hosts(log, test_host_configs, kvm_template_dict):\r
- """\r
- Parse test hosts from configuration.\r
- :param test_host_configs:\r
- :return: host node list, None if failed\r
- """\r
- # pylint: disable=too-many-locals,too-many-branches,too-many-statements\r
- compute_node = re.compile(r"(?P<comname>[\w\-.]+)"\r
- r"(?P<range>\[(?P<start>\d+)\-(?P<stop>\d+)\])?",\r
- re.VERBOSE)\r
- hosts = list()\r
- for node_conf in test_host_configs:\r
- node_hostname = node_conf.get(cstr.CSTR_HOSTNAME)\r
- if node_hostname is None:\r
- log.cl_error("no [%s] found in items of section [%s]",\r
- cstr.CSTR_HOSTNAME, cstr.CSTR_TEST_HOSTS)\r
- return None\r
-\r
- match = compute_node.match(node_hostname)\r
- if not match or not match.group("comname"):\r
- log.cl_error("wrong format of hostname configuration [%s]",\r
- node_hostname)\r
- return None\r
-\r
- distro = node_conf.get(cstr.CSTR_DISTRO)\r
- if distro is None:\r
- log.cl_error("no [%s] found of node configuration [%s]",\r
- cstr.CSTR_DISTRO, node_conf)\r
- return None\r
-\r
- purpose = node_conf.get(cstr.CSTR_PURPOSE)\r
- if purpose is None:\r
- log.cl_error("no [%s] found of node configuration [%s]",\r
- cstr.CSTR_PURPOSE, node_conf)\r
- return None\r
-\r
- if purpose != PURPOSE_BUILD and purpose != PURPOSE_TEST:\r
- log.cl_error("unknown purpose [%s] of test host configuration [%s]",\r
- purpose, node_conf)\r
- return None\r
-\r
- if purpose == PURPOSE_BUILD:\r
- concurrency = node_conf.get(cstr.CSTR_CONCURRENCY)\r
- if concurrency is None:\r
- log.cl_error("no [%s] found of node configuration [%s]",\r
- cstr.CSTR_CONCURRENCY, node_conf)\r
- return None\r
- else:\r
- concurrency = 1\r
- kvm = node_conf.get(cstr.CSTR_KVM)\r
- if kvm is None:\r
- log.cl_debug("no [%s] found of kvm host configuration [%s]",\r
- cstr.CSTR_KVM, node_conf)\r
- kvm_server_hostname = None\r
- kvm_template_ipv4_address = None\r
- template_hostname = None\r
- kvm_template = None\r
- else:\r
- kvm_server_hostname = kvm.get(cstr.CSTR_KVM_SERVER_HOSTNAME)\r
- if kvm_server_hostname is None:\r
- log.cl_error("no [%s] found of kvm host configuration [%s]",\r
- cstr.CSTR_KVM_SERVER_HOSTNAME, kvm)\r
- return None\r
-\r
- kvm_template_ipv4_address = kvm.get(cstr.CSTR_KVM_TEMPLATE_IPV4_ADDRESS)\r
- if kvm_template_ipv4_address is None:\r
- log.cl_error("no [%s] found of kvm host configuration [%s]",\r
- cstr.CSTR_KVM_TEMPLATE_IPV4_ADDRESS, kvm)\r
- return None\r
-\r
- template_hostname = kvm.get(cstr.CSTR_TEMPLATE_HOSTNAME)\r
- if template_hostname is None:\r
- log.cl_error("no [%s] found of kvm host configuration [%s]",\r
- cstr.CSTR_TEMPLATE_HOSTNAME, kvm)\r
- return None\r
-\r
- if template_hostname not in kvm_template_dict:\r
- log.cl_error("no VM template with hostname [%s] is configured",\r
- template_hostname)\r
- return None\r
- kvm_template = kvm_template_dict[template_hostname]\r
-\r
- tag = node_conf.get(cstr.CSTR_TAG)\r
-\r
- comname = match.group("comname")\r
- if not match.group("range"):\r
- # This assumes the /etc/hosts or LDAP is properly configured so\r
- # we can get the IP by the hostname\r
- ipv4_address = socket.gethostbyname(comname)\r
- ipv4_addresses = [ipv4_address]\r
-\r
- l_host = TestHost(comname, distro, purpose, tag,\r
- concurrency, ipv4_addresses=ipv4_addresses,\r
- kvm_server_hostname=kvm_server_hostname,\r
- kvm_template_ipv4_address=kvm_template_ipv4_address,\r
- kvm_template=kvm_template)\r
- hosts.append(l_host)\r
- continue\r
-\r
- start = int(match.group("start"))\r
- stop = int(match.group("stop")) + 1\r
- if start > stop:\r
- log.cl_error("range error in host configuration [%s]", node_conf)\r
- return None\r
- for i in range(start, stop):\r
- hostname = ("%s%d" % (comname, i))\r
- ipv4_address = socket.gethostbyname(hostname)\r
- ipv4_addresses = [ipv4_address]\r
- l_host = TestHost(hostname, distro, purpose, tag,\r
- concurrency, kvm_server_hostname=kvm_server_hostname,\r
- kvm_template_ipv4_address=kvm_template_ipv4_address,\r
- ipv4_addresses=ipv4_addresses,\r
- kvm_template=kvm_template)\r
- hosts.append(l_host)\r
- return hosts\r
-\r
-\r
-def parse_config_test_hosts_and_templates(log, workspace, config, config_file):\r
- """\r
- Parse the scheduler configuration\r
- """\r
- test_host_configs = config.get(cstr.CSTR_TEST_HOSTS)\r
- if test_host_configs is None:\r
- log.cl_error("no section [%s] found in configuration file [%s]",\r
- cstr.CSTR_TEST_HOSTS, config_file)\r
- return None\r
-\r
- kvm_template_dict = lipe_virt.parse_templates_config(log, workspace,\r
- config, config_file,\r
- hosts=None)\r
- if kvm_template_dict is None:\r
- log.cl_error("failed to parse template configs in file [%s]",\r
- config_file)\r
- return None\r
-\r
- test_hosts = parse_config_test_hosts(log, test_host_configs, kvm_template_dict)\r
- if test_hosts is None:\r
- log.cl_error("failed to parse [%s] from configuration file [%s]",\r
- cstr.CSTR_TEST_HOSTS, config_file)\r
- return None\r
-\r
- return test_hosts\r
-\r
-\r
-def parse_config_ip_addresses(log, config, config_fpath):\r
- """\r
- Parse the IP adress config\r
- """\r
- ip_addresses = []\r
- address_configs = config.get(cstr.CSTR_IP_ADDRESSES)\r
- if address_configs is None:\r
- log.cl_error("no section [%s] found in configuration file [%s]",\r
- cstr.CSTR_IP_ADDRESSES, config_fpath)\r
- return None\r
-\r
- for address_config in address_configs:\r
- address = address_config.get(cstr.CSTR_IP_ADDRESS)\r
- if address is None:\r
- log.cl_error("one of the config in [%s] doesn't have [%s] "\r
- "configured, please correct configuration file [%s]",\r
- cstr.CSTR_IP_ADDRESSES, cstr.CSTR_IP_ADDRESS,\r
- config_fpath)\r
- return None\r
-\r
- bindnetaddr = address_config.get(cstr.CSTR_BINDNETADDR)\r
- if bindnetaddr is None:\r
- log.cl_error("the config of ip address with [%s] in [%s] doesn't "\r
- "have [%s] configured, please correct configuration "\r
- "file [%s]", address, cstr.CSTR_IP_ADDRESSES,\r
- cstr.CSTR_BINDNETADDR, config_fpath)\r
- return None\r
-\r
- ip_address = IPAddress(address, bindnetaddr)\r
- ip_addresses.append(ip_address)\r
- return ip_addresses\r
-\r
-\r
-def signal_handler(signum, frame):\r
- """\r
- Singnal hander. Set the shutting down flag.\r
- """\r
- # pylint: disable=unused-argument,global-statement\r
- log = GLOBAL_LOG\r
- log.cl_info("signal handler called with signal [%d]", signum)\r
- global SHUTTING_DOWN\r
- SHUTTING_DOWN = True\r
-\r
-\r
-def lipe_test_scheduler(log, workspace, config_fpath):\r
- """\r
- Start to test LiPE holding the confiure lock\r
- """\r
- # pylint: disable=bare-except,global-statement\r
- global GLOBAL_LOG\r
-\r
- GLOBAL_LOG = log\r
-\r
- config_fd = open(config_fpath)\r
- ret = 0\r
- try:\r
- config = yaml.load(config_fd)\r
- except:\r
- log.cl_error("not able to load [%s] as yaml file: %s", config_fpath,\r
- traceback.format_exc())\r
- ret = -1\r
- config_fd.close()\r
- if ret:\r
- return -1\r
-\r
- scheduler_id = os.path.basename(workspace)\r
- log.cl_info("LiPE test scheduler started, please check [%s] for more log",\r
- workspace)\r
-\r
- scheduler_port = config.get(cstr.CSTR_PORT)\r
- if scheduler_port is None:\r
- scheduler_port = TEST_SCHEDULER_PORT\r
-\r
- addresses = parse_config_ip_addresses(log, config,\r
- config_fpath)\r
- if addresses is None:\r
- log.cl_error("failed to parse config of addresses")\r
- return -1\r
-\r
- test_hosts = parse_config_test_hosts_and_templates(log, workspace, config,\r
- config_fpath)\r
- if test_hosts is None:\r
- log.cl_error("failed to parse config test hosts and templates")\r
- return -1\r
-\r
- scheduler = TestScheduler(log, scheduler_id, test_hosts, addresses)\r
- output = scheduler.ts_host_list(False)\r
- log.cl_info("\n%s", output)\r
-\r
- output = scheduler.ts_ip_address_list(False)\r
- log.cl_info("\n%s", output)\r
- # Set signal hander before start to handling reqeust.\r
- signal.signal(signal.SIGINT, signal_handler)\r
- signal.signal(signal.SIGTERM, signal_handler)\r
-\r
- utils.thread_start(server_main, (scheduler, scheduler_port))\r
- utils.thread_start(scheduler.ts_recovery_main, ())\r
-\r
- while not SHUTTING_DOWN:\r
- scheduler.ts_jobs_check()\r
- time.sleep(TEST_HEARTBEAT_TIMEOUT)\r
- log.cl_info("stopping test scheduler service")\r
- return 0\r
-\r
-\r
-def main():\r
- """\r
- Start to test LiPE\r
- """\r
- cmd_general.main(TEST_SCHEDULER_CONFIG, TEST_SCHEDULER_LOG_DIR,\r
- lipe_test_scheduler)\r
+++ /dev/null
-#!/usr/bin/python2 -u
-# Copyright (c) 2019 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Check the import of pyltest source code
-"""
-from pybuild import pyltest_import_check
-
-if __name__ == "__main__":
- pyltest_import_check.main()
"daemon",
"install_common",
"install_common_nodeps",
- "lipe_virt",
"lustre",
- "lustre_test",
"lyaml",
"parallel",
"rwlock",
"ssh_host",
- "test_common",
"time_util",
"utils",
"watched_io"]
LIPE_INSTALL_CONFIG_FNAME = "lipe_install.conf"
LIPE_INSTALL_CONFIG = ETC_DIR_PATH + LIPE_INSTALL_CONFIG_FNAME
-LIPE_TEST_CONFIG_FNAME = "lipe_test.conf"
-LIPE_TEST_CONFIG = ETC_DIR_PATH + LIPE_TEST_CONFIG_FNAME
-LIPE_TEST_LOG_DIR_BASENAME = "lipe_test"
-LIPE_TEST_LOG_DIR = VAR_LOG_PATH + "/" + LIPE_TEST_LOG_DIR_BASENAME
-
LIPE_BUILD_CONFIG_FNAME = "lipe_build.conf"
LIPE_BUILD_CONFIG = ETC_DIR_PATH + LIPE_BUILD_CONFIG_FNAME
LIPE_BUILD_LOG_DIR_BASENAME = "lipe_build_log"
+++ /dev/null
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Library for installing virtual machines
-"""
-# pylint: disable=too-many-lines
-import sys
-import traceback
-import random
-import re
-import yaml
-
-# Local libs
-from pylustre import utils
-from pylustre import ssh_host
-from pylustre import lustre
-from pylustre import cstr
-from pylustre import cmd_general
-
-LIPE_VIRT_CONFIG_FNAME = "lipe_virt.conf"
-LIPE_VIRT_CONFIG = "/etc/" + LIPE_VIRT_CONFIG_FNAME
-LIPE_VIRT_LOG_DIR = "/var/log/lipe_virt"
-LIPE_UDEV_RULES = "/etc/udev/rules.d/80-lipe-name.rules"
-LVIRT_IMAGE_SHARED_SUBFIX = "_shared"
-
-
-class VirtTemplate(object):
- """
- Each virtual machine template has an object of this type
- """
- # pylint: disable=too-few-public-methods,too-many-instance-attributes
- # pylint: disable=too-many-arguments
- def __init__(self, iso, template_hostname, internet, network_configs,
- image_dir, distro, ram_size, disk_sizes, dns,
- bus_type=cstr.CSTR_BUS_SCSI,
- server_host=None, server_host_id=None, reinstall=None):
- self.vt_server_host = server_host
- self.vt_server_host_id = server_host_id
- self.vt_reinstall = reinstall
-
- self.vt_dns = dns
- self.vt_iso = iso
- self.vt_template_hostname = template_hostname
- self.vt_internet = internet
- self.vt_network_configs = network_configs
- self.vt_image_dir = image_dir
- self.vt_distro = distro
- self.vt_ram_size = ram_size
- self.vt_disk_sizes = disk_sizes
- self.vt_bus_type = bus_type
-
-
-class SharedDisk(object):
- """
- Each shared disk has an object of this type
- """
- # pylint: disable=too-few-public-methods,too-many-arguments
- def __init__(self, disk_id, server_host, server_host_id, image_prefix, size):
- self.sd_disk_id = disk_id
- self.sd_server_host = server_host
- self.sd_image_fpath = image_prefix + LVIRT_IMAGE_SHARED_SUBFIX
- self.sd_size = size
- self.sd_server_host_id = server_host_id
- self.sd_targets = []
-
- def _sd_create(self, log):
- """
- Create the shared disk
- """
- command = ("qemu-img create -f raw %s %sG" %
- (self.sd_image_fpath, self.sd_size))
-
- retval = self.sd_server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- self.sd_server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
- def sd_add_target(self, log, target):
- """
- Add a host which shares this disk
- """
- log.cl_info("adding target [%s] on host [%s] to device [%s]",
- target.st_target_name, target.st_host.sh_hostname,
- self.sd_disk_id)
- self.sd_targets.append(target)
-
- def _sd_share_target(self, log, target):
- """
- Share the disk with the host
- """
- # pylint: disable=too-many-branches,too-many-return-statements
- target_name = target.st_target_name
- host = target.st_host
-
- ret, devices = host.sh_lsscsi(log)
- if ret:
- log.cl_error("failed to get device on host [%s]",
- host.sh_hostname)
- return -1
-
- command = ("virsh attach-disk %s %s %s --subdriver raw --persistent --cache=directsync" %
- (host.sh_hostname, self.sd_image_fpath, target_name))
- retval = self.sd_server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- self.sd_server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Need to wait until VM shut off, otherwise "virsh change-media" won't
- # change the XML file
- ret = utils.wait_condition(log, host_check_lsscsi,
- (host, len(devices) + 1))
- if ret:
- log.cl_error("timeout when waiting the device number of host "
- "[%s]", host.sh_hostname)
- return ret
-
- ret, new_devices = host.sh_lsscsi(log)
- if ret:
- log.cl_error("failed to get device on host [%s]",
- host.sh_hostname)
- return -1
-
- if len(new_devices) != len(devices) + 1:
- log.cl_error("unexpected new devices number %s on host [%s], old "
- "devices %s", new_devices, host.sh_hostname, devices)
- return -1
-
- new_device = None
- for device in new_devices:
- if device not in devices:
- if new_device is not None:
- log.cl_error("unexpected new devices %s on host [%s], "
- "old devices %s", new_devices,
- host.sh_hostname, devices)
- return -1
- new_device = device
-
- serial = host.sh_device_serial(log, new_device)
- if serial is None:
- log.cl_error("failed to get serial of device [%s] on host [%s]",
- serial, host.sh_hostname)
- return -1
-
- log.cl_debug("added device [%s] with serial number [%s] on host [%s]",
- new_device, serial, host.sh_hostname)
- command = ('echo \'ENV{ID_SERIAL}=="%s", SYMLINK+="mapper/%s"\' >> %s' %
- (serial, self.sd_disk_id, LIPE_UDEV_RULES))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = "udevadm control --reload-rules && udevadm trigger"
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- device_link = "/dev/mapper/" + self.sd_disk_id
- command = "readlink -f %s" % device_link
- expect_stdout = new_device + "\n"
- ret = host.sh_wait_update(log, command, expect_exit_status=0,
- expect_stdout=expect_stdout)
- if ret:
- log.cl_error("created wrong symlink [%s] on host "
- "[%s], expected [%s]",
- device_link, host.sh_hostname, new_device)
- return -1
- return 0
-
- def sd_share(self, log):
- """
- Share the disk on all hosts
- """
- log.cl_info("sharing disk [%s]", self.sd_disk_id)
- if len(self.sd_targets) == 0:
- return 0
- ret = self._sd_create(log)
- if ret:
- log.cl_error("failed to create shared disk [%s] on host with "
- "ID [%s]", self.sd_image_fpath,
- self.sd_server_host_id)
- return -1
-
- for target in self.sd_targets:
- ret = self._sd_share_target(log, target)
- if ret:
- log.cl_error("failed to share disk [%s] on server host with "
- "ID [%s] to VM [%s]", self.sd_image_fpath,
- self.sd_server_host_id, target.st_host.sh_hostname)
- return -1
- return 0
-
-
-class SharedTarget(object):
- """
- Each shared disk on each VM has an object of this type
- """
- # pylint: disable=too-few-public-methods,too-many-arguments
- def __init__(self, vm_host, target_name):
- self.st_host = vm_host
- self.st_target_name = target_name
-
-
-def random_mac():
- """
- Generate random MAC address
- """
- mac_parts = [random.randint(0x00, 0x7f),
- random.randint(0x00, 0xff),
- random.randint(0x00, 0xff)]
- mac_string = "52:54:00"
- for mac_part in mac_parts:
- mac_string += ":" + ("%02x" % mac_part)
- return mac_string
-
-
-def vm_is_shut_off(log, server_host, hostname):
- """
- Check whether vm is shut off
- """
- state = server_host.sh_virsh_dominfo_state(log, hostname)
- if state is None:
- return False
- elif state == "shut off":
- return True
- return False
-
-
-def host_check_lsscsi(log, host, expect_dev_number):
- """
- Check whether scsi number is expected
- """
- ret, new_devices = host.sh_lsscsi(log)
- if ret:
- log.cl_error("failed to get device on host [%s]",
- host.sh_hostname)
- return -1
- if len(new_devices) == expect_dev_number:
- return 0
- else:
- return -1
-
-
-def vm_copy_in(log, server_host, vm, src, dest):
- """
- Copy file @src on server_host into dir @dest on guest @vm
- """
- retval = server_host.sh_run(log, "which virt-copy-in")
- if retval.cr_exit_status != 0:
- command = ("yum install libguestfs-tools -y")
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to install libguestfs-tools via "
- "command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- ret = 0
- command = ("virt-copy-in -d %s %s %s" % (vm, src, dest))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], ret = [%d], "
- "stdout = [%s], stderr = [%s]",
- command, server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- ret = -1
-
- # "virt-copy-in -d" may fail if multiple device on guest domain, try
- # "virt-copy-in -i" with each device then.
- if ret == -1:
- command = ("virsh domblklist %s --details | grep disk | awk '{print $4}'" % vm)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], ret = [%d], "
- "stdout = [%s], stderr = [%s]",
- command, server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- images = retval.cr_stdout.splitlines()
- for image in images:
- command = ("virt-copy-in -i %s %s %s" % (image, src, dest))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status == 0:
- return 0
-
- log.cl_error("failed to copy file [%s] from server [%s] into "
- "[%s] of vm [%s]", src, server_host.sh_hostname, dest, vm)
- return ret
-
-
-def vm_check_shut_off(log, server_host, hostname):
- """
- Check whether vm is shut off
- """
- off = vm_is_shut_off(log, server_host, hostname)
- if off:
- return 0
- return -1
-
-
-def vm_delete(log, server_host, hostname):
- """
- Delete a virtual machine
- """
- existed = True
- active = True
- state = server_host.sh_virsh_dominfo_state(log, hostname)
- if state is None:
- existed = False
- active = False
- elif state == "shut off":
- active = False
-
- if active:
- command = ("virsh destroy %s" % hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- if existed:
- command = ("virsh undefine %s" % hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- return 0
-
-
-def vm_clone(log, workspace, server_host, hostname, network_configs, ips,
- template_hostname, image_dir, distro, internet, disk_number):
- """
- Create virtual machine
- """
- # pylint: disable=too-many-arguments,too-many-locals,too-many-return-statements
- # pylint: disable=too-many-branches,too-many-statements
- log.cl_info("cloning host [%s] from template [%s]", hostname,
- template_hostname)
- host_ip = ips[0]
- ret = vm_delete(log, server_host, hostname)
- if ret:
- return -1
-
- command = ("ping -c 1 %s" % host_ip)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status == 0:
- log.cl_error("IP [%s] already used by a host", host_ip)
- return -1
-
- command = ("ping -c 1 %s" % hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status == 0:
- log.cl_error("host [%s] already up", hostname)
- return -1
-
- active = True
- state = server_host.sh_virsh_dominfo_state(log, template_hostname)
- if state is None:
- log.cl_error("template [%s] doesn't exist on host [%s]",
- template_hostname, server_host.sh_hostname)
- return -1
- elif state == "shut off":
- active = False
-
- if active:
- command = ("virsh destroy %s" % template_hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- file_options = ""
- for disk_index in range(disk_number):
- file_options += (" --file %s/%s_%d.img" %
- (image_dir, hostname, disk_index))
-
- command = ("rm -f %s/%s_%d.img" %
- (image_dir, hostname, disk_index))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = ("virt-clone --original %s --name %s%s" %
- (template_hostname, hostname, file_options))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- local_host_dir = workspace + "/" + hostname
- ret = utils.mkdir(local_host_dir)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- local_host_dir)
- return -1
-
- # net.ifnames=0 biosdevname=0 has been added to grub, so the interface
- # name will always be eth*
- eth_number = 0
- for eth_ip in ips:
- network_config = network_configs[eth_number]
- ifcfg = 'DEVICE="eth%d"\n' % eth_number
- ifcfg += 'IPADDR="%s"\n' % eth_ip
- ifcfg += 'NETMASK="%s"\n' % network_config["netmask"]
- if "gateway" in network_config:
- ifcfg += 'GATEWAY=\"%s"\n' % network_config["gateway"]
- ifcfg += """ONBOOT=yes
-BOOTPROTO="static"
-TYPE=Ethernet
-IPV6INIT=no
-NM_CONTROLLED=no
-"""
-
- ifcfg_fname = "ifcfg-eth%d" % eth_number
- ifcfg_fpath = local_host_dir + "/" + ifcfg_fname
- with open(ifcfg_fpath, "wt") as fout:
- fout.write(ifcfg)
-
- host_ifcfg_fpath = workspace + "/" + ifcfg_fname
- ret = server_host.sh_send_file(log, ifcfg_fpath, workspace)
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "directory [%s] on host [%s]",
- ifcfg_fpath, workspace,
- server_host.sh_hostname)
- return -1
-
- ret = vm_copy_in(log, server_host, hostname, host_ifcfg_fpath,
- "/etc/sysconfig/network-scripts")
- if ret:
- return -1
- eth_number += 1
-
- host_rules_fpath = workspace + "/70-persistent-net.rules"
- command = ("> %s" % host_rules_fpath)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- ret = vm_copy_in(log, server_host, hostname, host_rules_fpath,
- "/etc/udev/rules.d")
- if ret:
- return -1
-
- if distro == ssh_host.DISTRO_RHEL6:
- network_string = 'NETWORKING=yes\n'
- network_string += 'HOSTNAME=%s\n' % hostname
- network_fname = "network"
- network_fpath = local_host_dir + "/" + network_fname
- with open(network_fpath, "wt") as fout:
- fout.write(network_string)
-
- host_network_fpath = workspace + "/" + network_fname
- ret = server_host.sh_send_file(log, network_fpath, workspace)
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "directory [%s] on host [%s]",
- network_fpath, workspace,
- server_host.sh_hostname)
- return -1
-
- ret = vm_copy_in(log, server_host, hostname, host_network_fpath,
- "/etc/sysconfig")
- if ret:
- return -1
- else:
- host_hostname_fpath = workspace + "/hostname"
- command = ("echo %s > %s" % (hostname, host_hostname_fpath))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- ret = vm_copy_in(log, server_host, hostname, host_hostname_fpath,
- "/etc")
- if ret:
- return -1
-
- command = ("virsh start %s" % hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Remove the record in known_hosts, otherwise ssh will fail
- command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (host_ip))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Remove the record in known_hosts, otherwise ssh will fail
- command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (hostname))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- vm_host = ssh_host.SSHHost(host_ip)
- ret = vm_host.sh_wait_up(log)
- if ret:
- log.cl_error("failed to wait host [%s] up",
- host_ip)
- return -1
-
- ret = vm_check(log, hostname, host_ip, distro, internet)
- if ret:
- return -1
- return 0
-
-
-def vm_check(log, hostname, host_ip, distro, internet):
- """
- Check whether virtual machine is up and fine
- """
- # pylint: disable=too-many-return-statements
- vm_host = ssh_host.SSHHost(host_ip)
- command = "hostname"
- retval = vm_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host_ip,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- current_hostname = retval.cr_stdout.strip()
- if current_hostname != hostname:
- log.cl_error("wrong host name of the virtual machine [%s], expected "
- "[%s], got [%s]", host_ip, hostname, current_hostname)
- return -1
-
- vm_host = ssh_host.SSHHost(hostname)
- command = "hostname"
- retval = vm_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- current_hostname = retval.cr_stdout.strip()
- if current_hostname != hostname:
- log.cl_error("wrong host name of the virtual machine [%s], expected "
- "[%s], got [%s]", hostname, hostname, current_hostname)
- return -1
-
- vm_distro = vm_host.sh_distro(log)
- if vm_distro != distro:
- log.cl_error("wrong distro of the virtual machine [%s], expected "
- "[%s], got [%s]", hostname, distro, vm_distro)
- return -1
-
- if internet:
- if vm_host.sh_check_internet(log):
- log.cl_error("virtual machine [%s] can not access Internet",
- hostname)
- return -1
- return 0
-
-
-def vm_start(log, workspace, server_host, hostname, network_configs, ips,
- template_hostname, image_dir, distro, internet, disk_number):
- """
- Start virtual machine, if vm is bad, clone it
- """
- # pylint: disable=too-many-arguments,too-many-locals
- log.cl_info("starting virtual machine [%s]", hostname)
- host_ip = ips[0]
- ret = vm_check(log, hostname, host_ip, distro, internet)
- if ret == 0:
- return 0
-
- if vm_is_shut_off(log, server_host, hostname):
- command = ("virsh start %s" % (hostname))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- vm_host = ssh_host.SSHHost(hostname)
- ret = vm_host.sh_wait_up(log)
- if ret == 0:
- ret = vm_check(log, hostname, host_ip, distro, internet)
- if ret == 0:
- return 0
-
- ret = vm_clone(log, workspace, server_host, hostname, network_configs, ips,
- template_hostname, image_dir, distro, internet, disk_number)
- if ret:
- log.cl_error("failed to create virtual machine [%s] based on "
- "template [%s]", hostname, template_hostname)
- return -1
- return 0
-
-
-def mount_iso(log, workspace, server_host, iso_path):
- """
- Mount the ISO, return the mnt path
- """
- mnt_path = workspace + "/mnt/" + utils.random_word(8)
- command = ("mkdir -p %s && mount -o loop %s %s" %
- (mnt_path, iso_path, mnt_path))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
- return mnt_path
-
-
-def _vm_install(log, workspace, bus_type, image_dir, ram_size, disk_sizes,
- distro, hostname, network_configs, server_host, iso_path,
- mnt_path):
- """
- Actually start to install by using virt_install
- """
- # pylint: disable=too-many-arguments,too-many-locals
- # pylint: disable=too-many-return-statements,too-many-statements
- # pylint: disable=too-many-branches
- ks_config = """# Kickstart file automatically generated by LiPE.
-install
-reboot
-cdrom
-lang en_US.UTF-8
-keyboard us
-"""
- pri_disk = ""
- if bus_type == cstr.CSTR_BUS_VIRTIO:
- pri_disk = cstr.CSTR_DISK_VIRTIO_PRIMARY
- elif bus_type == cstr.CSTR_BUS_IDE:
- pri_disk = cstr.CSTR_DISK_IDE_PRIMARY
- elif bus_type == cstr.CSTR_BUS_SCSI:
- pri_disk = cstr.CSTR_DISK_SCSI_PRIMARY
- else:
- log.cl_error("unsupported bus type [%s], please correct it",
- bus_type)
- return -1
- ks_config += """rootpw password
-firewall --disabled
-authconfig --enableshadow --passalgo=sha512
-selinux --disabled
-timezone --utc Asia/Shanghai
-"""
- ks_config += """bootloader --location=mbr --driveorder=%s --append="crashkernel=auto net.ifnames=0 biosdevname=0"\
-""" % pri_disk
- ks_config += """
-zerombr
-clearpart --all --initlabel
-"""
- ks_config += "part / --fstype=ext4 --grow --size=500 --ondisk=%s --asprimary" % pri_disk
- ks_config += """
-repo --name="Media" --baseurl=file:///mnt/source --cost=100
-%packages
-@Core
-%end
-%post --log=/var/log/anaconda/post-install.log
-#!/bin/bash
-# Configure hostname, somehow virt-install --name doesn't work
-"""
- if distro == ssh_host.DISTRO_RHEL6:
- ks_config += 'echo NETWORKING=yes > /etc/sysconfig/network\n'
- ks_config += ('echo HOSTNAME=%s >> /etc/sysconfig/network\n' %
- (hostname))
- elif distro == ssh_host.DISTRO_RHEL7:
- ks_config += "echo %s > /etc/hostname\n" % (hostname)
- else:
- log.cl_error("wrong distro [%s]", distro)
- return -1
- ks_config += "# Configure network\n"
- eth_number = 0
- ens_number = 3
- for network_config in network_configs:
- # net.ifnames=0 biosdevname=0 will be added to GRUB_CMDLINE_LINUX, so the
- # interface name will always be eth*
- ks_config += "# Network eth%d\n" % eth_number
- ks_config += ("rm -f /etc/sysconfig/network-scripts/ifcfg-ens%d\n" %
- ens_number)
- ks_config += ("cat << EOF > /etc/sysconfig/network-scripts/ifcfg-eth%d\n" %
- eth_number)
- ks_config += "DEVICE=eth%d\n" % eth_number
- ks_config += 'IPADDR="%s"\n' % network_config["ip"]
- ks_config += 'NETMASK="%s"\n' % network_config["netmask"]
- if "gateway" in network_config:
- ks_config += 'GATEWAY=\"%s"\n' % network_config["gateway"]
- ks_config += """ONBOOT=yes
-BOOTPROTO="static"
-TYPE=Ethernet
-IPV6INIT=no
-NM_CONTROLLED=no
-EOF
-"""
- eth_number += 1
- ens_number += 1
-
- ks_config += "%end\n"
- local_host_dir = workspace + "/" + hostname
- ret = utils.mkdir(local_host_dir)
- if ret:
- log.cl_error("failed to create directory [%s] on local host",
- local_host_dir)
- return -1
-
- ks_fname = "%s.ks" % hostname
- ks_fpath = local_host_dir + "/" + ks_fname
- with open(ks_fpath, "wt") as fout:
- fout.write(ks_config)
-
- host_ks_fpath = workspace + "/" + ks_fname
- ret = server_host.sh_send_file(log, ks_fpath, workspace)
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "directory [%s] on host [%s]",
- ks_fpath, workspace,
- server_host.sh_hostname)
- return -1
-
- command = "yum install virt-install -y"
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = ("virt-install --vcpus=1 --os-type=linux --hvm "
- "--connect=qemu:///system --accelerate --serial pty -v "
- "--nographics --noautoconsole --wait=-1 --force ")
- command += "--ram=%s " % ram_size
- for network_config in network_configs:
- command += ("--network=%s " % (network_config["virt_install_option"]))
- command += ("--name=%s " % (hostname))
- command += ("--initrd-inject=%s " % (host_ks_fpath))
- disk_index = 0
- for disk_size in disk_sizes:
- disk_path = "%s/%s_%d.img" % (image_dir, hostname, disk_index)
- remove_command = "rm -f %s" % disk_path
- retval = server_host.sh_run(log, remove_command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- command += ("--disk path=%s,bus=%s,size=%s " %
- (disk_path, bus_type, disk_size))
- disk_index += 1
- command += ("--location %s " % (mnt_path))
- command += ("--disk=%s,device=cdrom,perms=ro " % (iso_path))
- command += ("--extra-args='console=tty0 console=ttyS0,115200n8 "
- "ks=file:/%s'" % (ks_fname))
-
- if distro == ssh_host.DISTRO_RHEL6:
- install_timeout = 600
- elif distro == ssh_host.DISTRO_RHEL7:
- install_timeout = 1200
-
- retval = server_host.sh_run(log, command, timeout=install_timeout)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
-
-def vm_install(log, workspace, server_host, iso_path, hostname, internet, dns,
- network_configs, image_dir, distro, ram_size, disk_sizes,
- bus_type=cstr.CSTR_BUS_SCSI):
- """
- Install virtual machine from ISO
- """
- # pylint: disable=too-many-arguments,too-many-locals
- # pylint: disable=too-many-return-statements,too-many-statements
- # pylint: disable=too-many-branches
- ret = vm_delete(log, server_host, hostname)
- if ret:
- return -1
-
- network_config = network_configs[0]
- host_ip = network_config["ip"]
- command = ("ping -c 1 %s" % host_ip)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status == 0:
- log.cl_error("IP [%s] is already used by a host", host_ip)
- return -1
-
- command = ("ping -c 1 %s" % hostname)
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status == 0:
- log.cl_error("host [%s] is already up", hostname)
- return -1
-
- mnt_path = mount_iso(log, workspace, server_host, iso_path)
- if mnt_path is None:
- log.cl_error("failed to get mnt path of ISO [%s]", iso_path)
- return -1
-
- ret = _vm_install(log, workspace, bus_type, image_dir, ram_size, disk_sizes,
- distro, hostname, network_configs, server_host, iso_path,
- mnt_path)
- if ret:
- log.cl_error("failed install VM")
-
- command = ("umount %s" % (mnt_path))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = ("rmdir %s" % (mnt_path))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- if ret:
- log.cl_error("quiting because failed to install VM")
- return -1
-
- ret = server_host.sh_run(log, "which sshpass")
- if ret.cr_exit_status != 0:
- command = ("yum install sshpass -y")
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Remove the record in known_hosts, otherwise ssh will fail
- command = ('sed -i "/%s /d" /root/.ssh/known_hosts' % (host_ip))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # When virt-install finished, the virtual machine starts to reboot
- # so wait a little bit here until the host is up. Need
- # StrictHostKeyChecking=no, otherwise exit code will be 6 (ENOENT)
- expect_stdout = hostname + "\n"
- command = ("sshpass -p password ssh -o StrictHostKeyChecking=no "
- "root@%s hostname" % (host_ip))
- ret = server_host.sh_wait_update(log, command, expect_exit_status=0,
- expect_stdout=expect_stdout)
- if ret:
- log.cl_error("failed to wait host [%s] up", hostname)
- return -1
-
- command = ("sshpass -p password ssh root@%s "
- "\"mkdir /root/.ssh && chmod 600 /root/.ssh\"" % (host_ip))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = ("sshpass -p password scp /root/.ssh/* root@%s:/root/.ssh" % (host_ip))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- vm_host = ssh_host.SSHHost(host_ip)
- command = "> /root/.ssh/known_hosts"
- retval = vm_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- vm_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = "hostname"
- retval = vm_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- vm_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- real_hostname = retval.cr_stdout.strip()
- if real_hostname != hostname:
- log.cl_error("wrong hostname, expected [%s], got [%s]",
- hostname, real_hostname)
- return -1
-
- if internet:
- ret = vm_host.sh_enable_dns(log, dns)
- if ret:
- log.cl_error("failed to enable dns on host [%s]")
- return -1
-
- command = "yum install rsync -y"
- retval = vm_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- vm_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Do not check the return status, because the connection could be stopped
- command = "init 0"
- vm_host.sh_run(log, command)
-
- # Need to wait until VM shut off, otherwise "virsh change-media" won't
- # change the XML file
- ret = utils.wait_condition(log, vm_check_shut_off, (server_host, hostname))
- if ret:
- log.cl_error("failed when waiting host [%s] on [%s] shut off",
- hostname, server_host.sh_hostname)
- return ret
-
- # Find the CDROM device
- command = ("virsh domblklist %s --details | grep cdrom | "
- "awk '{print $3}'" % (hostname))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- cdroms = retval.cr_stdout.splitlines()
- if len(cdroms) != 1:
- log.cl_error("unexpected cdroms: [%s]",
- retval.cr_stdout)
- return -1
- cdrom = cdroms[0]
-
- command = ("virsh change-media %s %s --eject" % (hostname, cdrom))
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- return 0
-
-
-def target_index2name(log, target_index, bus_type=cstr.CSTR_BUS_SCSI):
- """
- Return the target name according to index
- 0 -> sda
- 1 -> sdb
- ...
- """
- ascii_number = ord('a')
- ascii_number += target_index
-
- prefix = ""
- if bus_type == cstr.CSTR_BUS_VIRTIO:
- prefix = cstr.CSTR_DISK_VIRTIO_PREFIX
- elif bus_type == cstr.CSTR_BUS_IDE:
- prefix = cstr.CSTR_DISK_IDE_PREFIX
- elif bus_type == cstr.CSTR_BUS_SCSI:
- prefix = cstr.CSTR_DISK_SCSI_PREFIX
- else:
- log.cl_error("unsupported bus type [%s], please correct it", bus_type)
- return None
-
- return prefix + chr(ascii_number)
-
-
-def lipe_parse_sharedisks_configs(log, shared_disk_configs, shared_disks,
- hosts, config_fpath):
- """
- Parse shared disk configs.
- """
- if shared_disk_configs is None or len(shared_disk_configs) == 0:
- return -1
-
- for shared_disk_config in shared_disk_configs:
- disk_id = utils.config_value(shared_disk_config, cstr.CSTR_DISK_ID)
- if disk_id is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_DISK_ID, config_fpath)
- return -1
-
- size = utils.config_value(shared_disk_config, cstr.CSTR_SIZE)
- if size is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_SIZE, config_fpath)
- return -1
-
- server_host_id = utils.config_value(shared_disk_config, cstr.CSTR_SERVER_HOST_ID)
- if server_host_id is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_SERVER_HOST_ID, config_fpath)
- return -1
- if server_host_id not in hosts:
- log.cl_error("SSH host with ID [%s] is NOT configured in "
- "[%s], please correct file [%s]",
- cstr.CSTR_SERVER_HOST_ID, cstr.CSTR_SSH_HOSTS,
- config_fpath)
- return -1
-
- server_host = hosts[server_host_id]
-
- image_file = utils.config_value(shared_disk_config, cstr.CSTR_IMAGE_FILE)
- if image_file is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_IMAGE_FILE, config_fpath)
- return -1
-
- shared_disk = SharedDisk(disk_id, server_host, server_host_id, image_file, size)
- shared_disks[disk_id] = shared_disk
-
- return 0
-
-
-def lipe_vm_reboot(log, host, hostserver):
- """
- Reset the guest vm on hostserver
- """
- # pylint: disable=too-many-return-statements,too-many-locals
- # pylint: disable=too-many-branches,too-many-statements
- ret = host.sh_reboot(log)
- if ret == 0:
- return 0
- # reboot failed? try hard reset
- command = "virsh reset %s" % host.sh_hostname
- retval = hostserver.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], ",
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, hostserver.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout, retval.cr_stderr)
- return -1
-
- # wait for the host up after hard reset
- if host.sh_wait_up(log):
- log.cl_error("host [%s] failed to startup, even after hard reset.",
- host.sh_hostname)
- return -1
- return 0
-
-
-def parse_templates_config(log, workspace, config, config_fpath, hosts=None):
- """
- Parse the template configurations
- """
- # pylint: disable=too-many-locals,too-many-branches,too-many-statements
- template_configs = utils.config_value(config, cstr.CSTR_TEMPLATES)
- if template_configs is None:
- log.cl_error("no section [%s] found in configuration file [%s]",
- cstr.CSTR_TEMPLATES, config_fpath)
- return None
-
- templates = {}
- for template_config in template_configs:
- template_hostname = utils.config_value(template_config,
- cstr.CSTR_HOSTNAME)
- if template_hostname is None:
- log.cl_error("can NOT find [%s] in the config of a "
- "SSH host, please correct file [%s]",
- cstr.CSTR_HOSTNAME, config_fpath)
- return None
-
- internet = utils.config_value(template_config,
- cstr.CSTR_INTERNET)
- if internet is None:
- internet = False
- log.cl_debug("no [%s] is configured, will "
- "not add internet support", cstr.CSTR_INTERNET)
-
- if internet:
- dns = utils.config_value(template_config, cstr.CSTR_DNS)
- if dns is None:
- log.cl_error("no [%s] is configured, when internet support "
- "is enabled, please correct file [%s]",
- cstr.CSTR_DNS, config_fpath)
- return None
-
- ram_size = utils.config_value(template_config, cstr.CSTR_RAM_SIZE)
- if ram_size is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_RAM_SIZE, config_fpath)
- return None
-
- disk_sizes = utils.config_value(template_config,
- cstr.CSTR_DISK_SIZES)
- if disk_sizes is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_DISK_SIZES, config_fpath)
- return None
-
- bus_type = utils.config_value(template_config,
- cstr.CSTR_BUS_TYPE)
- if bus_type is None:
- log.cl_info("no [%s] is configured, use scsi as default",
- cstr.CSTR_BUS_TYPE)
- bus_type = cstr.CSTR_BUS_SCSI
-
- network_configs = utils.config_value(template_config,
- cstr.CSTR_NETWORK_CONFIGS)
- if network_configs is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_NETWORK_CONFIGS, config_fpath)
- return None
-
- iso = utils.config_value(template_config, cstr.CSTR_ISO)
- if iso is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_ISO, config_fpath)
- return None
-
- distro = utils.config_value(template_config, cstr.CSTR_DISTRO)
- if distro is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_DISTRO, config_fpath)
- return None
-
- image_dir = utils.config_value(template_config, cstr.CSTR_IMAGE_DIR)
- if image_dir is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_IMAGE_DIR, config_fpath)
- return None
-
- if hosts is None:
- server_host_id = None
- server_host = None
- reinstall = None
- else:
- server_host_id = utils.config_value(template_config,
- cstr.CSTR_SERVER_HOST_ID)
- if server_host_id is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_SERVER_HOST_ID, config_fpath)
- return None
-
- if server_host_id not in hosts:
- log.cl_error("SSH host with ID [%s] is NOT configured in "
- "[%s], please correct file [%s]",
- cstr.CSTR_SERVER_HOST_ID, cstr.CSTR_SSH_HOSTS,
- config_fpath)
- return None
-
- server_host = hosts[server_host_id]
- command = "mkdir -p %s" % workspace
- retval = server_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return None
-
- reinstall = utils.config_value(template_config,
- cstr.CSTR_REINSTALL)
- if reinstall is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_REINSTALL, config_fpath)
- return None
-
- template = VirtTemplate(iso, template_hostname, internet,
- network_configs, image_dir, distro, ram_size,
- disk_sizes, dns,
- bus_type=bus_type,
- server_host=server_host,
- server_host_id=server_host_id,
- reinstall=reinstall)
- templates[template_hostname] = template
- return templates
-
-
-def lipe_vm_install(log, workspace, config, config_fpath):
- """
- Start to install virtual machine
- """
- # pylint: disable=too-many-return-statements,too-many-locals
- # pylint: disable=too-many-branches,too-many-statements
- ssh_host_configs = utils.config_value(config, cstr.CSTR_SSH_HOSTS)
- if ssh_host_configs is None:
- log.cl_error("can NOT find [%s] in the config file, "
- "please correct file [%s]",
- cstr.CSTR_SSH_HOSTS, config_fpath)
- return -1
-
- hosts = {}
- for host_config in ssh_host_configs:
- host_id = host_config[cstr.CSTR_HOST_ID]
- if host_id is None:
- log.cl_error("can NOT find [%s] in the config of a "
- "SSH host, please correct file [%s]",
- cstr.CSTR_HOST_ID, config_fpath)
- return -1
-
- hostname = utils.config_value(host_config, cstr.CSTR_HOSTNAME)
- if hostname is None:
- log.cl_error("can NOT find [%s] in the config of SSH host "
- "with ID [%s], please correct file [%s]",
- cstr.CSTR_HOSTNAME, host_id, config_fpath)
- return -1
-
- ssh_identity_file = utils.config_value(host_config, cstr.CSTR_SSH_IDENTITY_FILE)
-
- if host_id in hosts:
- log.cl_error("multiple SSH hosts with the same ID [%s], please "
- "correct file [%s]", host_id, config_fpath)
- return -1
- host = ssh_host.SSHHost(hostname, ssh_identity_file)
- hosts[host_id] = host
-
- kvm_template_dict = parse_templates_config(log, workspace, config, config_fpath, hosts=hosts)
- if kvm_template_dict is None:
- log.cl_error("failed to parse the config of templates")
- return -1
-
- for template in kvm_template_dict.values():
- iso = template.vt_iso
- template_hostname = template.vt_template_hostname
- internet = template.vt_internet
- network_configs = template.vt_network_configs
- image_dir = template.vt_image_dir
- distro = template.vt_distro
- ram_size = template.vt_ram_size
- disk_sizes = template.vt_disk_sizes
- bus_type = template.vt_bus_type
- server_host = template.vt_server_host
- reinstall = template.vt_reinstall
- dns = template.vt_dns
-
- state = server_host.sh_virsh_dominfo_state(log, template_hostname)
- if not reinstall and state is not None:
- log.cl_debug("skipping reinstall of template [%s] according to config",
- template_hostname)
- continue
-
- ret = vm_install(log, workspace, server_host, iso, template_hostname,
- internet, dns, network_configs, image_dir, distro,
- ram_size, disk_sizes, bus_type)
- if ret:
- log.cl_error("failed to create virtual machine template [%s]",
- template_hostname)
- return -1
-
- shared_disks = {}
- shared_disk_configs = utils.config_value(config, cstr.CSTR_SHARED_DISKS)
- if shared_disk_configs is None:
- log.cl_info("can NOT find [%s] in the config file [%s], "
- "ignore it.",
- cstr.CSTR_SHARED_DISKS, config_fpath)
- else:
- ret = lipe_parse_sharedisks_configs(log, shared_disk_configs,
- shared_disks, hosts,
- config_fpath)
- if ret:
- log.cl_error("failed to parse [%s] in the config file [%s], "
- "please correct it.",
- cstr.CSTR_SHARED_DISKS, config_fpath)
- return -1
-
- vm_host_configs = utils.config_value(config, cstr.CSTR_VM_HOSTS)
- if vm_host_configs is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_VM_HOSTS, config_fpath)
- return -1
-
- vm_hosts = []
- hosts_servers_mapping = dict()
- shared_disk_ids_mapping = dict()
- hosts_string = ""
- for vm_host_config in vm_host_configs:
- hostname = utils.config_value(vm_host_config, cstr.CSTR_HOSTNAME)
- if hostname is None:
- log.cl_error("no [hostname] is configured for a vm_host, "
- "please correct file [%s]", config_fpath)
- return -1
-
- ips = utils.config_value(vm_host_config, cstr.CSTR_HOST_IPS)
- if ips is None:
- log.cl_error("no [%s] is configured for a vm_host, "
- "please correct file [%s]", cstr.CSTR_HOST_IPS,
- config_fpath)
- return -1
-
- template_hostname = utils.config_value(vm_host_config,
- cstr.CSTR_TEMPLATE_HOSTNAME)
- if template_hostname is None:
- log.cl_error("can NOT find [%s] in the config of a "
- "SSH host, please correct file [%s]",
- cstr.CSTR_TEMPLATE_HOSTNAME, config_fpath)
- return -1
-
- if template_hostname not in kvm_template_dict:
- log.cl_error("template with hostname [%s] is NOT configured in "
- "[%s], please correct file [%s]",
- template_hostname, cstr.CSTR_TEMPLATES, config_fpath)
- return -1
-
- template = kvm_template_dict[template_hostname]
-
- reinstall = utils.config_value(vm_host_config, cstr.CSTR_REINSTALL)
- state = template.vt_server_host.sh_virsh_dominfo_state(log, hostname)
- if reinstall is None:
- reinstall = False
- if state is None:
- reinstall = True
-
- if not reinstall:
- ret = vm_start(log, workspace,
- template.vt_server_host,
- hostname,
- template.vt_network_configs,
- ips,
- template.vt_template_hostname,
- template.vt_image_dir,
- template.vt_distro,
- template.vt_internet,
- len(template.vt_disk_sizes))
- if ret:
- log.cl_error("virtual machine [%s] can't be started",
- hostname)
- return -1
- else:
- ret = vm_clone(log, workspace,
- template.vt_server_host,
- hostname,
- template.vt_network_configs,
- ips,
- template.vt_template_hostname,
- template.vt_image_dir,
- template.vt_distro,
- template.vt_internet,
- len(template.vt_disk_sizes))
- if ret:
- log.cl_error("failed to create virtual machine [%s] based on "
- "template [%s]", hostname,
- template.vt_template_hostname)
- return -1
-
- host_ip = ips[0]
- vm_host = lustre.LustreServerHost(hostname)
- hosts_string += ("%s %s\n" % (host_ip, hostname))
- vm_hosts.append(vm_host)
- hosts_servers_mapping[hostname] = template.vt_server_host
- shared_disk_ids = utils.config_value(vm_host_config,
- cstr.CSTR_SHARED_DISK_IDS)
- if shared_disk_ids is None or shared_disk_configs is None:
- continue
- shared_disk_ids_mapping[hostname] = shared_disk_ids
-
- host_configs = utils.config_value(config, cstr.CSTR_HOSTS)
- if host_configs is not None:
- for host_config in host_configs:
- hostname = utils.config_value(host_config, cstr.CSTR_HOSTNAME)
- if hostname is None:
- log.cl_debug("can NOT find [%s] in the config file, "
- "please correct file [%s]",
- cstr.CSTR_HOSTNAME, config_fpath)
- continue
-
- host_ip = utils.config_value(host_config, cstr.CSTR_IP)
- if host_ip is None:
- log.cl_debug("can NOT find [%s] in the config file, "
- "please correct file [%s]",
- cstr.CSTR_IP, config_fpath)
- continue
- hosts_string += ("%s %s\n" % (host_ip, hostname))
- else:
- log.cl_debug("can NOT find [%s] in the config file [%s], "
- "ignore it",
- cstr.CSTR_HOSTS, config_fpath)
-
- hosts_fpath = workspace + "/hosts"
- with open(hosts_fpath, "wt") as hosts_file:
- with open("/etc/hosts") as local_hosts:
- for line in local_hosts:
- hosts_file.write(line)
-
- hosts_file.write(hosts_string)
- hosts_file.flush()
-
- for host in vm_hosts:
- # Cleanup log dirs, as previous clownfish testing may generate
- # lots of logs.
- command = "rm -rf /var/log/lipe*"
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Umount all mount points
- command = ("cat /proc/mounts")
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- pattern = (r"^(?P<device>\S+) (?P<mount_point>/var/log/clownfish\S*) .+$")
- regular = re.compile(pattern)
-
- for line in retval.cr_stdout.splitlines():
- # log.cl_debug("checking line [%s]", line)
- match = regular.match(line)
- if not match:
- continue
-
- mount_point = match.group("mount_point")
- command = "umount %s" % mount_point
- retval = host.sh_run(log, command)
- if retval.cr_exit_status != 0:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command, host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- command = "rm -rf /var/log/clownfish*"
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- log.cl_info("preparing virtual machine [%s] after starting it",
- host.sh_hostname)
- ret = host.sh_send_file(log, hosts_fpath, "/etc")
- if ret:
- log.cl_error("failed to send hosts file [%s] on local host to "
- "directory [%s] on host [%s]",
- hosts_fpath, workspace,
- host.sh_hostname)
- return -1
-
- # Clear the known_hosts, otherwise the reinstalled hosts can't be
- # accessed by other hosts
- command = "> /root/.ssh/known_hosts"
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- # Stop Corosync to kill all possible Clownfish server
- for host in vm_hosts:
- service_names = ["corosync", "pacemaker"]
- for service_name in service_names:
- ret = host.sh_service_stop(log, service_name)
- if ret:
- log.cl_error("failed to stop service [%s] on host [%s]",
- service_name, host.sh_hostname)
- return -1
-
- ret = host.sh_service_disable(log, service_name)
- if ret:
- log.cl_error("failed to disable service [%s] on host [%s]",
- service_name, host.sh_hostname)
- return -1
-
- # umount all Lustre clients first
- reboot_hosts = []
- for host in vm_hosts:
- ret = host.lsh_lustre_umount_services(log, client_only=True)
- if ret:
- log.cl_info("failed to umount Lustre clients on host [%s], "
- "reboot is needed", host.sh_hostname)
- reboot_hosts.append(host)
-
- # umount all Lustre servers
- for host in vm_hosts:
- ret = host.lsh_lustre_umount_services(log)
- if ret:
- log.cl_info("failed to umount Lustre servers on host [%s], "
- "reboot is needed", host.sh_hostname)
- if host not in reboot_hosts:
- reboot_hosts.append(host)
-
- for host in reboot_hosts:
- ret = lipe_vm_reboot(log, host,
- hosts_servers_mapping[host.sh_hostname])
- if ret:
- log.cl_error("failed to reboot host [%s]",
- host.sh_hostname)
- return -1
-
- for host in vm_hosts:
- # Destroy all ZFS pool
- ret = host.sh_destroy_zfs_pools(log)
- if ret:
- log.cl_info("failed to destroy ZFS pools on host [%s], "
- "reboot is needed", host.sh_hostname)
- ret = lipe_vm_reboot(log, host,
- hosts_servers_mapping[host.sh_hostname])
- if ret:
- log.cl_error("failed to reboot host [%s]",
- host.sh_hostname)
- return -1
-
- ret = host.sh_destroy_zfs_pools(log)
- if ret:
- log.cl_info("failed to destroy ZFS pools on host [%s] even "
- "after reboot", host.sh_hostname)
- return -1
-
- # Detach all shared disks
- hostname = host.sh_hostname
- server_host = hosts_servers_mapping[hostname]
- ret = server_host.sh_virsh_detach_domblks(log, hostname,
- LVIRT_IMAGE_SHARED_SUBFIX)
- if ret:
- log.cl_error("failed to deatch disks on VM [%s]",
- hostname)
- return -1
-
- # Generate the targets of shared disks
- shared_disk_ids = shared_disk_ids_mapping[hostname]
- if shared_disk_ids is None or shared_disk_configs is None:
- continue
-
- command = ("> %s" % LIPE_UDEV_RULES)
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- server_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- target_index = 0
- for shared_disk_id in shared_disk_ids:
- if shared_disk_id not in shared_disks:
- log.cl_error("shared disk with ID [%s] is not configured",
- shared_disk_id)
- return -1
-
- shared_disk = shared_disks[shared_disk_id]
-
- if template.vt_server_host_id != shared_disk.sd_server_host_id:
- log.cl_error("shared disk with ID [%s] is not configured "
- "on host with ID [%s]. It is on host with ID "
- "[%s] instead, thus can't share it on VM [%s].",
- shared_disk_id, template.vt_server_host_id,
- shared_disk.sd_server_host_id, hostname)
- return -1
-
- while True:
- target_name = target_index2name(log, target_index)
- target_index += 1
- command = "ls /dev/%s" % target_name
- retval = host.sh_run(log, command)
- # If the device exists, use another device
- if retval.cr_exit_status:
- break
- shared_target = SharedTarget(host, target_name)
- shared_disk.sd_add_target(log, shared_target)
-
- for shared_disk in shared_disks.values():
- ret = shared_disk.sd_share(log)
- if ret:
- log.cl_error("failed to share disk [%s] on server host with "
- "ID [%s]", shared_disk.sd_image_fpath,
- shared_disk.sd_server_host_id)
- return -1
- return 0
-
-
-def lipe_virt(log, workspace, config_fpath):
- """
- Start to test holding the confiure lock
- """
- # pylint: disable=too-many-branches,bare-except,too-many-locals
- # pylint: disable=too-many-statements
- config_fd = open(config_fpath)
- ret = 0
- try:
- config = yaml.load(config_fd)
- except:
- log.cl_error("not able to load [%s] as yaml file: %s", config_fpath,
- traceback.format_exc())
- ret = -1
- config_fd.close()
- if ret:
- return -1
-
- try:
- ret = lipe_vm_install(log, workspace, config, config_fpath)
- except:
- ret = -1
- log.cl_error("exception: %s", traceback.format_exc())
-
- if ret:
- log.cl_error("failed to install the VMs, please check [%s] for more "
- "log", workspace)
- else:
- log.cl_info("installed the VMs successfully, please check [%s] for more "
- "log", workspace)
- return ret
-
-
-def usage():
- """
- Print usage string
- """
- utils.oprint("Usage: %s <config_file>" % sys.argv[0])
-
-
-def main():
- """
- Install virtual machines
- """
- cmd_general.main(LIPE_VIRT_CONFIG, LIPE_VIRT_LOG_DIR,
- lipe_virt)
+++ /dev/null
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-
-"""
-Lustre test library
-"""
-import time
-
-# Local libs
-from pylustre import utils
-from pylustre import watched_io
-
-MULTIOP = "/usr/lib64/lustre/tests/multiop"
-PAUSING = "PAUSING\n"
-
-
-def check_file_executable(log, host, fpath):
- """
- Check the file is executable
- """
- command = ("test -f %s && test -x %s " % (fpath, fpath))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return 0
-
-
-class Multiop(object):
- """
- multiop process on a host
- """
- def __init__(self, host, fpath, args, stdout_file, stderr_file):
- # pylint: disable=too-many-arguments
- self.mop_host = host
- self.mop_command = MULTIOP + " " + fpath + " " + args
- self.mop_stdout_file = stdout_file
- self.mop_stderr_file = stderr_file
- self.mop_retval = None
- self.mop_stdout = ""
- self.mop_exited = False
-
- def mop_wait_pausing(self, log):
- """
- Wait until the multiop is pausing
- """
- return self._mop_wait_output(log, PAUSING)
-
- def _mop_wait_output(self, log, expected, timeout=60, sleep_interval=1):
- """
- Wait until the output is expected
- """
- waited = 0
- while True:
- if self.mop_stdout == expected:
- log.cl_debug("got expected output [%s]", expected)
- return 0
-
- if waited < timeout:
- waited += sleep_interval
- time.sleep(sleep_interval)
- continue
- log.cl_error("timeout when waiting output, expected [%s], "
- "got [%s]", expected, self.mop_stdout)
- return -1
- return -1
-
- def mop_watcher_stdout(self, args, new_log):
- """
- log watcher of stdout
- """
- # pylint: disable=unused-argument
- log = args["log"]
- if len(new_log) == 0:
- return
- self.mop_stdout += new_log
- log.cl_debug("stdout of multiop [%s]: [%s]", self.mop_command,
- new_log)
-
- def mop_watcher_stderr(self, args, new_log):
- """
- log wather of stderr
- """
- log = args["log"]
- # pylint: disable=unused-argument
- if len(new_log) == 0:
- return
- log.cl_debug("stderr of multiop [%s]: [%s]", self.mop_command,
- new_log)
-
- def _mop_thread_main(self, log):
- """
- Thread of running multiop
- """
- host = self.mop_host
- args = {}
- args["log"] = log
- stdout_fd = watched_io.watched_io_open(self.mop_stdout_file,
- self.mop_watcher_stdout, args)
- stderr_fd = watched_io.watched_io_open(self.mop_stderr_file,
- self.mop_watcher_stderr, args)
- log.cl_debug("start to run command [%s] on host [%s]",
- self.mop_command, host.sh_hostname)
- retval = host.sh_run(log, self.mop_command, stdout_tee=stdout_fd,
- stderr_tee=stderr_fd, return_stdout=False,
- return_stderr=False, timeout=None, flush_tee=True)
- stdout_fd.close()
- stderr_fd.close()
-
- log.cl_debug("thread of multiop [%s] is exiting",
- self.mop_command)
- self.mop_retval = retval
- self.mop_exited = True
-
- def mop_start(self, log):
- """
- Start the process of multiop
- """
- utils.thread_start(self._mop_thread_main, (log))
-
- def mop_pkill(self, log):
- """
- Kill the process of running multiop
- """
- return self.mop_host.sh_pkill(log, self.mop_command)
-
- def mop_signal(self, log):
- """
- Send USR1 singal to the process
- """
- return self.mop_host.sh_pkill(log, self.mop_command,
- special_signal="USR1")
-
- def mop_wait_exit(self, log, timeout=60, sleep_interval=1, quiet=False):
- """
- Wait until the process exits
- """
- waited = 0
- while True:
- if self.mop_exited:
- log.cl_debug("multiop thread exited")
- return 0
-
- if waited < timeout:
- waited += sleep_interval
- time.sleep(sleep_interval)
- continue
- if not quiet:
- log.cl_error("timeout when waiting the multiop thread to exit")
- return -1
- return -1
+++ /dev/null
-# Copyright (c) 2018 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Library for testing
-"""
-
-import traceback
-import os
-import yaml
-
-# Local libs
-from pylustre import utils
-from pylustre import cstr
-from pylustre import ssh_host
-from pylustre import watched_io
-from pylustre import lyaml
-from pylustre import lipe_virt
-
-
-def install_with_iso_mounted(args, mnt_path):
- """
- Run the install test
- """
- # pylint: disable=too-many-locals,too-many-arguments
- log, workspace, install_server, install_config, cmd_name, install_config_fname = args
- # Make sure install server is local host, since this will overwrite the
- # local config files
- uuid_install = install_server.sh_uuid(log)
- if uuid_install is None:
- log.cl_error("failed to get the UUID on host [%s]",
- install_server.sh_hostname)
- return -1
-
- local_host = ssh_host.SSHHost("localhost", local=True)
- uuid_local = local_host.sh_uuid(log)
- if uuid_local is None:
- log.cl_error("failed to get the UUID on localhost")
- return -1
-
- if uuid_local == uuid_install:
- log.cl_error("please do NOT use host [%s] as the install server, "
- "since it is the localhost, and installation test "
- "would overwrite the local configuration files",
- install_server.sh_hostname)
- return -1
-
- ret = install_server.sh_rpm_find_and_uninstall(log, "grep lipe")
- if ret:
- log.cl_error("failed to uninstall LiPE rpms on host [%s]",
- install_server.sh_hostname)
- return -1
-
- # Fix me: LiPE depends on liblustreapi.so, but installation scripts don't
- package_dir = mnt_path + "/" + cstr.CSTR_PACKAGES
- command = ("rpm -ivh %s/lipe-pylustre-*.x86_64.rpm "
- "%s/lipe-1.*.x86_64.rpm "
- "%s/lipe-client-1.*.x86_64.rpm "
- "%s/lipe-server-1.*.x86_64.rpm "
- "%s/lipe-clownfish-*.x86_64.rpm --nodeps" %
- (package_dir, package_dir, package_dir, package_dir, package_dir))
- retval = install_server.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- install_server.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- install_config_fpath = (workspace + "/" + install_config_fname)
- config_string = ("""#
-# Configuration file for installing %s from DDN
-#
-""" % (cmd_name))
- config_string += yaml.dump(install_config, Dumper=lyaml.YamlDumper,
- default_flow_style=False)
- try:
- with open(install_config_fpath, 'w') as yaml_file:
- yaml_file.write(config_string)
- except:
- log.cl_error("failed to save the config file to [%s]")
- return -1
-
- ret = install_server.sh_send_file(log, install_config_fpath, "/etc")
- if ret:
- log.cl_error("failed to send file [%s] on local host to "
- "/etc on host [%s]",
- install_config_fpath,
- install_server.sh_hostname)
- return -1
-
- args = {}
- args["log"] = log
- args["hostname"] = install_server.sh_hostname
- stdout_file = (workspace + "/" + cmd_name + "_install.stdout")
- stderr_file = (workspace + "/" + cmd_name + "_install.stderr")
- stdout_fd = watched_io.watched_io_open(stdout_file,
- watched_io.log_watcher_info, args)
- stderr_fd = watched_io.watched_io_open(stderr_file,
- watched_io.log_watcher_error, args)
- command = ("%s_install" % (cmd_name))
- retval = install_server.sh_run(log, command, stdout_tee=stdout_fd,
- stderr_tee=stderr_fd, return_stdout=False,
- return_stderr=False, timeout=None,
- flush_tee=True)
- stdout_fd.close()
- stderr_fd.close()
-
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d]",
- command,
- install_server.sh_hostname,
- retval.cr_exit_status)
- return -1
- return 0
-
-
-def mount_and_run(log, host, host_iso_path, funct, args):
- """
- Mount the ISO and run @funct with @args
- """
- # pylint: disable=bare-except,too-many-arguments
- mnt_path = "/mnt/" + utils.random_word(8)
-
- command = ("mkdir -p %s && mount -o loop %s %s" %
- (mnt_path, host_iso_path, mnt_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- try:
- ret = funct(args, mnt_path)
- if ret:
- log.cl_error("failed to run funct with ISO mnt [%s]", mnt_path)
- except:
- ret = -1
- log.cl_error("exception: %s", traceback.format_exc())
-
- command = ("umount %s" % (mnt_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- ret = -1
-
- command = ("rmdir %s" % (mnt_path))
- retval = host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
- return ret
-
-
-def mount_and_install(log, workspace, host, host_iso_path, config,
- cmd_name, install_config_fname):
- """
- Mount the ISO and install
- """
- # pylint: disable=too-many-arguments
- args = (log, workspace, host, config, cmd_name, install_config_fname)
- ret = mount_and_run(log, host, host_iso_path, install_with_iso_mounted, args)
- if ret:
- log.cl_error("failed to mount and install")
- return ret
- return 0
-
-
-def start_install(log, workspace, install_server, install_config, config_fpath,
- cmd_name, install_config_fname):
- """
- Start do real install action
- """
- # pylint: disable=too-many-locals,too-many-arguments
- command = "mkdir -p %s" % workspace
- retval = install_server.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- install_server.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- local_host = ssh_host.SSHHost("localhost", local=True)
- command = "ls lipe-*.iso"
- retval = local_host.sh_run(log, command)
- if retval.cr_exit_status:
- log.cl_error("failed to run command [%s] on host [%s], "
- "ret = [%d], stdout = [%s], stderr = [%s]",
- command,
- local_host.sh_hostname,
- retval.cr_exit_status,
- retval.cr_stdout,
- retval.cr_stderr)
- return -1
-
- current_dir = os.getcwd()
- iso_names = retval.cr_stdout.split()
- if len(iso_names) != 1:
- log.cl_error("found unexpected ISOs [%s] under currect directory "
- "[%s]", iso_names, current_dir)
- return -1
-
- iso_name = iso_names[0]
- iso_path = current_dir + "/" + iso_name
-
- ret = install_server.sh_send_file(log, config_fpath, workspace)
- if ret:
- log.cl_error("failed to send Clownfish config [%s] on local host to "
- "directory [%s] on host [%s]",
- config_fpath, workspace,
- install_server.sh_hostname)
- return -1
- config_fname = os.path.basename(config_fpath)
-
- ret = install_server.sh_send_file(log, iso_path, workspace)
- if ret:
- log.cl_error("failed to send LiPE ISO [%s] on local host to "
- "directory [%s] on host [%s]",
- iso_path, workspace,
- install_server.sh_hostname)
- return -1
-
- host_iso_path = workspace + "/" + iso_name
- host_config_fpath = workspace + "/" + config_fname
- install_config[cstr.CSTR_ISO_PATH] = host_iso_path
- install_config[cstr.CSTR_CONFIG_FPATH] = host_config_fpath
- ret = mount_and_install(log, workspace, install_server, host_iso_path,
- install_config, cmd_name, install_config_fname)
- if ret:
- log.cl_error("failed to test installation on host [%s]",
- install_server.sh_hostname)
- return -1
- return 0
-
-
-def test_install(log, workspace, install_config_fpath,
- skip_install, install_server, cmd_name,
- install_config_fname):
- """
- Start to test
- """
- # pylint: disable=too-many-arguments
- install_config_fd = open(install_config_fpath)
- ret = 0
- try:
- install_config = yaml.load(install_config_fd)
- except:
- log.cl_error("not able to load [%s] as yaml file: %s",
- install_config_fpath, traceback.format_exc())
- ret = -1
- install_config_fd.close()
- if ret:
- return -1
-
- config_fpath = utils.config_value(install_config,
- cstr.CSTR_CONFIG_FPATH)
- if config_fpath is None:
- log.cl_error("can NOT find [%s] in the installation config, "
- "please correct file [%s]",
- cstr.CSTR_CONFIG_FPATH, install_config_fpath)
- return -1
-
- if not skip_install:
- ret = start_install(log, workspace, install_server, install_config,
- config_fpath, cmd_name, install_config_fname)
- if ret:
- log.cl_error("failed to run install test")
- return -1
- return 0
-
-
-def test_install_virt(log, workspace, test_config, test_config_fpath):
- """
- Start to install virt
- """
- skip_virt = utils.config_value(test_config,
- cstr.CSTR_SKIP_VIRT)
- if skip_virt is None:
- log.cl_debug("no [%s] is configured, do not skip checking virt")
- skip_virt = False
-
- if skip_virt:
- log.cl_debug("skip checking virt")
- return 0
-
- virt_config_fpath = utils.config_value(test_config,
- cstr.CSTR_VIRT_CONFIG)
- if virt_config_fpath is None:
- log.cl_error("no [%s] is configured, please correct file [%s]",
- cstr.CSTR_VIRT_CONFIG, test_config_fpath)
- return -1
- ret = lipe_virt.lipe_virt(log, workspace, virt_config_fpath)
- if ret:
- log.cl_error("failed to install the virtual machines")
- return -1
- return 0
+++ /dev/null
-[Unit]
-Description=Lipe Test Scheduler, common scheduler framework, manages the usage of test hosts and services the test launchers.
-
-[Service]
-Type=simple
-ExecStart=/usr/bin/lipe_test_scheduler
-User=root
-
-[Install]
-WantedBy=multi-user.target