Whamcloud - gitweb
EX-3209 lipe: add lpcc util and service
authorLei Feng <flei@whamcloud.com>
Tue, 27 Jul 2021 07:37:11 +0000 (15:37 +0800)
committerLi Xi <lixi@ddn.com>
Tue, 17 Aug 2021 12:07:17 +0000 (12:07 +0000)
Create lpcc daemon/cli and systemd serivce to manage all
PCC devices and services. Create umount.lustre to hook the
umounting and stop PCC in advance. Remove unused lpcc_test
and lpcc_cleanup. Fix stats mistake for purge_objs. Add
--pidfile for lpcc_purge.

Change-Id: I941d07b61906e4d5ebee13dab2a8015e43ecf676
Signed-off-by: Lei Feng <flei@whamcloud.com>
Test-Parameters: trivial
Reviewed-on: https://review.whamcloud.com/44103
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
22 files changed:
lipe/Makefile.am
lipe/lipe.spec.in
lipe/lpcc
lipe/lpcc.conf
lipe/lpcc_cleanup [deleted file]
lipe/lpcc_test [deleted file]
lipe/man/lpcc-start.8 [new file with mode: 0644]
lipe/man/lpcc-status.8 [new file with mode: 0644]
lipe/man/lpcc-stop.8 [new file with mode: 0644]
lipe/man/lpcc.8 [new file with mode: 0644]
lipe/man/lpcc.conf.5 [new file with mode: 0644]
lipe/pylpcc/__init__.py [deleted file]
lipe/pylpcc/lpcc.py [deleted file]
lipe/pylpcc/lpcc_cleanup.py [deleted file]
lipe/pylpcc/lpcc_test.py [deleted file]
lipe/src/lpcc_purge.c
lipe/systemd/lpcc.service
lustre/doc/Makefile.am
lustre/doc/umount.lustre.8 [new file with mode: 0644]
lustre/scripts/Makefile.am
lustre/scripts/umount.lustre [new file with mode: 0755]
lustre/tests/sanity.sh

index 2a56277..81f257b 100644 (file)
@@ -48,8 +48,6 @@ PYTHON_COMMANDS = \
        loris_crontab \
        loris_test \
        lpcc \
-       lpcc_cleanup \
-       lpcc_test \
        pyltest_import_check
 
 EXTRA_DIST= \
@@ -77,7 +75,6 @@ EXTRA_DIST= \
        pylipe/.pylintrc \
        pylipe/*.py \
        pyloris/*.py \
-       pylpcc/*.py \
        pylustre/*.py \
        pyltest/*.py \
        scripts/*.sh \
@@ -87,7 +84,7 @@ EXTRA_DIST= \
        .pylintrc
 
 PYLTEST_FILES = $(wildcard pyltest/*.py)
-PYTHON_LIB_FILES = $(wildcard pyclownfish/*.py pylustre/*.py  pyloris/*.py pylhsm/*.py pylpcc/*.py)
+PYTHON_LIB_FILES = $(wildcard pyclownfish/*.py pylustre/*.py  pyloris/*.py pylhsm/*.py)
 PYTHON_LIB_FILES += $(PYLTEST_FILES)
 PYTHON_FILES = $(PYTHON_LIB_FILES) $(PYTHON_COMMANDS)
 PYTHON_CHECKS = $(PYTHON_FILES:%=%.python_checked)
index 9e1114d..5792820 100644 (file)
@@ -223,10 +223,9 @@ python2 -m py_compile pylustre/*.py
 python2 -m py_compile pylhsm/*.py
 python2 -m py_compile pylipe/*.py
 python2 -m py_compile pyloris/*.py
-python2 -m py_compile pylpcc/*.py
 python2 -m py_compile pyltest/*.py
 
-find pyclownfish pylustre pylhsm pylipe pyloris pylpcc pyltest -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f
+find pyclownfish pylustre pylhsm pylipe pyloris pyltest -maxdepth 1 -type f -a -name "*.python_checked" -o -name "*.py" | xargs rm -f
 
 %install
 rm -rf $RPM_BUILD_ROOT
@@ -236,6 +235,7 @@ mkdir -p $RPM_BUILD_ROOT%{_libdir}
 mkdir -p $RPM_BUILD_ROOT%{python2_sitelib}
 mkdir -p $RPM_BUILD_ROOT%{_mandir}/man1
 mkdir -p $RPM_BUILD_ROOT%{_mandir}/man5
+mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/yum.repos.d
 cp \
        ldsync \
@@ -255,8 +255,7 @@ cp \
        loris_crontab \
        loris_test \
        lpcc \
-       lpcc_cleanup \
-       lpcc_test \
+       src/lpcc_purge \
        src/ext4_inode2path \
        src/lcreatemany \
        src/ldumpstripe \
@@ -283,7 +282,6 @@ cp -a pyclownfish $RPM_BUILD_ROOT%{python2_sitelib}
 cp -a pylhsm $RPM_BUILD_ROOT%{python2_sitelib}
 cp -a pylipe $RPM_BUILD_ROOT%{python2_sitelib}
 cp -a pyloris $RPM_BUILD_ROOT%{python2_sitelib}
-cp -a pylpcc $RPM_BUILD_ROOT%{python2_sitelib}
 cp -a pylustre $RPM_BUILD_ROOT%{python2_sitelib}
 cp -a pyltest $RPM_BUILD_ROOT%{python2_sitelib}
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}
@@ -298,6 +296,7 @@ cp -a \
        example_configs/clownfish/seperate_mgs/lipe_virt.conf \
        lpcc.conf \
        $RPM_BUILD_ROOT%{_sysconfdir}
+
 %if %{with laudit}
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/laudit
 cp -a laudit.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/laudit
@@ -332,6 +331,11 @@ cp -a example_configs/hotpool/* $RPM_BUILD_ROOT%{_sysconfdir}/
 install -m 0644 man/lipe_scan.1 $RPM_BUILD_ROOT%{_mandir}/man1/
 install -m 0644 man/lipe_find.1 $RPM_BUILD_ROOT%{_mandir}/man1/
 install -m 0644 man/lfill.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/lpcc.8 $RPM_BUILD_ROOT%{_mandir}/man8/
+install -m 0644 man/lpcc-start.8 $RPM_BUILD_ROOT%{_mandir}/man8/
+install -m 0644 man/lpcc-stop.8 $RPM_BUILD_ROOT%{_mandir}/man8/
+install -m 0644 man/lpcc-status.8 $RPM_BUILD_ROOT%{_mandir}/man8/
+install -m 0644 man/lpcc.conf.5 $RPM_BUILD_ROOT%{_mandir}/man5/
 %if %{with laudit}
 install -m 0644 man/laudit.1 $RPM_BUILD_ROOT%{_mandir}/man1/
 install -m 0644 man/laudit-report.1 $RPM_BUILD_ROOT%{_mandir}/man1/
@@ -374,16 +378,21 @@ rm -rf $RPM_BUILD_ROOT
 
 %files lpcc
 %defattr(-,root,root)
-%{python2_sitelib}/pylpcc
 %{_bindir}/lpcc
-%{_bindir}/lpcc_cleanup
-%{_bindir}/lpcc_test
+%{_bindir}/lpcc_purge
 %config(noreplace) %{_sysconfdir}/lpcc.conf
 %if %{with systemd}
     %{_unitdir}/lpcc.service
 %else
     %{_sysconfdir}/rc.d/init.d/lpcc
 %endif
+%{_mandir}/man8/lpcc.8*
+%{_mandir}/man8/lpcc-start.8*
+%{_mandir}/man8/lpcc-stop.8*
+%{_mandir}/man8/lpcc-status.8*
+%{_mandir}/man5/lpcc.conf.5*
+
+
 
 %files hsm
 %defattr(-,root,root)
index 73922cc..d573a0b 100755 (executable)
--- a/lipe/lpcc
+++ b/lipe/lpcc
-#!/usr/bin/python2 -u
-# Copyright (c) 2017 DataDirect Networks, Inc.
+#!/usr/bin/env python3
+# Copyright (c) 2021 DataDirect Networks, Inc.
 # All Rights Reserved.
-# Author: lixi@ddn.com
+# Author: flei@ddn.com
+
 """
-LPCC(Lustre Persistent Client Cache)
+Manage all PCC devices and services
 """
-from pylpcc import lpcc
+
+import argparse
+import errno
+import json
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import time
+import yaml
+
+
+LISTEN_SOCK_FN = "/var/run/lpcc.sock"
+LISTEN_SOCK = None
+
+def eprint(*args, **kwargs):
+    """print something to stderr"""
+    print(*args, file=sys.stderr, **kwargs)
+
+class LpccService:
+    """
+    Class to manage single instance of lpcc
+    """
+    copytool_prog = 'lhsmtool_posix'
+    lpcc_purge_prog = 'lpcc_purge'
+
+    lpcc_config = None
+
+    lpcc_mount = None
+    lpcc_cache = None
+    lpcc_roid = None
+    lpcc_autocache = None
+
+    lpcc_purge_high_usage = 90
+    lpcc_purge_low_usage = 75
+    lpcc_purge_interval = 5
+    lpcc_purge_scan_threads = 1
+
+    def __init__(self, lpcc_config):
+        self.lpcc_config = lpcc_config
+        self.lpcc_mount = lpcc_config['mount']
+        self.lpcc_cache = lpcc_config['cache']
+        self.lpcc_autocache = lpcc_config['autocache']
+        self.lpcc_roid = lpcc_config['roid']
+
+        lpcc_purge_obj = lpcc_config.get('purge')
+        if lpcc_purge_obj is not None:
+            self.lpcc_purge_high_usage = lpcc_purge_obj.get('high_usage', 90)
+            self.lpcc_purge_low_usage = lpcc_purge_obj.get('low_usage', 75)
+            self.lpcc_purge_interval = lpcc_purge_obj.get('interval', 30)
+            self.lpcc_purge_scan_threads = lpcc_purge_obj.get('scan_threads', 1)
+
+    @staticmethod
+    def _check_process_by_pidfile(procname, pidfile):
+        """
+        Check existence of process with the pid in pidfile.
+        If pidfile does not exist or is not valid, return False.
+        """
+        cmdline = ['pkill', '--signal', '0', '--pidfile', pidfile, '--', procname]
+        eprint(cmdline)
+        cproc = subprocess.run(cmdline, check=False, stderr=subprocess.DEVNULL)
+        if cproc.returncode != 0:
+            return False
+
+        return True
+
+    @staticmethod
+    def _wait_process_by_pidfile(procname, pidfile, secs=5):
+        """
+        Wait for at most secs seconds for the existence of pid in pidfile
+        """
+        for i in range(secs):
+            if LpccService._check_process_by_pidfile(procname, pidfile):
+                return True
+            else:
+                time.sleep(1)
+
+        return False
+
+    @staticmethod
+    def _kill_process_by_pidfile(procname, pidfile):
+        """
+        Kill a process with given pid in pidfile
+        """
+        cmdline = ['pkill', '--pidfile', pidfile, '--', procname]
+        eprint(cmdline)
+        cproc = subprocess.run(cmdline, check=False)
+        return cproc.returncode
+
+    def _add_pcc(self):
+        eprint("Adding PCC...")
+
+        param = '%s roid=%d ropcc=1' % (self.lpcc_autocache, self.lpcc_roid)
+        cmdline = ['lctl', 'pcc', 'add', self.lpcc_mount, self.lpcc_cache, \
+                   '--param', param]
+        eprint(cmdline)
+        cproc = subprocess.run(cmdline, check=False)
+        return cproc.returncode
+
+    def _del_pcc(self):
+        eprint("Deleting PCC...")
+        cmdline = ['lctl', 'pcc', 'del', self.lpcc_mount, self.lpcc_cache]
+        eprint(cmdline)
+        cproc = subprocess.run(cmdline, check=False)
+        return cproc.returncode
+
+    def _start_lpcc_purge(self):
+        eprint("Starting lpcc_purge...")
+
+        pidfile = '/var/run/lpcc_purge-%d.pid' % self.lpcc_roid
+        cmdline = [self.lpcc_purge_prog, \
+                   '--mount', self.lpcc_mount, \
+                   '--cache', self.lpcc_cache, \
+                   '--roid', str(self.lpcc_roid), \
+                   '--high-usage', str(self.lpcc_purge_high_usage), \
+                   '--low-usage', str(self.lpcc_purge_low_usage), \
+                   '--interval', str(self.lpcc_purge_interval), \
+                   '--scan-threads', str(self.lpcc_purge_scan_threads), \
+                   '--pidfile', pidfile]
+
+        eprint(cmdline)
+        subprocess.Popen(cmdline)
+
+        succ = LpccService._wait_process_by_pidfile(self.lpcc_purge_prog, pidfile)
+        if not succ:
+            eprint("lpcc_purge did not start successfully!")
+            return 1
+
+        return 0
+
+    def _stop_lpcc_purge(self):
+        eprint("Stopping lpcc_purge...")
+        pidfile = '/var/run/lpcc_purge-%d.pid' % self.lpcc_roid
+        self._kill_process_by_pidfile(self.lpcc_purge_prog, pidfile)
+
+    def _dump_config(self):
+        eprint("========== Config ==========")
+        yaml.safe_dump(self.lpcc_config, sys.stdout, default_flow_style=False)
+        eprint("============================")
+
+    def start(self):
+        """
+        Start a PCC device and related services
+        """
+        eprint("Start PCC...")
+        self._dump_config()
+
+        retcode = self._add_pcc()
+        if retcode != 0:
+            return retcode
+
+        retcode = self._start_lpcc_purge()
+        if retcode != 0:
+            self._del_pcc()
+            return retcode
+
+        eprint("Done")
+        eprint()
+        return 0
+
+    def stop(self):
+        """
+        Start a PCC device and related services
+        """
+        eprint("Stop PCC...")
+        self._dump_config()
+
+        self._stop_lpcc_purge()
+        self._del_pcc()
+
+        eprint("Done")
+        eprint()
+        return 0
+
+    def status(self):
+        """
+        Get the status of PCC and service
+        """
+        result = {}
+        result['mount'] = self.lpcc_mount
+        result['cache'] = self.lpcc_cache
+
+        cmdline = ['lctl', 'pcc', 'list', self.lpcc_mount]
+        try:
+            output = subprocess.check_output(cmdline)
+        except subprocess.CalledProcessError as err:
+            result['status'] = "error"
+            result['error_msg'] = os.strerror(err.returncode)
+            return result
+
+        result['status'] = "stopped"
+        pcclist = yaml.load(output)
+        if pcclist is not None and 'pcc' in pcclist:
+            for pcc in pcclist['pcc']:
+                if pcc['pccpath'] == self.lpcc_cache:
+                    result['status'] = "running"
+                    result['roid'] = pcc['roid']
+                    result['autocache'] = pcc['autocache']
+                    break
+
+        if result['status'] != "running":
+            return result
+
+        # Now check lpcc_purge process
+        pidfile = '/var/run/lpcc_purge-%d.pid' % self.lpcc_roid
+        succ = LpccService._check_process_by_pidfile(self.lpcc_purge_prog, pidfile)
+        if succ:
+            result['purge'] = "running"
+        else:
+            result['purge'] = "stopped"
+            result['error_msg'] = "lpcc_purge is not running!"
+
+        return result
+
+    def is_running(self):
+        """
+        Check the status of PCC, return True if PCC is started, or False
+        """
+        pcc_status = self.status()
+        if pcc_status.get('status') == "running":
+            return True
+
+        return False
+
+    def is_stopped(self):
+        """
+        Check the status of PCC, return True if PCC is started, or False
+        """
+        pcc_status = self.status()
+        if pcc_status.get('status') == "stopped":
+            return True
+
+        return False
+
+
+class LpccMonitor:
+    """
+    Class to monitor mounted fs and start pcc if it's configurated
+    """
+
+    config_obj = None
+
+    def __init__(self, config_file):
+        try:
+            with open(config_file, "r") as file_handle:
+                self.config_obj = yaml.safe_load(file_handle)
+                # if config_obj is None, it means the config file is empty but still valid
+            if self.config_obj is None:
+                eprint("Config file '%s' is empty, the service won't do any real work!" % \
+                    config_file)
+                self.config_obj = []
+        except FileNotFoundError:
+            # if config file does not exist, it's the same as an empty config file
+            eprint("Config file '%s' does not exist, the service won't do any real work!" % \
+                config_file)
+            self.config_obj = []
+        else:
+            if not self._check_config():
+                # None means invalid config file or information
+                self.config_obj = None
+
+    def _check_config(self):
+        if not isinstance(self.config_obj, list):
+            eprint("Config information is not valid!")
+            return False
+        return True
+
+    def _scan_start_pcc(self):
+        for lpcc_config in self.config_obj:
+            lpcc_service = LpccService(lpcc_config)
+
+            if bool(lpcc_config.get('disabled')):
+                continue
+            if not os.path.ismount(lpcc_config['mount']):
+                continue
+            if lpcc_service.is_stopped():
+                lpcc_service.start()
+
+        return 0
+
+    def _start_pcc(self, request):
+        count = 0
+        response = {}
+        mount = request.get('mount')
+        cache = request.get('cache')
+
+        for lpcc_config in self.config_obj:
+            if mount is not None and mount != lpcc_config['mount']:
+                continue
+            if cache is not None and cache != lpcc_config['cache']:
+                continue
+
+            count = count + 1
+            lpcc_service = LpccService(lpcc_config)
+            if 'disabled' in lpcc_config:
+                del lpcc_config['disabled']
+            if lpcc_service.is_stopped():
+                lpcc_service.start()
+
+        if count == 0 and mount is not None:
+            response['retcode'] = errno.ENOENT
+            response['error_msg'] = "No matched configuration for mount='%s' cache='%s'" \
+                % (mount, cache)
+        else:
+            response['retcode'] = 0
+
+        response['count'] = count
+        return response
+
+    def _stop_pcc(self, request):
+        count = 0
+        response = {}
+        mount = request.get('mount')
+        cache = request.get('cache')
+
+        for lpcc_config in self.config_obj:
+            if mount is not None and mount != lpcc_config['mount']:
+                continue
+            if cache is not None and cache != lpcc_config['cache']:
+                continue
+
+            count = count + 1
+            lpcc_service = LpccService(lpcc_config)
+            if not lpcc_service.is_stopped():
+                lpcc_service.stop()
+            if not bool(request.get('keep-enabled')):
+                lpcc_config['disabled'] = True
+
+        if count == 0 and mount is not None:
+            response['retcode'] = errno.ENOENT
+            response['error_msg'] = "No matched configuration for mount='%s' cache='%s'" \
+                % (mount, cache)
+        else:
+            response['retcode'] = 0
+
+        response['count'] = count
+        return response
+
+    def _stop_all_pcc(self):
+        request = {}
+        request['action'] = 'stop-all'
+        return self._stop_pcc(request)
+
+    def _status_pcc(self, request):
+        response = {}
+        mount = request.get('mount')
+        cache = request.get('cache')
+
+        status_list = []
+        for lpcc_config in self.config_obj:
+            if mount is not None and mount != lpcc_config['mount']:
+                continue
+            if cache is not None and cache != lpcc_config['cache']:
+                continue
+
+            lpcc_service = LpccService(lpcc_config)
+            lpcc_status = lpcc_service.status()
+            if bool(lpcc_config.get('disabled')):
+                lpcc_status['disabled'] = True
+            status_list.append(lpcc_status)
+
+        response['retcode'] = 0
+        response['status_list'] = status_list
+        return response
+
+    def _process_cmd(self, request):
+        response = {}
+
+        if request['action'] == "start" or request['action'] == "start-all":
+            response = self._start_pcc(request)
+        elif request['action'] == "stop" or request['action'] == "stop-all":
+            response = self._stop_pcc(request)
+        elif request['action'] == 'status' or request['action'] == 'status-all':
+            response = self._status_pcc(request)
+        else:
+            response['retcode'] = -1
+
+        response['request'] = request
+        return response
+
+    def _serve_cmd(self):
+        try:
+            conn, _ = LISTEN_SOCK.accept()
+            request_str = conn.makefile().readline()
+            request = json.loads(request_str)
+        except Exception as ex:
+            eprint(ex)
+
+        eprint("Request:", request)
+        response = self._process_cmd(request)
+        eprint("Response:", response)
+
+        try:
+            conn.send(bytes(json.dumps(response), encoding='utf-8'))
+            conn.close()
+        except Exception as ex:
+            eprint(ex)
+
+    def run(self):
+        """
+        Start monitor daemon, scan and start PCC in config file,
+        monitor /proc/self/mounts and listen on command socket
+        """
+        mounts_fh = open("/proc/self/mounts", "r")
+
+        if os.path.exists(LISTEN_SOCK_FN):
+            os.unlink(LISTEN_SOCK_FN)
+
+        global LISTEN_SOCK
+        LISTEN_SOCK = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        LISTEN_SOCK.bind(LISTEN_SOCK_FN)
+        LISTEN_SOCK.listen(1)
+
+        self._scan_start_pcc()
+
+        while True:
+            try:
+                rset, _, eset = select.select([LISTEN_SOCK], [], [mounts_fh])
+            except OSError:
+                break
+            except ValueError:
+                break
+
+            if LISTEN_SOCK in rset:
+                self._serve_cmd()
+            if mounts_fh in eset:
+                self._scan_start_pcc()
+
+        eprint("Do cleaning...")
+        self._stop_all_pcc()
+        mounts_fh.close()
+        LISTEN_SOCK.close()
+        os.unlink(LISTEN_SOCK_FN)
+
+        return 0
+
+
+class LpccCli:
+    """
+    Class to get command from cli, communicate with monitor,
+    and show result
+    """
+
+    def __init__(self):
+        pass
+
+    def run_cmd(self, cmd):
+        """
+        Communicate with server and run a sub command
+        """
+        try:
+            sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            sock.connect(LISTEN_SOCK_FN)
+        except FileNotFoundError:
+            eprint("Socket file '%s' does not exist, " % LISTEN_SOCK_FN + \
+                "please check whether the monitor service started!")
+            sys.exit(1)
+
+        sock.sendall(bytes(json.dumps(cmd), encoding='utf-8'))
+        sock.shutdown(socket.SHUT_WR)
+        response = sock.makefile().readline()
+        sock.close()
+
+        return json.loads(response)
+
+
+def sigint_handler(signum, frame):
+    """
+    SIGINT handler
+    """
+    #pylint: disable=unused-argument
+    # close the listen socket to notify the monitor service to exit
+    eprint("Received signal %s" % signal.Signals(signum).name)
+    LISTEN_SOCK.close()
+
+def main():
+    """
+    main function
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config-file', default='/etc/lpcc.conf',
+                        help='specify the config file')
+    subparsers = parser.add_subparsers(dest='action')
+    subparsers.add_parser('monitor', help='start the monitor process')
+
+    start_parser = subparsers.add_parser('start', help=\
+        'start one LPCC of specfied lustre file system and cache dir,' +\
+        'or all LPCCs based on specified lustre file system')
+    start_parser.add_argument('mount', nargs=1, help=\
+        'the mount point of lustre file system')
+    start_parser.add_argument('cache', nargs='?', help=\
+        'the cache dir of LPCC')
+
+    stop_parser = subparsers.add_parser('stop', help=\
+        'stop one LPCC of specfied lustre file system and cache dir, ' +\
+        'or all LPCCs based on specified lustre file system')
+    stop_parser.add_argument('mount', nargs=1, help=\
+        'the mount point of lustre file system')
+    stop_parser.add_argument('cache', nargs='?', help=\
+        'the cache dir of LPCC')
+    stop_parser.add_argument('--keep-enabled', action='store_true', help=\
+        'keep the LPCC enabled, which means when the lustre file system is ' +\
+        'mounted again, start all LPCCs based on it')
+
+    status_parser = subparsers.add_parser('status', help=\
+        'get the status of one LPCC of specfied lustre file system and ' +\
+        'cache dir, or all LPCCs based on specified lustre file system')
+    status_parser.add_argument('mount', nargs=1, help=\
+        'the mount point of lustre file system')
+    status_parser.add_argument('cache', nargs='?', help=\
+        'the cache dir of LPCC')
+
+    subparsers.add_parser('start-all', help='start all LPCCs')
+    subparsers.add_parser('stop-all', help='stop all LPCCs')
+    subparsers.add_parser('status-all', help='get the status of all LPCCs')
+
+    args = parser.parse_args()
+
+    if args.action == 'monitor':
+        signal.signal(signal.SIGINT, sigint_handler)
+        signal.signal(signal.SIGTERM, sigint_handler)
+
+        monitor = LpccMonitor(args.config_file)
+        if monitor.config_obj is None:
+            return 1
+
+        try:
+            retcode = monitor.run()
+        finally:
+            if os.path.exists(LISTEN_SOCK_FN):
+                os.unlink(LISTEN_SOCK_FN)
+        return retcode
+
+    if args.action == 'start' or args.action == 'stop':
+        request = {}
+        request['action'] = args.action
+        request['mount'] = args.mount[0]
+        request['cache'] = args.cache
+        if getattr(args, 'keep_enabled', False):
+            request['keep-enabled'] = True
+
+        response = LpccCli().run_cmd(request)
+
+        print(response)
+        return response['retcode']
+
+    if args.action == 'start-all' or args.action == 'stop-all':
+        request = {}
+        request['action'] = args.action
+
+        response = LpccCli().run_cmd(request)
+
+        print(response)
+        return response['retcode']
+
+    if args.action == 'status':
+        request = {}
+        request['action'] = args.action
+        request['mount'] = args.mount[0]
+        request['cache'] = args.cache
+
+        response = LpccCli().run_cmd(request)
+
+        print(json.dumps(response['status_list'], indent=4))
+        return response['retcode']
+
+    if args.action == 'status-all':
+        request = {}
+        request['action'] = args.action
+        response = LpccCli().run_cmd(request)
+
+        print(json.dumps(response['status_list'], indent=4))
+        return response['retcode']
+
+    eprint("Type 'lpcc -h' for more information.")
+    return 1
+
 
 if __name__ == "__main__":
-    lpcc.main()
+    ret = main()
+    sys.exit(ret)
index 44e90e5..3a19f93 100644 (file)
@@ -1,75 +1,14 @@
-# Configuration file of Lustre Persistent Client Cache Management
-#
-# Configuration Guide:
-#
-# $fsname:
-# File system name of Lustre
-#
-# $ssh_hosts:
-# $ssh_hosts includes the informations of logining to the server hosts using
-# SSH connections. $host_id is the unique ID of the host. Two hosts shouldn't
-# share a same $host_id. $hostname is the host name to use when connecting to
-# the host using SSH. $host_id and $hostname could be different, because there
-# could multiple ways to connect to the same host. $ssh_identity_file is the
-# SSH key file used when connecting to the host. $ssh_identity_file could be
-# omitted if the default SSH identity file works.
-#
-# $mds_hosts:
-# $mds_hosts includes all the hosts that could be running MDT of this file
-# system. Multiple hosts can be configured to support failover.
-# "lctl set_param" commands will be run on the MDT to configure the system
-# properly for HSM.
-#
-# $lpcc_readwrite_datasets:
-# $lpcc_readwrite_datasets includes all the clients that needs to enable
-# readwrite LPCC. $host_id is the host with this client. $archive_id is the
-# HSM archive ID reserved for this client. $lpcc_root is the path of the LPCC
-# root directory, usually a mounted local file system on SSD.
-# $lustre_mount_point is the Lustre client mount point. $client_id is a unique
-# ID of the LPCC client. Two LPCC clients shouldn't share a same $client_id.
-#
-fsname: 969362ae                           # File system name of Lustre
-ssh_hosts:                                 # Array of hosts
-  - host_id: server17-el7-vm1              # ID of this SSH host
-    hostname: server17-el7-vm1             # The host name
-    ssh_identity_file: /root/.ssh/id_dsa   # The SSH key to connect to the host
-  - host_id: server17-el7-vm2
-    hostname: server17-el7-vm2
-    ssh_identity_file: /root/.ssh/id_dsa
-  - host_id: server17-el7-vm3
-    hostname: server17-el7-vm3
-    ssh_identity_file: /root/.ssh/id_dsa
-mds_hosts:                                 # Array of hosts that could have MDTs
-  - host_id: server17-el7-vm1              # ID of the host running MDS
-lustre_clients:
-  - host_id: server17-el7-vm2              # ID of the host running client
-    lustre_mount_point: /mnt/lustre        # Lustre mount point
-    client_id: server17-el7-vm2            # ID of this Lustre client
-  - host_id: server17-el7-vm3
-    lustre_mount_point: /mnt/lustre
-    client_id: server17-el7-vm3
-lpcc_readwrite_datasets:                   # Array of client with readwrite LPCC
-  - client_id: server17-el7-vm2            # ID of the Lustre client
-    archive_id: 1                          # Archive number
-    lpcc_root: /mnt/lpcc                   # LPCC root path
-    project_id: 100                        # Project ID of new files for automatic caching
-    dataset_id: server17-el7-vm2_lpcc_rw   # ID of this LPCC dataset
-  - client_id: server17-el7-vm3
-    archive_id: 2
-    lpcc_root: /mnt/lpcc
-    project_id: 101
-    dataset_id: server17-el7-vm3_lpcc_rw
-lpcc_readonly_dataset_groups:              # Array of client with readonly LPCC
-  - group_id: 3                            # Group ID
-    lpcc_root: /mnt/lpcc_ro_g3             # LPCC root path
-    project_id: 10                         # Project ID of new files for automatic caching
-    lustre_clients:                        # Array of Lustre clients to enable this dataset
-      - client_id: server17-el7-vm2        # ID of Lustre client
-      - client_id: server17-el7-vm3
-  - group_id: 4
-    lpcc_root: /mnt/lpcc_ro_g4
-    lustre_mount_point: /mnt/lustre
-    project_id: 11
-    lustre_clients:
-      - client_id: server17-el7-vm2
-      - client_id: server17-el7-vm3
+#- mount: /mnt/lfs
+#  cache: /mnt/pcc
+#  roid: 2
+#  autocache: projid={500 1000}&fname={*.h5},uid={1001}
+#  purge:
+#    high_usage: 90
+#    low_usage: 75
+#    scan_threads: 1
+#    interval: 30
+#
+#- mount: /mnt/lfs
+#  cache: /mnt/pcc2
+#  roid: 3
+#  autocache: projid={500}
diff --git a/lipe/lpcc_cleanup b/lipe/lpcc_cleanup
deleted file mode 100755 (executable)
index c283a7a..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/python2 -u
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-LPCC(Lustre Persistent Client Cache) Cleanup
-"""
-from pylpcc import lpcc_cleanup
-
-if __name__ == "__main__":
-    lpcc_cleanup.main()
diff --git a/lipe/lpcc_test b/lipe/lpcc_test
deleted file mode 100755 (executable)
index 34e34a9..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/python2 -u
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-"""
-Tests for LPCC(Lustre Persistent Cache Management)
-"""
-from pylpcc import lpcc_test
-
-if __name__ == "__main__":
-    lpcc_test.main()
diff --git a/lipe/man/lpcc-start.8 b/lipe/man/lpcc-start.8
new file mode 100644 (file)
index 0000000..661b3e3
--- /dev/null
@@ -0,0 +1,26 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2021, DDN and/or its affiliates. All rights reserved.
+.\" This file may be copied under the terms of the GNU Public License, v2.
+.\"
+.TH lpcc-start 8 "2021 Jul 7" Lustre "configuration utilities"
+
+.SH NAME
+lpcc-start - lpcc start sub command
+
+.SH SYNOPSIS
+.BI "lpcc start MOUNT_POINT [CACHE_DIR]"
+.PP
+.BI "lpcc start-all"
+.PP
+
+.SH DESCRIPTION
+Start a specific LPCC if both \fBMOUNT_POINT\fR and \fBCAHCE_DIR\fR
+are specified.
+Start all LPCCs based on a specific Lustre file system if only
+\fBMOUNT_POINT\fR are specified.
+\fBstart_all\fR sub command starts all LPCCs in config file.
+.PP
+.SH "SEE ALSO"
+.BR lpcc(8)
+.BR lpcc-stop(8)
+.BR lpcc-status(8)
diff --git a/lipe/man/lpcc-status.8 b/lipe/man/lpcc-status.8
new file mode 100644 (file)
index 0000000..ac212dc
--- /dev/null
@@ -0,0 +1,26 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2021, DDN and/or its affiliates. All rights reserved.
+.\" This file may be copied under the terms of the GNU Public License, v2.
+.\"
+.TH lpcc-status 8 "2021 Jul 7" Lustre "configuration utilities"
+
+.SH NAME
+lpcc-status - lpcc status sub command
+
+.SH SYNOPSIS
+.BI "lpcc status MOUNT_POINT [CACHE_DIR]"
+.PP
+.BI "lpcc status-all"
+.PP
+
+.SH DESCRIPTION
+Get status of a specific LPCC if both \fBMOUNT_POINT\fR and \fBCAHCE_DIR\fR
+are specified.
+Get status all LPCCs based on a specific Lustre file system if only
+\fBMOUNT_POINT\fR are specified.
+\fBstatus_all\fR sub command get the status of all LPCCs in config file.
+.PP
+.SH "SEE ALSO"
+.BR lpcc(8)
+.BR lpcc-start(8)
+.BR lpcc-stop(8)
diff --git a/lipe/man/lpcc-stop.8 b/lipe/man/lpcc-stop.8
new file mode 100644 (file)
index 0000000..cb26524
--- /dev/null
@@ -0,0 +1,34 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2021, DDN and/or its affiliates. All rights reserved.
+.\" This file may be copied under the terms of the GNU Public License, v2.
+.\"
+.TH lpcc-stop 8 "2021 Jul 7" Lustre "configuration utilities"
+
+.SH NAME
+lpcc-stop - lpcc stop sub command
+
+.SH SYNOPSIS
+.BI "lpcc stop MOUNT_POINT [CACHE_DIR] [OPTIONS]"
+.PP
+.BI "lpcc stop-all"
+.PP
+
+.SH DESCRIPTION
+Stop a specific LPCC if both \fBMOUNT_POINT\fR and \fBCAHCE_DIR\fR
+are specified.
+Stop all LPCCs based on a specific Lustre file system if only
+\fBMOUNT_POINT\fR are specified.
+\fBstop_all\fR sub command stops all LPCCs in config file.
+.PP
+.SH OPTIONS
+.TP
+.BR --keep-enabled
+used by
+.BR umount.lustre (8)
+to notify monitor daemon that the LPCC should be started again if the lustre
+file system is mounted again.
+
+.SH "SEE ALSO"
+.BR lpcc(8)
+.BR lpcc-start(8)
+.BR lpcc-status(8)
diff --git a/lipe/man/lpcc.8 b/lipe/man/lpcc.8
new file mode 100644 (file)
index 0000000..c32f609
--- /dev/null
@@ -0,0 +1,61 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2021, DDN and/or its affiliates. All rights reserved.
+.\" This file may be copied under the terms of the GNU Public License, v2.
+.\"
+.TH lpcc 8 "2021 Jul 7" Lustre "configuration utilities"
+
+.SH NAME
+lpcc - Management tool for Lustre Persistent Client Cache (LPCC)
+
+.SH SYNOPSIS
+.BI "lpcc -h|--help"
+.PP
+.BI "lpcc SUBCMD ARGS"
+.PP
+
+.SH DESCRIPTION
+To start/stop Lustre Persistent Client Cache (LPCC), there is a series of
+commands to be run correctly with consistent parameters. If there are multiple
+LPCCs on a client, it is even more complex.
+.PP
+.TP
+The \fBlpcc\fR tool helps to:
+.br
+\(bu configurate all the LPCCs in single file
+.br
+\(bu start/stop LPCCs automatically when system boots up/shutdown
+.br
+\(bu monitor the mounting/umounting of lustre file system and start/stop
+LPCCs based on the file system
+.br
+\(bu start/stop specific LPCC manually
+
+.PP
+To use \fBlpcc\fR tool, first prepare a configuration file.
+The file is \fB/etc/lpcc.conf\fR by default.
+.PP
+Then start the monitor daemon. Usually this work is done by a wrapper
+systemd service \fBlpcc.service\fR.
+It is highly recommended to enable the service so that it is started
+automatically when the system boots up.
+.PP
+When the monitor daemon starts, it checks all the LPCCs. If the base lustre
+file system of any LPCC has been mounted, the LPCC will be started
+automatically.
+.PP
+If a lustre file system is mounted later, the monitor daemon checks any LPCC
+based on that file system and starts it. If a lustre file system is unmounted,
+\fBumount.lustre (8)\fR checks any LPCC based on that file system, and stops
+it before doing the real umounting.
+.PP
+While the monitor daemon is running, user can manually start/stop one specific
+LPCC by \fImount_point\fR and \fIcache_dir\fR, or all LPCCs based on a
+specific \fImount_point\fR.
+.PP
+All these LPCCs will be stopped when the monitor daemon stops.
+.SH "SEE ALSO"
+.BR lpcc.conf(5)
+.BR lpcc-start(8)
+.BR lpcc-stop(8)
+.BR lpcc-status(8)
+.BR lctl-pcc(8)
diff --git a/lipe/man/lpcc.conf.5 b/lipe/man/lpcc.conf.5
new file mode 100644 (file)
index 0000000..8df803f
--- /dev/null
@@ -0,0 +1,137 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2021, DDN and/or its affiliates. All rights reserved.
+.\" This file may be copied under the terms of the GNU Public License, v2.
+.\"
+.TH lpcc.conf 5 "2021 Jul 7" Lustre "File Formats Manual"
+
+.SH NAME
+lpcc.conf - configuration file for lpcc systemd service
+
+.SH DESCRIPTION
+The file \fB/etc/lpcc.conf\fR contains a list of Lustre Persistent Client Cache
+(LPCC). The whole file is an array in YAML. Each element of the array
+is the configuration of a LPCC.
+For each LPCC, the configuration is a dictionary with these items:
+.PP
+.TP
+.BR mount
+The mount point of lustre file system to be cached
+.TP
+.BR cache
+The dir for cached file
+.TP
+.BR roid
+The id of LPCC. It is a positive interger and must be unique on a single client.
+.TP
+.BR autocache
+The condition to cache file automatically.
+.TP
+.BR purge
+More configuration for lpcc_purge daemon. Since all the sub items under it have
+default value, this item is not necessary if it has no explicit sub item.
+.TP
+.BR purge.high_usage
+If the disk usage of cache device is higher than \fBpurge.high_usage\fR, start
+purging. It is 90 (means 90% disk/inode useage) by default.
+.TP
+.BR purge.low_usage
+If the disk usage of cache device is lower than \fBpurge.low_usage\fR, stop
+purging. It is 75 (means 75% disk/inode usage) by default.
+.TP
+.BR purge.interval
+The interval for lpcc_purge to check cache device usage, in seconds. It is 5
+seconds by default.
+.TP
+.BR purge.scan_threads
+How many threads are used to scan cache device in parallel. It is 1 thread by
+default.
+
+.SH AUTOCACHE CONDITION
+When a file in lustre file system is opened, the autocache condition will be
+checked against the file. If the condition is true, the file will be cached in
+the cache device automatically.
+.PP
+The rule is either a single compare expression, or several compare expressions
+connected with '&' or ','. Here '&' is logical opearator AND, ',' is logical
+operator OR. '&' has a higher priority than ','.
+.PP
+Each compare expression has 3 parts: attribute, opeartor and target.
+Attribute is the attribute of file to be checked, e.g., projid, fname.
+Operator is '=', '<' or '>'. Target is either a single value or a value group
+(several single values separated by blankspace ' '), embraced by a pair of '{}'.
+.PP
+For example:
+.PP
+.TP
+.BR projid={500}
+projid is exactly 500.
+.TP
+.BR "projid={500 1000}"
+projid is either 500 or 1000.
+.TP
+.BR "projid={500 1000}&fname={*.h5},uid={1001}"
+Either case of:
+(a) projid is 500 or 1000 and fname matches *.h5;
+(b) process uid is exactly 1001.
+.PP
+These attributes are supported in compare expression:
+.PP
+.TP
+.BR projid
+The project id of file. It supports '=', '<' and '>' operators. '=' operator
+supports single value or value group. '<' and '>' support only single value.
+Each single value should be a number.
+.TP
+.BR fname
+The base name of file. It supports only '=' operator.
+Both single value and value group are supported.
+Each single value should be a precise file name string, or a pattern including
+wildchar '*'.
+.TP
+.BR uid
+The uid of the process to access the file. It supports '=', '<' and '>'
+operators. '=' operator supports single value or value group. '<' and '>'
+support only single value.
+Each single value should be a number.
+.TP
+.BR gid
+The gid of process to access the file. Similar to uid.
+.TP
+.BR size
+The size of file. It supports '=', '<' and '>' operators. '=' operator supports
+single value or value group. '<' and '>' support only single value.
+Each single value should be a number, or a number with unit.
+The unit could be: K, M, G, T, P, E.
+.TP
+.BR mtime
+The mtime of file. Actually it means the age, that is to say, the seconds of
+(current - mtime). It supports '=', '<' and '>' operators. '=' operator supports
+single value or value group. '<' and '>' support only single value.
+Each single value should be a number in seconds, or a number wiht unit.
+The unit could be: m(minute), h(hour), d(day), w(week), y(year). Here 1 year is
+exactly 52 weeks.
+For example, "mtime>{10m}" means the file was modified more than 10 minutes ago;
+"mtime<{1h30}" means the file was modified less than 1 hour and 30 seconds ago.
+
+.SH EXAMPLES
+.EX
+# sample /etc/lpcc.conf
+- mount: /mnt/lfs
+  cache: /mnt/pcc
+  roid: 2
+  autocache: projid={500 1000}&fname={*.h5},uid={1001}
+  purge:
+    high_usage: 85
+    low_usage: 70
+    scan_threads: 3
+    interval: 10
+- mount: /mnt/lfs2
+  cache: /mnt/pcc2
+  roid: 3
+  autocache: projid={500}
+.EE
+.SH "SEE ALSO"
+.BR lpcc(8)
+.BR lpcc-start(8)
+.BR lpcc-stop(8)
+.BR lpcc-status(8)
\ No newline at end of file
diff --git a/lipe/pylpcc/__init__.py b/lipe/pylpcc/__init__.py
deleted file mode 100644 (file)
index 9900853..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-"""
-Python library for LPCC
-"""
-__all__ = ["lpcc",
-           "lpcc_cleanup",
-           "lpcc_test"]
diff --git a/lipe/pylpcc/lpcc.py b/lipe/pylpcc/lpcc.py
deleted file mode 100644 (file)
index 2f146ba..0000000
+++ /dev/null
@@ -1,653 +0,0 @@
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-
-"""
-Library for managing LPCC(Lustre Persistent Client Cache)
-"""
-
-import sys
-import traceback
-import os
-import shutil
-import signal
-import time
-import re
-import filelock
-import yaml
-
-# Local libs
-from pylustre import lustre
-from pylustre import time_util
-from pylustre import utils
-from pylustre import daemon
-from pylustre import hsm
-from pylustre import clog
-
-LPCC_CONFIG_FNAME = "lpcc.conf"
-LPCC_CONFIG = "/etc/" + LPCC_CONFIG_FNAME
-LPCC_LOG_DIR = "/var/log/lpcc"
-LPCC_STATE_PATTERN = (r"^type: (?P<pcc_type>\S+), "
-                      r"PCC file: (?P<pcc_file>\S+), "
-                      r"user number: (?P<user_number>\S+), "
-                      r"attr cached: (?P<attr_cached>\S+)$")
-LPCC_STATE_REGULAR = re.compile(LPCC_STATE_PATTERN)
-LPCC_TYPE_NONE = "none"
-LPCC_TYPE_READONLY = "readonly"
-LPCC_TYPE_READWRITE = "readwrite"
-
-STR_SSH_HOSTS = "ssh_hosts"
-STR_HOSTNAME = "hostname"
-STR_HOST_ID = "host_id"
-STR_SSH_IDENTITY_FILE = "ssh_identity_file"
-STR_MDS_HOSTS = "mds_hosts"
-STR_LUSTRE_CLIENTS = "lustre_clients"
-STR_LUSTRE_MOUNT_POINT = "lustre_mount_point"
-STR_CLIENT_ID = "client_id"
-STR_LPCC_READWRITE_DATASETS = "lpcc_readwrite_datasets"
-STR_DATASET_ID = "dataset_id"
-STR_LPCC_ROOT = "lpcc_root"
-STR_GROUP_ID = "group_id"
-STR_ROOT = "root"
-STR_ARCHIVE_ID = "archive_id"
-STR_PROJECT_ID = "project_id"
-STR_LPCC_READONLY_DATASET_GROUPS = "lpcc_readonly_dataset_groups"
-
-
-def usage():
-    """
-    Print usage string
-    """
-    utils.oprint("Usage: %s <config_file>" % sys.argv[0])
-
-
-class LPCCDataset(object):
-    """
-    Each SSH host has an object of this type
-    """
-    # pylint: disable=too-few-public-methods,too-many-arguments,too-many-instance-attributes
-    def __init__(self, client_name, pcc_type, root, set_id, projid,
-                 lustre_client):
-        self.lpccd_lustre_client = lustre_client
-        self.lpccd_host = lustre_client.lc_host
-        self.lpccd_id = set_id
-        self.lpccd_projid = projid
-        self.lpccd_root = root
-        self.lpccd_client_name = client_name
-        self.lpccd_pcc_type = pcc_type
-        self.lpccd_lustre_mnt = lustre_client.lc_mnt
-
-    def lpccd_stop(self, log):
-        """
-        Delete this dataset from LPCC
-        """
-        command = ("echo -n 'del %s' > /proc/fs/lustre/llite/%s/pcc" %
-                   (self.lpccd_root, self.lpccd_client_name))
-        retval = self.lpccd_host.sh_run(log, command)
-        if retval.cr_exit_status:
-            log.cl_error("failed to run command [%s] on host [%s], "
-                         "ret = [%d], stdout = [%s], stderr = [%s]",
-                         command,
-                         self.lpccd_host.sh_hostname,
-                         retval.cr_exit_status,
-                         retval.cr_stdout,
-                         retval.cr_stderr)
-            return -1
-        return 0
-
-    def lpccd_start(self, log):
-        """
-        Add this dataset to LPCC
-        """
-        command = ("echo -n 'add %s %s %s' > /proc/fs/lustre/llite/%s/pcc" %
-                   (self.lpccd_root, self.lpccd_id, self.lpccd_projid,
-                    self.lpccd_client_name))
-        retval = self.lpccd_host.sh_run(log, command)
-        if retval.cr_exit_status:
-            log.cl_error("failed to run command [%s] on host [%s], "
-                         "ret = [%d], stdout = [%s], stderr = [%s]",
-                         command,
-                         self.lpccd_host.sh_hostname,
-                         retval.cr_exit_status,
-                         retval.cr_stdout,
-                         retval.cr_stderr)
-            return -1
-        return 0
-
-
-class LPCCRwDataset(LPCCDataset):
-    """
-    Each readwrite PCC client has an object of this type
-    """
-    # pylint: disable=too-few-public-methods,too-many-instance-attributes
-    # pylint: disable=too-many-arguments
-    def __init__(self, log, dataset_id, archive_id, project_id, lrwd_root,
-                 parent_directory, fsname, have_raolu, mdts, client_name,
-                 lustre_client):
-        super(LPCCRwDataset, self).__init__(client_name,
-                                            LPCC_TYPE_READWRITE,
-                                            lrwd_root, archive_id,
-                                            project_id,
-                                            lustre_client)
-        self.lrwd_workspace = parent_directory + "/" + dataset_id
-        self.lrwd_copytool = hsm.HSMCopytool("copytool", lustre_client.lc_host, archive_id,
-                                             lrwd_root, lustre_client.lc_mnt,
-                                             self.lrwd_workspace)
-        self.lrwd_parent_directory = parent_directory
-        self.lrwd_have_raolu = have_raolu
-        self.lrwd_removers = []
-        if not have_raolu:
-            for mdt in mdts:
-                remover_id = "remover_" + mdt.ls_index_string
-                remover = hsm.HSMRemover(log, remover_id, lustre_client.lc_host, fsname, mdt,
-                                         lrwd_root, self.lrwd_workspace)
-                self.lrwd_removers.append(remover)
-
-    def lrwd_killall(self, log):
-        """
-        Kill all the process of this LPCC client
-        """
-        self.lrwd_copytool.hc_killall(log)
-        for remover in self.lrwd_removers:
-            remover.hr_killall(log)
-        return 0
-
-    def lpccd_stop(self, log):
-        """
-        Stop all the process of this LPCC client
-        """
-        self.lrwd_killall(log)
-        self.lrwd_copytool.hc_thread.join()
-        for remover in self.lrwd_removers:
-            remover.hr_thread.join()
-            remover.hr_fini(log)
-        return super(LPCCRwDataset, self).lpccd_stop(log)
-
-    def lpccd_start(self, log):
-        """
-        Start all the process of this LPCC client
-        """
-        ret = utils.mkdir(self.lrwd_workspace)
-        if ret:
-            log.cl_error("failed to create directory [%s] on local host",
-                         self.lrwd_workspace)
-            return -1
-
-        self.lrwd_killall(log)
-        ret = super(LPCCRwDataset, self).lpccd_start(log)
-        if ret:
-            log.cl_error("failed to add dataset [%s] of mnt [%s] on host "
-                         "[%s]", self.lpccd_root, self.lpccd_host.sh_hostname,
-                         self.lpccd_lustre_mnt)
-            return -1
-
-        for remover in self.lrwd_removers:
-            ret = remover.hr_thread_start(log)
-            if ret:
-                log.cl_error("failed to start remover thread")
-                return -1
-        ret = self.lrwd_copytool.hc_thread_start(log)
-        if ret:
-            log.cl_error("failed to start copytool thread for dataset [%s] of "
-                         "mnt [%s] on host [%s]", self.lpccd_root,
-                         self.lpccd_host.sh_hostname, self.lpccd_lustre_mnt)
-            return -1
-
-        return 0
-
-
-def find_lpcc_dataset_from_id(datasets, set_id):
-    """
-    Find LPCC from archive ID
-    """
-    # pylint: disable=unused-variable
-    for dataset in datasets:
-        if dataset.lpccd_id == set_id:
-            return dataset
-    return None
-
-
-class LPCCManager(object):
-    """
-    Each SSH host has an object of this type
-    """
-    # pylint: disable=too-few-public-methods,too-many-arguments,too-many-instance-attributes
-    def __init__(self, workspace, config_fpath):
-        self.lm_rw_dataset_dict = {}
-        self.lm_rw_datasets = []
-        self.lm_ro_dataset_groups = {}
-        self.lm_ro_datasets = []
-        self.lm_lustre_clients = {}
-        self.lm_workspace = workspace
-        self.lm_config_fpath = config_fpath
-        self.lm_fsname = None
-        self.lm_hosts = {}
-        self.lm_mdt_hosts = []
-
-    def lm_parse(self, log):
-        """
-        Parse the configuration
-        """
-        # pylint: disable=bare-except,too-many-locals,too-many-return-statements
-        # pylint: disable=too-many-branches,too-many-statements,unused-variable
-        config_fd = open(self.lm_config_fpath)
-        ret = 0
-        try:
-            config = yaml.load(config_fd)
-        except:
-            log.cl_error("not able to load [%s] as yaml file: %s",
-                         self.lm_config_fpath, traceback.format_exc())
-            ret = -1
-        config_fd.close()
-        if ret:
-            return -1
-
-        fsname = config["fsname"]
-        host_configs = config[STR_SSH_HOSTS]
-        for host_config in host_configs:
-            hostname = host_config[STR_HOSTNAME]
-            host_id = host_config[STR_HOST_ID]
-            if STR_SSH_IDENTITY_FILE in host_config:
-                ssh_identity_file = host_config[STR_SSH_IDENTITY_FILE]
-            else:
-                ssh_identity_file = None
-            if host_id in self.lm_hosts:
-                log.cl_error("multiple hosts with the same ID [%s]", host_id)
-                return -1
-            host = lustre.LustreServerHost(hostname,
-                                           identity_file=ssh_identity_file,
-                                           host_id=host_id)
-            self.lm_hosts[host_id] = host
-
-        mds_configs = config[STR_MDS_HOSTS]
-        for mds_config in mds_configs:
-            mds_host_id = mds_config[STR_HOST_ID]
-            if mds_host_id not in self.lm_hosts:
-                log.cl_error("no host with ID [%s] is configured", host_id)
-                return -1
-            host = self.lm_hosts[mds_host_id]
-            self.lm_mdt_hosts.append(host)
-
-        have_raolu = True
-        mdts = []
-        for host in self.lm_mdt_hosts:
-            tmp_clients = {}
-            tmp_osts = {}
-            tmp_mdts = {}
-            ret = host.lsh_lustre_detect_services(tmp_clients, tmp_osts, tmp_mdts)
-            if ret:
-                log.cl_error("failed to detect services on host [%s]",
-                             host.sh_hostname)
-                return -1
-            for mdt_index, mdt in tmp_mdts.iteritems():
-                if mdt.lsi_service.ls_lustre_fs.lf_fsname != fsname:
-                    continue
-                mdts.append(mdt)
-                ret = mdt.mdti_enable_hsm_control(log)
-                if ret:
-                    return -1
-
-                ret = mdt.mdti_enable_raolu(log)
-                if ret < 0:
-                    return -1
-                elif ret == 1:
-                    have_raolu = False
-
-        lustre_client_configs = config[STR_LUSTRE_CLIENTS]
-        for lustre_client_config in lustre_client_configs:
-            host_id = lustre_client_config[STR_HOST_ID]
-            if host_id not in self.lm_hosts:
-                log.cl_error("no host with ID [%s] is configured", host_id)
-                return -1
-            host = self.lm_hosts[host_id]
-
-            lustre_mount_point = lustre_client_config[STR_LUSTRE_MOUNT_POINT]
-            client_id = lustre_client_config[STR_CLIENT_ID]
-            if client_id in self.lm_lustre_clients:
-                log.cl_error("multiple Lustre client with the same ID [%s]",
-                             client_id)
-                return -1
-            lustre_fs = lustre.LustreFilesystem(fsname)
-            lustre_client = lustre.LustreClient(log, lustre_fs, host, lustre_mount_point)
-            self.lm_lustre_clients[client_id] = lustre_client
-
-        if STR_LPCC_READWRITE_DATASETS in config:
-            lpcc_rw_dataset_configs = config[STR_LPCC_READWRITE_DATASETS]
-        else:
-            lpcc_rw_dataset_configs = []
-
-        for lpcc_rw_dataset_config in lpcc_rw_dataset_configs:
-            client_id = lpcc_rw_dataset_config[STR_CLIENT_ID]
-            if client_id not in self.lm_lustre_clients:
-                log.cl_error("no Lustre client with ID [%s] is configured", client_id)
-                return -1
-            lustre_client = self.lm_lustre_clients[client_id]
-            lustre_mount_point = lustre_client.lc_mnt
-
-            host = lustre_client.lc_host
-            dataset_id = lpcc_rw_dataset_config[STR_DATASET_ID]
-            if dataset_id in self.lm_rw_dataset_dict:
-                log.cl_error("multiple LPCC client with the same ID [%s]",
-                             dataset_id)
-                return -1
-            lpcc_root = lpcc_rw_dataset_config[STR_LPCC_ROOT]
-            archive_id = lpcc_rw_dataset_config[STR_ARCHIVE_ID]
-            lpcc_rw_dataset = find_lpcc_dataset_from_id(self.lm_rw_datasets, archive_id)
-            if lpcc_rw_dataset is not None:
-                log.cl_error("multiple LPCC client with the same archive ID [%s]",
-                             archive_id)
-                return -1
-
-            project_id = lpcc_rw_dataset_config[STR_PROJECT_ID]
-
-            client_name = host.lsh_getname(log, lustre_mount_point)
-            if client_name is None:
-                log.cl_error("failed to get client name of path [%s] on host "
-                             "[%s]", lustre_mount_point, host.sh_hostname)
-                return -1
-            if not client_name.startswith(fsname + "-"):
-                log.cl_error("client name [%s] of path [%s] on host [%s] "
-                             "doesn't have expected fsname [%s] ", client_name,
-                             lustre_mount_point, host.sh_hostname, fsname)
-                return -1
-            lpcc_rw_dataset = LPCCRwDataset(log, dataset_id, archive_id, project_id,
-                                            lpcc_root, self.lm_workspace,
-                                            fsname, have_raolu, mdts, client_name,
-                                            lustre_client)
-            self.lm_rw_dataset_dict[dataset_id] = lpcc_rw_dataset
-            self.lm_rw_datasets.append(lpcc_rw_dataset)
-
-        if STR_LPCC_READONLY_DATASET_GROUPS in config:
-            group_configs = config[STR_LPCC_READONLY_DATASET_GROUPS]
-        else:
-            group_configs = []
-        for group_config in group_configs:
-            lpcc_root = group_config[STR_LPCC_ROOT]
-            group_id = group_config[STR_GROUP_ID]
-            if group_id in self.lm_ro_dataset_groups:
-                log.cl_error("multiple LPCC readonly group with the same group ID [%s]",
-                             group_id)
-                return -1
-
-            project_id = group_config[STR_PROJECT_ID]
-            client_configs = group_config[STR_LUSTRE_CLIENTS]
-            group_datasets = []
-            for client_config in client_configs:
-                client_id = client_config[STR_CLIENT_ID]
-                if client_id not in self.lm_lustre_clients:
-                    log.cl_error("no Lustre client with ID [%s] is configured", client_id)
-                    return -1
-                lustre_client = self.lm_lustre_clients[client_id]
-
-                fsname = lustre_client.lc_lustre_fs.lf_fsname
-                host = lustre_client.lc_host
-                lustre_mount_point = lustre_client.lc_mnt
-                client_name = host.lsh_getname(log, lustre_mount_point)
-                if client_name is None:
-                    log.cl_error("failed to get client name of path [%s] on host "
-                                 "[%s]", lustre_mount_point, host.sh_hostname)
-                    return -1
-                if not client_name.startswith(fsname + "-"):
-                    log.cl_error("client name [%s] of path [%s] on host [%s] "
-                                 "doesn't have expected fsname [%s] ", client_name,
-                                 lustre_mount_point, host.sh_hostname, fsname)
-                    return -1
-
-                dataset = LPCCDataset(client_name, LPCC_TYPE_READONLY,
-                                      lpcc_root, group_id, project_id,
-                                      lustre_client)
-                self.lm_ro_datasets.append(dataset)
-                group_datasets.append(dataset)
-            self.lm_ro_dataset_groups[group_id] = group_datasets
-
-    def lm_start(self, log):
-        """
-        Start LPCC manager
-        """
-        # pylint: disable=unused-variable
-        for host_id, host in self.lm_hosts.iteritems():
-            clients = lustre.detect_lustre_clients(log, host)
-            for client in clients:
-                ret = lpcc_dataset_stop(log, host, client.lc_mnt)
-                if ret:
-                    return -1
-
-        for dataset in self.lm_rw_datasets:
-            ret = dataset.lpccd_start(log)
-            if ret:
-                log.cl_error("failed to start readwrite dataset")
-                return -1
-
-        for dataset in self.lm_ro_datasets:
-            ret = dataset.lpccd_start(log)
-            if ret:
-                log.cl_error("failed to start readonly dataset")
-                return -1
-
-        return 0
-
-    def lm_stop(self, log):
-        """
-        Stop LPCC manager
-        """
-        for lpcc_rw_dataset in self.lm_rw_datasets:
-            lpcc_rw_dataset.lpccd_stop(log)
-
-        for lpcc_rw_dataset in self.lm_ro_datasets:
-            lpcc_rw_dataset.lpccd_stop(log)
-
-
-def manage_lpcc_locked(log, workspace, config_fpath):
-    """
-    Manage LPCC clients holding the lock
-    """
-    manager = LPCCManager(workspace, config_fpath)
-    ret = manager.lm_parse(log)
-    if ret:
-        return ret
-
-    ret = manager.lm_start(log)
-    if ret:
-        return ret
-
-    while not daemon.SHUTTING_DOWN:
-        time.sleep(1)
-
-    ret = manager.lm_stop(log)
-
-    return 0
-
-
-def manage_lpcc(log, workspace, config_fpath):
-    """
-    Manage LPCC clients
-    """
-    # pylint: disable=bare-except
-    lock_file = config_fpath + ".lock"
-    lock = filelock.FileLock(lock_file)
-    try:
-        with lock.acquire(timeout=0):
-            try:
-                ret = manage_lpcc_locked(log, workspace, config_fpath)
-            except:
-                ret = -1
-                log.cl_error("exception: %s", traceback.format_exc())
-            lock.release()
-    except filelock.Timeout:
-        ret = -1
-        log.cl_error("someone else is holding lock of file [%s], aborting "
-                     "to prevent conflicts", lock_file)
-    return ret
-
-
-def lpcc_dataset_list(log, host, lustre_mount_point):
-    """
-    List the datasets on a Lustre mount point
-    """
-    # pylint: disable=too-many-locals
-    client_name = host.lsh_getname(log, lustre_mount_point)
-    if client_name is None:
-        return None
-
-    fsname = client_name.split('-')[0]
-    lustre_fs = lustre.LustreFilesystem(fsname)
-    lustre_client = lustre.LustreClient(log, lustre_fs, host, lustre_mount_point)
-
-    command = ("cat /proc/fs/lustre/llite/%s/pcc" % client_name)
-    retval = host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return None
-
-    dataset_pattern = (r"^(?P<root>\S+) (?P<archive_id>\S+) (?P<projid>\S+)$")
-    dataset_regular = re.compile(dataset_pattern)
-    datasets = []
-    for line in retval.cr_stdout.splitlines():
-        log.cl_debug("parsing line [%s] to get dataset", line)
-        match = dataset_regular.match(line)
-        if match:
-            root = match.group("root")
-            archive_id = match.group(STR_ARCHIVE_ID)
-            projid = match.group("projid")
-            dataset = LPCCDataset(client_name, LPCC_TYPE_NONE, root,
-                                  archive_id, projid, lustre_client)
-            datasets.append(dataset)
-            log.cl_debug("LPCC dataset [%s] configured on dir [%s] of host "
-                         "[%s]", root, lustre_mount_point, host.sh_hostname)
-        else:
-            reason = ("failed to parse line [%s] to get dataset" % line)
-            log.cl_error(reason)
-            raise Exception(reason)
-    return datasets
-
-
-def lpcc_dataset_stop(log, host, lustre_mount_point):
-    """
-    Stop the datasets on a Lustre mount point
-    """
-    datasets = lpcc_dataset_list(log, host, lustre_mount_point)
-    for dataset in datasets:
-        ret = dataset.lpccd_stop(log)
-        if ret:
-            return ret
-    return 0
-
-
-class PCCState(object):
-    """
-    The HSM state
-    """
-    # pylint: disable=too-few-public-methods
-    def __init__(self, pcc_type, pcc_file=None, user_number=None, attr_cached=None):
-        self.ps_type = pcc_type
-        self.ps_pcc_file = pcc_file
-        self.ps_user_number = user_number
-        self.ps_attr_cached = attr_cached
-
-
-def lfs_pcc_state(log, fpath, host=None):
-    """
-    PCC state
-    """
-    command = ("lfs pcc_state %s" % (fpath))
-    extra_string = ""
-    if host is None:
-        retval = utils.run(command)
-    else:
-        retval = host.sh_run(log, command)
-        extra_string = ("on host [%s]" % host.sh_hostname)
-    if retval.cr_exit_status != 0:
-        log.cl_error("failed to run command [%s]%s, "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command, extra_string,
-                     retval.cr_exit_status, retval.cr_stdout,
-                     retval.cr_stderr)
-        return None
-
-    file_part = "file: %s, " % fpath
-
-    output = retval.cr_stdout.strip()
-    if not output.startswith(file_part):
-        log.cl_error("unexpected output [%s]", output)
-        return None
-
-    fpath_len = len(file_part)
-    output = output[fpath_len:]
-
-    type_none = "type: none"
-    if output == type_none:
-        return PCCState("none")
-
-    match = LPCC_STATE_REGULAR.match(output)
-    if not match:
-        log.cl_error("output [%s] doesn't mather pattern [%s]",
-                     output, LPCC_STATE_PATTERN)
-        return None
-
-    pcc_type = match.group("pcc_type")
-    pcc_file = match.group("pcc_file")
-    user_number = match.group("user_number")
-    attr_cached = match.group("attr_cached")
-    return PCCState(pcc_type, pcc_file=pcc_file, user_number=user_number,
-                    attr_cached=attr_cached)
-
-
-def main():
-    """
-    Run LPCC manager
-    """
-    # pylint: disable=unused-variable,not-callable
-    if sys.version[0] == '2':
-        reload(sys)
-        if hasattr(sys, "setdefaultencoding"):
-            set_encoding = getattr(sys, "setdefaultencoding", None)
-            set_encoding('UTF-8')
-        else:
-            os.environ["PYTHONIOENCODING"] = 'UTF-8'
-    config_fpath = LPCC_CONFIG
-
-    if len(sys.argv) == 2:
-        config_fpath = sys.argv[1]
-    elif len(sys.argv) > 2:
-        usage()
-        sys.exit(-1)
-
-    identity = time_util.local_strftime(time_util.utcnow(), "%Y-%m-%d-%H_%M_%S")
-    workspace = LPCC_LOG_DIR + "/" + identity
-
-    if not os.path.exists(LPCC_LOG_DIR):
-        ret = utils.mkdir(LPCC_LOG_DIR)
-        if ret:
-            utils.eprint("failed to create directory [%s]" % LPCC_LOG_DIR)
-            sys.exit(-1)
-    elif not os.path.isdir(LPCC_LOG_DIR):
-        utils.eprint("[%s] is not a directory" % LPCC_LOG_DIR)
-        sys.exit(-1)
-
-    if not os.path.exists(workspace):
-        ret = utils.mkdir(workspace)
-        if ret:
-            utils.eprint("failed to create directory [%s]" % workspace)
-            sys.exit(-1)
-    elif not os.path.isdir(workspace):
-        utils.eprint("[%s] is not a directory" % workspace)
-        sys.exit(-1)
-
-    log = clog.get_log(resultsdir=workspace)
-    log.cl_info("started LPCC manager using config [%s], please check [%s] for "
-                "more log" % (config_fpath, workspace))
-    signal.signal(signal.SIGINT, daemon.signal_handler)
-    signal.signal(signal.SIGTERM, daemon.signal_handler)
-
-    save_fpath = workspace + "/" + LPCC_CONFIG_FNAME
-    log.cl_debug("copying config file from [%s] to [%s]", config_fpath,
-                 save_fpath)
-    shutil.copyfile(config_fpath, save_fpath)
-    ret = manage_lpcc(log, workspace, config_fpath)
-    sys.exit(ret)
diff --git a/lipe/pylpcc/lpcc_cleanup.py b/lipe/pylpcc/lpcc_cleanup.py
deleted file mode 100644 (file)
index 42c30ef..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-
-"""
-Library for cleanup LPCC(Lustre Persistent Client Cache) storage
-"""
-
-import sys
-import os
-import getopt
-
-# Local libs
-from pylustre import utils
-from pylustre import time_util
-from pylustre import clog
-from pylustre import hsm_check
-
-LPCC_CLEANUP_LOG_DIR = "/var/log/lpcc_cleanup"
-
-
-def usage():
-    """
-    Print usage string
-    """
-    utils.oprint("Usage: %s <--pcc_root pcc_root> <--lustre_mnt lustre_mnt> " %
-                 sys.argv[0])
-
-
-def lfs_pcc_detach_fid(log, lustre_mnt, fid):
-    """
-    Transfer FID to fpath
-    """
-    command = ("lfs pcc_detach_fid %s %s" % (lustre_mnt, fid))
-    retval = utils.run(command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("failed to run command [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     retval.cr_exit_status, retval.cr_stdout,
-                     retval.cr_stderr)
-        return None
-
-    return retval.cr_exit_status
-
-
-def pcc_fid_detach(log, lustre_mnt, fid_name):
-    """
-    Detach FID exists from PCC
-    """
-    ret = lfs_pcc_detach_fid(log, lustre_mnt, fid_name)
-    if ret:
-        log.cl_error("failed to detach FID [%s] on Lustre file system [%s] from PCC",
-                     fid_name, lustre_mnt)
-        return -1
-    return 0
-
-
-def main():
-    """
-    Cleanup LPCC
-    """
-    # pylint: disable=unused-variable,not-callable
-    if sys.version[0] == '2':
-        reload(sys)
-        if hasattr(sys, "setdefaultencoding"):
-            set_encoding = getattr(sys, "setdefaultencoding", None)
-            set_encoding('UTF-8')
-        else:
-            os.environ["PYTHONIOENCODING"] = 'UTF-8'
-
-    options, remainder = getopt.getopt(sys.argv[1:],
-                                       "h",
-                                       ["help",
-                                        "pcc_root=",
-                                        "lustre_mnt="])
-
-    pcc_root = None
-    lustre_mnt = None
-    for opt, arg in options:
-        if opt == "--pcc_root":
-            pcc_root = arg.rstrip('/')
-        elif opt == "--lustre_mnt":
-            lustre_mnt = arg
-        elif opt == '-h' or opt == "--help":
-            usage()
-            sys.exit(0)
-    if pcc_root is None or lustre_mnt is None:
-        usage()
-        sys.exit(-1)
-
-    identity = time_util.local_strftime(time_util.utcnow(), "%Y-%m-%d-%H_%M_%S")
-    workspace = LPCC_CLEANUP_LOG_DIR + "/" + identity
-
-    if not os.path.exists(LPCC_CLEANUP_LOG_DIR):
-        ret = utils.mkdir(LPCC_CLEANUP_LOG_DIR)
-        if ret:
-            sys.stderr.write("failed to create directory [%s]" % LPCC_CLEANUP_LOG_DIR)
-            sys.exit(-1)
-    elif not os.path.isdir(LPCC_CLEANUP_LOG_DIR):
-        sys.stderr.write("[%s] is not a directory" % LPCC_CLEANUP_LOG_DIR)
-        sys.exit(-1)
-
-    if not os.path.exists(workspace):
-        ret = utils.mkdir(workspace)
-        if ret:
-            sys.stderr.write("failed to create directory [%s]" % workspace)
-            sys.exit(-1)
-    elif not os.path.isdir(workspace):
-        sys.stderr.write("[%s] is not a directory" % workspace)
-        sys.exit(-1)
-
-    log = clog.get_log(resultsdir=workspace)
-    log.cl_info("started LPCC cleanup, please check [%s] for "
-                "more log" % (workspace))
-    ret = hsm_check.hsm_process(log, lustre_mnt, pcc_root, pcc_fid_detach)
-    sys.exit(ret)
diff --git a/lipe/pylpcc/lpcc_test.py b/lipe/pylpcc/lpcc_test.py
deleted file mode 100644 (file)
index 754ccda..0000000
+++ /dev/null
@@ -1,1028 +0,0 @@
-# Copyright (c) 2017 DataDirect Networks, Inc.
-# All Rights Reserved.
-# Author: lixi@ddn.com
-
-"""
-Library for testing LPCC(Lustre Persistent Client Cache)
-"""
-# pylint: disable=too-many-lines
-import sys
-import traceback
-import signal
-import os
-import shutil
-import filelock
-
-# Local libs
-from pylpcc import lpcc
-from pylustre import lustre
-from pylustre import lustre_test
-from pylustre import utils
-from pylustre import time_util
-from pylustre import daemon
-from pylustre import clog
-
-LPCC_LOG_TEST_DIR = "/var/log/lpcc_test"
-MANAGER = None
-LPCC_TESTS = []
-
-
-def usage():
-    """
-    Print usage string
-    """
-    utils.oprint("Usage: %s <config_file>" % sys.argv[0])
-
-
-def check_file_size(log, host, fpath, expected_size):
-    """
-    Check the file size
-    """
-    command = ("stat --printf=%%s %s" % (fpath))
-    retval = host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-    size = int(retval.cr_stdout)
-
-    if size != expected_size:
-        log.cl_error("wrong size of file [%s], expected [%s], got [%s]",
-                     fpath, expected_size, size)
-        return -1
-    return 0
-
-
-def check_lpcc_sizes(log, lpcc_host, lpcc_fpath, lustre_fpath, expected_size):
-    """
-    Check the LPCC file sizes
-    """
-    ret = check_file_size(log, lpcc_host, lpcc_fpath, expected_size)
-    if ret:
-        log.cl_error("wrong size of LPCC file")
-        return ret
-    ret = check_file_size(log, lpcc_host, lustre_fpath, expected_size)
-    if ret:
-        log.cl_error("wrong size of Lustre file")
-        return ret
-    return 0
-
-
-def check_file_data(log, host, fpath, expected_data):
-    """
-    Check the file data
-    """
-    # Read data before checking size since this might trigger HSM restore
-    command = ("cat %s" % (fpath))
-    retval = host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    if retval.cr_stdout != expected_data:
-        log.cl_error("wrong data of file [%s], expected [%s], "
-                     "got [%s]", fpath, expected_data,
-                     retval.cr_stdout)
-        return -1
-
-    expected_size = len(expected_data)
-    ret = check_file_size(log, host, fpath, expected_size)
-    if ret:
-        log.cl_error("wrong size of file [%s]", fpath)
-        return -1
-    return 0
-
-
-def check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, expected_data):
-    """
-    Check the LPCC file data
-    """
-    ret = check_file_data(log, lpcc_host, lpcc_fpath, expected_data)
-    if ret:
-        log.cl_error("wrong data of LPCC file")
-        return ret
-
-    ret = check_file_data(log, lpcc_host, lustre_fpath, expected_data)
-    if ret:
-        log.cl_error("wrong data of Lustre file")
-        return ret
-
-    return 0
-
-
-def check_multiop_exists(log, lpcc_rw_datasets):
-    """
-    Check that all hosts has multiop command
-    """
-    for lpcc_rw_dataset in lpcc_rw_datasets:
-        host = lpcc_rw_dataset.lpccd_host
-        ret = host.sh_file_executable(log, lustre_test.MULTIOP)
-        if ret:
-            log.cl_error("command [%s] is doesn't exist on host [%s]",
-                         lustre_test.MULTIOP, host.sh_hostname)
-            return ret
-    return 0
-
-
-def lpcc_cleanup_test_file(log, lpcc_host, lustre_dir, lustre_fpath):
-    """
-    Cleanup the test directory
-    """
-    command = ("rm %s -f" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    command = ("test -e %s" % (lustre_dir))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        command = ("rmdir %s" % (lustre_dir))
-        retval = lpcc_host.sh_run(log, command)
-        if retval.cr_exit_status:
-            log.cl_error("failed to run command [%s] on host [%s], "
-                         "ret = [%d], stdout = [%s], stderr = [%s]",
-                         command,
-                         lpcc_host.sh_hostname,
-                         retval.cr_exit_status,
-                         retval.cr_stdout,
-                         retval.cr_stderr)
-            return -1
-
-    command = ("mkdir %s" % (lustre_dir))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-    return 0
-
-
-def lpcc_rw_test(log, restore=False, project=True):
-    """
-    Run LPCC readwrite tests
-    """
-    # pylint: disable=too-many-return-statements,too-many-locals
-    # pylint: disable=too-many-statements,too-many-branches
-    # pylint: disable=no-self-use
-    lpcc_nclient = len(MANAGER.lm_rw_datasets)
-    if lpcc_nclient < 1:
-        log.cl_debug("not enough LPCC client")
-        return 1
-    lpcc_rw_dataset = MANAGER.lm_rw_datasets[0]
-    lpcc_root = lpcc_rw_dataset.lpccd_root
-    lpcc_archive_id = lpcc_rw_dataset.lpccd_id
-    lpcc_host = lpcc_rw_dataset.lpccd_host
-    lustre_mnt = lpcc_rw_dataset.lpccd_lustre_mnt
-
-    lustre_dirname = "dir"
-    lustre_fname = "file"
-    lustre_dir = ("%s/%s" % (lustre_mnt, lustre_dirname))
-    lustre_fpath = ("%s/%s" % (lustre_dir, lustre_fname))
-    ret = lpcc_cleanup_test_file(log, lpcc_host, lustre_dir, lustre_fpath)
-    if ret:
-        log.cl_error("failed to cleanup test file")
-        return -1
-
-    if project:
-        project_supported = lpcc_host.sh_chattr_has_projid_support(log)
-        if not project_supported:
-            log.cl_error("project is not supported by chattr, please upgrade "
-                         "E2fsprogs to latest Lustre version")
-            return -1
-
-        command = ("chattr -p %d %s" % (lpcc_rw_dataset.lpccd_projid, lustre_dir))
-        retval = lpcc_host.sh_run(log, command)
-        if retval.cr_exit_status:
-            log.cl_error("failed to run command [%s] on host [%s], "
-                         "ret = [%d], stdout = [%s], stderr = [%s]",
-                         command,
-                         lpcc_host.sh_hostname,
-                         retval.cr_exit_status,
-                         retval.cr_stdout,
-                         retval.cr_stderr)
-            log.cl_error("project support might not be enabled, you might need "
-                         "to run [tune2fs -O project $DEV] on all Lustre devices")
-            return -1
-
-    file_data = "fetch_origin"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    if not project:
-        pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-        if pcc_state.ps_type != lpcc.LPCC_TYPE_NONE:
-            log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                         lpcc.LPCC_TYPE_NONE, pcc_state.ps_type)
-
-        command = ("lfs pcc_fetch -a %s %s" %
-                   (lpcc_archive_id, lustre_fpath))
-        retval = lpcc_host.sh_run(log, command)
-        if retval.cr_exit_status:
-            log.cl_error("failed to run command [%s] on host [%s], "
-                         "ret = [%d], stdout = [%s], stderr = [%s]",
-                         command,
-                         lpcc_host.sh_hostname,
-                         retval.cr_exit_status,
-                         retval.cr_stdout,
-                         retval.cr_stderr)
-            return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_READWRITE:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_READWRITE, pcc_state.ps_type)
-        return -1
-
-    hsm_states = (lustre.HSMState.HS_EXISTS | lustre.HSMState.HS_ARCHIVED |
-                  lustre.HSMState.HS_RELEASED)
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        log.cl_error("failed to check HSM status after creating LPCC file [%s]",
-                     lustre_fpath)
-        return ret
-
-    fid_string = lustre.lfs_path2fid(log, lpcc_host, lustre_fpath)
-    if fid_string is None:
-        log.cl_error("failed to get fid from path [%s]", lustre_fpath)
-        return -1
-
-    lustre_fid = lustre.LustreFID(log, fid_string)
-    lpcc_fpath = lustre_fid.lf_posix_archive_path(lpcc_root)
-    command = ("ls -l %s" % (lpcc_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    ret = check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, file_data)
-    if ret:
-        log.cl_error("wrong file data after creation")
-        return ret
-
-    size = 7654321
-    command = ("dd if=/dev/zero of=%s bs=%s count=1" %
-               (lustre_fpath, size))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    ret = check_lpcc_sizes(log, lpcc_host, lpcc_fpath, lustre_fpath, size)
-    if ret:
-        log.cl_error("wrong file size after wrote file")
-        return ret
-
-    size = 1234567
-    command = ("truncate -s %s %s" % (size, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    ret = check_lpcc_sizes(log, lpcc_host, lpcc_fpath, lustre_fpath, size)
-    if ret:
-        log.cl_error("wrong file size after truncated file")
-        return ret
-
-    file_data = "file_data"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    ret = check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, file_data)
-    if ret:
-        log.cl_error("wrong file data after written")
-        return ret
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_READWRITE:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_READWRITE, pcc_state.ps_type)
-        return -1
-
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        log.cl_error("failed to check HSM status after written LPCC file [%s]",
-                     lustre_fpath)
-        return ret
-
-    hsm_states = lustre.HSMState.HS_EXISTS | lustre.HSMState.HS_ARCHIVED
-    if lpcc_nclient < 2 or restore:
-        log.cl_debug("restoring the PCC file using command")
-        ret = lustre.lfs_hsm_restore(log, lustre_fpath, host=lpcc_host)
-        if ret:
-            log.cl_error("failed to restore file [%s]", lustre_fpath)
-            return ret
-
-        ret = lustre.wait_hsm_state(log, lustre_fpath, hsm_states,
-                                    archive_id=lpcc_archive_id, host=lpcc_host)
-        if ret:
-            log.cl_error("failed to wait status after restoring file [%s]",
-                         lustre_fpath)
-            return ret
-    else:
-        log.cl_debug("accessing the data to trigger restoring of the PCC file")
-        for remote_client in MANAGER.lm_rw_datasets[1:]:
-            remote_host = remote_client.lpccd_host
-            remote_mnt = remote_client.lpccd_lustre_mnt
-            remote_dir = ("%s/%s" % (remote_mnt, lustre_dirname))
-            remote_fpath = ("%s/%s" % (remote_dir, lustre_fname))
-            ret = check_file_data(log, remote_host, remote_fpath, file_data)
-            if ret:
-                log.cl_error("wrong file data on the remote client [%s]",
-                             remote_host.sh_hostname)
-                return ret
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_NONE:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_NONE, pcc_state.ps_type)
-        return -1
-
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        log.cl_error("failed to check HSM status after restoring file [%s]",
-                     lustre_fpath)
-        return ret
-
-    # The file has been restored, thus no LPCC cache now
-    file_data = "new_data"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_NONE:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_NONE, pcc_state.ps_type)
-        return -1
-
-    hsm_states = (lustre.HSMState.HS_EXISTS | lustre.HSMState.HS_ARCHIVED |
-                  lustre.HSMState.HS_DIRTY)
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        log.cl_error("failed to check HSM status after writing to restored file [%s]",
-                     lustre_fpath)
-        return ret
-
-    for dataset in MANAGER.lm_rw_datasets:
-        host = dataset.lpccd_host
-        ret = check_file_data(log, host, lustre_fpath, file_data)
-        if ret:
-            log.cl_error("wrong file data on the client [%s]",
-                         host.sh_hostname)
-            return ret
-    return 0
-
-
-def test_lfs_pcc_fetch_restore(log):
-    """
-    Test lfs pcc_fetch with HSM restore
-    """
-    return lpcc_rw_test(log, restore=True, project=False)
-
-
-LPCC_TESTS.append(test_lfs_pcc_fetch_restore)
-
-
-def test_lfs_pcc_fetch_access(log):
-    """
-    Test lfs pcc_fetch with remote access
-    """
-    return lpcc_rw_test(log, restore=False, project=False)
-
-
-LPCC_TESTS.append(test_lfs_pcc_fetch_access)
-
-
-def test_project_restore(log):
-    """
-    Test project ID with HSM restore
-    """
-    return lpcc_rw_test(log, restore=True, project=True)
-
-
-LPCC_TESTS.append(test_project_restore)
-
-
-def test_project_access(log):
-    """
-    Test project ID with remote access
-    """
-    return lpcc_rw_test(log, restore=False, project=True)
-
-
-LPCC_TESTS.append(test_project_access)
-
-
-def test_multi_open_when_creating(log):
-    # pylint: disable=no-self-use,too-many-locals,too-many-return-statements
-    # pylint: disable=too-many-statements
-    """
-    When a process created a LPCC file and holding the open, another
-    process on the same client should be able to open the file.
-    """
-    lpcc_nclient = len(MANAGER.lm_rw_datasets)
-    if lpcc_nclient < 1:
-        log.cl_debug("not enough LPCC client")
-        return 1
-
-    if check_multiop_exists(log, MANAGER.lm_rw_datasets):
-        log.cl_debug("multiop command doesn't exist")
-        return 1
-
-    lpcc_rw_dataset = MANAGER.lm_rw_datasets[0]
-    lpcc_root = lpcc_rw_dataset.lpccd_root
-    lpcc_archive_id = lpcc_rw_dataset.lpccd_id
-    lpcc_host = lpcc_rw_dataset.lpccd_host
-
-    lustre_mnt = lpcc_rw_dataset.lpccd_lustre_mnt
-    lustre_dirname = "dir"
-    lustre_fname = "file"
-    lustre_dir = ("%s/%s" % (lustre_mnt, lustre_dirname))
-    lustre_fpath = ("%s/%s" % (lustre_dir, lustre_fname))
-    ret = lpcc_cleanup_test_file(log, lpcc_host, lustre_dir, lustre_fpath)
-    if ret != 0:
-        log.cl_error("failed to cleanup test file")
-        return -1
-
-    command = ("chattr -p %d %s" % (lpcc_rw_dataset.lpccd_projid, lustre_dir))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("failed to run command [%s]", command)
-        return -1
-
-    multiop = lustre_test.Multiop(lpcc_host, lustre_fpath, "vO_c",
-                                  "/tmp/multiop.stdout",
-                                  "/tmp/multiop.stderr")
-    multiop.mop_start(log)
-    ret = multiop.mop_wait_pausing(log)
-    if ret:
-        return ret
-
-    hsm_states = (lustre.HSMState.HS_EXISTS | lustre.HSMState.HS_ARCHIVED |
-                  lustre.HSMState.HS_RELEASED)
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        return ret
-
-    file_data = "multiopen_data"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("failed to run command [%s]", command)
-        return -1
-
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret:
-        return ret
-
-    fid_string = lustre.lfs_path2fid(log, lpcc_host, lustre_fpath)
-    if fid_string is None:
-        log.cl_error("failed to get fid from path [%s]", lustre_fpath)
-        return -1
-
-    lustre_fid = lustre.LustreFID(log, fid_string)
-    lpcc_fpath = lustre_fid.lf_posix_archive_path(lpcc_root)
-    command = ("ls -l %s" % (lpcc_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("failed to run command [%s]", command)
-        return -1
-
-    ret = check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, file_data)
-    if ret != 0:
-        log.cl_error("failed to check lpcc data")
-        return -1
-
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret != 0:
-        log.cl_error("failed to check hsm state")
-        return -1
-
-    multiop.mop_pkill(log)
-    return 0
-
-
-LPCC_TESTS.append(test_multi_open_when_creating)
-
-
-def test_remote_local_open(log):
-    # pylint: disable=no-self-use,too-many-locals,too-many-return-statements
-    # pylint: disable=too-many-statements
-    """
-    When a process created a LPCC file and holding the open, another
-    process on the different client should not be able to open the file.
-    """
-    lpcc_nclient = len(MANAGER.lm_rw_datasets)
-    if lpcc_nclient < 2:
-        log.cl_debug("not enough LPCC client")
-        return 1
-
-    if check_multiop_exists(log, MANAGER.lm_rw_datasets):
-        log.cl_debug("multiop command doesn't exist")
-        return 1
-
-    lpcc_rw_dataset = MANAGER.lm_rw_datasets[0]
-    lpcc_archive_id = lpcc_rw_dataset.lpccd_id
-    lpcc_host = lpcc_rw_dataset.lpccd_host
-
-    lustre_mnt = lpcc_rw_dataset.lpccd_lustre_mnt
-    lustre_dirname = "dir"
-    lustre_fname = "file"
-    lustre_dir = ("%s/%s" % (lustre_mnt, lustre_dirname))
-    lustre_fpath = ("%s/%s" % (lustre_dir, lustre_fname))
-    ret = lpcc_cleanup_test_file(log, lpcc_host, lustre_dir, lustre_fpath)
-    if ret != 0:
-        log.cl_error("failed to cleanup test file")
-        return -1
-
-    command = ("chattr -p %d %s" % (lpcc_rw_dataset.lpccd_projid, lustre_dir))
-    retval = lpcc_host.sh_run(log, command)
-    if ret != 0:
-        log.cl_error("failed to run command [%s]", command)
-        return -1
-
-    multiop = lustre_test.Multiop(lpcc_host, lustre_fpath, "vO_c",
-                                  "/tmp/multiop.stdout",
-                                  "/tmp/multiop.stderr")
-    multiop.mop_start(log)
-    ret = multiop.mop_wait_pausing(log)
-    if ret != 0:
-        log.cl_error("failed to wait multiop")
-        return -1
-
-    hsm_states = (lustre.HSMState.HS_EXISTS | lustre.HSMState.HS_ARCHIVED |
-                  lustre.HSMState.HS_RELEASED)
-    ret = lustre.check_hsm_state(log, lustre_fpath, hsm_states,
-                                 archive_id=lpcc_archive_id, host=lpcc_host)
-    if ret != 0:
-        log.cl_error("failed to check HSM state")
-        return -1
-
-    remote_client = MANAGER.lm_rw_datasets[1]
-    remote_host = remote_client.lpccd_host
-
-    command = ("cat %s" % (lustre_fpath))
-    retval = remote_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded unexpectedly", command)
-        return -1
-
-    file_data = "multiopen_data"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = remote_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded unexpectedly", command)
-        return -1
-
-    multiop.mop_signal(log)
-    multiop.mop_wait_exit(log)
-
-    command = ("cat %s" % (lustre_fpath))
-    retval = remote_host.sh_run(log, command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("command [%s] failed unexpectedly", command)
-        return -1
-
-    file_data = "multiopen_data"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = remote_host.sh_run(log, command)
-    if retval.cr_exit_status != 0:
-        log.cl_error("command [%s] failed unexpectedly", command)
-        return -1
-    return 0
-
-
-LPCC_TESTS.append(test_remote_local_open)
-
-
-def lpcc_ro_test(log):
-    # pylint: disable=too-many-locals,no-self-use
-    # pylint: disable=too-many-statements,too-many-branches
-    # pylint: disable=too-many-return-statements,unused-variable
-    """
-    Run LPCC readonly tests
-    """
-    dataset_number = len(MANAGER.lm_ro_datasets)
-    if dataset_number < 1:
-        log.cl_info("not enough LPCC readonly dataset")
-        return 1
-    lpcc_ro_dataset = MANAGER.lm_ro_datasets[0]
-    lpcc_client = lpcc_ro_dataset.lpccd_lustre_client
-    lpcc_root = lpcc_ro_dataset.lpccd_root
-    lpcc_group_id = lpcc_ro_dataset.lpccd_id
-    group_datasets = MANAGER.lm_ro_dataset_groups[lpcc_group_id]
-    group_clients = []
-    for group_dataset in group_datasets:
-        lustre_client = group_dataset.lpccd_lustre_mnt
-        if lustre_client not in group_clients:
-            group_clients.append(lustre_client)
-
-    none_group_clients = []
-    for client_id, client in MANAGER.lm_lustre_clients.iteritems():
-        if client not in group_clients:
-            none_group_clients.append(lustre_client)
-
-    lpcc_host = lpcc_ro_dataset.lpccd_host
-    lustre_mnt = lpcc_ro_dataset.lpccd_lustre_mnt
-
-    lustre_dirname = "dir_ro"
-    lustre_fname = "file"
-    lustre_dir = ("%s/%s" % (lustre_mnt, lustre_dirname))
-    lustre_fpath = ("%s/%s" % (lustre_dir, lustre_fname))
-    ret = lpcc_cleanup_test_file(log, lpcc_host, lustre_dir, lustre_fpath)
-    if ret:
-        log.cl_error("failed to cleanup test file")
-        return -1
-
-    file_data = "fetch_origin"
-    command = ("echo -n %s > %s" % (file_data, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_NONE:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_NONE, pcc_state.ps_type)
-        return -1
-
-    command = ("lfs pcc_fetch -r -a %s %s" % (lpcc_group_id, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_READONLY:
-        log.cl_error("wrong PCC type, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_READONLY, pcc_state.ps_type)
-
-    fid_string = lustre.lfs_path2fid(log, lpcc_host, lustre_fpath)
-    if fid_string is None:
-        return -1
-
-    lustre_fid = lustre.LustreFID(log, fid_string)
-    lpcc_fpath = lustre_fid.lf_posix_archive_path(lpcc_root)
-    command = ("ls -l %s" % (lpcc_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    ret = check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, file_data)
-    if ret:
-        log.cl_error("wrong file data after creation")
-        return ret
-
-    for client_id, client in MANAGER.lm_lustre_clients.iteritems():
-        if client == lpcc_client:
-            continue
-        host = client.lc_host
-        remote_lustre_fpath = ("%s/%s/%s" %
-                               (client.lc_mnt, lustre_dirname,
-                                lustre_fname))
-
-        # File read without grouplock should be blocked
-        multiop = lustre_test.Multiop(host, remote_lustre_fpath, "vor10",
-                                      "/tmp/multiop.stdout",
-                                      "/tmp/multiop.stderr")
-        multiop.mop_start(log)
-        ret = multiop.mop_wait_exit(log, timeout=3, quiet=True)
-        if ret == 0:
-            log.cl_error("file read on host [%s] is not blocked by group "
-                         "lock on host [%s]", host.sh_hostname,
-                         lpcc_host.sh_hostname)
-            return -1
-
-        multiop.mop_pkill(log)
-        ret = multiop.mop_wait_exit(log)
-        if ret:
-            log.cl_error("file read on host [%s] is not canceled",
-                         host.sh_hostname)
-            return -1
-
-    # Not able to truncate file
-    command = ("truncate -s 0 %s" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded on host [%s], which is unexpected",
-                     command, lpcc_host.sh_hostname)
-        return -1
-
-    command = ("truncate -s 1048576 %s" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded on host [%s], which is unexpected",
-                     command, lpcc_host.sh_hostname)
-        return -1
-
-    # Not able to write data to readonly cache
-    command = ("echo -n not_written > %s" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded on host [%s], which is unexpected",
-                     command, lpcc_host.sh_hostname)
-        return -1
-
-    # Not able to append to readonly cache
-    command = ("echo -n not_written >> %s" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status == 0:
-        log.cl_error("command [%s] succeeded on host [%s], which is unexpected",
-                     command, lpcc_host.sh_hostname)
-        return -1
-
-    # Check data again in case any data is changed
-    ret = check_lpcc_data(log, lpcc_host, lpcc_fpath, lustre_fpath, file_data)
-    if ret:
-        log.cl_error("wrong file data after truncate and write failures")
-        return ret
-
-    # Detch and re-attach
-    command = ("lfs pcc_detach %s" % (lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_NONE:
-        log.cl_error("wrong PCC type after detach, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_NONE, pcc_state.ps_type)
-        return -1
-
-    command = ("lfs pcc_fetch -r -a %s %s" % (lpcc_group_id, lustre_fpath))
-    retval = lpcc_host.sh_run(log, command)
-    if retval.cr_exit_status:
-        log.cl_error("failed to run command [%s] on host [%s], "
-                     "ret = [%d], stdout = [%s], stderr = [%s]",
-                     command,
-                     lpcc_host.sh_hostname,
-                     retval.cr_exit_status,
-                     retval.cr_stdout,
-                     retval.cr_stderr)
-        return -1
-
-    pcc_state = lpcc.lfs_pcc_state(log, lustre_fpath, host=lpcc_host)
-    if pcc_state.ps_type != lpcc.LPCC_TYPE_READONLY:
-        log.cl_error("wrong PCC type after re-prefetch, expected [%s], got [%s]",
-                     lpcc.LPCC_TYPE_READONLY, pcc_state.ps_type)
-        return -1
-    return 0
-
-
-def test_readonly(log):
-    """
-    Test readonly PCC
-    """
-    return lpcc_ro_test(log)
-
-
-LPCC_TESTS.append(test_readonly)
-
-
-def test_lpcc_locked(log, workspace, config_fpath):
-    """
-    Start to run LPCC tests holding the confiure lock
-    """
-    # pylint: disable=global-statement,too-many-branches
-    lpcc_workspace = workspace + "/lpcc"
-
-    ret = utils.mkdir(lpcc_workspace)
-    if ret:
-        log.cl_error("failed to creat directory [%s]", lpcc_workspace)
-        return ret
-    global MANAGER
-    MANAGER = lpcc.LPCCManager(lpcc_workspace, config_fpath)
-    ret = MANAGER.lm_parse(log)
-    if ret:
-        return ret
-
-    ret = MANAGER.lm_start(log)
-    if ret:
-        return ret
-
-    quit_on_error = True
-    only_test = None
-    passed_tests = []
-    failed_tests = []
-    skipped_tests = []
-    for lpcc_test in LPCC_TESTS:
-        if only_test is not None and only_test != lpcc_test.__name__:
-            continue
-        log.cl_info("test [%s] started", lpcc_test.__name__)
-        ret = lpcc_test(log)
-        if ret < 0:
-            log.cl_error("test [%s] failed", lpcc_test.__name__)
-            failed_tests.append(lpcc_test)
-            if quit_on_error:
-                return -1
-        elif ret == 1:
-            log.cl_warning("test [%s] skipped", lpcc_test.__name__)
-            skipped_tests.append(lpcc_test)
-        else:
-            log.cl_info("test [%s] passed", lpcc_test.__name__)
-            passed_tests.append(lpcc_test)
-
-    if len(skipped_tests) != 0:
-        for skipped_test in skipped_tests:
-            log.cl_warning("test [%s] skipped", skipped_test.__name__)
-
-    if len(failed_tests) != 0:
-        for failed_test in failed_tests:
-            log.cl_error("test [%s] failed", failed_test.__name__)
-
-    if len(passed_tests) != 0:
-        for passed_test in passed_tests:
-            log.cl_info("test [%s] passed", passed_test.__name__)
-
-    daemon.SHUTTING_DOWN = True
-    ret = MANAGER.lm_stop(log)
-    if ret:
-        log.cl_error("failed to stop lpcc manager")
-
-    if len(failed_tests) != 0 or ret:
-        return -1
-    return 0
-
-
-def test_lpcc(log, workspace, config_fpath):
-    """
-    Start to run LPCC tests
-    """
-    # pylint: disable=bare-except
-    lock_file = config_fpath + ".lock"
-    lock = filelock.FileLock(lock_file)
-    try:
-        with lock.acquire(timeout=0):
-            try:
-                ret = test_lpcc_locked(log, workspace, config_fpath)
-            except:
-                ret = -1
-                log.cl_error("exception: %s", traceback.format_exc())
-            lock.release()
-    except filelock.Timeout:
-        ret = -1
-        log.cl_error("someone else is holding lock of file [%s], aborting "
-                     "to prevent conflicts", lock_file)
-    return ret
-
-
-def main():
-    """
-    Run LPCC tests
-    """
-    # pylint: disable=unused-variable,not-callable
-    if sys.version[0] == '2':
-        reload(sys)
-        if hasattr(sys, "setdefaultencoding"):
-            set_encoding = getattr(sys, "setdefaultencoding", None)
-            set_encoding('UTF-8')
-        else:
-            os.environ["PYTHONIOENCODING"] = 'UTF-8'
-
-    config_fpath = lpcc.LPCC_CONFIG
-
-    if len(sys.argv) == 2:
-        config_fpath = sys.argv[1]
-    elif len(sys.argv) > 2:
-        usage()
-        sys.exit(-1)
-
-    identity = time_util.local_strftime(time_util.utcnow(), "%Y-%m-%d-%H_%M_%S")
-    workspace = LPCC_LOG_TEST_DIR + "/" + identity
-
-    if not os.path.exists(LPCC_LOG_TEST_DIR):
-        ret = utils.mkdir(LPCC_LOG_TEST_DIR)
-        if ret:
-            utils.eprint("failed to create directory [%s] on local host" % LPCC_LOG_TEST_DIR)
-            sys.exit(-1)
-    elif not os.path.isdir(LPCC_LOG_TEST_DIR):
-        utils.eprint("[%s] is not a directory" % LPCC_LOG_TEST_DIR)
-        sys.exit(-1)
-
-    if not os.path.exists(workspace):
-        ret = utils.mkdir(workspace)
-        if ret:
-            utils.eprint("failed to create directory [%s] on local host" % workspace)
-            sys.exit(-1)
-    elif not os.path.isdir(workspace):
-        utils.eprint("[%s] is not a directory" % workspace)
-        sys.exit(-1)
-
-    signal.signal(signal.SIGINT, daemon.signal_handler)
-    signal.signal(signal.SIGTERM, daemon.signal_handler)
-    log = clog.get_log(resultsdir=workspace)
-    log.cl_info("started LPCC test using config [%s], please check [%s] for "
-                "more log" % (config_fpath, workspace))
-
-    save_fpath = workspace + "/" + lpcc.LPCC_CONFIG_FNAME
-    log.cl_debug("copying config file from [%s] to [%s]", config_fpath,
-                 save_fpath)
-    shutil.copyfile(config_fpath, save_fpath)
-    ret = test_lpcc(log, workspace, config_fpath)
-    if ret:
-        log.cl_error("test failed, please check [%s] for more log", workspace)
-        sys.exit(ret)
-    log.cl_info("all tests passed, please check [%s] for more log", workspace)
-    sys.exit(0)
index b8f4e24..f07d594 100644 (file)
@@ -42,6 +42,7 @@
 #define OPT_CLEAR_HASHDIR              3
 #define OPT_LOG_LEVEL                  4
 #define OPT_MAX_SCAN_SECS              5
+#define OPT_PIDFILE                    6
 
 struct lpcc_purge_options {
        char *o_cache;
@@ -57,6 +58,7 @@ struct lpcc_purge_options {
        int o_max_scan_secs;
 
        char *o_dumpfile;
+       char *o_pidfile;
        bool o_dry_run;
        bool o_clear_hashdir;
 };
@@ -299,7 +301,8 @@ static void usage(void)
                "\t-t, --scan-threads=NUM scanning threads (default: %u)\n"
                "\t    --candidate-num=NUM, candidate number of approximate LRU (default: %d, min: %d, max: %d)\n"
                "\t    --max-scan-secs, max seconds to scan continously before purging (default: %d, min: %d, max: %d)\n"
-               "\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-PID.stats)\n"
+               "\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-RWID.stats)\n"
+               "\t    --pidfile=FILE, the pidfile name (default: /var/run/lpcc_purge-RWID.pid)\n"
                "\t    --clear-hashdir, clear empty hash dir after detaching file\n"
                "\t    --dry-run, scan once but do not detach file really\n"
                "\t-h, --help, print this help message\n",
@@ -329,6 +332,7 @@ static struct option long_options[] = {
        { "dry-run", no_argument, NULL, OPT_DRY_RUN},
        { "candidate-num", required_argument, NULL, OPT_CANDIDATE_NUM},
        { "dump", required_argument, NULL, 'w'},
+       { "pidfile", required_argument, NULL, OPT_PIDFILE},
        { "clear-hashdir", no_argument, NULL, OPT_CLEAR_HASHDIR},
        { "max-scan-secs", required_argument, NULL, OPT_MAX_SCAN_SECS},
        { "help", no_argument, NULL, 'h' },
@@ -570,6 +574,9 @@ static void lpcc_purge_process_opt(int c, char *optarg)
        case 'w':
                opt.o_dumpfile = strdup(optarg);
                break;
+       case OPT_PIDFILE:
+               opt.o_pidfile = strdup(optarg);
+               break;
        case OPT_DRY_RUN:
                opt.o_dry_run = true;
                break;
@@ -655,6 +662,10 @@ void lpcc_purge_verify_opts(void)
                snprintf(buf, sizeof(buf), "/var/run/lpcc_purge-%d.stats", opt.o_rwid);
                opt.o_dumpfile = strdup(buf);
        }
+       if (opt.o_pidfile == NULL) {
+               snprintf(buf, sizeof(buf), "/var/run/lpcc_purge-%d.pid", opt.o_rwid);
+               opt.o_pidfile = strdup(buf);
+       }
 
        /* check freehi > freelo */
        if (opt.o_high_usage <= opt.o_low_usage) {
@@ -719,14 +730,12 @@ out:
 
 static void lpcc_purge_lock_pidfile(void)
 {
-       char buf[PATH_MAX];
        int fd;
 
-       snprintf(buf, sizeof(buf), "/var/run/lpcc_purge-%d.pid", opt.o_rwid);
-       fd = create_pid_file(buf);
+       fd = create_pid_file(opt.o_pidfile);
        if (fd < 0) {
                llapi_error(LLAPI_MSG_FATAL, errno,
-                       "cannot create pidfile '%s'", buf);
+                       "cannot create pidfile '%s'", opt.o_pidfile);
                exit(1);
        }
        /* we keep the fd open to hold the flock,
@@ -856,11 +865,6 @@ static int lpcc_purge_detach_candidate(const char *mnt,
 
        llapi_printf(LLAPI_MSG_DEBUG, "detach fid: "DFID"\n", PFID(&candidate->c_fid));
 
-       pthread_mutex_lock(&stats.s_lock);
-       stats.s_purged_objs++;
-       stats.s_total_purged_objs++;
-       pthread_mutex_unlock(&stats.s_lock);
-
        /* double confirm the atime. If it's changed, discard this entry */
        rc = stat(candidate->c_path, &statbuf);
        if (rc) {
index d2ca574..1d7a6c8 100644 (file)
@@ -1,10 +1,16 @@
 [Unit]
-Description=Lustre Persistent Client Cache
+Description=Lustre Persistent Client Cache Management
+
+Requires=network-online.target
+After=network-online.target
+
+ConditionPathExists=/etc/lpcc.conf
 
 [Service]
 Type=simple
-ExecStart=/usr/bin/lpcc
-User=root
+ExecStart=lpcc monitor
+ExecStop=kill $MAINPID
 
 [Install]
 WantedBy=multi-user.target
+
index 436ec1f..bec88b6 100644 (file)
@@ -90,8 +90,8 @@ MANFILES =                                    \
        lustre_rsync.8                          \
        nids.5                                  \
        plot-llstat.8                           \
-       routerstat.8
-
+       routerstat.8                            \
+       umount.lustre.8
 
 LIBMAN =                                       \
        lustreapi.7                             \
diff --git a/lustre/doc/umount.lustre.8 b/lustre/doc/umount.lustre.8
new file mode 100644 (file)
index 0000000..053e554
--- /dev/null
@@ -0,0 +1,75 @@
+.\"@(#)umount.lustre.8"
+.TH UMOUNT.LUSTRE 8 "7 Jul 2021"
+.SH NAME
+umount.lustre \- unmount a Lustre File System
+.SH SYNOPSIS
+.BI "umount.lustre" " dir" " [\-fvnrldh ]"
+.SH DESCRIPTION
+.BR umount.lustre
+is a part of
+.BR lustre (7)
+utilities package, which provides Lustre client functionality.
+
+.BR umount.lustre
+stops any Lustre Persistent Client Cache (LPCC) running on the Lustre file
+system to be unmounted, then do real umounting.
+
+.BR umount.lustre
+are meant to be used by the
+.BR umount (8)
+command for unmounting Lustre file system. This subcommand, however, can also
+be used as a standalone command with limited functionality.
+
+.I dir
+is the directory on which the file system is mounted.
+
+.SH OPTIONS
+.TP
+.BI "\-f"
+Force unmount the file system in case of unreachable Lustre file system.
+.TP
+.BI "\-v"
+Be verbose.
+.TP
+.BI "\-n"
+Do not update
+.I /etc/mtab.
+By default, an entry is created in
+.I /etc/mtab
+for every mounted file system. Use this option to skip deleting an entry.
+.TP
+.BI "\-r"
+In case unmounting fails, try to mount read-only.
+.TP
+.BI "\-l"
+Lazy unmount. Detach the file system from the file system hierarchy now, and
+cleanup all references to the file system as soon as it is not busy anymore.
+.TP
+.BI "\-d"
+When the unmounted device was a loop device, also free this loop device.
+.TP
+.BI "\-h"
+Print help message.
+
+.SH NOTE
+For further information please refer
+.BR lustre (5)
+and
+.BR umount (8)
+manual pages.
+
+.SH FILES
+.TP 18n
+.I /etc/fstab
+file system table
+.TP
+.I /etc/mtab
+table of mounted file systems
+
+.PD
+.SH "SEE ALSO"
+.BR lustre (7),
+.BR umount (8),
+
+.SH "AUTHOR"
+Lei Feng <flei@ddn.com>
index 7a3232b..e85d8f2 100644 (file)
@@ -37,7 +37,7 @@ genscripts = lc_modprobe lc_net lc_hb lc_cluman lc_md lc_lvm lustre_start lnet
 
 SUBDIRS = systemd
 
-sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe
+sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe umount.lustre
 
 if RHEL
 initdir = $(sysconfdir)/init.d
@@ -93,7 +93,8 @@ EXTRA_DIST = license-status lustre_rmmod ldev lc_mon lhbadm \
             $(addsuffix .in,$(genscripts)) lfs_migrate lustre_req_history \
             lustre lsvcgss lc_common haconfig Lustre.ha_v2 dkms.mkconf \
             zfsobj2fid ko2iblnd-probe statechange-lustre.sh \
-            bash-completion/lustre bash-completion/lctl bash-completion/lfs
+            bash-completion/lustre bash-completion/lctl bash-completion/lfs \
+            umount.lustre
 
 CLEANFILES = $(genscripts)
 
diff --git a/lustre/scripts/umount.lustre b/lustre/scripts/umount.lustre
new file mode 100755 (executable)
index 0000000..3097da0
--- /dev/null
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+set -eu
+
+
+usage()
+{
+       echo "usage: umount.lustre dir [-fvnrlh]"
+       echo "options:"
+       echo -e "\t-f\tforce umount"
+       echo -e "\t-v\tverbose"
+       echo -e "\t-n\tDo not update /etc/mtab"
+       echo -e "\t-r\tremount"
+       echo -e "\t-l\tlazy umount"
+       echo -e "\t-h\tprint this help"
+}
+
+declare -a args=("$@")
+
+temp=`getopt -o fvnrlh -- "$@"`
+if [[ $? != 0 ]]; then
+       usage
+       exit 1
+fi
+
+eval set -- "$temp"
+
+while true; do
+       case "$1" in
+               -h)
+                       usage
+                       exit 0
+                       ;;
+               -f|-v|-n|-r|-l)
+                       shift
+                       ;;
+               --)
+                       shift
+                       mount_point="$1"
+                       break
+                       ;;
+               *)
+                       usage
+                       exit 1
+                       ;;
+       esac
+done
+
+if [[ -x /usr/bin/lpcc ]] && [[ -S /var/run/lpcc.sock ]]; then
+       /usr/bin/lpcc stop "$mount_point" --keep-enabled > /dev/null
+fi
+
+umount --internal-only "${args[@]}"
+
index 6384027..ea73190 100755 (executable)
@@ -15183,10 +15183,10 @@ test_160h() {
                        "R" 20 ||
                        error "$i: GC-thread not found in R-state"
                # check umounts of each MDT on MDS have reached kthread_stop()
-               [[ $(do_node $i pgrep umount | wc -l) -eq $nb ]] ||
+               [[ $(do_node $i pgrep umount.lustre | wc -l) -eq $nb ]] ||
                        error "$i: expected $nb umount"
                wait_update $i \
-                       "ps -C umount -o state --no-headers | uniq" "D" 20 ||
+                       "ps -C umount -o state --no-headers | grep D | wc -l" "$nb" 20 ||
                        error "$i: umount not found in D-state"
        done