EXTRA_DIST= \
$(PYTHON_COMMANDS) \
detect-distro.sh \
+ lipe-revision.sh \
example_configs/clownfish/seperate_mgs/clownfish.conf \
example_configs/clownfish/seperate_mgs/lipe_virt.conf \
example_configs/lipe/lipe_install.conf \
AC_DEFINE(IDLE_USER_HEADER, 1, [Lustre uses idle user header])
fi
-# -------- check whether DoM is supported by Lustre head files --------
-AC_MSG_CHECKING([Lustre has PFL support])
-saved_libs=$LIBS
-LIBS="-llustreapi"
-AC_LINK_IFELSE([AC_LANG_SOURCE([
- #include <lustre/lustre_user.h>
-
- int main(void) {
- int a = LOV_USER_MAGIC_COMP_V1;
- }
-])],[
+# few defines which aren't conditional anymore
AC_DEFINE([HAVE_LUSTRE_PFL], 1,
[Lustre has PFL support])
-have_lustre_pfl=yes
-], [have_lustre_pfl="no"])
-LIBS=$saved_libs
-AC_MSG_RESULT([$have_lustre_pfl])
-
-# -------- check for llapi_layout_get_by_xattr() --------
-AC_MSG_CHECKING([Lustre have llapi_layout_get_by_xattr() and LSoM])
-saved_libs=$LIBS
-LIBS="-llustreapi"
-AC_LINK_IFELSE([AC_LANG_SOURCE([
- #include <lustre/lustreapi.h>
-
- int main(void) {
- llapi_layout_get_by_xattr(NULL, 0, 0);
- }
-])],[
AC_DEFINE([HAVE_LAYOUT_BY_XATTR], 1,
[have llapi_layout_get_by_xattr()])
AC_DEFINE([HAVE_LAZY_SIZE_ON_MDT], 1,
[have lazy size on MDT])
-have_layout_by_xattr=yes
-], [have_layout_by_xattr="no"])
-LIBS=$saved_libs
-AC_MSG_RESULT([$have_layout_by_xattr])
# -------- check for llapi_changelog_in_buf() --------
AC_MSG_CHECKING([Lustre have llapi_changelog_in_buf()])
LIPE_RELEASE="1"
AC_DEFINE_UNQUOTED(RELEASE, "$LIPE_RELEASE", [release info] )
+AC_DEFINE_UNQUOTED(LIPE_RELEASE, "$LIPE_RELEASE", [release info] )
AC_SUBST(LIPE_RELEASE)
+AC_MSG_CHECKING([for lipe revision])
+LIPE_REVISION=$(bash lipe-revision.sh)
+AC_MSG_RESULT([$LIPE_REVISION])
+AC_DEFINE_UNQUOTED(LIPE_REVISION, "$LIPE_REVISION", [revision info] )
+AC_SUBST(LIPE_REVISION)
+
# for exporting to spec file
AC_SUBST(ac_configure_args)
AC_CONFIG_FILES([Makefile
--- /dev/null
+# Hotpools Hotfix (lipe-server-1.17) Release Notes
+
+## lamigo agents
+
+lamigo will continue to ssh to "agents" and invoke lfs to mirror and
+resync files. An "agent" need not be an EXAScaler server (VM). Instead
+an agent can be any node that lamigo can reach via ssh as root, has a
+Lustre client mount, and has the lfs utility installed. The lamigo
+configuration file determines the set of agents used:
+```
+ # MDT=myfs1-MDT0042
+ # grep '^agent=' /etc/lamigo/${MDT}.conf
+ agent=ai400x-0a12-vm00:/lustre/myfs1/client:4
+ agent=ai400x-0a12-vm01:/lustre/myfs1/client:4
+ agent=ai400x-0a12-vm02:/lustre/myfs1/client:4
+ agent=ai400x-0a12-vm03:/lustre/myfs1/client:4
+```
+In the triple `ai400x-0a12-vm00:/lustre/myfs1/client:4` the first
+component is the hostname of the agent, the second is the mount point
+of the Lustre client on the agent, and the third is the maximum number
+of concurrent mirror operations.
+
+Using agents other than the EXAScaler servers will alleviate many of
+the HA issues seen to date (DDN-1920, EX-1613). The
+`stratagem-hp-config.sh` scripts will use the EXAScaler servers as
+mirroring agents for lamigo.
+
+Previous version of lamigo and lpurge failed to startup in some
+certain cases when agents were unreachable (DDN-2070, EX-3021). All
+known issues in this area are believed to be fixed.
+
+In the example above we have 4 agents with 4 concurrent operations
+each for a total of 16 concurrent operations on behalf of this lamigo
+service. Some basic benchmarking on a mix of small and medium files
+has shown that we can expect about twice as many (2 * 16 = 32)
+replications per second from lamigo.
+
+## agentless lpurge
+
+lpurge will no longer use agents or external commands (lfs mirror
+split ...) to purge files. Instead it will use the Lustre client mount
+point (`/lustre/$FS/client`) on the server where lpurge runs. For
+backwards compatibility lpurge will ignore any agents specified in the
+config file. Note that lpurge.conf uses lines of the form
+```
+ mds=0:ai400x-0a12-vm00:/lustre/myfs1/client
+ mds=1:ai400x-0a12-vm01:/lustre/myfs1/client
+```
+to specify an agent. Since they will be ignored, we recommend just
+leaving them in the config file. This will make downgrade easier if
+needed.
+
+## Online upgrade of lamigo and lpurge
+
+To upgrade lamigo and lpurge on a live system we suggest installing
+the new lipe-server RPM to all EXAScaler servers and then running
+```
+ # clush -a killall lamigo lpurge
+```
+In this case systemd will automatically restart lamigo and lpurge
+(using the newly installed binaries) without invoking HA. This is
+preferred over using `stratagem-hp-stop.sh && stratagem-hp-start.sh`.
+
+## Converting hotpools to use ticketed HA configuration
+
+Using the Lustre client mount Pacemaker resource to control the
+starting and stopping of hotpools introduced cyclic resource
+dependencies which in turn made stopping hotpools impossible in some
+cases (usually leading to STONITH, see DDN-1920 and EX-1613). To
+address this `stratagem-hp-config.sh` now uses a new Pacemaker
+resource (`$FS-hotpool`) and ticket (`$FS-hotpool-allocated`) to start
+and stop hotpools. Running `stratagem-hp-stop.sh` stops lamigo and
+lpurge but not the Lustre client mounts on the servers.
+
+Alongside the changes to `stratagem-hp-config.sh`, this release
+provides two new HA scripts:
+
+`stratagem-hp-convert.sh` converts a previously configured hotpools
+installation to a ticketed configuration. Conversion may be performed
+with hotpools running or stopped.
+
+`stratagem-hp-teardown-client.sh` stops and unconfigures the Lustre
+client mount on the servers.
+
+## Permanently stopping hotpools
+
+If hotpools is to be stopped permanently or indefinitely then the
+changelog users registered for lamigo should be deregistered to avoid
+client operations failing due to full changelogs and increased
+changelog consumption on the MDTs (upto about 512GB). This should be
+done for each MDT.
+
+## Restarting hotpools
+
+If hotpools was previously enabled and subsequently stopped then
+some maintenance around changelogs should be performed before
+re-enabling it. Remove any old '.chlg' files:
+```
+ # clush -a "find /var/lib -name 'lamigo-*.chlg' -delete"
+```
+For each MDT in $FS-MDT0000..$FS-MDTffff perform the following steps.
+
+1. Check that the changelog user specified in the lamigo config file
+agrees with the changelog user registered on the MDT:
+```
+ # MDT=myfs1-MDT0042
+ # grep '^user=' /etc/lamigo/${MDT}.conf
+ user=cl23
+ # clush -aN lctl get_param mdd.${MDT}.changelog_users 2> /dev/null
+ mdd.myfs1-MDT0042.changelog_users=
+ current index: 83896534
+ ID index (idle seconds)
+ cl23 82748303 (481516)
+```
+If there are other changelog users registered then they may need to be
+removed. Please consult with the site admins to ensure that they are
+not in use by another changelog consumer (Robinhood, laudit,
+...). Other registered changelog users with high `idle seconds` values
+should be regarded with suspicion.
+
+The number of changelog entries stored on disk by the MDT is the
+difference between the "current index" and the minimum of the indexes
+of all registered users:
+```
+ 83896534 - 82748303 = 1148231
+```
+2. Check the total changelog space consumption on the MDT:
+```
+ # clush -aN lctl get_param mdd.${MDT}.changelog_size 2> /dev/null
+ mdd.myfs1-MDT0042.changelog_size=161358368
+```
+A changelog entry consumes about 144 bytes on average. So this looks
+about right
+```
+ 144 * 1148231 = 165345264
+```
+Contact L3 if there are no changelog users registered but
+`changelog_size` is reporting a large value or if `changelog_size` is much
+larger than the expected size.
+
+3. Clear old changelogs:
+
+If lamigo is the only changelog consumer and there are many unconsumed
+entries on the MDT then clear the changelog manually before restarting
+hotpools:
+```
+ # clush -aN lctl get_param mdd.${MDT}.changelog_users 2> /dev/null
+ mdd.myfs1-MDT0042.changelog_users=
+ current index: 83896534
+ ID index (idle seconds)
+ cl23 82748303 (481516)
+ # CL=cl23
+ # lfs changelog_clear $MDT $CL 0
+```
+Clearing a large amount of old changelog entries will take some
+time. (Do not interrupt it!) Changelogs on *different* MDTs may be
+cleared in parallel.
+
+The steps listed above should be repeated for each MDT!
+
+## Tuning lamigo (optimization)
+
+Consider increasing the number of agents or the number of concurrent
+jobs per agent.
+
+## Tuning lpurge (optimization)
+
+Consider increasing the max_jobs config parameter from the default of
+8.
+
+## Reducing the changelog mask (optimization)
+
+If there are no changelog consumers other than lamigo then the
+changelog mask may be reduced from the default to include only the
+types needed by hotpools (`MARK UNLNK CLOSE`). Note the spelling of
+'UNLNK'. To see the current changelog mask run:
+```
+ # clush -aN lctl get_param "mdd.${FS}-*.changelog_mask"
+ mdd.myfs1-MDT0042.changelog_mask=
+ MARK CREAT MKDIR HLINK SLINK MKNOD UNLNK RMDIR RENME RNMTO CLOSE LYOUT TRUNC SATTR XATTR HSM MTIME CTIME MIGRT FLRW RESYNC
+```
+To make a persistent setting run the following on the MGS:
+```
+ # lctl set_param -P "mdd.${FS}-*.changelog_mask"='MARK UNLNK CLOSE'
+```
+## Creating a nodemap for hotpools agents (optimization)
+
+As lamigo consumes changelogs (and OST access logs) it dispatches
+mirroring operation to agents which in turn generate yet more
+changelog entries that lamigo must consume. This feedback loop is not
+fatal but does affect performance. It can be avoided by creating a
+nodemap that containing the EXAScaler servers (and any configured
+agents) and setting `audit_mode=0` for that nodemap.
+
+(First consult site admins to determine if nodemaps are already in
+use. If so then any changes made must be consistent with current
+nodemap configuration.)
+
+To proceed collect the set of possible LNet NIDs for the hotpools
+agents. By default this will be the NIDs of the EXAScaler server VMs.
+```
+ # clush -aN lctl list_nids | sort
+ 172.16.0.48@o2ib
+ 172.16.0.49@o2ib
+ 172.16.0.50@o2ib
+ 172.16.0.51@o2ib
+ 172.16.0.52@o2ib
+ 172.16.0.53@o2ib
+ 172.16.0.54@o2ib
+ 172.16.0.55@o2ib
+```
+This list of NIDs must be converted to a comma separated list or range
+expression. In this example:
+```
+ 172.16.0.[48-55]@o2ib
+```
+(See https://github.com/DDNStorage/lustre_manual_markdown/blob/master/03.16-Mapping%20UIDs%20and%20GIDs%20with%20Nodemap.md for the full syntax.)
+To configure and enable the nodemap run the following on the MGS:
+```
+ # HP_NODEMAP=hp_agents
+ # HP_AGENT_NIDS="172.16.0.[48-55]@o2ib"
+ # lctl nodemap_modify --name default --property admin --value 1
+ # lctl nodemap_modify --name default --property trusted --value 1
+ # lctl nodemap_add $HP_NODEMAP
+ # lctl nodemap_modify --name $HP_NODEMAP --property admin --value 1
+ # lctl nodemap_modify --name $HP_NODEMAP --property trusted --value 1
+ # lctl nodemap_modify --name $HP_NODEMAP --property audit_mode --value 0
+ # lctl nodemap_add_range --name $HP_NODEMAP --range "$HP_AGENT_NIDS"
+ # lctl nodemap_add_range --name $HP_NODEMAP --range 0@lo
+ # lctl nodemap_activate 1
+```
+
+## Monitoring lamigo and lpurge
+
+There are several ways to monitor hotpools beyond checking the
+utilization of the fast pool. lamigo maintains several counters and
+statistics which it will write out on receiving `SIGUSR1`. The following
+commands were run on the VM where `${MDT}` is mounted.
+```
+ # MDT=myfs1-MDT0042
+ # cat /var/run/lamigo-${MDT}.pid
+ 20223
+ # ps -Fp $(cat /var/run/lamigo-${MDT}.pid)
+ UID PID PPID C SZ RSS PSR STIME TTY TIME CMD
+ root 14434 9331 0 71699 5120 3 Apr20 ? 00:59:16 /usr/sbin/lamigo -f /etc/lamigo/myfs1-MDT0042.conf
+ # pkill --exact --pidfile=/var/run/lamigo-${MDT}.pid --signal=USR1 lamigo
+ # sleep 1
+ # cat /var/run/lamigo-${MDT}.stats
+ version: 1.16-1
+ revision: da7d99c2761e0f294a3e0a150ef59972bd04bc89
+ config:
+ chlg_user: cl23
+ mdtname: myfs1-MDT0042
+ mountpoint: /lustre/myfs1/client
+ source_pool: ddn_ssd
+ target_pool: ddn_hdd
+ min_age: 600
+ max_cache: 268435456
+ rescan: 0
+ thread_count: 1
+ pool_refresh: 600
+ progress_interval: 600
+ periods: 16
+ period_time: 10
+ warmup_k: 20
+ cooldown_k: 15
+ ofd_interval: 5
+ hot_fraction: 20
+ hot_after_idle: 3
+ src_free: 70
+ tgt_free: 10
+ pool ddn_ssd:
+ osts: 8
+ avail: 22974591116
+ total: 31824761344
+ open: 1
+ pool ddn_hdd:
+ osts: 2
+ avail: 436791025224
+ total: 494282835936
+ open: 1
+ stats:
+ read: 6188174
+ skipped: 369235
+ processed: 5818939
+ removed: 0
+ dups: 311924
+ spawned: 7111691
+ replicated: 4429160
+ busy: 0
+ queued: 1226018
+ skip_hot: 0
+ ro2hot: 0
+ rw2hot: 0
+ rw2cold: 3382984
+ changelog:
+ last_processed_idx: 84139503
+ last_cleared_idx: 82909792
+ last_cleared_time: 1619120075 # ( Thu Apr 22 19:34:35 2021 )
+ agents:
+ agent3: { host: ai400x-0a12-vm03, mnt: /lustre/myfs1/client, jobs: 4, max: 4, state: active }
+ agent2: { host: ai400x-0a12-vm02, mnt: /lustre/myfs1/client, jobs: 3, max: 4, state: active }
+ ...
+ jobs:
+ job0: { tid: 140569798792960, fid: [0x2000079ac:0xfb4e:0x0], index: 0, agent: 3, start: 1619120075, command: extend }
+ job1: { tid: 140568524986112, fid: [0x2000079ac:0xfb02:0x0], index: 0, agent: 3, start: 1619120075, command: extend }
+ job2: { tid: 140569807185664, fid: [0x2000079ac:0xfaff:0x0], index: 0, agent: 3, start: 1619120075, command: extend }
+ ...
+```
+Note that the difference between the MDT current changelog index and
+the last index cleared by lamigo indicates the degree to which
+lamigo's consumption of changelog entries is keeping up the production
+of new entries (both values can be seen using the `changelog_users`
+parameter as shown above). A 10 minute (min-age) lag between reading
+and clearing is normal.
+
+Similarly for lpurge, the following commands were run on the VM where
+`${OST}` is mounted:
+```
+ # OST=myfs1-OST0117
+ # cat /var/run/lpurge-${OST}.pid
+ 20222
+ # ps -Fp $(cat /var/run/lpurge-${OST}.pid)
+ UID PID PPID C SZ RSS PSR STIME TTY TIME CMD
+ root 20222 1 6 171957 13352 16 Apr20 ? 03:09:06 /usr/sbin/lpurge -f /etc/lpurge/myfs1/OST0117.conf
+ # pkill --exact --pidfile=/var/run/lpurge-${OST}.pid --signal=USR1 lpurge
+ # sleep 1
+ # cat /var/run/lpurge-${OST}.stats
+ version: 1.16-1
+ revision: b465db8b9b99e217d175f31230d04e10a9a17906
+ config:
+ free_high: 80
+ free_low: 50
+ ostname: myfs1-OST0117
+ mountpoint: /lustre/myfs1/client
+ pool: ddn_ssd
+ max_jobs: 8
+ check_interval: 30
+ scan_rate: 10000
+ scan_threads: 1
+ slot_size: 1048576
+ stats:
+ scans: 10
+ scan_time: 346
+ slow_scans: 5
+ queued: 3210711
+ started: 3202330
+ purged: 363985
+ failed: 2838337
+ low_hits: 6
+ space:
+ kbfree: 2997655148
+ low: 1989047584
+ hi: 3182476134
+ hlists:
+```
+
+# Known issues
+
+## If lamigo is falling behind on changelog consumption
+
+Append a `rescan` directive to the lamigo configuration file,
+manually clear the changelog file, and then restart lamigo.
+```
+ # MDT=myfs1-MDT0042
+ # echo rescan >> /etc/lamigo/${MDT}.conf
+ # CL=...
+ # lfs changelog_clear $MDT $CL 0
+ # pkill --exact --pidfile=/var/run/lamigo-${MDT}.pid lamigo
+```
+
+## lamigo SSH errors
+
+If lamigo reports
+```
+ lamigo: Error authenticating pubkey: Failed to read private key: /root/.ssh/id_rsa
+```
+or
+```
+ lamigo: cannot authenticate SSH session to host HOST: ...
+```
+then first check that `ssh` works between the affected hosts. If so
+then check that the private key file (usually `/root/.ssh/id_rsa`) is
+in PEM format. If the key file starts with
+```
+ -----BEGIN RSA PRIVATE KEY-----
+```
+then it is likely in PEM format. If, instead, it starts with
+```
+ -----BEGIN OPENSSH PRIVATE KEY-----
+```
+then it is in the newer OpenSSH format. To generate a key in PEM
+format, add `-m PEM` to the `ssh-keygen` command line.
+
+Second, ensure that the key file does not contain any comments (lines
+starting with `#`).
+
+## Working around HA/Pacemaker/STONITH when stopping hotpools
+
+Formerly "stratagem-hp-stop-no-more-tears.txt". These instructions are
+obsolete after converting hotpools to a ticketed HA configuration.
+```
+# Temporary process to stop hotpools in a production environment
+# without invoking HA STONITH due to busy client mount points.
+#
+# XXX This is not an automated process. You need to run each command in
+# turn and look at the output. Maybe even understand it. Sorry.
+#
+# See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/high_availability_add-on_reference/s1-managedresource-haar
+# for more information.
+#
+# Author: John L. Hammond <jhammond@whamcloud.com>
+
+# Put all lamigo and lpurge HA resources in unmanaged mode.
+crm resource unmanage 'lamigo*'
+crm resource unmanage 'lpurge*'
+
+# In the output of hastatus the lamigo and lpurge resources should now
+# show up as "Started (unmanaged)"
+hastatus
+
+# Use systemctl to actually stop the services corresponding to the HA
+# resources you just unmanaged.
+clush -a systemctl stop 'lamigo@*'
+clush -a systemctl stop 'lpurge@*'
+
+# In the output of hastatus the lpurge resources should now show up as "Stopped (unmanaged)"
+hastatus
+
+# Verify that the processes have actually terminated.
+clush -a ps -FC lamigo,lpurge
+
+# Wait for any mirroring operations to drain from the client mounts.
+clush -a lsof /lustre/$FS/client/
+clush -a ps -FC lfs
+
+# Once they have drained you can use the stop script.
+stratagem-hp-stop.sh
+
+# To restart:
+crm resource manage 'lamigo*'
+crm resource manage 'lpurge*'
+stratagem-hp-start.sh
+
+# Verify that things are back to normal:
+hastatus
+
+# After all of this 'hastatus' may show some 'Failed Resource Actions'. For example:
+#
+# Failed Resource Actions:
+# * lpurge-fs0a12-OST0004_monitor_30000 on ai400x-0a12-vm02 'not running' (7): call=316, status=complete, exitreason='',
+# last-rc-change='Thu Apr 8 18:01:57 2021', queued=0ms, exec=0ms
+#
+# To clean these up do 'crm resource cleanup lpurge-fs0a12-OST0004'
+
+```
--- /dev/null
+#!/bin/bash
+
+# Try to get the current revision (commit hash). Don't fail.
+
+function get_revision {
+ local src="$1"
+ local val="$2"
+
+ if [[ -n "${val}" ]]; then
+ echo "$0: using '${src}' = '${val}'" >&2
+ printf "%s\n" "${val}"
+ exit 0
+ fi
+}
+
+# Manual override.
+get_revision LIPE_REVISION "${LIPE_REVISION:-}"
+
+# Release build on https://es-build.datadirectnet.com/
+get_revision ES_BUILD_LUSTRE_REVISION "${ES_BUILD_LUSTRE_REVISION:-}"
+
+# Reviews build from Gerrit.
+get_revision GERRIT_PATCHSET_REVISION "${GERRIT_PATCHSET_REVISION:-}"
+
+# Build from source.
+GIT_REVISION="$(git rev-parse HEAD 2> /dev/null)"
+get_revision 'git rev-parse HEAD' "${GIT_REVISION:-}"
+
+printf "None\n"
%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
Release: @LIPE_RELEASE@%{?dist}
-
+VCS: @LIPE_REVISION@
Summary: lipe - Lustre Integrated Policy Engine
License: All rights reserved DataDirect Networks Inc.
Group: Applications/System
install -m 0744 -D init.d/lipe_test_scheduler \
$RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/lipe_test_scheduler
%endif
-install -g 0 -o 0 -m 0644 man/lipe_scan.1 $RPM_BUILD_ROOT%{_mandir}/man1/
-install -g 0 -o 0 -m 0644 man/lipe_find.1 $RPM_BUILD_ROOT%{_mandir}/man1/
-install -g 0 -o 0 -m 0644 man/lfill.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/lipe_scan.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/lipe_find.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/lfill.1 $RPM_BUILD_ROOT%{_mandir}/man1/
%if %{with laudit}
-install -g 0 -o 0 -m 0644 man/laudit.1 $RPM_BUILD_ROOT%{_mandir}/man1/
-install -g 0 -o 0 -m 0644 man/laudit-report.1 $RPM_BUILD_ROOT%{_mandir}/man1/
-install -g 0 -o 0 -m 0644 man/laudit.conf.5 $RPM_BUILD_ROOT%{_mandir}/man5/
+install -m 0644 man/laudit.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/laudit-report.1 $RPM_BUILD_ROOT%{_mandir}/man1/
+install -m 0644 man/laudit.conf.5 $RPM_BUILD_ROOT%{_mandir}/man5/
%endif
cp -a lhsm_tests $RPM_BUILD_ROOT%{_libdir}/
%changelog
-* Thu Jun 30 2020 Gu Zheng <gzheng@ddn.com> 1.5
+* Tue Jun 30 2020 Gu Zheng <gzheng@ddn.com> [1.5]
- Update version to 1.5
-* Thu Apr 3 2019 Gu Zheng <gzheng@ddn.com> 1.4
+* Wed Apr 03 2019 Gu Zheng <gzheng@ddn.com> [1.4]
- Devide RPM by function
-* Thu Jan 3 2019 Gu Zheng <gzheng@ddn.com> 1.4
+* Thu Jan 03 2019 Gu Zheng <gzheng@ddn.com> [1.4]
- Add pyltest RPM
-* Wed Mar 7 2018 Sebastien Buisson <sbuisson@ddn.com> 1.1
+* Wed Mar 07 2018 Sebastien Buisson <sbuisson@ddn.com> [1.1]
- Add laudit and laudit-report
-* Mon Oct 11 2017 Li Xi <lixi@ddn.com> 1.0
+* Wed Oct 11 2017 Li Xi <lixi@ddn.com> [1.0]
- Add license-server RPM
-* Mon Feb 27 2017 Qian Yingjin <qian@ddn.com> 0.1
+* Mon Feb 27 2017 Qian Yingjin <qian@ddn.com> [0.1]
- Initial import
#!/bin/bash
-# Copyright DDN 2020
# -*- indent-tabs-mode: nil -*-
+# Copyright DDN 2021
+#
+# Pacemaker Ticket:
+# $FS-hotpool-allocated (controlled by resource $FS-hotpool)
+#
+# Pacemaker Resources:
+# $FS-hotpool (controlls $FS-hotpool-allocated ticket)
+# cl-$FS-client (cloned client mount)
+# lamigo-$FS-MDTXXXX (for each MDT)
+# lpurge-$FS-OSTXXXX (for each OST in fast pool)
+#
+# Files Created
+# /etc/lamigo/$FS-MDTXXXX.conf (for each MDT)
+# /etc/lpurge/$FS/OSTXXXX.conf (for each OST in fast pool)
+# /etc/fstab updated with client mount line
+#
+# Lustre Changes
+# changelog user created for each MDT
# CONFIGURATION VARIABLES
-# ASSUMES last configlog user is for lamigo, or creates one if one doesn't exist
-
-LAMIGO_MINAGE=600
-LPURGE_FREEHI=80
-LPURGE_FREELO=50
-FAST_POOL=ddn_ssd
-SLOW_POOL=ddn_hdd
+DEF_LAMIGO_MINAGE=600
+DEF_LPURGE_FREEHI=80
+DEF_LPURGE_FREELO=50
+DEF_FAST_POOL=ddn_ssd
+DEF_SLOW_POOL=ddn_hdd
function usage()
{
- echo "Usage: $(basename $0) -m MINAGE -H FREEHI -L FREELO -f FASTPOOL -s SLOWPOOL [ FILESYSTEM ]"
- exit 0
+ local rc=${1:-0}
+ local values=${2:-1}
+ echo "Usage: $(basename $0) [-h] [-n] [-m MINAGE -H FREEHI -L FREELO -f FASTPOOL -s SLOWPOOL] [ FILESYSTEM ]"
+ echo " Automatically configures Stratagem Hotpools in a existing HA environment"
+ echo " -n - No auto-detect value"
+ echo " -h - This help message"
+ echo " -m - lamigo minimum age"
+ echo " -H - lpurge free-hi percent"
+ echo " -L - lpurge free-lo percent"
+ echo " -f - Fast OST pool"
+ echo " -s - Slow OST pool"
+ if (( values == 1 )); then
+ echo "Values:"
+ echo " Filesystem : $FS"
+ echo " Fast OST Pool : $FAST_POOL"
+ echo " Slow OST Pool : $SLOW_POOL"
+ echo " LAmigo MinAge : $LAMIGO_MINAGE"
+ echo " LPurge FreeHI : ${LPURGE_FREEHI:-${DEF_LPURGE_FREEHI}}"
+ echo " LPurge FreeLO : ${LPURGE_FREELO:-${DEF_LPURGE_FREELO}}"
+ fi
+ exit $rc
}
-while getopts "hm:H:L:f:s:" opt; do
+HELP=false
+AUTO=true
+
+while getopts "hm:H:L:f:s:n" opt; do
case "$opt" in
- m) LAMIGO_MINAGE=$OPTARG ;;
- H) LPURGE_FREEHI=$OPTARG ;;
- L) LPURGE_FREELO=$OPTARG ;;
- f) FAST_POOL=$OPTARG ;;
- s) SLOW_POOL=$OPTARG ;;
- h) usage ;;
+ m) LAMIGO_MINAGE=$OPTARG ;;
+ H) LPURGE_FREEHI=$OPTARG ;;
+ L) LPURGE_FREELO=$OPTARG ;;
+ f) FAST_POOL=$OPTARG ;;
+ s) SLOW_POOL=$OPTARG ;;
+ n) AUTO=false ;;
+ h) HELP=true ;;
esac
done
shift $((OPTIND-1))
+FS=$1
+
# Location of /lustre/$FS/<SERVER> mounts and /lustre/$FS/client mount
ROOT=/lustre
-FS=$1
+function locate_resource() {
+ local res=$1
+ clush -N --group=ha_heads "crm_resource -QW -r $res 2> /dev/null || true"
+}
function is_valid_percent {
local p="${1:-}"
[[ "${p}" =~ ^[[:digit:]]+$ ]] && ((p >= 5))
}
-if ! is_valid_percent "${LPURGE_FREEHI}"; then
- echo "Invalid FREEHI percentage '${LPURGE_FREEHI}'"
- exit 1
-fi
-
-if ! is_valid_percent "${LPURGE_FREELO}"; then
- echo "Invalid FREELO percentage '${LPURGE_FREELO}'"
- exit 1
-fi
-
-if ! ((LPURGE_FREELO < LPURGE_FREEHI)); then
- echo "FREELO (${LPURGE_FREELO}) must be less than FREEHI (${LPURGE_FREEHI})"
- exit 1
-fi
-
-if ! is_minage_ok "${LAMIGO_MINAGE}"; then
- echo "MINAGE (${LAMIGO_MINAGE}) must be positive integer >= 5"
- exit 1
-fi
-
if [ -z "$FS" ]; then
# Try automatic detection
- FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$)
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|sort -u)
if [ $(echo "$FS" |wc -w) -ne 1 ]; then
- echo "Usage: $0 FSNAME"
- exit 1
+ echo "Error: Unable to determine filesystem automatically"
+ usage 22 0
fi
fi
-# Script from here down
-function locate_resource() {
- res=$1
- clush -N --group=ha_heads "crm_resource -QW -r $res 2> /dev/null || true"
-}
+if $AUTO && [ ! -f /etc/lamigo/$FS-MDT0000.conf ]; then
+ echo "Disabling auto-detection of settings ($FS-MDT0000 config missing)"
+ AUTO=false
+fi
MDSHOST=$(locate_resource mdt0000-$FS)
if [ -z "$MDSHOST" ]; then
# if FS is not an resident filesystem, mdt0 will not exist for it
echo Failed to find mdt0 for filesystem: $FS
- exit 1
+ exit 5
+fi
+
+# reuse of changelog user handled below
+# get lpurge options - this gets the first OST from the FAST pool
+if $AUTO; then
+ AUTO_FAST_POOL=$(awk -F = '/^src=/{ print $2 }' /etc/lamigo/$FS-MDT0000.conf)
+ AUTO_SLOW_POOL=$(awk -F = '/^tgt=/{ print $2 }' /etc/lamigo/$FS-MDT0000.conf)
+ AUTO_LAMIGO_MINAGE=$(awk -F = '/^min-age=/{ print $2 }' /etc/lamigo/$FS-MDT0000.conf)
fi
+# Set value: if not explicitly set, set auto, if no auto, set default
+
+LAMIGO_MINAGE=${LAMIGO_MINAGE:-${AUTO_LAMIGO_MINAGE:-${DEF_LAMIGO_MINAGE}}}
+FAST_POOL=${FAST_POOL:-${AUTO_FAST_POOL:-${DEF_FAST_POOL}}}
+SLOW_POOL=${SLOW_POOL:-${AUTO_SLOW_POOL:-${DEF_SLOW_POOL}}}
+
# List of %04x formated OST indexes
-FAST_OSTLIST=$(ssh $MDSHOST lctl pool_list $FS.$FAST_POOL|sed -e 's/.*OST\(....\).*/\1/p;d')
+FAST_OSTLIST=$(ssh $MDSHOST lctl pool_list $FS.$FAST_POOL|sed -ne 's/.*-OST\(....\)_UUID/\1/p')
if [ -z "$FAST_OSTLIST" ]; then
echo "Failed to find OSTs in Fast pool ($FS.$FAST_POOL)"
- exit 1
+ usage 2
fi
-SLOW_OSTLIST=$(ssh $MDSHOST lctl pool_list $FS.$SLOW_POOL|sed -e 's/.*OST\(....\).*/\1/p;d')
+SLOW_OSTLIST=$(ssh $MDSHOST lctl pool_list $FS.$SLOW_POOL|sed -ne 's/.*-OST\(....\)_UUID/\1/p')
if [ -z "$SLOW_OSTLIST" ]; then
echo "Failed to find OSTs in Slow pool ($FS.$SLOW_POOL)"
- exit 1
+ usage 2
+fi
+
+if $AUTO; then
+ for OST in $FAST_OSTLIST; do
+ F=/etc/lpurge/$FS/OST${OST}.conf
+ if [ -f $F ]; then
+ AUTO_LPURGE_FREEHI=$(awk -F = '/^freehi=/{ print $2 }' $F)
+ AUTO_LPURGE_FREELO=$(awk -F = '/^freelo=/{ print $2 }' $F)
+ break
+ fi
+ done
+fi
+
+LPURGE_FREEHI=${LPURGE_FREEHI:-${AUTO_LPURGE_FREEHI:-${DEF_LPURGE_FREEHI}}}
+LPURGE_FREELO=${LPURGE_FREELO:-${AUTO_LPURGE_FREELO:-${DEF_LPURGE_FREELO}}}
+
+if ! is_valid_percent "${LPURGE_FREEHI}"; then
+ echo "Invalid FREEHI percentage '${LPURGE_FREEHI}'"
+ usage 34
+fi
+
+if ! is_valid_percent "${LPURGE_FREELO}"; then
+ echo "Invalid FREELO percentage '${LPURGE_FREELO}'"
+ usage 34
fi
+if ! ((LPURGE_FREELO < LPURGE_FREEHI)); then
+ echo "FREELO (${LPURGE_FREELO}) must be less than FREEHI (${LPURGE_FREEHI})"
+ usage 34
+fi
+
+if ! is_minage_ok "${LAMIGO_MINAGE}"; then
+ echo "MINAGE (${LAMIGO_MINAGE}) must be positive integer >= 5"
+ usage 34
+fi
+
+if $HELP; then
+ usage 0
+fi
+
+echo "Using: $0 -m ${LAMIGO_MINAGE} -H ${LPURGE_FREEHI} -L ${LPURGE_FREELO} -f ${FAST_POOL} -s ${SLOW_POOL} $FS"
+
OSSLIST=$(es_config_get --option fs_settings.$FS.oss_list)
# Die on errors
MOUNTSPEC=$(/opt/ddn/es/tools/mount_lustre_client --dry-run --fs $FS |awk '{ print $4 }')
-echo "Creating config directories"
-clush -a mkdir -p /etc/lpurge/$FS/ /etc/lamigo/ $ROOT/$FS/client
+# Create client mount if it doesn't exist
+if ! crm_resource -QW -r cl-$FS-client > /dev/null 2>&1; then
-echo "Creating lustre client mounts for $FS"
-clush -a "grep -q $FS/client /etc/fstab || (echo $MOUNTSPEC $ROOT/$FS/client lustre x-systemd.mount-timeout=20m,noauto,_netdev >> /etc/fstab)"
-clush -a mkdir -p /etc/systemd/system/lustre-$FS-client.mount.d/
-cat << EOF > /etc/systemd/system/lustre-$FS-client.mount.d/timeout.conf
+ echo "Creating config directories"
+ clush -a mkdir -p /etc/lpurge/$FS/ /etc/lamigo/ $ROOT/$FS/client
+
+ echo "Creating lustre client mounts for $FS"
+ clush -a "grep -q $FS/client /etc/fstab || (echo $MOUNTSPEC $ROOT/$FS/client lustre x-systemd.mount-timeout=20m,noauto,_netdev >> /etc/fstab)"
+ clush -a mkdir -p /etc/systemd/system/lustre-$FS-client.mount.d/
+ cat << EOF > /etc/systemd/system/lustre-$FS-client.mount.d/timeout.conf
[Mount]
TimeoutSec=20m
EOF
-clush -ca /etc/systemd/system/lustre-$FS-client.mount.d/timeout.conf
-clush -a systemctl daemon-reload
-echo "Creating lustre client resource agent"
-clush -g ha_heads crm configure <<EOF
-primitive $FS-client systemd:lustre-$FS-client.mount op start timeout=900s op monitor interval=60s meta target-role=Stopped
+ clush -ca /etc/systemd/system/lustre-$FS-client.mount.d/timeout.conf
+ clush -a systemctl daemon-reload
+ echo "Creating lustre client resource agent"
+ clush -g ha_heads crm configure <<EOF
+primitive $FS-client systemd:lustre-$FS-client.mount meta target-role=Stopped op start timeout=900s op monitor interval=60s op stop timeout=900s
clone cl-$FS-client $FS-client
rsc_ticket ticket-$FS-allocated-client $FS-allocated: cl-$FS-client loss-policy=stop
EOF
-# create order constraints on all lustre-server template types (mdt or ost)
-clush -g ha_heads "cibadmin -e --query --xpath '//template[@type=\"lustre-server\"]'|awk -F \"'\" '/lustre-$FS-/{ print \$2 }'|awk -F - '{ print \"order $FS-client-after-\"\$3\" 0: lustre-$FS-\"\$3\":start cl-$FS-client:start\" }'|crm configure"
+ # create order constraint for client on all lustre-server template types (mdt or ost)
+ clush -g ha_heads "cibadmin -e --query --xpath '//template[@type=\"lustre-server\"]'|awk -F \"'\" '/lustre-$FS-/{ print \$2 }'|awk -F - '{ print \"order $FS-client-after-\"\$3\" 0: lustre-$FS-\"\$3\":start cl-$FS-client:start\" }'|crm configure"
-ssh $MDSHOST crm configure <<EOF
+ ssh $MDSHOST crm configure <<EOF
order $FS-client-after-mgs 0: mgs:start cl-$FS-client:start
EOF
+fi # ! crm_resource client
+
+# ticket
+echo "Creating Hotpool ticket for $FS"
+clush -g ha_heads crm configure <<EOF
+primitive $FS-hotpool ocf:ddn:Ticketer params name=$FS-hotpool-allocated meta allow-migrate=true priority=199 target-role=Stopped op start interval=0 timeout=30 op monitor interval=30 timeout=30 op stop interval=0 timeout=30
+order $FS-hotpool-after-cl-$FS-client 0: cl-$FS-client $FS-hotpool
+EOF
+
echo "Configuring OFD access logs"
MGSHOST=$(locate_resource mgs)
ssh $MGSHOST lctl set_param -P "obdfilter.${FS}-*.access_log_size=1048576"
# this results in MDTLIST being a list of MDT indexes in format "%04d"
# This format is problematic for values greater than 0007, since by
# default bash will interpret it as octal
-MDTLIST=$(clush -qN -g ha_heads crm_resource --list-raw|sed "s/mdt\(.*\)-$FS$/\1/p;d")
+MDTLIST=$(clush -qN -g ha_heads crm_resource --list-raw|sed -ne "s/mdt\(.*\)-$FS$/\1/p")
for OST in $FAST_OSTLIST; do
INDEX=$(printf "%04d" 0x$OST)
clush -ca /etc/lpurge/$FS/OST$OST.conf
echo Creating lpurge resource for $FS-OST$OST
- clush -qS --group=ha_heads "crm_resource -QW -r ost$INDEX-$FS >/dev/null 2&>1 && crm configure << EOF || true
+ clush -qS --group=ha_heads "crm_resource -QW -r ost$INDEX-$FS >/dev/null 2>&1 && crm configure << EOF || true
primitive lpurge-$FS-OST$OST systemd:lpurge@$FS-OST$OST.service op monitor interval=30s
colocation lpurge-$FS-$OST-with-ost inf: lpurge-$FS-OST$OST ost$INDEX-$FS
order lpurge-$FS-$OST-after-ost ost$INDEX-$FS lpurge-$FS-OST$OST
-order lpurge-$FS-$OST-after-client cl-$FS-client lpurge-$FS-OST$OST
+rsc_ticket ticket-$FS-hotpool-allocated-lpurge-$FS-OST$OST $FS-hotpool-allocated: lpurge-$FS-OST$OST loss-policy=stop
EOF"
done
for INDEX in $MDTLIST; do
MDT=$(printf "%04x" $((10#$INDEX)) )
MDSHOST=$(locate_resource mdt$INDEX-$FS)
- CL=$(ssh $MDSHOST lctl get_param -n mdd.$FS-MDT$MDT.changelog_users|tail -1|awk '/^cl/{ print $1 }')
+ CL=""
+
+ # reuse existing changelog user
+ if [ -f /etc/lamigo/$FS-MDT$MDT.conf ]; then
+ CL=$(awk -F = '/^user=/{ print $2 }' /etc/lamigo/$FS-MDT$MDT.conf)
+ # ensure changelog user exists
+ if [ -z "$(ssh $MDSHOST lctl get_param -n mdd.$FS-MDT$MDT.changelog_users|grep $CL[[:space:]])" ]; then
+ echo "Previous changelog user for $FS-MDT$MDT: $CL is missing"
+ CL=""
+ fi
+ fi
+ # create new user if none was previously configured
if [ -z "$CL" ]; then
- ssh $MDSHOST lctl --device $FS-MDT$MDT changelog_register
- CL=$(ssh $MDSHOST lctl get_param -n mdd.$FS-MDT$MDT.changelog_users|tail -1|awk '/^cl/{ print $1 }')
+ CL=$(ssh $MDSHOST lctl --device $FS-MDT$MDT changelog_register | awk -F \' '{print $2 }')
+ echo "Registered new changelog user for $FS-MDT$MDT: $CL"
fi
echo Creating lamigo config for $FS-MDT$MDT
tgt=$SLOW_POOL
EOF
for HOST in $OSSLIST; do
- echo oss=$HOST >> /etc/lamigo/$FS-MDT$MDT.conf
+ echo oss=$HOST >> /etc/lamigo/$FS-MDT$MDT.conf
done
for HOST in $(cluset -e @all); do
echo agent=$HOST:$ROOT/$FS/client:4 >> /etc/lamigo/$FS-MDT$MDT.conf
clush -ca /etc/lamigo/$FS-MDT$MDT.conf /etc/systemd/system/lamigo@$FS-MDT$MDT.service.d/override.conf
echo Creating lamigo resource for $FS-MDT$MDT
- clush -qS --group=ha_heads "crm_resource -QW -r mdt$INDEX-$FS >/dev/null 2&>1 && crm configure << EOF || true
+ clush -qS --group=ha_heads "crm_resource -QW -r mdt$INDEX-$FS >/dev/null 2>&1 && crm configure << EOF || true
primitive lamigo-$FS-MDT$MDT systemd:lamigo@$FS-MDT$MDT.service op start timeout=15m op monitor interval=30s
colocation lamigo-$FS-MDT$MDT-with-mdt inf: lamigo-$FS-MDT$MDT mdt$INDEX-$FS
order lamigo-$FS-$MDT-after-mdt mdt$INDEX-$FS lamigo-$FS-MDT$MDT
-order lamigo-$FS-$MDT-after-client cl-$FS-client lamigo-$FS-MDT$MDT
+rsc_ticket ticket-$FS-hotpool-allocated-lamigo-$FS-MDT$MDT $FS-hotpool-allocated: lamigo-$FS-MDT$MDT loss-policy=stop
EOF"
done
echo "Configuration complete. Run the following command to start services:"
-echo " clush -qS --group=ha_heads crm resource start cl-$FS-client"
-echo "or"
echo " stratagem-hp-start.sh $FS"
--- /dev/null
+#!/bin/bash
+# -*- indent-tabs-mode: nil -*-
+# Copyright DDN 2021
+
+FS=$1
+
+function usage() {
+ echo "Usage: $(basename $0) [-h] [FILESYSTEM]"
+ echo " Convert Stratagem Hotpools HA environment to be ticket based"
+}
+
+if [ "$FS" = "-h" ] || [ "$FS" = "--help" ]; then
+ usage
+ exit 0
+fi
+
+function locate_resource() {
+ local res=$1
+ clush -N --group=ha_heads "crm_resource -QW -r $res 2> /dev/null || true"
+}
+
+if [ -z "$FS" ]; then
+ # Try automatic detection
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|uniq)
+
+ if [ $(echo "$FS" |wc -w) -ne 1 ]; then
+ echo "Error: Unable to determine filesystem automatically, please specify"
+ usage
+ exit 1
+ fi
+fi
+
+if crm_resource -QW -r $FS-hotpool > /dev/null 2>&1; then
+ echo "Hotpools for $FS already using ticket"
+ exit 0
+fi
+
+MDSHOST=$(locate_resource mdt0000-$FS)
+MDTLIST=$(clush -qN -g ha_heads crm_resource --list-raw|sed -ne "s/mdt\(.*\)-$FS$/\1/p")
+FAST_POOL=$(awk -F = '/^src=/{ print $2 }' /etc/lamigo/$FS-MDT0000.conf)
+FAST_OSTLIST=$(ssh $MDSHOST lctl pool_list $FS.$FAST_POOL|sed -ne 's/.*-OST\(....\)_UUID/\1/p')
+
+ROLE=Started
+if crm_resource -r $FS-client -W |& grep -q NOT; then
+ ROLE=Stopped
+fi
+
+echo "Creating hotpool ticket for $FS"
+clush -g ha_heads crm configure <<EOF
+primitive $FS-hotpool ocf:ddn:Ticketer params name=$FS-hotpool-allocated meta allow-migrate=true priority=199 target-role=$ROLE op start interval=0 timeout=30 op monitor interval=30 timeout=30 op stop interval=0 timeout=30
+order $FS-hotpool-after-cl-$FS-client 0: cl-$FS-client $FS-hotpool
+EOF
+
+for OST in $FAST_OSTLIST; do
+ INDEX=$(printf "%04d" 0x$OST)
+ echo Updating lpurge config for $FS-OST$OST
+
+ echo "Updating lpurge HA for $FS-OST$OST"
+
+ clush -qS --group=ha_heads "crm_resource -QW -r ost$INDEX-$FS >/dev/null 2>&1 && crm configure << EOF || true
+delete lpurge-$FS-$OST-after-client
+rsc_ticket ticket-$FS-hotpool-allocated-lpurge-$FS-OST$OST $FS-hotpool-allocated: lpurge-$FS-OST$OST loss-policy=stop
+EOF"
+done
+
+# MDTLIST is derived from resources so it's already in decimal but formatted %04d
+for INDEX in $MDTLIST; do
+ MDT=$(printf "%04x" $((10#$INDEX)) )
+
+ echo "Updating lamigo HA for $FS-MDT$MDT"
+
+ clush -qS --group=ha_heads "crm_resource -QW -r mdt$INDEX-$FS >/dev/null 2>&1 && crm configure << EOF || true
+delete lamigo-$FS-$MDT-after-client
+rsc_ticket ticket-$FS-hotpool-allocated-lamigo-$FS-MDT$MDT $FS-hotpool-allocated: lamigo-$FS-MDT$MDT loss-policy=stop
+EOF"
+done
FS=$1
+function usage() {
+ echo "Usage: $(basename $0) [-h] [FILESYSTEM]"
+ echo " Start Stratagem Hotpools HA environment"
+}
+
+if [ "$FS" = "-h" ] || [ "$FS" = "--help" ]; then
+ usage
+ exit 0
+fi
+
if [ -z "$FS" ]; then
# Try automatic detection
- FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$)
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|uniq)
if [ $(echo "$FS" |wc -w) -ne 1 ]; then
- echo "Usage: $0 FSNAME"
+ echo "Error: Could not automatically find filesystem, please specify"
+ usage
exit 1
fi
fi
+if ! crm_resource -QW -r $FS-hotpool > /dev/null 2>&1; then
+ if clush -Ng ha_heads crm_resource -l|egrep -q "^(lamigo|lpurge)-$FS-"; then
+ echo "Hotpools needs conversion"
+ echo " Please run: stratagem-hp-convert.sh"
+ else
+ echo "Hotpools not configured"
+ echo " Please run: stratagem-hp-config.sh"
+ fi
+ exit 1
+fi
+
echo Starting Hotpools for $FS
-clush -qS --group=ha_heads crm res start cl-$FS-client
+clush -qS --group=ha_heads crm res start cl-$FS-client $FS-hotpool
FS=$1
+function usage() {
+ echo "Usage: $(basename $0) [-h] [FILESYSTEM]"
+ echo " Stop stratagem hotpools in HA environment"
+}
+
+if [ "$FS" = "-h" ] || [ "$FS" = "--help" ]; then
+ usage
+ exit 0
+fi
+
if [ -z "$FS" ]; then
# Try automatic detection
- FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$)
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|uniq)
if [ $(echo "$FS" |wc -w) -ne 1 ]; then
- echo "Usage: $0 FSNAME"
+ echo "Error: Could not automatically find filesystem, please specify"
+ usage
exit 1
fi
fi
-echo Stopping Hotpools for $FS
-clush -qS --group=ha_heads crm res stop cl-$FS-client
+if crm_resource -QW -r $FS-hotpool > /dev/null 2>&1; then
+ echo Stopping Hotpools for $FS
+ clush -qS --group=ha_heads crm res stop $FS-hotpool
+
+elif clush -Ng ha_heads crm_resource -l|egrep -q "^(lamigo|lpurge)-$FS-"; then
+ echo "Stopping old style Hotpools (see stratagem-hp-convert.sh)"
+ clush -qS -g ha_heads crm res stop cl-$FS-client
+
+else
+ echo "Hotpools not configured"
+ echo " Please run: stratagem-hp-config.sh"
+ exit 1
+fi
--- /dev/null
+#!/bin/bash
+# -*- indent-tabs-mode: nil -*-
+# Copyright DDN 2021
+#
+# Stops and remove cloned client mount
+# Removes:
+# resources $FS-mount (the cloned mount)
+# client mount from /etc/fstab
+
+FS=$1
+
+# Set a 10+ minute timeout for waitfor()
+TIMEOUT=${TIMEOUT:-600}
+
+function usage() {
+ echo "Usage: $(basename $0) [-h] [-t SECS] [FILESYSTEM]"
+ echo " Tear down server client mount HA environment"
+ echo " -t SECS - Max timeout to wait for services to stop (default: $TIMEOUT)"
+}
+
+while getopts "ht:" opt; do
+ case $opt in
+ h) usage; exit 0;;
+ t) TIMEOUT=$OPTARG;;
+ esac
+done
+
+if [ "$FS" = "--help" ]; then
+ usage
+ exit 0
+fi
+
+if [ -z "$FS" ]; then
+ # Try automatic detection
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|uniq)
+
+ if [ $(echo "$FS" |wc -w) -ne 1 ]; then
+ echo "Usage: $0 FSNAME"
+ exit 1
+ fi
+fi
+
+waitfor () {
+ local ids=$*
+
+ [ -z "$ids" ] && return
+
+ echo -n "Waiting for $ids"
+
+ local deadline=$((SECONDS+TIMEOUT))
+
+ local done=false
+ until $done || ((deadline < SECONDS)); do
+
+ done=true
+ for i in $ids; do
+ # This finds the host the resource is active on (empty it is stopped)
+ res=$(clush -qg ha_heads crm_resource -QW -r $i 2> /dev/null)
+ if [ -n "$res" ]; then
+ done=false
+ echo -n "."
+ sleep 1
+ break
+ fi
+ done
+ done
+
+ echo ""
+
+ if ! $done; then
+ echo "Waiting for $ids TIMED OUT!"
+ exit 1
+ fi
+}
+
+res=$(clush -qNg ha_heads crm_resource -l 2> /dev/null|grep lamigo-$FS-MDT0000)
+if [ -n "$res" ]; then
+ echo "ERROR: Please tear down Hotpools first"
+ exit 1
+fi
+
+echo Stopping Client
+clush -qS --group=ha_heads crm res stop cl-$FS-client
+
+waitfor cl-$FS-client
+
+clush -qS --group=ha_heads crm config del $FS-client
+
+echo "Removing client mount from fstab"
+clush -a "sed -i '/\/$FS\/client.*lustre/d' /etc/fstab"
#!/bin/bash
# -*- indent-tabs-mode: nil -*-
# Copyright DDN 2020
+#
+# After stopping hotpools (via ticket)
+# Removes:
+# resources lamigo-$FS-*
+# resources lpurge-$FS-*
+# resource $FS-hotpool
+# ticket $FS-hotpool-allocated
+#
+# Leaves:
+# all lamigo & lpurge config files
+# client mount
FS=$1
-# Set a 10+ minute timeout for waitfor()
+# Set a default 10+ minute timeout for waitfor()
TIMEOUT=${TIMEOUT:-600}
+function usage() {
+ echo "Usage: $(basename $0) [-h] [-t SECS] [FILESYSTEM]"
+ echo " Tear down Stratagem Hotpools HA environment"
+ echo " -t SECS - Max timeout to wait for services to stop (default: $TIMEOUT)"
+}
+
+while getopts "ht:" opt; do
+ case $opt in
+ h) usage; exit 0;;
+ t) TIMEOUT=$OPTARG;;
+ esac
+done
+
+if [ "$FS" = "--help" ]; then
+ usage
+ exit 0
+fi
+
if [ -z "$FS" ]; then
# Try automatic detection
- FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$)
+ FS=$(crm_ticket -l|cut -f 1 -d -|grep -v ^lustre$|uniq)
if [ $(echo "$FS" |wc -w) -ne 1 ]; then
echo "Usage: $0 FSNAME"
fi
waitfor () {
- IDS=$*
+ local ids=$*
- [ -z "$IDS" ] && return
+ [ -z "$ids" ] && return
- echo -n "Waiting for $IDS"
+ echo -n "Waiting for $ids"
- local timeout=$TIMEOUT
+ local deadline=$((SECONDS+TIMEOUT))
- local NOTDONE=true
- while $NOTDONE && ((timeout > 0)); do
+ local done=false
+ until $done || ((deadline < SECONDS)); do
- NOTDONE=false
- for i in $IDS; do
+ done=true
+ for i in $ids; do
# This finds the host the resource is active on (empty it is stopped)
res=$(clush -qg ha_heads crm_resource -QW -r $i 2> /dev/null)
if [ -n "$res" ]; then
- NOTDONE=true
+ done=false
echo -n "."
sleep 1
break
fi
done
-
- let timeout-=1
done
echo ""
- if ((timeout == 0)); then
- echo "Waiting for $IDS TIMED OUT!"
+ if ! $done; then
+ echo "Waiting for $ids TIMED OUT!"
exit 1
fi
}
+if ! crm_resource -QW -r $FS-hotpool > /dev/null 2>&1; then
+ if clush -Ng ha_heads crm_resource -l|egrep -q "^(lamigo|lpurge)-$FS-"; then
+ echo "Hotpools needs conversion"
+ echo " Please run: stratagem-hp-convert.sh"
+ exit 1
+ else
+ echo "Hotpools not configured"
+ exit 0
+ fi
+fi
+
+echo "Stopping Hotpools for $FS"
+clush -qS --group=ha_heads crm res stop $FS-hotpool
+
for host in $(cluset -e @ha_heads); do
- IDS=$(ssh $host crm_resource --list-raw|egrep "(lamigo|lpurge)-$FS")
+ IDS=$(ssh $host crm_resource --list-raw|egrep "^(lamigo|lpurge)-$FS-")
if [ -n "$IDS" ]; then
echo Stopping lamigo and lpurge on $host
ssh $host crm res stop $IDS
fi
done
+echo "Removing Hotpool ticket for $FS"
+clush -qS --group=ha_heads crm config del $FS-hotpool
-echo Stopping Client
-clush -qS --group=ha_heads crm res stop cl-$FS-client
-
-waitfor cl-$FS-client
-
-clush -qS --group=ha_heads crm config del $FS-client
+clush -qS --group=ha_heads crm_ticket --ticket $FS-hotpool-allocated --cleanup
-echo "Removing client mount from fstab"
-clush -a "sed -i '/\/$FS\/client.*lustre/d' /etc/fstab"
+echo "Local client is still configured"
+echo " to remove run: stratagem-hp-teardown-client.sh"
lustre_ea.c lustre_ea.h \
lipe_ldiskfs.c lipe_ldiskfs.h \
lipe_object_attrs.h \
+ lipe_version.c \
+ lipe_version.h \
list.h policy.h policy.c \
ldiskfs_read_ldd.c ldiskfs_read_ldd.h \
general_policy.h general_policy.c \
lamigo_CFLAGS = $(LIPE_CFLAGS)
lamigo_LDFLAGS = $(LIPE_LDFLAGS) -lssh -lssh_threads
-lpurge_SOURCES = lpurge.c $(LIPE_SOURCES) $(lipe_ssh_SOURCES)
+lpurge_SOURCES = lpurge.c $(LIPE_SOURCES)
lpurge_CFLAGS = $(LIPE_CFLAGS)
-lpurge_LDFLAGS = $(LIPE_LDFLAGS) -lssh -lssh_threads
+lpurge_LDFLAGS = $(LIPE_LDFLAGS)
lipe_expression_test_SOURCES = lipe_expression_test.c $(LIPE_SOURCES)
lipe_expression_test_CFLAGS = $(LIPE_CFLAGS)
* - auto-config with lfs df -v to find flash OSTs
* - tests
*/
+#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include "general_policy.h"
#include "list.h"
#include "lipe_config.h"
+#include "lipe_version.h"
#define DEF_POOL_REFRESH_INTV (10 * 60)
#define DEF_PROGRESS_INTV (10 * 60)
"\t--hot-fraction, files with heat >= <hottest*fraction/100> are hot (default: %d)\n"
"\t--hot-after-idle, hot files after N idle periods to be mirrored (default: %d)\n"
"\t--src-free, mirroring to source OST pool is prohibited if pool has less space (default: %d%%)\n"
- "\t--tgt-free, mirroring to target OST pool is prohibited if pool has less space (default: %d%%)\n",
+ "\t--tgt-free, mirroring to target OST pool is prohibited if pool has less space (default: %d%%)\n"
+ "\t--version, print version information and exit\n",
program_invocation_short_name,
DEF_MIN_AGE,
DEF_CACHE_SIZE >> 20,
};
__u64 lamigo_last_processed_idx; /* changelog index */
+static time_t lamigo_last_cleared;
+static __u64 lamigo_last_cleared_index;
struct lipe_list_head lamigo_agent_list;
static int lamigo_agent_count;
int lamigo_max_jobs; /* max jobs for all agents */
struct pool_list *tgt_pools; /* target pool */
static void *lamigo_refresh_statfs_thread(void *arg);
+inline int are_agents_busy(void)
+{
+ if (opt.o_iml_re_socket)
+ return 0;
+
+ if (lamigo_jobs_running >= lamigo_max_jobs)
+ return 1;
+
+ return 0;
+}
+
/* Convert human readable size string to and int; "1k" -> 1000 */
static int strsize2int(long *sizep, char *str)
{
}
}
+static void systemf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
+static void systemf(const char *fmt, ...)
+{
+ char *cmd = NULL;
+ va_list args;
+ int rc;
+
+ va_start(args, fmt);
+ rc = vasprintf(&cmd, fmt, args);
+ va_end(args);
+
+ if (rc < 0)
+ return;
+
+ system(cmd);
+ free(cmd);
+}
+
static struct lipe_flist *lamigo_flist_init(char *socket)
{
struct lipe_flist *flist = flist_alloc(NULL, 1024,
struct resync_agent *a;
struct resync_job *j;
struct pool_list *pl;
+ struct tm *tmtime;
+ char timestr[32];
FILE *f;
int i;
llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "can't open dump file\n");
return;
}
- fprintf(f, "config:\n"
+ fprintf(f,
+ "version: %s-%s\n"
+ "revision: %s\n"
+ "config:\n"
" chlg_user: %s\n"
" mdtname: %s\n"
" mountpoint: %s\n"
" source_pool: ",
+ PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION,
opt.o_chlg_user, opt.o_mdtname, opt.o_mntpt);
i = 0;
for (pl = src_pools; pl != NULL; pl = pl->pl_next, i++)
stats.s_skip_hot, stats.s_replicate_ro2hot,
stats.s_replicate_rw2hot, stats.s_replicate_rw2cold);
+ tmtime = localtime(&lamigo_last_cleared);
+ strftime(timestr, sizeof(timestr), "%c", tmtime);
+
+ fprintf(f, "changelog:\n"
+ " last_processed_idx: %llu\n"
+ " last_cleared_idx: %llu\n"
+ " last_cleared_time: %lu # ( %s )\n",
+ lamigo_last_processed_idx, lamigo_last_cleared_index,
+ lamigo_last_cleared, timestr);
+
#define AGENT_FMT \
" agent%d: { host: %s, mnt: %s, jobs: %u, max: %u, state: %s }\n"
signal(SIGUSR1, lamigo_null_handler);
signal(SIGUSR2, lamigo_null_handler);
- signal(SIGCHLD, lamigo_null_handler);
LIPE_INIT_LIST_HEAD(&head.lh_list);
LIPE_INIT_LIST_HEAD(&lamigo_job_list);
lipe_list_for_each_entry_safe(rss, tmp,
&agent->rag_ssh_list,
rss_list) {
- lipe_cleanup_ssh_session(&rss->rss_ctx);
+ lipe_ssh_context_destroy(&rss->rss_ctx);
lipe_list_del(&rss->rss_list);
free(rss);
}
lipe_list_del(&rss->rss_list);
pthread_mutex_unlock(&a->rag_ssh_lock);
- if (lipe_ssh_status(&rss->rss_ctx) != SSH_OK) {
- rc = lipe_init_ssh_session(&rss->rss_ctx, a->rag_hostname);
- if (rc != SSH_OK)
- goto out;
- }
-
rc = lipe_ssh_exec(&rss->rss_ctx, cmd);
if (rc)
- llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, 0,
+ llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0,
"error executing ssh command '%s' on '%s': rc = %d\n",
cmd, a->rag_hostname, rc);
-out:
pthread_mutex_lock(&a->rag_ssh_lock);
lipe_list_add(&rss->rss_list, &a->rag_ssh_list);
pthread_cond_signal(&a->rag_ssh_cond);
if (rj->rj_setprefer) {
snprintf(cmd, sizeof(cmd),
- "lfs setstripe --comp-set --comp-flags=prefer -p %s "
- "%s/.lustre/fid/"DFID" >&/dev/null", rj->rj_pool,
+ "lfs setstripe --comp-set --comp-flags=prefer --pool='%s' "
+ "'%s/.lustre/fid/"DFID"' >&/dev/null", rj->rj_pool,
agent->rag_mountpoint,
PFID(&rj->rj_fid));
llapi_printf(LLAPI_MSG_DEBUG, "set prefer on "DFID"\n",
int i;
i = snprintf(cmd, sizeof(cmd),
- "%s --pool=%s",
+ "%s --pool='%s'",
opt.o_mirror_cmd, rj->rj_pool);
if (rj->rj_stripes > 0)
i += snprintf(cmd + i, sizeof(cmd) - i,
- " -c %d", rj->rj_stripes);
+ " --stripe-count=%d", rj->rj_stripes);
if (rj->rj_mirror_opts)
i += snprintf(cmd + i, sizeof(cmd) - i,
- " --flags=%s", rj->rj_mirror_opts);
+ " --flags='%s'", rj->rj_mirror_opts);
i += snprintf(cmd + i, sizeof(cmd) - i,
- " %s/.lustre/fid/"DFID" > /dev/null 2>&1",
+ " '%s/.lustre/fid/"DFID"' > /dev/null 2>&1",
agent->rag_mountpoint, PFID(&rj->rj_fid));
} else if (resync == AMIGO_RESYNC_RESYNC) {
snprintf(cmd, sizeof(cmd),
- "lfs mirror resync %s/.lustre/fid/"DFID" > /dev/null 2>&1",
+ "lfs mirror resync '%s/.lustre/fid/"DFID"' > /dev/null 2>&1",
agent->rag_mountpoint,
PFID(&rj->rj_fid));
} else {
/* the subsequent steps to fetch EAs are very expensive
* skip if possible */
- if (need_attr == 0 && 0)
+ if (need_attr == 0)
return 0;
/* XXX: would be great to get all EAs with a single call */
char cmd[PATH_MAX];
struct resync_agent *a = (struct resync_agent *)args;
- snprintf(cmd, sizeof(cmd), "lfs path2fid %s > /dev/null 2>&1",
+ snprintf(cmd, sizeof(cmd), "lfs path2fid '%s' > /dev/null 2>&1",
a->rag_mountpoint);
rc = lamigo_exec_cmd(a, cmd);
return 0;
}
+ if (are_agents_busy()) {
+ /* all the agents are busy */
+ llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "no agents avilable (max: %d)\n", lamigo_max_jobs);
+ return 1;
+ }
+
/* prevent hot file migration from hot pool to slow */
rc = lamigo_alr_check_is_hot(&f->fr_fh.fh_fid, &ah);
if (rc) {
return 0;
}
- if (lamigo_jobs_running >= lamigo_max_jobs) {
- /* all the agents are busy */
- llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "no agents avilable (max: %d)\n", lamigo_max_jobs);
- return 1;
- }
-
rj = calloc(1, sizeof(struct resync_job));
if (rj == NULL) {
llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "can't allocate for a job\n");
int count;
/* try to resubmit failed jobs first */
- while (!lipe_list_empty(&lamigo_failed_job_list)) {
+ while (!lipe_list_empty(&lamigo_failed_job_list) && !are_agents_busy()) {
struct resync_job *rj;
rj = lipe_list_entry(lamigo_failed_job_list.next,
return cache_size;
}
-static time_t lamigo_last_cleared;
-static __u64 lamigo_last_cleared_index;
-
static void lamigo_check_and_clear_changelog(void)
{
struct resync_job *tmp;
index = lamigo_last_processed_idx;
/* this checks prevents too frequent in-memory-search for
- * the minimal record-in-work: every 256th record and not
- * frequently than once a 5 seconds
+ * the minimal record-in-work, not frequently than once a 3 seconds
*/
- if ((index & 256) == 0 && time(NULL) - lamigo_last_cleared < 5)
+ if (time(NULL) - lamigo_last_cleared < 3)
return;
lamigo_last_cleared = time(NULL);
index = tmp->rj_index;
}
- if (index - lamigo_last_cleared_index < opt.o_chlg_clear_frequency)
+ lipe_list_for_each_entry(tmp, &lamigo_failed_job_list, rj_list) {
+ if (tmp->rj_check_job || !tmp->rj_index)
+ continue;
+ if (tmp->rj_index < index)
+ index = tmp->rj_index;
+ }
+
+ if (index < lamigo_last_cleared_index ||
+ index - lamigo_last_cleared_index < opt.o_chlg_clear_frequency)
return;
llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "CLEAR upto %llu in %s (%llu last)\n",
index, opt.o_chlg_user, lamigo_last_processed_idx);
lamigo_last_cleared_index = index;
rc = llapi_changelog_clear(opt.o_mdtname, opt.o_chlg_user, index);
- if (rc)
+ if (rc < 0) {
llapi_error(LLAPI_MSG_ERROR, rc,
- "failed to clear changelog record: %s:%llu",
- opt.o_chlg_user, index);
+ "failed to clear changelog record %s:%llu (%llu last)",
+ opt.o_chlg_user, index, lamigo_last_cleared_index);
+ systemf("lctl get_param 'mdd.%s.changelog_users'", opt.o_mdtname);
+ }
}
-static void lamigo_check_jobs(void)
+static void lamigo_job_fini(struct resync_job *rj, intptr_t retval)
{
- struct resync_job *rj = NULL, *tmp;
- int rc = 0;
- intptr_t retval;
-
- lipe_list_for_each_entry(tmp, &lamigo_job_list, rj_list) {
- rc = pthread_tryjoin_np(tmp->rj_pid, (void **)&retval);
- if (rc == 0) {
- rj = tmp;
- break;
- } else if (rc != EBUSY) {
- break;
- }
- }
-
- if (rj == NULL) {
- if (rc && rc != EBUSY)
- llapi_error(LLAPI_MSG_ERROR|LLAPI_MSG_NO_ERRNO, 0, "unknown job %d exited\n",
- rc);
- return;
- }
-
llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0,
"job %lu on "DFID" done in %lu: %"PRIdPTR" (%d)\n",
rj->rj_pid, PFID(&rj->rj_fid), time(NULL) - rj->rj_start,
retval, rj->rj_agent->rag_bad);
- lipe_list_del(&rj->rj_list);
-
if (rj->rj_check_job) {
rj->rj_agent->rag_check_in_progress = 0;
if (retval == 0) {
/* the agent is back */
- llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "agent %s is back\n",
- rj->rj_agent->rag_hostname);
if (rj->rj_agent->rag_bad) {
+ llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "agent %s is back\n",
+ rj->rj_agent->rag_hostname);
rj->rj_agent->rag_bad = false;
lamigo_max_jobs += rj->rj_agent->rag_maxjobs;
}
} else {
/* the agent is still bad */
if (rj->rj_agent->rag_bad == false) {
+ llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "agent %s is bad\n",
+ rj->rj_agent->rag_hostname);
+
assert(lamigo_max_jobs >= rj->rj_agent->rag_maxjobs);
lamigo_max_jobs -= rj->rj_agent->rag_maxjobs;
rj->rj_agent->rag_bad = true;
}
- llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, "agent %s is still bad\n",
- rj->rj_agent->rag_hostname);
}
free(rj);
return;
/* no striping info or file disappeared */
/* no need to handle, forget the changelog record */
} else if (retval == 0) {
- stats.s_replicated++;
+ if (!rj->rj_setprefer)
+ stats.s_replicated++;
}
if (rj)
free(rj);
}
+static void lamigo_check_jobs(void)
+{
+ struct resync_job *rj, *tmp;
+ int rc = 0;
+ intptr_t retval;
+
+ lipe_list_for_each_entry_safe(rj, tmp, &lamigo_job_list, rj_list) {
+ rc = pthread_tryjoin_np(rj->rj_pid, (void **)&retval);
+ if (rc == EBUSY) {
+ continue;
+ } else if (rc != 0) {
+ llapi_error(LLAPI_MSG_ERROR, rc, "cannot join thread %lld",
+ (long long)rj->rj_pid);
+ } else {
+ lipe_list_del(&rj->rj_list);
+ lamigo_job_fini(rj, retval);
+ }
+ }
+}
+
static void lamigo_add_agent(const char *host, const char *mnt, char *jobs)
{
struct resync_agent *a;
int i;
- a = malloc(sizeof(*a));
+ a = calloc(1, sizeof(*a));
if (!a) {
llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, 0,
"can't allocate memory for agent\n");
/* ssh context per job, and one more for agent heartbeat */
for (i = 0; i < a->rag_maxjobs + 1; i++) {
- struct resync_ssh_session *rss =
- malloc(sizeof(struct resync_ssh_session));
+ struct resync_ssh_session *rss = calloc(1, sizeof(*rss));
+ int rc;
if (!rss) {
llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, 0,
exit(1);
}
+ rc = lipe_ssh_context_init(&rss->rss_ctx, a->rag_hostname);
+ if (rc < 0) {
+ llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, 0,
+ "cannot create SSH context for '%s'\n",
+ a->rag_hostname);
+ exit(1);
+ }
+
lipe_list_add(&rss->rss_list, &a->rag_ssh_list);
}
#define LAMIGO_OPT_STATFS_REFRESH 15
#define LAMIGO_OPT_SRC_FREE 16
#define LAMIGO_OPT_TGT_FREE 17
+#define LAMIGO_OPT_VERSION 18
static struct option options[] = {
{ "ignore-reads", no_argument, NULL, LAMIGO_OPT_IGNORE_READS},
{ "tgt", required_argument, NULL, 't'},
{ "user", required_argument, 0, 'u'},
{ "verbose", no_argument, NULL, 'v'},
+ { "version", no_argument, NULL, LAMIGO_OPT_VERSION },
{ NULL }
};
exit(1);
}
break;
+ case LAMIGO_OPT_VERSION:
+ lipe_version();
+ exit(0);
case 'a':
opt.o_min_age = strtol(optarg, &endptr, 10);
if (*endptr != '\0' || opt.o_min_age < 5) {
lamigo_check_jobs();
do {
- while (lamigo_jobs_running >= lamigo_max_jobs) {
+ while (are_agents_busy()) {
lamigo_check_jobs();
lamigo_check_sync();
lamigo_check_bad_agents();
int ret = 0;
pthread_t pid;
+ lipe_version_init();
ssh_threads_set_callbacks(ssh_threads_get_pthread());
ssh_init();
* followed by the MDT name ("lamigo lustre-MDT0000"). */
llapi_set_command_name(opt.o_mdtname);
+ llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0,
+ "version %s-%s, revision %s\n",
+ PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION);
+
rc = lamigo_init_cache();
if (rc < 0) {
llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, 0, "can't init cache\n");
if (enable_heat)
lamigo_check_hot();
- rc = lamigo_check_sync();
- if (rc < 0) {
- stop = true;
- ret = rc;
+
+ if (!are_agents_busy()) {
+ rc = lamigo_check_sync();
+ if (rc < 0) {
+ stop = true;
+ ret = rc;
+ }
}
+
lamigo_check_jobs();
lamigo_check_and_clear_changelog();
lamigo_check_bad_agents();
out:
lamigo_cleanup();
llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0, "exited\n");
+ lipe_version_fini();
return ret;
}
opt.o_alr_ofd_interval, mdtidx, opt.o_alr_extra_args);
repeat:
- rc = lipe_init_ssh_session(&ala->ala_ctx, ala->ala_host);
- if (rc) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "failed to create connection to agent [%s], %d\n",
- ala->ala_host, rc);
- goto again;
- }
-
- rc = lipe_ssh_start_cmd(ala->ala_ctx.lsc_session, cmd, &channel);
+ rc = lipe_ssh_start_cmd(&ala->ala_ctx, cmd, &channel);
if (rc != SSH_OK) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "cannot start ofd_access_log_readed\n");
+ llapi_error(LLAPI_MSG_ERROR|LLAPI_MSG_NO_ERRNO, 0,
+ "cannot start ofd_access_log_reader on host '%s': rc = %d\n",
+ ala->ala_host, rc);
goto err;
}
+
llapi_err_noerrno(LLAPI_MSG_DEBUG, "alr agent on %s is running\n",
ala->ala_host);
ssh_channel_close(channel);
ssh_channel_free(channel);
-again:
/* wait for a while */
/* XXX: should be configurable? */
sleep(5);
void lamigo_add_alr_agent(const char *host)
{
struct alr_agent *ala;
+ int rc;
- ala = calloc(sizeof(*ala), 1);
- assert(ala);
+ ala = calloc(1, sizeof(*ala));
+ assert(ala != NULL);
ala->ala_host = strdup(host);
+ assert(ala->ala_host != NULL);
+
+ rc = lipe_ssh_context_init(&ala->ala_ctx, ala->ala_host);
+ assert(rc == SSH_OK);
+
lipe_list_add(&ala->ala_list, &alr_agent_list);
}
#include <stdlib.h>
+#include <assert.h>
+#include <malloc.h>
#include <string.h>
#include <errno.h>
#include "lipe_ssh.h"
-int lipe_ssh_authenticate_pubkey(ssh_session session)
-{
- int rc;
-
- rc = ssh_userauth_publickey_auto(session, NULL, NULL);
- if (rc != SSH_AUTH_SUCCESS) {
- llapi_printf(LLAPI_MSG_ERROR, "Authentication failed: %s\n",
- ssh_get_error(session));
- return rc;
- }
-
- return SSH_AUTH_SUCCESS;
-}
+#define lipe_ssh_debug(fmt, args...) \
+ llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, fmt, ##args)
+#define lipe_ssh_error(fmt, args...) \
+ llapi_error(LLAPI_MSG_ERROR|LLAPI_MSG_NO_ERRNO, 0, fmt, ##args)
-int lipe_ssh_start_cmd(ssh_session session, const char *cmd, ssh_channel *rch)
+static int lipe_ssh_session_start_cmd(ssh_session session, const char *cmd, ssh_channel *pchannel)
{
- ssh_channel channel;
+ ssh_channel channel = NULL;
int rc;
+ assert(session != NULL);
+
channel = ssh_channel_new(session);
if (channel == NULL) {
- llapi_printf(LLAPI_MSG_ERROR,
- "Error allocating a new channel: %s\n",
- ssh_get_error(session));
- return SSH_ERROR;
+ lipe_ssh_error("canot create a new SSH channel: %s\n",
+ ssh_get_error(session));
+ rc = SSH_ERROR;
+ goto out;
}
rc = ssh_channel_open_session(channel);
if (rc != SSH_OK) {
- llapi_printf(LLAPI_MSG_ERROR,
- "Error opening a session channel: %s\n",
- ssh_get_error(session));
- ssh_channel_free(channel);
- return rc;
+ lipe_ssh_error("cannot open SSH session channel: %s\n",
+ ssh_get_error(session));
+ goto out;
}
rc = ssh_channel_request_exec(channel, cmd);
if (rc != SSH_OK) {
- llapi_printf(LLAPI_MSG_ERROR, "Error executing command: %s\n",
- ssh_get_error(session));
- ssh_channel_close(channel);
- ssh_channel_free(channel);
- return rc;
+ lipe_ssh_error("cannot execute SSH command: %s\n",
+ ssh_get_error(session));
+ goto out;
}
- *rch = channel;
- return SSH_OK;
+
+ *pchannel = channel;
+ channel = NULL;
+ rc = SSH_OK;
+out:
+ if (channel != NULL)
+ ssh_channel_close(channel);
+
+ ssh_channel_free(channel);
+
+ return rc;
}
-int lipe_ssh_exec_cmd(ssh_session session, const char *cmd)
+static int lipe_ssh_session_exec_cmd(ssh_session session, const char *cmd)
{
ssh_channel channel;
int rc;
- rc = lipe_ssh_start_cmd(session, cmd, &channel);
+ rc = lipe_ssh_session_start_cmd(session, cmd, &channel);
if (rc != SSH_OK)
return rc;
return rc;
}
-int lipe_init_ssh_session(struct lipe_ssh_context *ctx,
- const char *host)
+static void lipe_ssh_session_destroy(ssh_session *psession)
+{
+ if (*psession != NULL)
+ ssh_disconnect(*psession);
+
+ ssh_free(*psession);
+ *psession = NULL;
+}
+
+static int lipe_ssh_session_create(ssh_session *psession, const char *host)
{
- int timeout = 5;
+ ssh_session session = NULL;
+ long timeout = 5;
int rc;
- ctx->lsc_host = strdup(host);
- if (ctx->lsc_host == NULL)
- return SSH_ERROR;
+ assert(SSH_OK == 0);
+ assert(SSH_AUTH_SUCCESS == 0);
+ assert(SSH_ERROR < 0);
+
+ assert(psession != NULL);
+ assert(*psession == NULL);
+ assert(host != NULL);
- pthread_mutex_init(&ctx->lsc_session_mutex, NULL);
- /* Create a new ssh session and set options */
- ctx->lsc_session = ssh_new();
- if (ctx->lsc_session == NULL) {
- llapi_printf(LLAPI_MSG_ERROR,
- "Error creating a new ssh session\n");
+ lipe_ssh_debug("creating new SSH session for host '%s'\n", host);
+
+ session = ssh_new();
+ if (session == NULL) {
+ lipe_ssh_error("cannot create a new SSH session: %s\n",
+ strerror(ENOMEM)); /* Probably. */
rc = SSH_ERROR;
- goto out_host;
+ goto out;
}
- ssh_options_set(ctx->lsc_session, SSH_OPTIONS_HOST, host);
+ rc = ssh_options_set(session, SSH_OPTIONS_HOST, host);
+ if (rc != SSH_OK) {
+ lipe_ssh_error("cannot set SSH session host to '%s: %s'\n",
+ host, ssh_get_error(session));
+ goto out;
+ }
- if (ssh_options_set(ctx->lsc_session, SSH_OPTIONS_TIMEOUT,
- &timeout) < 0) {
- llapi_printf(LLAPI_MSG_FATAL,
- "Error setting ssh timeout\n");
- rc = SSH_ERROR;
+ rc = ssh_options_set(session, SSH_OPTIONS_TIMEOUT, &timeout);
+ if (rc != SSH_OK) {
+ lipe_ssh_error("cannot set SSH timeout to %ld: %s\n",
+ timeout, ssh_get_error(session));
goto out;
}
/* Connect to the ssh server */
- rc = ssh_connect(ctx->lsc_session);
+ rc = ssh_connect(session);
if (rc != SSH_OK) {
- llapi_printf(LLAPI_MSG_ERROR,
- "Error connecting to host %s: %s\n",
- host,
- ssh_get_error(ctx->lsc_session));
+ lipe_ssh_error("cannot connect SSH session to host '%s': %s\n",
+ host, ssh_get_error(session));
goto out;
}
/* Automatically authenticate with public key */
- rc = lipe_ssh_authenticate_pubkey(ctx->lsc_session);
- if (rc == SSH_AUTH_SUCCESS)
- return rc;
-
- llapi_printf(LLAPI_MSG_ERROR,
- "Error authenticating pubkey: %s\n",
- ssh_get_error(ctx->lsc_session));
+ rc = ssh_userauth_publickey_auto(session, NULL, NULL);
+ if (rc != SSH_AUTH_SUCCESS) {
+ lipe_ssh_error("cannot authenticate SSH session to host '%s': %s\n",
+ host, ssh_get_error(session));
+ goto out;
+ }
- ssh_disconnect(ctx->lsc_session);
+ *psession = session;
+ session = NULL;
+ rc = SSH_OK;
out:
- ssh_free(ctx->lsc_session);
- ctx->lsc_session = NULL;
-out_host:
- free(ctx->lsc_host);
+ lipe_ssh_debug("create new SSH session for host '%s': rc = %d\n", host, rc);
+ lipe_ssh_session_destroy(&session);
return rc;
}
-int lipe_reset_ssh_session(struct lipe_ssh_context *ctx)
+static int lipe_ssh_context_check(struct lipe_ssh_context *ctx)
{
- int rc;
- char *hostname;
-
- pthread_mutex_lock(&ctx->lsc_session_mutex);
- hostname = strdup(ctx->lsc_host);
- if (!hostname) {
- pthread_mutex_unlock(&ctx->lsc_session_mutex);
- return -ENOMEM;
- }
- lipe_cleanup_ssh_session(ctx);
- rc = lipe_init_ssh_session(ctx, hostname);
- pthread_mutex_unlock(&ctx->lsc_session_mutex);
- free(hostname);
+ if (ctx->lsc_session == NULL)
+ return lipe_ssh_session_create(&ctx->lsc_session, ctx->lsc_host);
- return rc;
+ return SSH_OK;
}
-int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd)
+static void lipe_ssh_context_fail(struct lipe_ssh_context *ctx)
{
- int rc;
-
- /* Execute a remote command */
- rc = lipe_ssh_exec_cmd(ctx->lsc_session, cmd);
- if (rc == SSH_OK)
- return SSH_OK;
-
- /* Reset ssh session and try again */
- if (rc == SSH_ERROR || rc == SSH_AGAIN) {
- rc = lipe_reset_ssh_session(ctx);
- if (rc)
- return rc;
- rc = lipe_ssh_exec_cmd(ctx->lsc_session, cmd);
- }
-
- return rc;
+ lipe_ssh_debug("fail SSH context for host '%s'\n", ctx->lsc_host);
+ lipe_ssh_session_destroy(&ctx->lsc_session);
}
-void lipe_cleanup_ssh_session(struct lipe_ssh_context *ctx)
+void lipe_ssh_context_destroy(struct lipe_ssh_context *ctx)
{
- ssh_disconnect(ctx->lsc_session);
- ssh_free(ctx->lsc_session);
+ lipe_ssh_session_destroy(&ctx->lsc_session);
free(ctx->lsc_host);
+ ctx->lsc_host = NULL;
}
-int lipe_ssh_status(struct lipe_ssh_context *ctx)
+int lipe_ssh_context_init(struct lipe_ssh_context *ctx, const char *host)
{
- if (ctx->lsc_session == NULL)
+ memset(ctx, 0, sizeof(*ctx));
+
+ ctx->lsc_host = strdup(host);
+ if (ctx->lsc_host == NULL)
return SSH_ERROR;
+
+ /* Session creation will be done on demand. */
+
return SSH_OK;
}
+
+int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel)
+{
+ int rc;
+
+ rc = lipe_ssh_context_check(ctx);
+ if (rc != SSH_OK)
+ return rc;
+
+ rc = lipe_ssh_session_start_cmd(ctx->lsc_session, cmd, pchannel);
+ if (rc != SSH_OK)
+ lipe_ssh_context_fail(ctx);
+
+ return rc;
+}
+
+int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd)
+{
+ int rc;
+
+ rc = lipe_ssh_context_check(ctx);
+ if (rc != SSH_OK)
+ return rc;
+
+ /* Execute a remote command */
+ rc = lipe_ssh_session_exec_cmd(ctx->lsc_session, cmd);
+ if (rc < 0)
+ lipe_ssh_context_fail(ctx);
+
+ return rc;
+}
#ifndef _LIPE_SSH_H_
#define _LIPE_SSH_H_
-#include <pthread.h>
#include <libssh/libssh.h>
struct lipe_ssh_context {
char *lsc_host;
ssh_session lsc_session;
- pthread_mutex_t lsc_session_mutex;
};
-int lipe_ssh_authenticate_pubkey(ssh_session session);
-int lipe_ssh_exec_cmd(ssh_session session, const char *cmd);
-int lipe_init_ssh_session(struct lipe_ssh_context *ctx,
- const char *host);
-void lipe_cleanup_ssh_session(struct lipe_ssh_context *ctx);
-int lipe_reset_ssh_session(struct lipe_ssh_context *ctx);
+int lipe_ssh_context_init(struct lipe_ssh_context *ctx, const char *host);
+void lipe_ssh_context_destroy(struct lipe_ssh_context *ctx);
+
int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd);
-int lipe_ssh_start_cmd(ssh_session session, const char *cmd, ssh_channel *rch);
-int lipe_ssh_status(struct lipe_ssh_context *ctx);
+int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel);
#endif /* _LIPE_SSH_H_ */
--- /dev/null
+#include <stdio.h>
+#include <errno.h>
+#include <malloc.h>
+#include <string.h>
+#include "lipe_version.h"
+
+/* A grep-able string for stripped binaries. */
+static const char lipe_revision_string[] = "LiPE Revision: "LIPE_REVISION;
+
+/* A grep-able string for core dumps. */
+static char *lipe_revision_string_copy;
+
+void lipe_version(void)
+{
+ printf("%s (%s) %s-%s\n%s\n",
+ program_invocation_short_name,
+ PACKAGE_NAME,
+ PACKAGE_VERSION,
+ LIPE_RELEASE,
+ lipe_revision_string);
+}
+
+void lipe_version_init(void)
+{
+ if (lipe_revision_string_copy == NULL)
+ lipe_revision_string_copy = strdup(lipe_revision_string);
+}
+
+void lipe_version_fini(void)
+{
+ free(lipe_revision_string_copy);
+}
--- /dev/null
+#ifndef _LIPE_VERSION_H_
+#define _LIPE_VERSION_H_
+
+void lipe_version(void);
+void lipe_version_init(void);
+void lipe_version_fini(void);
+
+#endif
/*
* TODO
* - convert blocks to 1K units
- * - multiple --mirror-id in lfs split to save on ssh sessions
* - take OST load into account
*
*/
#include <linux/lustre/lustre_fid.h>
#include <linux/lustre/lustre_cfg.h>
#include <json-c/json.h>
-#include <libssh/libssh.h>
-#include <libssh/callbacks.h>
#include "lipe_object_attrs.h"
+#include "lipe_version.h"
#include "list.h"
#include "policy.h"
#include "flist.h"
-#include "lipe_ssh.h"
#define container_of(ptr, type, member) ({ \
const typeof(((type *) 0)->member) * __mptr = (ptr); \
#define LPURGE_FIDS_DUMPFILE "/var/run/lpurge-%s.fids"
struct lpurge_object {
+ struct lipe_list_head lo_list;
struct lu_fid lo_fid;
/* Used space in bytes */
__u32 lo_blocks;
- __u32 lo_comp_id;
+ __u32 lo_mirror_id;
/* Last use time */
time_t lo_last_utime;
};
-struct lpurge_object_pack {
- struct lipe_list_head lop_list;
- unsigned int lop_max;
- unsigned int lop_nr;
- unsigned int lop_mdt_idx;
- pthread_t lop_pid;
- struct lpurge_object lop_objs[];
-};
-
struct lpurge_slot {
time_t ls_min_utime;
time_t ls_max_utime;
unsigned int ls_scan;
struct lipe_list_head ls_obj_list;
pthread_mutex_t ls_mutex;
+ /* stats for objects we can't release */
+ unsigned long ls_nomirror_objs; /* no replicated objects */
+ unsigned long ls_nomirror_space;
+ unsigned long ls_nopfid_objs; /* no PFID, can't find parent */
+ unsigned long ls_nopfid_space;
+ unsigned long ls_notfirst_objs; /* not a first stripe */
+ unsigned long ls_notfirst_space;
};
-struct mds_ssh_session {
- struct lipe_list_head mss_list;
- struct lipe_ssh_context mss_ctx;
-};
-
-struct mds {
- char *host;
- char *mnt;
- struct lipe_list_head m_ssh_list;
- pthread_mutex_t m_ssh_lock;
- pthread_cond_t m_ssh_cond;
-};
-
-#define MAX_MDTS 128
-struct mds mds[MAX_MDTS];
-int mdsnr;
-
#define LPURGE_HIST_MAX 16
struct lpurge_slot lpurge_hist[LPURGE_HIST_MAX];
unsigned long s_scans;
unsigned long s_low_hits;
unsigned long s_slow_scans;
- unsigned long s_spawned;
- unsigned long s_purged; /* files */
+ unsigned long s_queued; /* # files queued for purge */
+ unsigned long s_started; /* # files dequeued for purge by worker */
+ unsigned long s_purged; /* # files we successfully purged */
+ unsigned long s_failed; /* # files we failed to purge */
+ /* TODO count mirrors we tried to purge but found last non stale */
};
struct stats stats;
char ost_mntdev[PATH_MAX];
char *ost_mntpt;
int lustre_fd = -1;
+int open_by_fid_fd = -1;
unsigned long oldest;
time_t scan_started_time;
time_t scan_finished_time;
void load_config(char *name);
-#define PACK_SIZE (64*1024)
-#define PACK_NR ((PACK_SIZE - offsetof(struct lpurge_object_pack, lop_objs[0])) / \
- sizeof(struct lpurge_object))
-
-#define FIDS_PER_CALL 128 /* XXX: bump to a bigger value */
-#define PM_SIZE (offsetof(struct lpurge_object_pack, lop_objs[FIDS_PER_CALL]))
-
struct lipe_flist *lflist;
#define LPURGE_FLIST_SIZE (1024 * 1024)
static void sig_handler(int signal)
{
- int i;
-
psignal(signal, "exiting");
if (lflist)
flist_free(lflist);
- for (i = 0; i < mdsnr; i++) {
- struct mds *m = &mds[i];
- struct mds_ssh_session *mss, *tmp;
-
- lipe_list_for_each_entry_safe(mss, tmp, &m->m_ssh_list,
- mss_list) {
- lipe_cleanup_ssh_session(&mss->mss_ctx);
- lipe_list_del(&mss->mss_list);
- free(mss);
- }
- }
-
- _exit(1);
+ _exit(0);
}
static void lpurge_null_handler(int signal)
void lpurge_reset_result(void)
{
- struct lpurge_object_pack *s, *t;
+ struct lpurge_object *s, *t;
int i;
for (i = 0; i < LPURGE_HIST_MAX; i++) {
lpurge_hist[i].ls_scan = 1;
lpurge_hist[i].ls_min_utime = ~0UL;
lpurge_hist[i].ls_max_utime = 0;
+ lpurge_hist[i].ls_nopfid_objs = 0;
+ lpurge_hist[i].ls_nopfid_space = 0;
+ lpurge_hist[i].ls_nomirror_objs = 0;
+ lpurge_hist[i].ls_nomirror_space = 0;
+ lpurge_hist[i].ls_notfirst_objs = 0;
+ lpurge_hist[i].ls_notfirst_space = 0;
lipe_list_for_each_entry_safe(s, t, &lpurge_hist[i].ls_obj_list,
- lop_list) {
- lipe_list_del(&s->lop_list);
+ lo_list) {
+ lipe_list_del(&s->lo_list);
free(s);
}
}
"\t-R, --scan_rate, slow scanning rate, objs/sec (default: %u)\n"
"\t-S, --slot_size, objects to store in memory (default: %u)\n"
"\t-t, --scan_threads, scanning threads (default: %u)\n"
- "\t-w, --dump, stats file (via USR1 signal, default: %s)\n",
+ "\t-w, --dump, stats file (via USR1 signal, default: %s)\n"
+ "\t--version, print version information and exit\n",
program_invocation_short_name,
DEF_FREEHI,
DEF_INTERVAL,
int i = 0;
struct lpurge_slot *ls_1 = lpurge_hist; /* youngest slot */
struct lpurge_slot *ls_2;
- struct lpurge_object_pack *p, *t;
+ struct lpurge_object *p, *t;
if (index >= LPURGE_HIST_MAX - 1)
return;
ls_1->ls_min_utime = ~0UL;
/* reclaim youngest slot */
lipe_list_for_each_entry_safe(p, t, &ls_1->ls_obj_list,
- lop_list) {
- lipe_list_del(&p->lop_list);
+ lo_list) {
+ lipe_list_del(&p->lo_list);
free(p);
}
pthread_mutex_unlock(&ls_1->ls_mutex);
struct lipe_object_attrs *attrs)
{
struct lpurge_slot *ls = NULL;
- struct lpurge_object_pack *lp = NULL;
+ struct lpurge_object *lo = NULL;
time_t age, last_used;
int index;
if ((attrs->loa_mode & S_IFMT) != S_IFREG)
goto out;
- if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0)
- goto out;
-
- /* to avoid N OSTs to 1 MDT scalability issue we only consider
- * objects which store 1st stripe
- */
- if (attrs->loa_filter_fid.ff_parent.f_ver != 0)
- goto out;
-
- /* if the object has got ost_layout structure which encodes
- * whether the object has a mirror, then we can skip objects
- * with mirror_id=0 (no mirror)
- */
- if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) {
- if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id)
- == 0)
- goto out;
- }
-
last_used = attrs->loa_mtime_ms;
if (last_used < attrs->loa_atime_ms)
last_used = attrs->loa_atime_ms;
goto out;
}
+ pthread_mutex_lock(&ls->ls_mutex);
+
+ if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0) {
+ ls->ls_nopfid_objs++;
+ ls->ls_nopfid_space += attrs->loa_blocks >> 10;
+ pthread_mutex_unlock(&ls->ls_mutex);
+ goto out;
+ }
+
+ /* to avoid N OSTs to 1 MDT scalability issue we only consider
+ * objects which store 1st stripe
+ */
+ if (attrs->loa_filter_fid.ff_parent.f_ver != 0) {
+ ls->ls_notfirst_objs++;
+ ls->ls_notfirst_space += attrs->loa_blocks >> 10;
+ pthread_mutex_unlock(&ls->ls_mutex);
+ goto out;
+ }
+
+ /* if the object has got ost_layout structure which encodes
+ * whether the object has a mirror, then we can skip objects
+ * with mirror_id=0 (no mirror)
+ */
+ if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) {
+ if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id)
+ == 0) {
+ ls->ls_nomirror_objs++;
+ ls->ls_nomirror_space += attrs->loa_blocks >> 10;
+ pthread_mutex_unlock(&ls->ls_mutex);
+ goto out;
+ }
+ }
+
llapi_printf(LLAPI_MSG_DEBUG,
"found under "DFID": size %ld block %ld age %ld slot %d\n",
PFID(&attrs->loa_filter_fid.ff_parent),
(unsigned long)attrs->loa_size,
(unsigned long)attrs->loa_blocks >> 10, age, index);
- pthread_mutex_lock(&ls->ls_mutex);
ls->ls_found++;
ls->ls_space += attrs->loa_blocks >> 10;
lpurge_scanned_since++;
}
}
- if (!lipe_list_empty(&ls->ls_obj_list))
- lp = lipe_list_entry(ls->ls_obj_list.next,
- struct lpurge_object_pack, lop_list);
+ lo = calloc(1, sizeof(*lo));
+ if (lo == NULL)
+ goto out;
- if (!lp || lp->lop_nr >= lp->lop_max) {
- lp = calloc(1, PACK_SIZE);
- if (!lp) {
- llapi_printf(LLAPI_MSG_ERROR,
- "can't alloca pack\n");
- exit(1);
- }
- lp->lop_pid = 0;
- lp->lop_max = PACK_NR;
- lp->lop_nr = 0;
- lipe_list_add(&lp->lop_list, &ls->ls_obj_list);
- }
- lp->lop_objs[lp->lop_nr].lo_fid = attrs->loa_filter_fid.ff_parent;
- lp->lop_objs[lp->lop_nr].lo_blocks = attrs->loa_blocks >> 10;
- lp->lop_objs[lp->lop_nr].lo_comp_id = 0;
- lp->lop_objs[lp->lop_nr].lo_last_utime = last_used;
+ lo->lo_fid = attrs->loa_filter_fid.ff_parent;
+ lo->lo_blocks = attrs->loa_blocks >> 10;
+ lo->lo_last_utime = last_used;
if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) {
__u32 id;
id = mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id);
- lp->lop_objs[lp->lop_nr].lo_comp_id = id;
+ lo->lo_mirror_id = id;
}
- lp->lop_nr++;
+
+ lipe_list_add(&lo->lo_list, &ls->ls_obj_list);
ls->ls_stored++;
+
if (ls->ls_max_utime < last_used)
ls->ls_max_utime = last_used;
if (ls->ls_min_utime > last_used)
ls->ls_min_utime = last_used;
pthread_mutex_unlock(&ls->ls_mutex);
- llapi_printf(LLAPI_MSG_DEBUG, "add "DFID" to %p/%d\n",
- PFID(&attrs->loa_filter_fid.ff_parent), lp, lp->lop_nr);
+ llapi_printf(LLAPI_MSG_DEBUG, "add "DFID" to %p/%lu\n",
+ PFID(&attrs->loa_filter_fid.ff_parent), ls, ls->ls_stored);
out:
/*
return 0;
}
-static int lpurge_exec_cmd(struct mds *m, char *cmd)
+struct collect_ids_data {
+ __u16 *cid_ids;
+ int cid_count;
+ __u16 cid_exclude;
+};
+
+static int collect_mirror_id(struct llapi_layout *layout, void *cbdata)
+{
+ struct collect_ids_data *cid = cbdata;
+ uint32_t id;
+ int rc;
+
+ rc = llapi_layout_mirror_id_get(layout, &id);
+ if (rc < 0)
+ return rc;
+
+ if ((__u16)id != cid->cid_exclude) {
+ int i;
+
+ for (i = 0; i < cid->cid_count; i++) {
+ /* already collected the mirror id */
+ if (id == cid->cid_ids[i])
+ return LLAPI_LAYOUT_ITER_CONT;
+ }
+ cid->cid_ids[cid->cid_count] = id;
+ cid->cid_count++;
+ }
+
+ return LLAPI_LAYOUT_ITER_CONT;
+}
+
+static bool last_non_stale_mirror(__u16 mirror_id, struct llapi_layout *layout)
{
- struct mds_ssh_session *mss;
+ __u16 mirror_ids[128] = { 0 };
+ struct collect_ids_data cid = {
+ .cid_ids = mirror_ids,
+ .cid_count = 0,
+ .cid_exclude = mirror_id,
+ };
+ int i;
+
+ llapi_layout_comp_iterate(layout, collect_mirror_id, &cid);
+
+ for (i = 0; i < cid.cid_count; i++) {
+ struct llapi_resync_comp comp_array[1024] = { { 0 } };
+ int comp_size = 0;
+
+ comp_size = llapi_mirror_find_stale(layout, comp_array,
+ ARRAY_SIZE(comp_array),
+ &mirror_ids[i], 1);
+ if (comp_size == 0)
+ return false;
+ }
+
+ return true;
+}
+
+static int
+lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id)
+{
+ char fid_buf[FID_LEN + 1];
+ char vname_buf[PATH_MAX];
+ struct ll_ioc_lease *lil = NULL;
+ struct llapi_layout *layout = NULL;
+ int mdt_index = -1;
+ int fd = -1;
+ int vfd = -1;
int rc;
- pthread_mutex_lock(&m->m_ssh_lock);
- /*
- * this should not happen since we limited the max count of launched jobs
- * just in case.
+ /* Inline replacement for
+ * lfs mirror split -d --mirror-id mirror_id $MOUNTPOINT/.lustre/fid/FID
*/
- while (lipe_list_empty(&m->m_ssh_list))
- pthread_cond_wait(&m->m_ssh_cond, &m->m_ssh_lock);
- /* pop one session */
- mss = container_of(m->m_ssh_list.next, struct mds_ssh_session,
- mss_list);
- lipe_list_del(&mss->mss_list);
+ snprintf(fid_buf, sizeof(fid_buf), DFID, PFID(fid));
- pthread_mutex_unlock(&m->m_ssh_lock);
+ fd = openat(open_by_fid_fd, fid_buf, O_RDWR);
+ if (fd < 0) {
+ rc = -errno;
+ llapi_printf(LLAPI_MSG_DEBUG,
+ "cannot open "DFID" for split: %s\n",
+ PFID(fid), strerror(errno));
+ goto out;
+ }
- rc = lipe_ssh_exec(&mss->mss_ctx, cmd);
- if (rc != 0 && rc != EUCLEAN)
- llapi_printf(LLAPI_MSG_FATAL,
- "error executing ssh command '%s' on '%s': rc = %d\n",
- cmd, m->host, rc);
- pthread_mutex_lock(&m->m_ssh_lock);
- lipe_list_add(&mss->mss_list, &m->m_ssh_list);
- pthread_cond_signal(&m->m_ssh_cond);
- pthread_mutex_unlock(&m->m_ssh_lock);
+ rc = llapi_file_fget_mdtidx(fd, &mdt_index);
+ if (rc < 0)
+ goto out;
+
+ snprintf(vname_buf, sizeof(vname_buf), LUSTRE_VOLATILE_HDR":%.4X:%.4X",
+ mdt_index, (unsigned int)random());
+
+ vfd = openat(open_by_fid_fd, vname_buf,
+ O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LOV_DELAY_CREATE);
+ if (vfd < 0) {
+ rc = -errno;
+ goto out;
+ }
+
+ rc = llapi_lease_acquire(fd, LL_LEASE_WRLCK);
+ if (rc < 0)
+ goto out;
+
+ layout = llapi_layout_get_by_fd(fd, 0);
+ if (layout == NULL) {
+ rc = -errno;
+ goto out;
+ }
+
+ if (last_non_stale_mirror(mirror_id, layout)) {
+ rc = -EUCLEAN;
+ goto out;
+ }
+
+ lil = calloc(1, offsetof(typeof(*lil), lil_ids[2]));
+ if (lil == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ lil->lil_mode = LL_LEASE_UNLCK;
+ lil->lil_flags = LL_LEASE_LAYOUT_SPLIT;
+ lil->lil_count = 2;
+ lil->lil_ids[0] = vfd;
+ lil->lil_ids[1] = mirror_id;
+
+ rc = llapi_lease_set(fd, lil);
+ if (rc < 0) {
+ goto out;
+ } else if (rc == 0) {
+ /* lost lease lock */
+ rc = -EBUSY;
+ } else {
+ rc = 0;
+ }
+out:
+ if (rc < 0)
+ llapi_printf(LLAPI_MSG_DEBUG,
+ "cannot delete mirror %u of "DFID": rc = %d\n",
+ mirror_id, PFID(fid), rc);
+ else
+ llapi_printf(LLAPI_MSG_DEBUG,
+ "deleted mirror %u of "DFID"\n",
+ mirror_id, PFID(fid));
+
+ llapi_layout_free(layout);
+
+ free(lil);
+
+ if (!(fd < 0))
+ close(fd);
+
+ if (!(vfd < 0))
+ close(vfd);
return rc;
}
-#define CMDSIZE (64 * 1024)
+static bool lpurge_work_should_run;
+static size_t lpurge_work_thread_count;
+static pthread_t *lpurge_work_threads;
+static LIPE_LIST_HEAD(lpurge_work_list);
+static pthread_mutex_t lpurge_work_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t lpurge_work_cond = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t lpurge_work_done = PTHREAD_COND_INITIALIZER;
-void *lpurge_spawn_one(void *args)
+static void *lpurge_work_func(void *data)
{
- struct lpurge_object_pack *lp = (struct lpurge_object_pack *)args;
- char buf[PATH_MAX];
- int rc, i, idx = lp->lop_mdt_idx;
-
- for (i = 0; i < lp->lop_nr; i++) {
- if (lp->lop_objs[i].lo_comp_id) {
- snprintf(buf, sizeof(buf),
- "lfs mirror split -d --mirror-id %u "
- "%s/.lustre/fid/"DFID" > /dev/null 2>&1",
- lp->lop_objs[i].lo_comp_id,
- mds[idx].mnt, PFID(&lp->lop_objs[i].lo_fid));
- } else {
- snprintf(buf, sizeof(buf),
- "lfs mirror split -d -p %s "
- "%s/.lustre/fid/"DFID" > /dev/null 2>&1",
- opt.o_pool, mds[idx].mnt, PFID(&lp->lop_objs[i].lo_fid));
- }
+ struct lpurge_object *lo;
+ int rc;
- rc = lpurge_exec_cmd(&mds[idx], buf);
- if (rc)
- llapi_printf(LLAPI_MSG_DEBUG,
- "couldn't remove %u from "DFID": rc=%d\n",
- lp->lop_objs[i].lo_comp_id,
- PFID(&lp->lop_objs[i].lo_fid), rc);
+ pthread_mutex_lock(&lpurge_work_lock);
+
+ while (1) {
+ while (lpurge_work_should_run && lipe_list_empty(&lpurge_work_list))
+ pthread_cond_wait(&lpurge_work_cond, &lpurge_work_lock);
+
+ if (!lpurge_work_should_run)
+ break;
+
+ lo = lipe_list_entry(lpurge_work_list.next,
+ struct lpurge_object, lo_list);
+ lipe_list_del(&lo->lo_list);
+
+ stats.s_started++;
+ pthread_mutex_unlock(&lpurge_work_lock);
+
+ rc = lpurge_mirror_delete(&lo->lo_fid, lo->lo_mirror_id);
+ free(lo);
+
+ pthread_mutex_lock(&lpurge_work_lock);
+
+ if (rc < 0)
+ stats.s_failed++;
+ else
+ stats.s_purged++;
+
+ assert(stats.s_purged + stats.s_failed <= stats.s_queued);
+
+ if (stats.s_purged + stats.s_failed == stats.s_queued)
+ pthread_cond_signal(&lpurge_work_done);
}
- /* ignore errors, in the worst case we'll find this object again */
- pthread_exit((void *)0);
+ pthread_mutex_unlock(&lpurge_work_lock);
+
+ return NULL;
}
-int lpurge_spawn(struct lipe_list_head *pm_list)
+static void lpurge_work_threads_stop(void)
{
- int jobs[MAX_MDTS] = { 0 };
- struct lipe_list_head list; /* list of jobs in execution */
- struct lpurge_object_pack *lp, *tmp;
- int total = 0;
- int i, repeat;
-
- LIPE_INIT_LIST_HEAD(&list);
-
-next:
- repeat = 0;
- for (i = 0; i < MAX_MDTS; i++) {
- while (!lipe_list_empty(&pm_list[i])) {
- pthread_t pid;
- int rc;
+ size_t i;
+ int rc;
- if (jobs[i] >= opt.o_max_jobs) {
- repeat = 1;
- break;
- }
+ pthread_mutex_lock(&lpurge_work_lock);
+ lpurge_work_should_run = false;
+ pthread_cond_broadcast(&lpurge_work_cond);
+ pthread_mutex_unlock(&lpurge_work_lock);
- lp = lipe_list_entry(pm_list[i].next,
- struct lpurge_object_pack, lop_list);
- lipe_list_del(&lp->lop_list);
-
- /* now fork and execute */
- rc = pthread_create(&pid, NULL, lpurge_spawn_one, lp);
- if (rc == 0) {
- stats.s_spawned++;
- stats.s_purged = lp->lop_nr;
-
- lp->lop_pid = pid;
- lp->lop_mdt_idx = i;
- lipe_list_add_tail(&lp->lop_list, &list);
- jobs[i]++;
- total++;
- }
+ for (i = 0; i < lpurge_work_thread_count; i++) {
+ rc = pthread_join(lpurge_work_threads[i], NULL);
+ if (rc != 0) {
+ llapi_printf(LLAPI_MSG_FATAL,
+ "cannot join work thread: %s\n",
+ strerror(rc));
}
}
-wait:
- assert(total > 0);
- lp = NULL;
- lipe_list_for_each_entry_safe(lp, tmp, &list, lop_list) {
- int rc;
- intptr_t retval;
+ lpurge_work_thread_count = 0;
- rc = pthread_tryjoin_np(lp->lop_pid, (void **)&retval);
- if (rc)
- continue;
- lipe_list_del(&lp->lop_list);
- i = lp->lop_mdt_idx;
- llapi_printf(LLAPI_MSG_DEBUG,
- "thread %lu MDT#%d completed: %"PRIdPTR"\n",
- lp->lop_pid, i, retval);
- free(lp);
- assert(jobs[i] > 0);
- jobs[i]--;
- total--;
- }
-
- if (repeat)
- goto next;
- if (total)
- goto wait;
- return 0;
-}
+ free(lpurge_work_threads);
+ lpurge_work_threads = NULL;
-/* round-robin distribution for unknown (w/o mappping, likely new) MDS */
-static int fid2idx_rr;
+ /* TODO: Free entries in lpurge_work_list. */
+}
-static int lpurge_fid2idx(const struct lu_fid *fid)
+static int lpurge_work_threads_start(size_t thread_count)
{
- int rc, idx;
+ int rc = 0;
- /* XXX: local cache to avoid ioctl() for each FID? */
- rc = llapi_get_mdt_index_by_fid(lustre_fd, fid, &idx);
- if (rc < 0) {
- llapi_printf(LLAPI_MSG_ERROR,
- "can't lookup "DFID": rc=%d\n",
- PFID(fid), rc);
- return rc;
+ assert(thread_count > 0);
+
+ lpurge_work_should_run = true;
+
+ lpurge_work_threads = calloc(thread_count, sizeof(lpurge_work_threads[0]));
+ if (lpurge_work_threads == NULL) {
+ rc = -ENOMEM;
+ goto out;
}
- if (idx >= mdsnr || !mds[idx].host) {
- /*
- * no MDS registered for this index
- * but we can use any, it's just less efficient
- * due to over-network RPCs to open-close-modify.
- * so choose some MDS to send replica removal to
- */
- do {
- fid2idx_rr++;
- if (fid2idx_rr == mdsnr)
- fid2idx_rr = 0;
- } while (mds[fid2idx_rr].host == NULL);
+ while (lpurge_work_thread_count < thread_count) {
+ rc = pthread_create(&lpurge_work_threads[lpurge_work_thread_count],
+ NULL /* attr */,
+ &lpurge_work_func,
+ NULL /* data */);
+ if (rc != 0) {
+ llapi_printf(LLAPI_MSG_FATAL,
+ "cannot create work thread: %s\n",
+ strerror(rc));
+ rc = -rc;
+ goto out;
+ }
- idx = fid2idx_rr;
+ lpurge_work_thread_count++;
}
+out:
+ if (rc < 0)
+ lpurge_work_threads_stop();
+
+ return rc;
+}
- return idx;
+/* Unused. */
+void lpurge_work_submit(struct lpurge_object *lo)
+{
+ pthread_mutex_lock(&lpurge_work_lock);
+ assert(lpurge_work_should_run);
+ stats.s_queued++;
+ lipe_list_add_tail(&lo->lo_list, &lpurge_work_list);
+ pthread_cond_signal(&lpurge_work_cond);
+ pthread_mutex_unlock(&lpurge_work_lock);
}
void lpurge_purge_slot(struct lpurge_slot *ls, long long target)
{
- struct lpurge_object_pack *lp, *pm;
- struct lipe_list_head pm_list[MAX_MDTS];
- __u64 blocks[MAX_MDTS] = { 0 };
+ struct lpurge_object *lo;
__u64 total, was, kbfree;
int i, rc;
/* try to remove some replicas */
-
- for (i = 0; i < MAX_MDTS; i++)
- LIPE_INIT_LIST_HEAD(&pm_list[i]);
-
again:
llapi_printf(LLAPI_MSG_DEBUG, "release upto %llu (expect %lu in %lu)\n",
target, ls->ls_space, ls->ls_found);
total = 0;
- /* take few FIDs */
+
assert(!lipe_list_empty(&ls->ls_obj_list));
- lp = lipe_list_entry(ls->ls_obj_list.next, struct lpurge_object_pack,
- lop_list);
- for (i = lp->lop_nr - 1; i >= 0; i--) {
- struct lu_fid fid;
- int idx;
- lp->lop_nr--;
+ pthread_mutex_lock(&lpurge_work_lock);
+ assert(lpurge_work_should_run);
- /* split by MDT index, we could do this at scanning, but that can
- * result in many useless lookups if only part of slots are subject
- * to removal
- */
- fid = lp->lop_objs[i].lo_fid;
- if (opt.o_iml_socket) {
- rc = flist_add_json_fid(lflist, &fid);
- if (rc < 0) {
- llapi_printf(LLAPI_MSG_ERROR,
- "can't add "DFID": rc=%d\n",
- PFID(&fid), rc);
- continue;
- }
- } else {
- idx = lpurge_fid2idx(&fid);
- if (idx < 0)
- continue;
- /* how many blocks we expect to free */
- blocks[idx] += lp->lop_objs[i].lo_blocks;
- total += lp->lop_objs[i].lo_blocks;
- ls->ls_space -= lp->lop_objs[i].lo_blocks;
- ls->ls_found--;
- ls->ls_stored--;
-
- /* put FID into per-MDT pack */
- pm = NULL;
- if (!lipe_list_empty(&pm_list[idx]))
- pm = lipe_list_entry(pm_list[idx].next,
- struct lpurge_object_pack,
- lop_list);
- if (!pm || pm->lop_nr >= pm->lop_max) {
- pm = calloc(1, PM_SIZE);
- if (!pm) {
- llapi_printf(LLAPI_MSG_ERROR,
- "can't alloca pack\n");
- exit(1);
- }
- pm->lop_max = FIDS_PER_CALL;
- pm->lop_nr = 0;
- lipe_list_add(&pm->lop_list, &pm_list[idx]);
- }
- pm->lop_objs[pm->lop_nr] = lp->lop_objs[i];
- pm->lop_nr++;
- }
+ while (!lipe_list_empty(&ls->ls_obj_list)) {
+ lo = lipe_list_entry(ls->ls_obj_list.next, struct lpurge_object,
+ lo_list);
+
+ /* how many blocks we expect to free */
+ total += lo->lo_blocks;
+ ls->ls_space -= lo->lo_blocks;
+ ls->ls_found--;
+ ls->ls_stored--;
+
+ lipe_list_move_tail(&lo->lo_list, &lpurge_work_list);
+ stats.s_queued++;
/* if current collection of objects may free target space, then stop */
if (total >= target)
break;
}
- if (!lp->lop_nr) {
- lipe_list_del(&lp->lop_list);
- free(lp);
- }
+
+ pthread_cond_broadcast(&lpurge_work_cond);
+ pthread_mutex_unlock(&lpurge_work_lock);
/* fork, start and wait for completion against each MDT */
/* give some time to OST_DESTROY to pass through */
llapi_printf(LLAPI_MSG_DEBUG, "spawn, expect %llu back\n", total);
- if (opt.o_iml_socket)
- flist_write(lflist, true);
- else
- lpurge_spawn(pm_list);
+ /* Wait for purge threads to complete all submitted work. */
+ pthread_mutex_lock(&lpurge_work_lock);
+ while (stats.s_purged + stats.s_failed < stats.s_queued)
+ pthread_cond_wait(&lpurge_work_done, &lpurge_work_lock);
+ pthread_mutex_unlock(&lpurge_work_lock);
/* XXX: 20 is probably too short for ZFS as changes need to
* get committed on MDS first, then sent to OST, then get
struct lu_fid fid;
int rc, idx;
- lustre_fd = open(name, O_RDONLY/* | O_DIRECTORY*/);
+ lustre_fd = open(name, O_RDONLY);
if (lustre_fd < 0) {
llapi_printf(LLAPI_MSG_FATAL,
"can't open %s: %d\n", name, errno);
"%s isn't Lustre mountpoint: %d\n", name, rc);
exit(1);
}
-}
-
-void parse_mds(char *args)
-{
- struct mds *m;
- char *host, *mnt, *sidx;
- int idx, rc;
- int j;
- char *endptr;
-
- /* mds# hostname mountpoint */
- sidx = strsep(&args, ":\n");
- idx = strtol(sidx, &endptr, 10);
- if (*endptr != '\0' || idx < 0) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_FATAL, rc,
- "invalid MDS index: '%s'\n", sidx);
- exit(1);
- }
-
- if (idx >= MAX_MDTS) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_FATAL, rc, "too high MDS index\n");
- exit(1);
- }
- m = &mds[idx];
- if (m->host) {
- llapi_printf(LLAPI_MSG_FATAL,
- "mds #%d is defined already\n", idx);
- exit(1);
- }
- host = strsep(&args, ":\n");
- mnt = strsep(&args, ":\n");
- if (!host || !mnt) {
- llapi_printf(LLAPI_MSG_FATAL,
- "no host or mntpt: %s\n", args);
+ open_by_fid_fd = openat(lustre_fd, ".lustre/fid", O_RDONLY);
+ if (open_by_fid_fd < 0) {
+ llapi_error(LLAPI_MSG_FATAL, -errno,
+ "cannot open '%s/.lustre/fid'", name);
exit(1);
}
-
- if (idx >= MAX_MDTS) {
- llapi_printf(LLAPI_MSG_FATAL,
- "MDT index %u > max index %u: %s\n",
- idx, MAX_MDTS, args);
- exit(1);
- }
-
- m->host = strdup(host);
- m->mnt = strdup(mnt);
-
- /* init ssh session to target mds */
- pthread_mutex_init(&m->m_ssh_lock, NULL);
- pthread_cond_init(&m->m_ssh_cond, NULL);
- LIPE_INIT_LIST_HEAD(&m->m_ssh_list);
-
- for (j = 0; j < opt.o_max_jobs; j++) {
- struct mds_ssh_session *mss;
-
- mss = calloc(1, sizeof(*mss));
- if (mss == NULL) {
- llapi_printf(LLAPI_MSG_FATAL,
- "cannot allocate ssh session\n");
- exit(1);
- }
-
- rc = lipe_init_ssh_session(&mss->mss_ctx, host);
- if (rc != SSH_AUTH_SUCCESS) {
- llapi_printf(LLAPI_MSG_FATAL,
- "cannot create ssh session for host %s: rc = %d\n",
- host, rc);
- exit(1);
- }
-
- lipe_list_add(&mss->mss_list, &m->m_ssh_list);
- }
-
- mdsnr++;
- llapi_printf(LLAPI_MSG_DEBUG, "add mds #%d at %s@%s\n",
- idx, host, mnt);
}
#define LPURGE_INTERNAL_DUMP_FIDS 1
#define LPURGE_OPT_IML_SOCKET 2
+#define LPURGE_OPT_VERSION 3
static struct option options[] = {
{ "device", required_argument, NULL, 'D'},
{ "scan_rate", required_argument, NULL, 'R'},
{ "scan_threads", required_argument, NULL, 't'},
{ "slot_size", required_argument, NULL, 'S'},
+ { "version", no_argument, NULL, LPURGE_OPT_VERSION},
{ NULL }
};
case LPURGE_INTERNAL_DUMP_FIDS:
opt.o_fids_dumpfile = strdup(optarg);
break;
+ case LPURGE_OPT_VERSION:
+ lipe_version();
+ exit(0);
case 'b':
llapi_msg_set_level(LLAPI_MSG_MAX);
break;
opt.o_freelo = value;
break;
case 'm':
- parse_mds(optarg);
+ /* parse_mds(optarg); */
break;
case 'M':
opt.o_mountpoint = strdup(optarg);
void lpurge_verify_opts(void)
{
char buf[1024];
- int i;
if (!opt.o_pool) {
opt.o_pool = DEF_POOL;
return;
}
- // Stand-alone Operation Checks
- if (mdsnr == 0) {
- llapi_printf(LLAPI_MSG_FATAL,
- "mds is not configured\n");
- exit(1);
- }
-
- for (i = 0; i < mdsnr; i++) {
- int rc;
- char cmd[PATH_MAX];
-
- if (!mds[i].host) {
- llapi_printf(LLAPI_MSG_FATAL,
- "MDS host is not configured\n");
- exit(1);
- }
-
- if (!mds[i].mnt) {
- llapi_error(LLAPI_MSG_FATAL, -EINVAL,
- "mountpoint on MDS host '%s' is not configured\n",
- mds[i].host);
- exit(1);
- }
-
- /* check mountpoint on MDS */
- snprintf(cmd, sizeof(cmd), "lfs path2fid %s > /dev/null 2>&1",
- mds[i].mnt);
- rc = lpurge_exec_cmd(&mds[i], cmd);
- if (rc) {
- llapi_error(LLAPI_MSG_FATAL, -EINVAL,
- "invalid mountpoint '%s' on MDS host: '%s'\n",
- mds[i].mnt, mds[i].host);
- exit(1);
- }
- }
-
if (opt.o_max_jobs < 1 || opt.o_max_jobs > 1024) {
llapi_printf(LLAPI_MSG_FATAL,
"invalid max_jobs: %d\n", opt.o_max_jobs);
return;
}
llapi_printf(LLAPI_MSG_DEBUG, "dump to %s\n", opt.o_dumpfile);
- fprintf(f, "config:\n"
+ fprintf(f,
+ "version: %s-%s\n"
+ "revision: %s\n"
+ "config:\n"
" free_high: %u\n"
" free_low: %u\n"
" ostname: %s\n"
" mountpoint: %s\n"
" pool: %s\n"
- " mds: ",
- opt.o_freehi, opt.o_freelo,
- opt.o_device, opt.o_mountpoint, opt.o_pool);
- for (i = 0; i < mdsnr; i++)
- fprintf(f, "%s%d:%s:%s", i ? "," : "", i,
- mds[i].host, mds[i].mnt);
- fprintf(f, "\n"
" max_jobs: %u\n"
" check_interval: %u\n"
" scan_rate: %u\n"
" scan_threads: %u\n"
" slot_size: %u\n",
+ PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION,
+ opt.o_freehi, opt.o_freelo,
+ opt.o_device, opt.o_mountpoint, opt.o_pool,
opt.o_max_jobs, opt.o_interval, opt.o_scan_rate,
opt.o_scan_threads, opt.o_slot_size);
if (opt.o_iml_socket)
" scans: %lu\n"
" scan_time: %llu\n"
" slow_scans: %lu\n"
- " spawned: %lu\n"
+ " queued: %lu\n"
+ " started: %lu\n"
" purged: %lu\n"
+ " failed: %lu\n"
" low_hits: %lu\n",
stats.s_scans, stats.s_scan_time, stats.s_slow_scans,
- stats.s_spawned, stats.s_purged, stats.s_low_hits);
+ stats.s_queued, stats.s_started, stats.s_purged, stats.s_failed,
+ stats.s_low_hits);
lpurge_kbfree(&kbfree);
fprintf(f, "space:\n"
" kbfree: %llu\n"
kbfree, freelo, freehi);
#define HIST_FMT \
- " hist%u: { age: %lu, found: %lu, space: %lu, stored: %lu }\n"
+ " hist%u: { age: %lu, found: %lu, space: %lu, stored: %lu, nomirror_cnt: %lu, nomirror_space: %lu, nopfid_cnt: %lu, nopfid_space: %lu, notfirst_cnt: %lu, notfirst_space: %lu }\n"
fprintf(f, "hlists:\n");
for (i = LPURGE_HIST_MAX - 1; i >= 0; i--) {
continue;
fprintf(f, HIST_FMT, i, ls->ls_age, ls->ls_found,
- ls->ls_space, ls->ls_stored);
+ ls->ls_space, ls->ls_stored,
+ ls->ls_nomirror_objs,ls->ls_nomirror_space,
+ ls->ls_nopfid_objs, ls->ls_nopfid_space,
+ ls->ls_notfirst_objs, ls->ls_notfirst_space);
}
fflush(f);
* {"fid":"[0x200000401:0x6:0x0]", "last_use_time": 123456, "slot_id": $SLOT_ID}
*/
for (i = LPURGE_HIST_MAX - 1; i >= 0; i--) {
- struct lpurge_object_pack *lp;
struct lpurge_slot *ls = lpurge_hist + i;
+ struct lpurge_object *lo;
if (ls->ls_found == 0)
continue;
- lp = lipe_list_entry(ls->ls_obj_list.next,
- struct lpurge_object_pack,
- lop_list);
-
- for (i = lp->lop_nr - 1; i >= 0; i--) {
+ lipe_list_for_each_entry(lo, &ls->ls_obj_list, lo_list) {
int rc;
char fid_buf[FID_LEN + 1];
struct lu_fid fid;
obj_stat = json_object_new_object();
- fid = lp->lop_objs[i].lo_fid;
+ fid = lo->lo_fid;
snprintf(fid_buf, sizeof(fid_buf), DFID, PFID(&fid));
json_object_object_add(obj_stat, "fid",
json_object_new_string(fid_buf));
json_object_object_add(obj_stat, "last_use_time",
json_object_new_int64(
- lp->lop_objs[i].lo_last_utime));
+ lo->lo_last_utime));
json_object_object_add(obj_stat, "slot_id",
json_object_new_int(i));
output = json_object_to_json_string_ext(obj_stat,
int main(int argc, char **argv)
{
- ssh_threads_set_callbacks(ssh_threads_get_pthread());
- ssh_init();
-
+ lipe_version_init();
setlinebuf(stdout);
setlinebuf(stderr);
- memset(mds, 0, sizeof(mds));
-
llapi_msg_set_level(LLAPI_MSG_INFO);
signal(SIGUSR1, lpurge_null_handler);
* followed by the OST name ("lpurge lustre-OST0000"). */
llapi_set_command_name(ostname);
+ llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0,
+ "version %s-%s, revision %s\n",
+ PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION);
+
lpurge_get_ost_mntpt();
lpurge_configure_thresholds();
lpurge_register_signal_handlers();
+ lpurge_work_threads_start(opt.o_max_jobs);
+
while (1) {
/* XXX: do lazy scanning in the background
* before the real need, so that when
/* device size can change runtime.. */
lpurge_configure_thresholds();
}
+
+ /* Unreached. */
+ lipe_version_fini();
+
+ return 0;
}
*
* Author: Li Xi <lixi@ddn.com>
*/
-#include <lustre/lustreapi.h>
+#include <errno.h>
#include <fnmatch.h>
+#include <lustre/lustreapi.h>
#include "fake_lustre_idl.h"
#include "lustre_ea.h"
#include "debug.h"
-#include "errno.h"
int decode_linkea(char *buf, ssize_t size)
{
#!/bin/sh
-# Please update all the version references if VERSION changed.
-# lipe.spec.in
-# man/lipe_scan.1
-# pylipe/lipe_test.py
-
-VERSION="1.14"
-if [ -d .git ]; then
- VERSION=$(git describe|sed 's/-[0-9]*-/./')
-fi
+VERSION="1.17"
+# if [ -d .git ]; then
+# VERSION=$(git describe|sed 's/-[0-9]*-/./')
+# fi
printf "%s" "$VERSION"