From 72399232bb9bbc85fb509fa48e12d88cc7471724 Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Wed, 25 Aug 2021 17:56:16 -0500 Subject: [PATCH] Squashed 'lipe/' changes from 2a015e67c4..b7b776f968 b7b776f968 Fix json.h include. 2ba146383f Update lipe version to 1.19. f007d85105 EX-2453 lipe: fixup striped directory paths 64847ef214 EX-2453 lipe: add SoM handling to lipe_scan2 75045a94c5 EX-3701 lipe: lamigo ssh logging improvements 984381d796 DDN-2223 lipe: add ngc_exp_remv and tests 9e5f54e3e7 EX-3588 lamigo: use POSIX redirection syntax a6240ef3bf EX-3701 lipe: lamigo lamigo log message improvements git-subtree-dir: lipe git-subtree-split: b7b776f9687f084300eabe4b5cebdc20f316d8e0 --- DDN-2223-ngx-exp-remv/.gitignore | 1 + DDN-2223-ngx-exp-remv/README | 143 ++++++++++ DDN-2223-ngx-exp-remv/ngc_exp_remv | 205 +++++++++++++++ DDN-2223-ngx-exp-remv/ngc_mcreate.c | 122 +++++++++ DDN-2223-ngx-exp-remv/sanity-ngc.sh | 501 ++++++++++++++++++++++++++++++++++++ configure.ac | 2 - pylipe/lipe_test.py | 5 - src/generate_definition.c | 6 - src/lamigo.c | 144 ++++++----- src/lamigo_alr.c | 3 +- src/lipe_object_attrs.c | 66 ++++- src/lipe_scan2.c | 6 +- src/lipe_ssh.c | 35 ++- src/lipe_ssh.h | 4 +- src/lipe_zfs.c | 7 - src/lustre_ea.h | 3 +- src/lustre_ea_ldiskfs.c | 8 +- src/policy.c | 39 ++- src/posix_ea.c | 6 +- version-gen.sh | 2 +- 20 files changed, 1192 insertions(+), 116 deletions(-) create mode 100644 DDN-2223-ngx-exp-remv/.gitignore create mode 100644 DDN-2223-ngx-exp-remv/README create mode 100755 DDN-2223-ngx-exp-remv/ngc_exp_remv create mode 100644 DDN-2223-ngx-exp-remv/ngc_mcreate.c create mode 100755 DDN-2223-ngx-exp-remv/sanity-ngc.sh diff --git a/DDN-2223-ngx-exp-remv/.gitignore b/DDN-2223-ngx-exp-remv/.gitignore new file mode 100644 index 0000000..21d5d48 --- /dev/null +++ b/DDN-2223-ngx-exp-remv/.gitignore @@ -0,0 +1 @@ +ngc_mcreate diff --git a/DDN-2223-ngx-exp-remv/README b/DDN-2223-ngx-exp-remv/README new file mode 100644 index 0000000..a74b849 --- /dev/null +++ b/DDN-2223-ngx-exp-remv/README @@ -0,0 +1,143 @@ +Author: John L. Hammond + +This is the NGC cloud expired file remover requested under DDN-2223. + +There are two things here: + +1. The ngc_exp_remv utility to remove expired files. This is a python + program to run and filter between 'lipe_scan2' and 'xargs lfs + rmfid'. + +2. A bash test script, sanity-ngc.sh. + +How to use ngc_exp_remv: + +1. Set a 'trusted.ngc-exp-time' extended attribute on files you want + ngc_exp_remv to manage. The xattr value should contain the + expiration data expressed in epoch seconds formatted as a ascii + string. + + For example, to set the expiration time on /lustre/fs0a12/client/blah + to 1:21 PDT on Oct 25 1985, use /usr/bin/date to get the epoch time + and setfattr to set the xattr: + + # date +%s --date='Oct 25 1:21 PDT 1985' + 499076460 + # setfattr --name=trusted.ngc-exp-time --value=499076460 /lustre/fs0a12/client/blah + + To view the expiration time, do: + # getfattr --absolute-names --name=trusted.ngc-exp-time /lustre/fs0a12/client/blah + # file: /lustre/fs0a12/client/blah + trusted.ngc-exp-time="499076460" + + More generally, do: + # setfattr --name=trusted.ngc-exp-time --value=$EPOCH_TIME $FILE + or: + # setfattr --name=trusted.ngc-exp-time --value=$(date +%s --date="$DATE") $FILE + + where $DATE is something that /usr/bin/date understands. See the + date(1) mangape form more information. + + To set a given expiration timestamp on all files in a directory, do: + + # lfs find "$DIR" -print0 | + xargs -0 setfattr --no-dereference --name=trusted.ngc-exp-time --value=$EPOCH_TIME -- + +2. Then for each MDT belonging to the FS, run 'ngc_exp_remv $MDT_DEVICE $CLIENT_MOUNT' where: + + MDT_DEVICE is the MDT block device (for example /dev/mapper/vg_mdt0000_fs0a12-mdt0000), + CLIENT_MOUNT is a Lustre client mount point for the same FS (for example /lustre/fs0a12/client) + + This command must be run on the node where the $MDT_DEVICE block + device is present (usually the node where it is also mounted). It + physically scans the MDT block device. + + It should be run for each MDT on the FS. + +Notes: + +1. Choosing an extended attribute (xattr) name. xattr names are of the + form "namespace.attribute". There are two possible namespace you + may use here: "trusted" and "user". + + xattrs in the "trusted" namespace may only be accessed by processes + with the CAP_SYS_ADMIN capability (processes owned by root generall + have this capability, processes owned by other users generally do + not). + + xattrs in the "user" namespace may be accessed according to the + file permissions. Using a "user" xattr to manage expiration + requires that the Lustre clients where the expiration xattr are set + or read be mounted with the 'user_xattr' option. + + # mount k@tcp:/lustre /mnt/lustre -t lustre -o user_xattr + # mount -t lustre + 192.168.122.75@tcp:/lustre on /mnt/lustre type lustre (rw,flock,lazystatfs) + + # mount k@tcp:/lustre /mnt/lustre -t lustre -o user_xattr + # mount -t lustre + 192.168.122.75@tcp:/lustre on /mnt/lustre type lustre (rw,flock,user_xattr,lazystatfs) + + Note that Linux allows setting "user" xattrs on regular files and + directories but not on other file types (symbolic links, fifos, + device files, sockets..). + + See the attr(5) manpage for more information. + +Limitations: + +1. ngc_exp_remv may not remove expired directories which are non-empty + (because 'lfs rmfid' cannot remove a non empty directory). If an + expired directory contains non -expired files then it will not be + removed. Even if an expired directory contains only expired files + it may not be removed in a single scan. This is because the scan + returs files in an arbitrary order and a directory may be seen + before some of its descendents. However an expired directory (which + only has expired descendents) will be removed eventually. + +2. ngc_exp_remv does not validate that $MDT_DEVICE and $CLIENT_MOUNT + are correct for each other. + +3. ngc_exp_remv silently ignores files with a missing expiration + xattr. It prints an error if a file has an invalid expiration + xattr. + +4. If 'lfs rmfid' cannot remove a file then it prints an error but + continues. In some cases this is expected. + +Options: + +There are options to ngc_exp_remv which allow: + * Enabling debugging. + * Performing a dry-run (prints removal commands instead of running them). + * Using a reference time other than now. + * Changing the name of the xattr used to store the expiration time. + +Here is the help message: + +# ngc_exp_remv --help +usage: ngc_exp_remv [-h] [--debug] [--debug-scan] [--dry-run] [--lfs COMMAND] + [--lipe-scan2 COMMAND] [--time REF_TIME] + [--xattr-name NAME] + MDT_DEVICE CLIENT_MOUNT + +Scan MDT_DEVICE for files with an expiration xattr (NAME) set. If the +expiration time (the xattr value) is before or equal to the reference time +(TIME) then remove the file using 'lfs rmfid CLIENT_MOUNT'. + +positional arguments: + MDT_DEVICE MDT device to scan + CLIENT_MOUNT Lustre client mount point for removal by FID + +optional arguments: + -h, --help show this help message and exit + --debug enable debugging output + --debug-scan enable debugging output from scan + --dry-run print removal commands which would be run + --lfs COMMAND override lfs command (default 'lfs') + --lipe-scan2 COMMAND lipe_scan2 command (default 'lipe_scan2') + --time REF_TIME reference time in epoch seconds (default now) + --xattr-name NAME read file expiration time from xattr NAME (default + 'trusted.ngc-exp-time') + +CLIENT_MOUNT is required but it need not exist when --dry-run is used. diff --git a/DDN-2223-ngx-exp-remv/ngc_exp_remv b/DDN-2223-ngx-exp-remv/ngc_exp_remv new file mode 100755 index 0000000..ad6d9d8 --- /dev/null +++ b/DDN-2223-ngx-exp-remv/ngc_exp_remv @@ -0,0 +1,205 @@ +#!/usr/bin/env python2.7 +# Copyright 2021 DataDirect Networks, Inc. +# Author: John L. Hammond +""" +Scan mdt_device and remove NGC expired files + +Run a lipe scan to identify files with a `exp_xattr_name' extended +attribute set. Base64 decode the attribute value to get the expiration +time of the file. Files are considered expired if their expiration +time is before or equal to ref_time. Pipe expired FIDs of expired +files to 'xargs lfs rmfid client_mount' for removal. +""" +import argparse +import base64 +import json +import logging +import subprocess +import sys +import time + +LOGGING_FORMAT = 'ngc_exp_remv: %(levelname)s: %(message)s' +XATTR_NAME_DEFAULT = "trusted.ngc-exp-time" +LFS_DEFAULT = 'lfs' +LIPE_SCAN2_DEFAULT = 'lipe_scan2' + +# Python uses float for epoch times. This is fine. +HUGE_VAL = float('inf') + +# # lipe_scan2 --print-json=fid,xattrs -- /dev/mapper/mds1_flakey 'xattr_name("trusted.ngc-exp-time")' | jq . +# { +# "fid": "[0x200000404:0x792:0x0]", +# "xattrs": [ +# { +# "name": "trusted.lma", +# "value": "AAAAAAAAAAAEBAAAAgAAAJIHAAAAAAAA" +# }, +# { +# "name": "trusted.lov", +# "value": "0AvRCwEAAACSBwAAAAAAAAQEAAACAAAAAAAQAAEAAADkAQAAAAAAAAAAAAAAAAAAAAAAAAIAAAA=" +# }, +# { +# "name": "trusted.link", +# "value": "3/HqEQEAAAAsAAAAAAAAAAAAAAAAAAAAABQAAAACAAAABwAAAAEAAAAAZjA=" +# }, +# { +# "name": "trusted.som", +# "value": "BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +# }, +# { +# "name": "trusted.ngc-exp-time", +# "value": "MTYyODY4NjQzMA==" +# } +# ] +# } + +def ngc_exp_time(obj, exp_xattr_name): + """Return the expiration time of obj + + The expiration time is the base64 decoded value of the + `exp_xattr_name` extended attribute. If obj does not have that + attribute then return infinity. + + Print an error if the obj has a invalid xattr value. + """ + for pair in obj['xattrs']: + if pair['name'] == exp_xattr_name: + exp_time_base64 = pair['value'] + exp_time_str = base64.decodestring(exp_time_base64) + try: + return float(exp_time_str) + except: + logging.error('%s: invalid expiration value %r', obj['fid'], exp_time_str) + + return HUGE_VAL + + +def exp_fid_filter(in_file, out_file, exp_xattr_name, ref_time): + """Filter between lipe scan pipe and 'xargs lfs rmfid ...' pipe + + Read objects from in_file, base64 decode the exp_xattr_name + attribute value to get the expiration time of the file. Files are + considered expired if their expiration time is before or equal to + ref_time. Write FIDs of expired files to out_file. One FID per + line. + """ + for line in in_file: + # If the jsonlines module is available then we can read a + # stream of objects. But we don't want to depend on it so read + # scan results (json objects) a line at a time. + obj = json.loads(line) + fid = obj['fid'] + exp_time = ngc_exp_time(obj, exp_xattr_name) + is_exp = exp_time <= ref_time + logging.debug('FID = %s, exp_time = %f, is_exp = %s', fid, exp_time, is_exp) + if exp_time <= ref_time: + print >> out_file, fid + + +def main(): + """Scan mdt_device and remove NGC expired files + + Run a lipe scan to identify files with a `exp_xattr_name' extended + attribute set. Base64 decode the attribute value to get the + expiration time of the file. Files are considered expired if their + expiration time is before or equal to ref_time. Pipe expired FIDs + of expired files to 'xargs lfs rmfid client_count' for removal. + """ + parser = argparse.ArgumentParser( + description=""" +Scan MDT_DEVICE for files with an expiration xattr (NAME) set. If +the expiration time (the xattr value) is before or equal to the +reference time (TIME) then remove the file using 'lfs rmfid +CLIENT_MOUNT'. +""", + epilog=""" +CLIENT_MOUNT is required but it need not exist when --dry-run is used. +""" + ) + + parser.add_argument('--debug', action="store_true", + help='enable debugging output') + parser.add_argument('--debug-scan', action="store_true", + help='enable debugging output from scan') + parser.add_argument('--dry-run', action="store_true", + help='print removal commands which would be run') + parser.add_argument('--lfs', metavar='COMMAND', default=LFS_DEFAULT, + help='override lfs command (default %r)' % LFS_DEFAULT) + parser.add_argument('--lipe-scan2', metavar='COMMAND', default=LIPE_SCAN2_DEFAULT, + help='lipe_scan2 command (default %r)' % LIPE_SCAN2_DEFAULT) + parser.add_argument('--time', type=float, dest='ref_time', default=time.time(), + help='reference time in epoch seconds (default now)', ) + parser.add_argument('--xattr-name', metavar='NAME', default=XATTR_NAME_DEFAULT, + help='read file expiration time from xattr NAME (default %r)' % XATTR_NAME_DEFAULT) + parser.add_argument('mdt_device', metavar='MDT_DEVICE', + help='MDT device to scan') + parser.add_argument('client_mount', metavar='CLIENT_MOUNT', + help='Lustre client mount point for removal by FID') + + if len(sys.argv) == 1: + # Print a nice no arg message. + print >> sys.stderr, """Usage: %s [OPTION...] MDT_DEVICE CLIENT_MOUNT +Try '%s --help' for more information.""" % (parser.prog, parser.prog) + sys.exit(2) + + args = parser.parse_args() + + if args.debug: + logging_level = logging.DEBUG + else: + logging_level = logging.ERROR + + logging.basicConfig(format=LOGGING_FORMAT, level=logging_level) + + dry_run = args.dry_run + lfs = args.lfs + lipe_scan2 = args.lipe_scan2 + xattr_name = args.xattr_name + client_mount = args.client_mount + ref_time = args.ref_time + mdt_device = args.mdt_device + + logging.debug('args = %r', args) + logging.debug('dry_run = %r', dry_run) + logging.debug('lfs = %r', lfs) + logging.debug('lipe_scan2 = %r', lipe_scan2) + logging.debug('xattr_name = %r', xattr_name) + logging.debug("client_mount = %r", client_mount) + logging.debug('ref_time = %r', ref_time) + logging.debug("mdt_device = %r", mdt_device) + + scan_args = [lipe_scan2] + if args.debug_scan: + scan_args += ['--debug'] + + scan_args += ['--print-json=fid,xattrs', + '--', + mdt_device, + 'xattr_name("%s")' % xattr_name] + logging.debug('scan_args = %r', scan_args) + scan_proc = subprocess.Popen(scan_args, stdout=subprocess.PIPE) + + remv_args = ['xargs', '--max-args=1024', '--'] + if dry_run: + remv_args += ['echo'] + + remv_args += [lfs, 'rmfid', client_mount] + logging.debug('remv_args = %r', remv_args) + remv_proc = subprocess.Popen(remv_args, stdin=subprocess.PIPE) + + exp_fid_filter(scan_proc.stdout, remv_proc.stdin, xattr_name, ref_time) + + scan_status = scan_proc.wait() + logging.debug('scan_proc = %r, scan_status = %d', scan_proc, scan_status) + + remv_proc.stdin.close() + remv_status = remv_proc.wait() + logging.debug('remv_proc = %r, remv_status = %d', remv_proc, remv_status) + + status = 0 + logging.debug('status = %d', status) + sys.exit(status) + + +if __name__ == "__main__": + main() diff --git a/DDN-2223-ngx-exp-remv/ngc_mcreate.c b/DDN-2223-ngx-exp-remv/ngc_mcreate.c new file mode 100644 index 0000000..b4d233e --- /dev/null +++ b/DDN-2223-ngx-exp-remv/ngc_mcreate.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* gcc -Wall -Werror ngc_mcreate.c -o ngc_mcreate -lpthread */ + +static const char *exp_xattr_name = "trusted.ngc-exp-time"; +static unsigned int file_count; +static time_t now; + +static time_t rand_exp_time(void) +{ + return now + (rand() / (RAND_MAX / 65536) - 32768); +} + +static int set_exp_time(const char *path, time_t exp_time) +{ + char exp_time_str[PATH_MAX]; + + snprintf(exp_time_str, sizeof(exp_time_str), "%lld", (long long)exp_time); + + return lsetxattr(path, exp_xattr_name, exp_time_str, strlen(exp_time_str), 0); +} + +static void *(ngc_mcreate_start_routine)(void *arg) +{ + unsigned int index = (uintptr_t)arg; + char dir_path[PATH_MAX]; + int dir_fd = -1; + unsigned int i; + intptr_t rc; + + snprintf(dir_path, sizeof(dir_path), "d%u", index); + rc = mkdir(dir_path, 0666); + if (rc < 0 && errno != EEXIST) + error(EXIT_FAILURE, errno, "mkdir"); + + rc = set_exp_time(dir_path, rand_exp_time()); + if (rc < 0) + error(EXIT_FAILURE, errno, "set_exp_time"); + + dir_fd = open(dir_path, O_RDONLY); + if (dir_fd < 0) + error(EXIT_FAILURE, errno, "open"); + + for (i = 0; i < file_count; i++) { + char file_path[PATH_MAX]; + + snprintf(file_path, sizeof(file_path), "%s/f%u", dir_path, i); + + rc = mknod(file_path, 0100666, 0); + if (rc < 0 && errno != EEXIST) + error(EXIT_FAILURE, errno, "mknod"); + + rc = set_exp_time(file_path, rand_exp_time()); + if (rc < 0) + error(EXIT_FAILURE, errno, "set_exp_time"); + } + + rc = close(dir_fd); + if (rc < 0) + error(EXIT_FAILURE, errno, "close"); + + rc = 0; + + return (void *)rc; +} + +int main(int argc, char *argv[]) +{ + pthread_t *thread = NULL; + unsigned int start_index;; + unsigned int thread_count; + unsigned int i; + int rc; + + srand(0); + now = time(NULL); + + start_index = atoi(argv[1]); + thread_count = atoi(argv[2]); + file_count = atoi(argv[3]); + thread = calloc(thread_count, sizeof(thread[0])); + if (thread == NULL) + error(EXIT_FAILURE, errno, "calloc"); + + for (i = 0; i < thread_count; i++) { + int rc2; + + rc2 = pthread_create(&thread[i], + NULL /* attr */, + &ngc_mcreate_start_routine, + (void *)(uintptr_t)(start_index + i)); + if (rc2 != 0) + error(EXIT_FAILURE, rc2, "pthread_create"); + } + + for (i = 0; i < thread_count; i++) { + intptr_t retval = 0; + int rc2; + + rc2 = pthread_join(thread[i], (void **)&retval); + if (rc2 != 0) + error(EXIT_FAILURE, rc2, "pthread_join"); + + rc2 = retval; + if (rc2 != 0) + rc = rc2; + } + + exit(rc == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/DDN-2223-ngx-exp-remv/sanity-ngc.sh b/DDN-2223-ngx-exp-remv/sanity-ngc.sh new file mode 100755 index 0000000..5ca80b2 --- /dev/null +++ b/DDN-2223-ngx-exp-remv/sanity-ngc.sh @@ -0,0 +1,501 @@ +#!/bin/bash +# NOTE: this is not a test framework test. + +# set -eu +type base64 +type getfattr +type setfattr +type jq +type lipe_scan2 +type ngc_exp_remv + +PROGRAM_NAME="${BASH_SOURCE[0]}" +EXP_XATTR_NAME="trusted.ngc-exp-time" +MDT_DEVICE=${MDSDEV1:-/dev/mapper/mds1_flakey} +DIR=${MOUNT:-/mnt/lustre}/ngc + +COUNT=32 # 1024 +REF_TIME="1628686430" +REF_TIME_BASE64="MTYyODY4NjQzMA==" + +function debug() { + echo "${PROGRAM_NAME}: debug: $*" >&2 +} + +function debugv() { + local list='' + local delim='' + local sym val + + for sym in "$@"; do + val=$(eval echo "\$${sym}") + list+="${delim}${sym} = '${val}'" + delim=', ' + done + + debug "${list}" +} + +function error() { + echo "${PROGRAM_NAME}: error: $*" >&2 + return 1 +} + +function fatal() { + echo "${PROGRAM_NAME}: fatal: $*" >&2 + exit 1 +} + +function streqv() { + local v1=$(eval echo "\$$1") + local v2=$(eval echo "\$$2") + + debugv "$1" "$2" + [[ "$v1" == "$v2" ]] || error "$1 ('$v1') != $2 ('$v2') as strings" +} + +function inteqv() { + local v1=$(eval echo "\$$1") + local v2=$(eval echo "\$$2") + + debugv "$1" "$2" + ((v1 == v2)) || error "$1 ('$v1') != $2 ('$v2') as integers" +} + +function set_exp_time() { + local path="$1" + local time="$2" + + setfattr --no-dereference --name="${EXP_XATTR_NAME}" --value="\"${time}\"" -- "${path}" +} + +function get_exp_time() { + local path="$1" + + # Use '--absolute-names' to prevent annoying 'getfattr: Removing + # leading '/' from absolute path names' warning from getfattr. + + getfattr --absolute-names --no-dereference --name="${EXP_XATTR_NAME}" --only-values -- "${path}" +} + +function get_exp_time_base64() { + local path="$1" + + # Get exp time from file xattr. + # + # Use '--absolute-names' to prevent annoying 'getfattr: Removing + # leading '/' from absolute path names' warning from getfattr. + # + # getfattr ignores --encoding=EN if --only-values is used. + + getfattr --absolute-names --no-dereference --name="${EXP_XATTR_NAME}" --encoding=base64 -- "${path}" | + sed --quiet "s/^${EXP_XATTR_NAME}=0s//p" +} + +function find_xattr_name() { + # Find files in $DIR with EXP_XATTR_NAME set. + ( + find "${DIR}" -mindepth 1 -exec getfattr --absolute-names --no-dereference --name="${EXP_XATTR_NAME}" -- {} \; | + sed --quiet "s/^# file: //p" + ) 2> /dev/null +} + +function scan() { + # Do the simplest scan possible by default, extra arguments + # (an expression and/or lipe_scan2 options) will be passed + # along to lipe_scan2. + lipe_scan2 --print-json=fid,xattrs "${MDT_DEVICE}" "$@" +} + +function scan_xattr_name() { + # Do a scan for files with EXP_XATTR_NAME set, any extra + # arguments (which must be lipe_scan2 options) will be passed + # along to lipe_scan2. + + scan "xattr_name(\"${EXP_XATTR_NAME}\")" "$@" +} + +# lipe_scan2 | jq one liners +# +# Extract object for FID +# lipe_scan2 --print-json=fid,xattrs $MDT_DEVICE | jq 'select(.fid == "[0x200000404:0x5:0x0]")' +# +# Extract xattrs for FID +# lipe_scan2 --print-json=fid,xattrs $MDT_DEVICE | jq 'select(.fid == "[0x200000404:0x5:0x0]").xattrs' +# +# Extract $EXP_XATTR_NAME xattr pair for FID +# # lipe_scan2 --print-json=fid,xattrs $MDT_DEVICE | jq 'select(.fid == "[0x200000404:0x5:0x0]").xattrs[] | select(.name == "trusted.ngc-exp-time")' +# { +# "name": "trusted.ngc-exp-time", +# "value": "MTYyODY4NjQzMA==" +# } +# +# Extract $EXP_XATTR_NAME xattr value for FID (base64 encoded with double quotes) +# # lipe_scan2 --print-json=fid,xattrs $MDT_DEVICE | jq 'select(.fid == "[0x200000404:0x5:0x0]").xattrs[] | select(.name == "trusted.ngc-exp-time").value' +# "MTYyODY4NjQzMA==" +# +# Add --raw-output to print without double quotes. + +function scan_exp_time_base64() { + local fid="$1" + shift + + # Get exp time for $fid using lipe_scan2. + # + + scan "$@" | + jq --raw-output \ + --arg EXP_XATTR_NAME "${EXP_XATTR_NAME}" \ + --arg FID "${fid}" \ + 'select(.fid == $FID).xattrs[] | select(.name == $EXP_XATTR_NAME).value' +} + +function scan_exp_time() { + scan_exp_time_base64 "$@" | base64 --decode +} + +function test_ref() { + local path="$DIR/r0" + local ref_time_base64_1 + local exp_time + local exp_time_base64 + + ref_time_base64_1=$(echo -n "${REF_TIME}" | base64) + streqv REF_TIME_BASE64 ref_time_base64_1 + + touch -- "${path}" + set_exp_time "${path}" "${REF_TIME}" + + # trusted.ngc-exp-time="1628686430" + + exp_time=$(get_exp_time "${path}") + inteqv REF_TIME exp_time + + exp_time_base64=$(get_exp_time_base64 "${path}") + streqv REF_TIME_BASE64 exp_time_base64 + + rm -- "${path}" +} + +function test_scan() { + local path="$DIR/s0" + local fid + local exp_time_base64 + local exp_time + + touch -- "${path}" + fid=$(lfs path2fid "${path}") + + set_exp_time "${path}" "${REF_TIME}" + + exp_time_base64=$(scan_exp_time_base64 "${fid}") + streqv exp_time_base64 REF_TIME_BASE64 + + exp_time=$(scan_exp_time "${fid}") + inteqv exp_time REF_TIME + streqv exp_time REF_TIME + + rm -- "${path}" +} + +function test_scan_xattr_name() { + local path="$DIR/s1" + local fid + local exp_time_base64 + local exp_time + + # Same as previous but use xattr_name on the scan. + touch -- "${path}" + fid=$(lfs path2fid "${path}") + + set_exp_time "${path}" "${REF_TIME}" + + exp_time_base64=$(scan_exp_time_base64 "${fid}") + streqv exp_time_base64 REF_TIME_BASE64 + + exp_time=$(scan_exp_time "${fid}") + inteqv exp_time REF_TIME + streqv exp_time REF_TIME + + rm -- "${path}" +} + +function test_scan_rand() { + local now=$(date +%s) + local count1 + local -a path_list=() + local path + local exp_time + local i + + for ((i = 0; i < COUNT; i++)); do + exp_time=$((now + RANDOM - 16384)) + + if ((exp_time <= now)); then + path="$DIR/o$i" + else + path="$DIR/n$i" + fi + + debugv path exp_time + touch -- "${path}" + set_exp_time "${path}" "${exp_time}" + path_list+=( "${path}" ) + done + + count1=$(scan_xattr_name | wc --lines) + inteqv COUNT count1 + + rm -- "${path_list[@]}" +} + +function test_ngc_exp_remv_old() { + local now=$(date +%s) + local found + local expect + local path + local exp_time + local i + + for ((i = 0; i < COUNT; i++)); do + path="$DIR/o$i" + exp_time=$((now - 86400)) + + debugv path exp_time + touch -- "${path}" + set_exp_time "${path}" "${exp_time}" + done + + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + + # Expect all removed, none left + found=$(find_xattr_name | wc --lines) + expect=0 + inteqv found expect +} + +function test_ngc_exp_remv_new() { + local now=$(date +%s) + local found + local expect + local path + local -a path_list=() + local exp_time + local i + + for ((i = 0; i < COUNT; i++)); do + path="$DIR/n$i" + path_list+=( "${path}" ) + exp_time=$((now + 86400)) + + debugv path exp_time + touch -- "${path}" + set_exp_time "${path}" "${exp_time}" + done + + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + + # Expect none removed, all left + found=$(find_xattr_name | wc --lines) + expect=$COUNT + inteqv found expect + + rm -- "${path_list[@]}" +} + +function test_ngc_exp_remv_rand() { + local now=$(date +%s) + local -a old_list=() + local -a new_list=() + local path + local exp_time + local i + + for ((i = 0; i < COUNT; i++)); do + exp_time=$((now + RANDOM - 16384)) + + if ((exp_time <= now)); then + path="$DIR/o$i" + old_list+=( "${path}" ) + else + path="$DIR/n$i" + new_list+=( "${path}" ) + fi + + debugv path exp_time + touch -- "${path}" + set_exp_time "${path}" "${exp_time}" + done + + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + for path in "${old_list[@]}"; do + if [[ -e "${path}" ]]; then + error "expired file '${path}' not removed" + fi + done + + for path in "${new_list[@]}"; do + if ! [[ -e "${path}" ]]; then + error "non expired file '${path}' removed" + fi + done + + rm -- "${new_list[@]}" +} + +function test_ngc_exp_remv_dir() { + local now=$(date +%s) + local path + local exp_time + + path="$DIR/d0" + exp_time=$((now - 86400)) + mkdir -- "${path}" + set_exp_time "${path}" "${exp_time}" + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + + if [[ -e "${path}" ]]; then + error "expired directory '${path}' not removed" + fi +} + +function test_ngc_exp_remv_link() { + local now=$(date +%s) + local path + local exp_time + + if [[ "${EXP_XATTR_NAME}" = user.* ]]; then + debug "user xattrs not supported on symbolic links, skipping" + return 0 + fi + + path="$DIR/l0" + exp_time=$((now - 86400)) + ln --symbolic -- TARGET "${path}" + set_exp_time "${path}" "${exp_time}" + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + + if [[ -e "${path}" ]]; then + error "expired symbolic link '${path}' not removed" + fi +} + +function test_ngc_exp_remv_tree() { + local now=$(date +%s) + local path + local exp_time + local i + + exp_time=$((now - 86400)) + + mkdir --parents -- "$DIR/d0/d1/d2/d3/d4" + touch -- "$DIR/d0/f1" + touch -- "$DIR/d0/d1/f2" + touch -- "$DIR/d0/d1/d2/f3" + touch -- "$DIR/d0/d1/d2/d3/f4" + touch -- "$DIR/d0/d1/d2/d3/d4/f5" + + find "$DIR" -mindepth 1 | while read path; do + set_exp_time "${path}" "${exp_time}" + done + + # Expired trees removed after sufficiently many scans + for ((i = 0; i < 6; i++)); do + ngc_exp_remv --time=$now "${MDT_DEVICE}" "${DIR}" + done + + if [[ -e "$DIR/d0/" ]]; then + error "expired directory '$DIR/d0/' not removed" + fi +} + +function test_ngc_exp_remv_invalid() { + # Files without xattr or with invalid xattr value are ignored. + local -a path_list=() + local -a value=('' '' '-' '?' 'x' '-a' '#' '--7' '7 8' '42+' 'x42' '42+' 'zarf') + local count=${#value[@]} + local path + local i + + for ((i = 0; i < count; i++)); do + path="${DIR}/f$i" + path_list+=( "${path}" ) + + touch -- "${path}" + if ((i != 0)); then + set_exp_time "${path}" "${value[i]}" + fi + done + + ngc_exp_remv "${MDT_DEVICE}" "${DIR}" + + for path in "${path_list[@]}"; do + if ! [[ -e "${path}" ]]; then + error "invalid file '${path}' was removed" + fi + done + + rm -- "${path_list[@]}" +} + +function run() { + local count + local rc + + # Check for existing files with EXP_XATTR_NAME set which may + # confuse the test. + # + # Using find: + count=$(find_xattr_name | wc --lines) + ((count == 0)) || fatal "find found $count file(s) with '${EXP_XATTR_NAME}' set" + + # And using scan: + count=$(scan_xattr_name | wc --lines) + ((count == 0)) || fatal "scan found $count file(s) with '${EXP_XATTR_NAME}' set" + + (set -eu; "$@") + rc=$? + if ((rc == 0)); then + debug "PASS: '$*'" + else + debug "FAIL: '$*', rc = $rc" + fi + + return $rc +} + +function main() { + local rc=0 + local rc2 + + mkdir -p -- "${DIR}" || fatal "cannot create '${DIR}'" + + run test_ref + rc2=$?; rc=$((rc || rc2)) + run test_scan + rc2=$?; rc=$((rc || rc2)) + run test_scan_xattr_name + rc2=$?; rc=$((rc || rc2)) + run test_scan_rand + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_old + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_new + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_rand + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_dir + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_link + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_tree + rc2=$?; rc=$((rc || rc2)) + run test_ngc_exp_remv_invalid + rc2=$?; rc=$((rc || rc2)) + + debug "main: rc = $rc" + + return $rc +} + +main "$@" diff --git a/configure.ac b/configure.ac index 88a47d5..b6c6574 100644 --- a/configure.ac +++ b/configure.ac @@ -113,8 +113,6 @@ AC_DEFINE([HAVE_LUSTRE_PFL], 1, [Lustre has PFL support]) AC_DEFINE([HAVE_LAYOUT_BY_XATTR], 1, [have llapi_layout_get_by_xattr()]) -AC_DEFINE([HAVE_LAZY_SIZE_ON_MDT], 1, - [have lazy size on MDT]) # -------- check for llapi_changelog_in_buf() -------- AC_MSG_CHECKING([Lustre have llapi_changelog_in_buf()]) diff --git a/pylipe/lipe_test.py b/pylipe/lipe_test.py index a9d4dca..97ef7c3 100644 --- a/pylipe/lipe_test.py +++ b/pylipe/lipe_test.py @@ -1179,11 +1179,6 @@ def test_lipe_find_size(log, parent_dir, client, mdti, fid, host, # If no PFL support, LiPE is not able get the size or LSoM from object # on MDT. have_size = False - elif not lipe_constant.HAVE_LAZY_SIZE_ON_MDT: - # If Lustre library doesn't support LSoM, LiPE won't be able to get - # LSoM. This function assumes the Lustre file is not DoM, so no size - # either. - have_size = False else: have_size = True diff --git a/src/generate_definition.c b/src/generate_definition.c index 7a4103b..dc805d7 100644 --- a/src/generate_definition.c +++ b/src/generate_definition.c @@ -270,12 +270,6 @@ int main(int argc, char **argv) } fprintf(fp, "\n"); -#ifdef HAVE_LAZY_SIZE_ON_MDT - fprintf(fp, "HAVE_LAZY_SIZE_ON_MDT = True\n"); -#else - fprintf(fp, "HAVE_LAZY_SIZE_ON_MDT = False\n"); -#endif - #ifdef HAVE_LUSTRE_PFL fprintf(fp, "HAVE_LUSTRE_PFL = True\n"); #else diff --git a/src/lamigo.c b/src/lamigo.c index 2f29566..fb77974 100644 --- a/src/lamigo.c +++ b/src/lamigo.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ static void usage(void) "\t--hot-after-idle, hot files after N idle periods to be mirrored (default: %d)\n" "\t--src-free, mirroring to source OST pool is prohibited if pool has less space (default: %d%%)\n" "\t--tgt-free, mirroring to target OST pool is prohibited if pool has less space (default: %d%%)\n" + "\t--ssh-log-verbosity=V, set SSH_OPTIONS_LOG_VERBOSITY to V (default: 0)\n" "\t--version, print version information and exit\n", program_invocation_short_name, DEF_MIN_AGE, @@ -635,9 +637,7 @@ static void lamigo_cleanup(void) } } -extern char **environ; - -static int lamigo_exec_cmd(struct resync_agent *a, char *cmd) +static int lamigo_exec_cmd(struct resync_agent *a, const char *cmd, int *pstatus) { struct resync_ssh_session *rss; int rc; @@ -655,12 +655,7 @@ static int lamigo_exec_cmd(struct resync_agent *a, char *cmd) lipe_list_del(&rss->rss_list); pthread_mutex_unlock(&a->rag_ssh_lock); - rc = lipe_ssh_exec(&rss->rss_ctx, cmd); - if (rc) - llapi_error(LLAPI_MSG_INFO, rc, - "error executing ssh command '%s' on '%s'", - cmd, a->rag_hostname); - + rc = lipe_ssh_exec(&rss->rss_ctx, cmd, pstatus); pthread_mutex_lock(&a->rag_ssh_lock); lipe_list_add(&rss->rss_list, &a->rag_ssh_list); pthread_cond_signal(&a->rag_ssh_cond); @@ -675,12 +670,14 @@ void *lamigo_replicate_one(void *args) struct resync_agent *agent = rj->rj_agent; int resync = rj->rj_resync; char cmd[PATH_MAX * 2]; - intptr_t rc; + int status = INT_MAX; + enum llapi_message_level msg_level; + int rc; if (rj->rj_setprefer) { snprintf(cmd, sizeof(cmd), "lfs setstripe --comp-set --comp-flags=prefer --pool='%s' " - "'%s/.lustre/fid/"DFID"' >&/dev/null", rj->rj_pool, + "'%s/.lustre/fid/"DFID"' > /dev/null 2>&1", rj->rj_pool, agent->rag_mountpoint, PFID(&rj->rj_fid)); llapi_printf(LLAPI_MSG_DEBUG, "set prefer on "DFID"\n", @@ -711,12 +708,28 @@ void *lamigo_replicate_one(void *args) goto out; } - rc = lamigo_exec_cmd(agent, cmd); + /* rc < 0 means an ssh error. Otherwise command exit status is + * in status. Mask common exit statuses. */ + rc = lamigo_exec_cmd(agent, cmd, &status); + if (rc < 0 || + /* 1 from setprefer (see EX-3591) */ + (rj->rj_setprefer && status != 0 && status != 1) || + /* EBUSY from mirror extend/resync */ + (!rj->rj_setprefer && status != 0 && status != EBUSY)) + msg_level = LLAPI_MSG_ERROR; + else + msg_level = LLAPI_MSG_DEBUG; + + llapi_error(msg_level|LLAPI_MSG_NO_ERRNO, 0, + "error executing command '%s' on '%s': rc = %d, status = %d", + cmd, agent->rag_hostname, rc, status); out: /* notify the main thread about completion */ write(lamigo_sigpipe[1], &rc, 1); - pthread_exit((void *)rc); + rc = rc < 0 ? rc : status; + + pthread_exit((void *)(intptr_t)rc); } static int lamigo_spawn_replication(struct resync_job *rj) @@ -1076,7 +1089,7 @@ lamigo_check_user_rules(struct lipe_object_attrs *attrs, } int lustre_hsm2user(struct hsm_attrs *disk, struct hsm_user_state *hus); -#ifdef HAVE_LAZY_SIZE_ON_MDT + static inline void lustre_som_swab(struct lustre_som_attrs *attrs) { #if __BYTE_ORDER == __BIG_ENDIAN @@ -1085,7 +1098,6 @@ static inline void lustre_som_swab(struct lustre_som_attrs *attrs) __swab64s(&attrs->lsa_blocks); #endif } -#endif /* Work around the redefining of IOC_MDC_GETFILEINFO when LSOM aware * statx was added. We want the old version. See LU-13826 and EX-926. */ @@ -1254,16 +1266,17 @@ out: void *lamigo_check_agent_func(void *args) { - intptr_t rc; char cmd[PATH_MAX]; struct resync_agent *a = (struct resync_agent *)args; + int status = INT_MAX; + int rc; snprintf(cmd, sizeof(cmd), "lfs path2fid '%s' > /dev/null 2>&1", a->rag_mountpoint); - rc = lamigo_exec_cmd(a, cmd); + rc = lamigo_exec_cmd(a, cmd, &status); - pthread_exit((void *)rc); + pthread_exit((void *)(intptr_t)(rc < 0 ? rc : status)); } static void lamigo_check_bad_agents(void) @@ -1813,23 +1826,26 @@ static void lamigo_add_agent(const char *host, const char *mnt, char *jobs) lamigo_agent_count++; } -#define LAMIGO_OPT_POOL_REFRESH 3 -#define LAMIGO_OPT_PROGRESS_INTV 4 -#define LAMIGO_OPT_MIRROR_CMD 5 -#define LAMIGO_OPT_IGNORE_READS 6 -#define LAMIGO_OPT_IGNORE_WRITES 7 -#define LAMIGO_OPT_LARGE_IO 8 -#define LAMIGO_OPT_PERIODS 9 -#define LAMIGO_OPT_PERIOD_TIME 10 -#define LAMIGO_OPT_OFD_INTERVAL 11 -#define LAMIGO_OPT_HOT_FRACTION 12 -#define LAMIGO_OPT_HOT_AFTER_IDLE 13 -#define LAMIGO_OPT_ALR_EXTRA_ARGS 14 -#define LAMIGO_OPT_STATFS_REFRESH 15 -#define LAMIGO_OPT_SRC_FREE 16 -#define LAMIGO_OPT_TGT_FREE 17 -#define LAMIGO_OPT_VERSION 18 -#define LAMIGO_OPT_SRC_DOM 19 +enum { + LAMIGO_OPT_ALR_EXTRA_ARGS = 3, + LAMIGO_OPT_HOT_AFTER_IDLE, + LAMIGO_OPT_HOT_FRACTION, + LAMIGO_OPT_IGNORE_READS, + LAMIGO_OPT_IGNORE_WRITES, + LAMIGO_OPT_LARGE_IO, + LAMIGO_OPT_MIRROR_CMD, + LAMIGO_OPT_OFD_INTERVAL, + LAMIGO_OPT_PERIODS, + LAMIGO_OPT_PERIOD_TIME, + LAMIGO_OPT_POOL_REFRESH, + LAMIGO_OPT_PROGRESS_INTV, + LAMIGO_OPT_SRC_DOM, + LAMIGO_OPT_SRC_FREE, + LAMIGO_OPT_SSH_LOG_VERBOSITY, + LAMIGO_OPT_STATFS_REFRESH, + LAMIGO_OPT_TGT_FREE, + LAMIGO_OPT_VERSION, +}; static struct option options[] = { { "ignore-reads", no_argument, NULL, LAMIGO_OPT_IGNORE_READS}, @@ -1860,6 +1876,7 @@ static struct option options[] = { { "progress-interval", required_argument, NULL, LAMIGO_OPT_PROGRESS_INTV }, { "rescan", no_argument, NULL, 'r'}, + { "ssh-log-verbosity", required_argument, NULL, LAMIGO_OPT_SSH_LOG_VERBOSITY }, { "src", required_argument, NULL, 's'}, { "src-dom", no_argument, NULL, LAMIGO_OPT_SRC_DOM}, { "src-free", required_argument, NULL, LAMIGO_OPT_SRC_FREE}, @@ -2146,6 +2163,9 @@ void lamigo_process_opt(int c, char *optarg) case LAMIGO_OPT_SRC_DOM: opt.o_src_dom = 1; break; + case LAMIGO_OPT_SSH_LOG_VERBOSITY: + lipe_ssh_log_verbosity = atoi(optarg); + break; case LAMIGO_OPT_TGT_FREE: opt.o_tgt_free = atoi(optarg); if (opt.o_tgt_free < 1 || opt.o_tgt_free > 99) { @@ -2706,36 +2726,41 @@ static int lamigo_find_device(const char *devname) static int lamigo_read_file(const char *param, char *val, const int vallen) { - int fd, rc; + int fd = -1; + int rc; fd = open(param, O_RDONLY); - if (fd < 0) - return -errno; - rc = read(fd, val, vallen); - if (rc > 0) { - while (rc && (val[rc - 1] == '\n' || val[rc - 1] == '\r')) - rc--; - val[rc] = 0; - rc = 0; + if (fd < 0) { + llapi_error(LLAPI_MSG_ERROR, errno, "cannot open '%s'", param); + rc = -errno; + goto out; } + + rc = read(fd, val, vallen); if (rc < 0) { - llapi_error(LLAPI_MSG_FATAL, errno, "can't read"); - return -errno; + llapi_error(LLAPI_MSG_ERROR, errno, "cannot read '%s'", param); + rc = -errno; + goto out; } - close(fd); + + while (rc && (val[rc - 1] == '\n' || val[rc - 1] == '\r')) + rc--; + + val[rc] = '\0'; + rc = 0; +out: + if (!(fd < 0)) + close(fd); + return rc; } static int lamigo_read_param(const char *param, char *val, const int vallen) { - char buf[PATH_MAX]; - int rc; + char path[PATH_MAX]; - snprintf(buf, sizeof(buf), "%s/%s", mdtprefix, param); - rc = lamigo_read_file(buf, val, vallen); - if (rc) - llapi_error(LLAPI_MSG_FATAL, errno, "can't open %s", param); - return rc; + snprintf(path, sizeof(path), "%s/%s", mdtprefix, param); + return lamigo_read_file(path, val, vallen); } /** @@ -3266,7 +3291,7 @@ int main(int argc, char **argv) llapi_set_command_name(opt.o_mdtname); llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0, - "version %s-%s, revision %s\n", + "version %s-%s, revision %s", PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION); rc = lamigo_init_cache(); @@ -3587,17 +3612,18 @@ static int lamigo_mdtidx; static __u64 lamigo_read_osp_param(const int ostidx, const char *param) { + char path[PATH_MAX]; char buf[PATH_MAX]; __u64 retval = 0; int fd, rc; assert(osproot); - snprintf(buf, sizeof(buf), "%s/%s-OST%04x-osc-MDT%04x/%s", + snprintf(path, sizeof(path), "%s/%s-OST%04x-osc-MDT%04x/%s", osproot, fsname, ostidx, lamigo_mdtidx, param); - fd = open(buf, O_RDONLY); + fd = open(path, O_RDONLY); if (fd < 0) { - llapi_error(LLAPI_MSG_ERROR, errno, "can't open %s", buf); + llapi_error(LLAPI_MSG_ERROR, errno, "cannot open '%s'", path); /* 0 means non-available OST */ return 0; } @@ -3605,7 +3631,7 @@ static __u64 lamigo_read_osp_param(const int ostidx, const char *param) if (rc > 0) retval = strtoul(buf, NULL, 10); if (rc < 0) - llapi_error(LLAPI_MSG_ERROR, errno, "can't read"); + llapi_error(LLAPI_MSG_ERROR, errno, "cannot read '%s'", path); close(fd); /* report zero if something went wrong diff --git a/src/lamigo_alr.c b/src/lamigo_alr.c index aceb3ce..2e61fd2 100644 --- a/src/lamigo_alr.c +++ b/src/lamigo_alr.c @@ -288,10 +288,9 @@ repeat: } rc = ssh_channel_get_exit_status(channel); - llapi_err_noerrno(LLAPI_MSG_ERROR, + llapi_err_noerrno(rc == 0 ? LLAPI_MSG_DEBUG : LLAPI_MSG_ERROR, "alr agent on %s exited with status %d\n", ala->ala_host, rc); - err: ssh_channel_send_eof(channel); ssh_channel_close(channel); diff --git a/src/lipe_object_attrs.c b/src/lipe_object_attrs.c index fb5e4df..669de48 100644 --- a/src/lipe_object_attrs.c +++ b/src/lipe_object_attrs.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -145,6 +145,46 @@ static struct json_object *lipe_xattr_list_to_json(const struct lipe_list_head * return arr; } +static struct json_object *lipe_som_attrs_to_json(const struct lustre_som_attrs *lsa) +{ + struct json_object *obj; + struct json_object *flags; + + /* The SOM_FL_* flags currently used in lsa->lsa_valid are all + * mutually exclusive. But we return an array of flag names in + * case we have non-mutually exclusive flags in the + * future. And we ignore SOM_FL_UNKNOWN which is 0. + * + * "som": { + * "flags": [ + * "lazy" + * ], + * "_flags": 4, + * "size": 16777216, + * "blocks": 32768 + * } + */ + + flags = json_object_new_array(); + + if (lsa->lsa_valid & SOM_FL_STRICT) + json_object_array_add(flags, json_object_new_string("strict")); + + if (lsa->lsa_valid & SOM_FL_STALE) + json_object_array_add(flags, json_object_new_string("stale")); + + if (lsa->lsa_valid & SOM_FL_LAZY) + json_object_array_add(flags, json_object_new_string("lazy")); + + obj = json_object_new_object(); + json_object_object_add(obj, "flags", flags); + json_object_object_add(obj, "_flags", json_object_new_int64(lsa->lsa_valid)); + json_object_object_add(obj, "size", json_object_new_int64(lsa->lsa_size)); + json_object_object_add(obj, "blocks", json_object_new_int64(lsa->lsa_blocks)); + + return obj; +} + struct json_object * lipe_object_attrs_to_json(const struct lipe_object_attrs *loa, __u64 encode_bits) @@ -190,6 +230,9 @@ lipe_object_attrs_to_json(const struct lipe_object_attrs *loa, I("blocks", loa->loa_blocks / 512); } + if (attr_bits & LIPE_OBJECT_ATTR_SOM) + J("som", lipe_som_attrs_to_json(&loa->loa_som)); + if (attr_bits & LIPE_OBJECT_ATTR_ENTRIES) I("entries", loa->loa_entries); @@ -436,6 +479,22 @@ out: return rc; } +/* Fixup DNE striped directory path with '//'. Root => "". Does not + * return "/" for root. See also copy_strip_dne_path(). */ +static void lipe_fid2path_fixup(char *path) +{ + char *d, *s; + + for (d = path, s = path; *s != '\0'; s++) { + if (*s == '/' && *(s + 1) == '/') + continue; + + *(d++) = *s; + } + + *d = '\0'; +} + int lipe_object_attrs_set_paths(struct lipe_object_attrs *loa, int client_mount_fd) { @@ -462,8 +521,6 @@ int lipe_object_attrs_set_paths(struct lipe_object_attrs *loa, linkno = 0; while (1) { - /* FIXME Use llapi_fid2path_at() when available. */ - memset(gf, 0, sizeof(*gf)); gf->gf_fid = loa->loa_fid; gf->gf_linkno = linkno; @@ -476,7 +533,8 @@ int lipe_object_attrs_set_paths(struct lipe_object_attrs *loa, goto out; } - /* FIXME // and root. */ + lipe_fid2path_fixup(gf->gf_u.gf_path); + rc = lipe_object_attrs_add_path(loa, gf->gf_u.gf_path); if (rc < 0) goto out; diff --git a/src/lipe_scan2.c b/src/lipe_scan2.c index ff72b90..a389748 100644 --- a/src/lipe_scan2.c +++ b/src/lipe_scan2.c @@ -27,7 +27,7 @@ static bool ls2_debug; #define LS2_DEBUG_P(x) LS2_DEBUG("%s = %p\n", #x, x) #define LS2_DEBUG_S(x) LS2_DEBUG("%s = '%s'\n", #x, x) #define LS2_DEBUG_U(x) LS2_DEBUG("%s = %"PRIuMAX"\n", #x, (uintmax_t)x) -#define LS2_DEBUG_X(x) LS2_DEBUG("%s = %"PRIxMAX"\n", #x, (uintmax_t)x) +#define LS2_DEBUG_X(x) LS2_DEBUG("%s = %#"PRIxMAX"\n", #x, (uintmax_t)x) #define LS2_ERROR(fmt, args...) \ fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##args) @@ -69,7 +69,7 @@ enum { LIPE_OBJECT_ATTR_HSMEA | // LIPE_OBJECT_ATTR_LOVEA | TODO in lipe_object_attrs_to_json() LIPE_OBJECT_ATTR_SIZE | - // LIPE_OBJECT_ATTR_SOM | TODO in lipe_object_attrs_to_json() + LIPE_OBJECT_ATTR_SOM | // LIPE_OBJECT_ATTR_DOM | TODO in lipe_object_attrs_to_json() // LIPE_OBJECT_ATTR_ENTRIES | FIXME Broken for striped dirs // LIPE_OBJECT_ATTR_EMPTY | FIXME Broken for striped dirs @@ -93,7 +93,7 @@ static struct attr_bit_name attr_bit_names[] = { X(LIPE_OBJECT_ATTR_HSMEA, "hsm"), // X(LIPE_OBJECT_ATTR_LOVEA, "lov"), X(LIPE_OBJECT_ATTR_SIZE, "size"), - // X(LIPE_OBJECT_ATTR_SOM, "som"), + X(LIPE_OBJECT_ATTR_SOM, "som"), // X(LIPE_OBJECT_ATTR_DOM, "dom"), // X(LIPE_OBJECT_ATTR_ENTRIES, "entries"), // X(LIPE_OBJECT_ATTR_EMPTY, "empty"), diff --git a/src/lipe_ssh.c b/src/lipe_ssh.c index d9173f5..8df19fa 100644 --- a/src/lipe_ssh.c +++ b/src/lipe_ssh.c @@ -8,6 +8,8 @@ #include "lipe_ssh.h" +int lipe_ssh_log_verbosity; + #define lipe_ssh_debug(fmt, args...) \ llapi_error(LLAPI_MSG_DEBUG|LLAPI_MSG_NO_ERRNO, 0, fmt, ##args) @@ -19,6 +21,11 @@ static int lipe_ssh_session_start_cmd(ssh_session session, const char *cmd, ssh_ ssh_channel channel = NULL; int rc; + assert(SSH_OK == 0); + assert(SSH_ERROR < 0); + assert(SSH_AGAIN < 0); + assert(SSH_EOF < 0); + assert(session != NULL); channel = ssh_channel_new(session); @@ -52,10 +59,12 @@ out: ssh_channel_free(channel); + assert(rc <= 0); + return rc; } -static int lipe_ssh_session_exec_cmd(ssh_session session, const char *cmd) +static int lipe_ssh_session_exec_cmd(ssh_session session, const char *cmd, int *pstatus) { ssh_channel channel; int rc; @@ -65,10 +74,19 @@ static int lipe_ssh_session_exec_cmd(ssh_session session, const char *cmd) return rc; rc = ssh_channel_get_exit_status(channel); + if (rc < 0) + goto out; + + *pstatus = rc; + rc = 0; +out: ssh_channel_send_eof(channel); ssh_channel_close(channel); ssh_channel_free(channel); + assert(rc <= 0); + assert(0 <= *pstatus); + return rc; } @@ -105,6 +123,11 @@ static int lipe_ssh_session_create(ssh_session *psession, const char *host) goto out; } + /* This sets the log verbosity on the session and the thread + * local variable. */ + if (lipe_ssh_log_verbosity != 0) + ssh_options_set(session, SSH_OPTIONS_LOG_VERBOSITY, &lipe_ssh_log_verbosity); + rc = ssh_options_set(session, SSH_OPTIONS_HOST, host); if (rc != SSH_OK) { lipe_ssh_error("cannot set SSH session host to '%s: %s'", @@ -142,6 +165,8 @@ out: lipe_ssh_debug("create new SSH session for host '%s': rc = %d", host, rc); lipe_ssh_session_destroy(&session); + assert(rc <= 0); + return rc; } @@ -191,10 +216,12 @@ int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channe if (rc != SSH_OK) lipe_ssh_context_fail(ctx); + assert(rc <= 0); + return rc; } -int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd) +int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd, int *pstatus) { int rc; @@ -203,9 +230,11 @@ int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd) return rc; /* Execute a remote command */ - rc = lipe_ssh_session_exec_cmd(ctx->lsc_session, cmd); + rc = lipe_ssh_session_exec_cmd(ctx->lsc_session, cmd, pstatus); if (rc < 0) lipe_ssh_context_fail(ctx); + assert(rc <= 0); + return rc; } diff --git a/src/lipe_ssh.h b/src/lipe_ssh.h index 57ab9d8..fd51a53 100644 --- a/src/lipe_ssh.h +++ b/src/lipe_ssh.h @@ -6,6 +6,8 @@ #include +extern int lipe_ssh_log_verbosity; + struct lipe_ssh_context { char *lsc_host; ssh_session lsc_session; @@ -14,7 +16,7 @@ struct lipe_ssh_context { int lipe_ssh_context_init(struct lipe_ssh_context *ctx, const char *host); void lipe_ssh_context_destroy(struct lipe_ssh_context *ctx); -int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd); +int lipe_ssh_exec(struct lipe_ssh_context *ctx, const char *cmd, int *pstatus); int lipe_ssh_start_cmd(struct lipe_ssh_context *ctx, const char *cmd, ssh_channel *pchannel); #endif /* _LIPE_SSH_H_ */ diff --git a/src/lipe_zfs.c b/src/lipe_zfs.c index 06f0748..f66a74e 100644 --- a/src/lipe_zfs.c +++ b/src/lipe_zfs.c @@ -151,7 +151,6 @@ static int zfs_get_xattr(struct lipe_object *lobject, memcpy(attrs->loa_lum, value, cnt); attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_LOVEA; } else if (strcmp(xattr_name, XATTR_NAME_SOM) == 0) { -#ifdef HAVE_LAZY_SIZE_ON_MDT if (cnt != sizeof(attrs->loa_som)) { OBJ_ERROR(lobject, "unexpected size of LSoM xattr, expected [%u] got [%u]\n", @@ -159,12 +158,6 @@ static int zfs_get_xattr(struct lipe_object *lobject, } lustre_som_swab((struct lustre_som_attrs *)value); memcpy(&attrs->loa_som, value, cnt); - attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_SOM; -#else /* !HAVE_LAZY_SIZE_ON_MDT */ - LERROR("lipe was built on Lustre without Lazy Size on MDT support, but MDT has xattr of LSoM\n" - ); - return 0; -#endif /* !HAVE_LAZY_SIZE_ON_MDT */ } return 0; } diff --git a/src/lustre_ea.h b/src/lustre_ea.h index 5ab7612..986ab76 100644 --- a/src/lustre_ea.h +++ b/src/lustre_ea.h @@ -22,7 +22,6 @@ int lustre_hsm2user(struct hsm_attrs *disk, struct hsm_user_state *hus); enum lustre_dom_status dom_status_from_lum(struct lov_user_md *lum); -#ifdef HAVE_LAZY_SIZE_ON_MDT static inline void lustre_som_swab(struct lustre_som_attrs *attrs) { #if __BYTE_ORDER == __BIG_ENDIAN @@ -31,7 +30,7 @@ static inline void lustre_som_swab(struct lustre_som_attrs *attrs) __swab64s(&attrs->lsa_blocks); #endif } -#endif /* HAVE_LAZY_SIZE_ON_MDT */ + int lum_has_pattern_flag(struct lov_user_md *lum, __u32 layout_pattern); int lum_get_stripe(struct lov_user_md *lum, __u32 *stripe_count, __u32 *stripe_size, __u32 *comp_count); diff --git a/src/lustre_ea_ldiskfs.c b/src/lustre_ea_ldiskfs.c index 197e7cd..1ab66e0 100644 --- a/src/lustre_ea_ldiskfs.c +++ b/src/lustre_ea_ldiskfs.c @@ -219,7 +219,6 @@ int get_lum_ea_ldiskfs(struct lipe_object *object, return 0; } -#ifdef HAVE_LAZY_SIZE_ON_MDT /* * llapi_layout_get_by_xattr() and LSoM are both included in Lustre-2.12, * so no need to duplicate the macros. @@ -251,10 +250,9 @@ int get_som_ea_ldiskfs(struct lipe_object *object, } lustre_som_swab(som); - attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_SOM; - return 0; + + return rc; } -#endif /* HAVE_LAZY_SIZE_ON_MDT */ struct empty_struct { bool es_is_empty; @@ -413,9 +411,7 @@ struct lipe_backfs_operations ldiskfs_operations = { .get_lmv_ea = get_lmv_ea_ldiskfs, .get_hsm_ea = get_hsm_ea_ldiskfs, .get_lum_ea = get_lum_ea_ldiskfs, -#ifdef HAVE_LAZY_SIZE_ON_MDT .get_som_ea = get_som_ea_ldiskfs, -#endif .check_dir_empty = check_dir_empty_ldiskfs, .get_dir_entries = get_dir_entries_ldiskfs, .get_all_xattrs = ldiskfs_get_all_xattrs, diff --git a/src/policy.c b/src/policy.c index 5ff27ac..76d69f9 100644 --- a/src/policy.c +++ b/src/policy.c @@ -910,6 +910,19 @@ int get_som_ea(struct lipe_object *object, OBJ_DEBUG(object, "failed to get SoM\n"); return rc; } + + switch (attrs->loa_som.lsa_valid) { + case SOM_FL_STRICT: + case SOM_FL_STALE: + case SOM_FL_LAZY: + attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_SOM; + rc = 0; + break; + default: + rc = -ENOTSUP; + break; + } + return 0; } @@ -942,9 +955,7 @@ static int get_size(struct lipe_object *object, struct lipe_object_attrs *attrs) { int rc; -#ifdef HAVE_LAZY_SIZE_ON_MDT struct lustre_som_attrs *som = &attrs->loa_som; -#endif LASSERT(attrs->loa_attr_bits & LIPE_OBJECT_ATTR_ATTR); if ((attrs->loa_mode & S_IFMT) != S_IFREG) { @@ -960,25 +971,22 @@ static int get_size(struct lipe_object *object, if (attrs->loa_attr_bits & LIPE_OBJECT_ATTR_SIZE) return 0; -#ifdef HAVE_LAZY_SIZE_ON_MDT if (!(attrs->loa_attr_bits & LIPE_OBJECT_ATTR_SOM)) { rc = get_som_ea(object, attrs); if (rc) return rc; } - /* Take the estimated size/blocks of SoM as the size/blocks */ assert(attrs->loa_attr_bits & LIPE_OBJECT_ATTR_SOM); - /* This should never happen, but check anyway */ - if (som->lsa_valid == SOM_FL_UNKNOWN) - return -ENOTSUP; + + if (som->lsa_valid != SOM_FL_STRICT) + return 0; + attrs->loa_size = som->lsa_size; attrs->loa_blocks = som->lsa_blocks * 512; attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_SIZE; + return 0; -#else /* !HAVE_LAZY_SIZE_ON_MDT */ - return -ENOTSUP; -#endif /* !HAVE_LAZY_SIZE_ON_MDT */ } static int check_empty(struct lipe_object *object, @@ -1199,6 +1207,17 @@ static int lipe_read_attrs(struct lipe_policy *policy, } } + if (lipe_need_read_attr(attrs, need_bits, LIPE_OBJECT_ATTR_SOM)) { + rc = get_som_ea(object, attrs); + if (rc) { + OBJ_DEBUG(object, "failed to get SoM\n"); + if (quit_on_error) + return rc; + if (!rc2) + rc2 = rc; + } + } + if (lipe_need_read_attr(attrs, need_bits, LIPE_OBJECT_ATTR_SIZE)) { rc = get_size(object, attrs); if (rc) { diff --git a/src/posix_ea.c b/src/posix_ea.c index db81ece..cff7ea2 100644 --- a/src/posix_ea.c +++ b/src/posix_ea.c @@ -171,7 +171,6 @@ static int get_lum_ea_posix(struct lipe_object *object, return 0; } -#ifdef HAVE_LAZY_SIZE_ON_MDT /* * llapi_layout_get_by_xattr() and LSoM are both included in Lustre-2.12, * so no need to duplicate the macros. @@ -202,10 +201,9 @@ static int get_som_ea_posix(struct lipe_object *object, } lustre_som_swab(som); - attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_SOM; + return 0; } -#endif /* HAVE_LAZY_SIZE_ON_MDT */ static int check_dir_empty_posix(struct lipe_object *object, struct lipe_object_attrs *attrs) @@ -393,9 +391,7 @@ struct lipe_backfs_operations posix_operations = { .get_lmv_ea = get_lmv_ea_posix, .get_hsm_ea = get_hsm_ea_posix, .get_lum_ea = get_lum_ea_posix, -#ifdef HAVE_LAZY_SIZE_ON_MDT .get_som_ea = get_som_ea_posix, -#endif .check_dir_empty = check_dir_empty_posix, .get_dir_entries = get_dir_entries_posix, .get_all_xattrs = posix_get_all_xattrs, diff --git a/version-gen.sh b/version-gen.sh index edcd684..d1ef8e8 100755 --- a/version-gen.sh +++ b/version-gen.sh @@ -1,6 +1,6 @@ #!/bin/sh -VERSION="1.18" +VERSION="1.19" # if [ -d .git ]; then # VERSION=$(git describe|sed 's/-[0-9]*-/./') # fi -- 1.8.3.1