From 8ddf4e2aa863e3100a3dff916821f2739e9427cd Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Mon, 20 Jan 2014 12:08:28 -0500 Subject: [PATCH 1/1] LU-4512 hsm: Fix lhsmtool_posix --report option The --report option is intended to allow an override of the default copytool progress reporting interval, but it doesn't work. This commit implements the intended functionality and renames the option to "--update-progress", or "-u" for short. Also fixes the progress display in hsm/active_requests to reflect the change from percentage complete to bytes moved. Signed-off-by: Michael MacDonald Change-Id: Id6ead1b33868e3454f00053165944bc3900cabb4 Reviewed-on: http://review.whamcloud.com/9034 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Faccini Bruno Reviewed-by: John L. Hammond Reviewed-by: Henri Doreau Reviewed-by: jacques-Charles Lafoucriere Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_hsm_cdt_requests.c | 2 +- lustre/tests/sanity-hsm.sh | 48 +++++++++++++++++++++++++++++++++++++++ lustre/utils/lhsmtool_posix.c | 36 +++++++++++++++++++---------- 3 files changed, 73 insertions(+), 13 deletions(-) mode change 100644 => 100755 lustre/tests/sanity-hsm.sh diff --git a/lustre/mdt/mdt_hsm_cdt_requests.c b/lustre/mdt/mdt_hsm_cdt_requests.c index 796cbea..1300861 100644 --- a/lustre/mdt/mdt_hsm_cdt_requests.c +++ b/lustre/mdt/mdt_hsm_cdt_requests.c @@ -521,7 +521,7 @@ static int mdt_hsm_active_requests_proc_show(struct seq_file *s, void *v) " compound/cookie="LPX64"/"LPX64 " action=%s archive#=%d flags="LPX64 " extent="LPX64"-"LPX64" gid="LPX64 - " data=[%s] canceled=%d uuid=%s done="LPU64"%%\n", + " data=[%s] canceled=%d uuid=%s done="LPU64"\n", PFID(&car->car_hai->hai_fid), PFID(&car->car_hai->hai_dfid), car->car_compound_id, car->car_hai->hai_cookie, diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh old mode 100644 new mode 100755 index 4ef21eb..f535b7e --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -90,6 +90,7 @@ init_agt_vars() { export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} + export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") HSM_ARCHIVE=$(copytool_device $SINGLEAGT) HSM_ARCHIVE_NUMBER=2 @@ -164,6 +165,8 @@ copytool_setup() { # independent of hardware local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root" [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" + [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || + cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" cmd+=" --bandwidth 1 $lustre_mntpnt" # Redirect the standard output and error to a log file which @@ -562,6 +565,21 @@ wait_request_state() { error "request on $fid is not $state on $mds" } +wait_request_progress() { + local fid=$1 + local request=$2 + local progress=$3 + # 4th arg (mdt index) is optional + local mdtidx=${4:-0} + local mds=mds$((mdtidx + 1)) + + local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.active_requests" + cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$12}' | cut -f2 -d=" + + wait_result $mds "$cmd" $progress 100 || + error "request on $fid has not made progress $progress on $mds" +} + get_request_state() { local fid=$1 local request=$2 @@ -2531,6 +2549,36 @@ test_58() { } run_test 58 "Truncate a released file will trigger restore" +test_60() { + local interval=5 + local progress_timeout=$((interval * 2)) + + # test needs a new running copytool + copytool_cleanup + HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(make_large_for_progress $f) + + local start_at=$(date +%s) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || + error "could not archive file" + wait_request_progress $fid ARCHIVE 5242880 + local finish_at=$(date +%s) + local elapsed=$((finish_at - start_at)) + + if [ $elapsed -gt $progress_timeout ]; then + error "Expected progress update within $progress_timeout seconds" + elif [ $elapsed -lt $interval ]; then + error "Expected progress update after at least $interval seconds" + fi + + cdt_clear_no_retry + copytool_cleanup +} +run_test 60 "Changing progress update interval from default" + test_90() { file_count=57 mkdir -p $DIR/$tdir diff --git a/lustre/utils/lhsmtool_posix.c b/lustre/utils/lhsmtool_posix.c index f182dac..ec5f305 100644 --- a/lustre/utils/lhsmtool_posix.c +++ b/lustre/utils/lhsmtool_posix.c @@ -164,16 +164,18 @@ static void usage(const char *name, int rc) " each line of consists of \n" " %s [options] --max-sequence \n" " return the max fid sequence of archived files\n" - " -A, --archive <#> Archive number (repeatable)\n" - " -p, --hsm-root Target HSM mount point\n" - " -q, --quiet Produce less verbose output\n" - " -v, --verbose Produce more verbose output\n" - " -c, --chunk-size I/O size used during data copy\n" - " (unit can be used, default is MB)\n" - " --abort-on-error Abort operation on major error\n" - " --dry-run Don't run, just show what would be done\n" - " --bandwidth Limit I/O bandwidth (unit can be used\n," - " default is MB)\n", + " --abort-on-error Abort operation on major error\n" + " -A, --archive <#> Archive number (repeatable)\n" + " -b, --bandwidth Limit I/O bandwidth (unit can be used\n," + " default is MB)\n" + " --dry-run Don't run, just show what would be done\n" + " -c, --chunk-size I/O size used during data copy\n" + " (unit can be used, default is MB)\n" + " -p, --hsm-root Target HSM mount point\n" + " -q, --quiet Produce less verbose output\n" + " -u, --update-interval Interval between progress reports sent\n" + " to Coordinator\n" + " -v, --verbose Produce more verbose output\n", cmd_name, cmd_name, cmd_name, cmd_name, cmd_name); exit(rc); @@ -204,7 +206,8 @@ static int ct_parseopts(int argc, char * const *argv) {"no_xattr", no_argument, &opt.o_copy_xattrs, 0}, {"quiet", no_argument, NULL, 'q'}, {"rebind", no_argument, NULL, 'r'}, - {"report", required_argument, &opt.o_report_int, 0}, + {"update-interval", required_argument, NULL, 'u'}, + {"update_interval", required_argument, NULL, 'u'}, {"verbose", no_argument, NULL, 'v'}, {0, 0, 0, 0} }; @@ -213,7 +216,7 @@ static int ct_parseopts(int argc, char * const *argv) unsigned long long unit; optind = 0; - while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qrv", + while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qru:v", long_opts, NULL)) != -1) { switch (c) { case 'A': @@ -258,6 +261,15 @@ static int ct_parseopts(int argc, char * const *argv) case 'r': opt.o_action = CA_REBIND; break; + case 'u': + opt.o_report_int = atoi(optarg); + if (opt.o_report_int < 0) { + rc = -EINVAL; + CT_ERROR(rc, "bad value for -%c '%s'", c, + optarg); + return rc; + } + break; case 'v': opt.o_verbose++; break; -- 1.8.3.1