Whamcloud - gitweb
LU-4512 hsm: Fix lhsmtool_posix --report option 34/9034/2
authorMichael MacDonald <michael.macdonald@intel.com>
Mon, 20 Jan 2014 17:08:28 +0000 (12:08 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 10 Feb 2014 15:38:40 +0000 (15:38 +0000)
The --report option is intended to allow an override of the
default copytool progress reporting interval, but it doesn't
work. This commit implements the intended functionality and
renames the option to "--update-progress", or "-u" for short.

Also fixes the progress display in hsm/active_requests to
reflect the change from percentage complete to bytes moved.

Signed-off-by: Michael MacDonald <michael.macdonald@intel.com>
Change-Id: Id6ead1b33868e3454f00053165944bc3900cabb4
Reviewed-on: http://review.whamcloud.com/9034
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Faccini Bruno <bruno.faccini@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Henri Doreau <henri.doreau@cea.fr>
Reviewed-by: jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/mdt/mdt_hsm_cdt_requests.c
lustre/tests/sanity-hsm.sh [changed mode: 0644->0755]
lustre/utils/lhsmtool_posix.c

index 796cbea..1300861 100644 (file)
@@ -521,7 +521,7 @@ static int mdt_hsm_active_requests_proc_show(struct seq_file *s, void *v)
                   " compound/cookie="LPX64"/"LPX64
                   " action=%s archive#=%d flags="LPX64
                   " extent="LPX64"-"LPX64" gid="LPX64
-                  " data=[%s] canceled=%d uuid=%s done="LPU64"%%\n",
+                  " data=[%s] canceled=%d uuid=%s done="LPU64"\n",
                   PFID(&car->car_hai->hai_fid),
                   PFID(&car->car_hai->hai_dfid),
                   car->car_compound_id, car->car_hai->hai_cookie,
old mode 100644 (file)
new mode 100755 (executable)
index 4ef21eb..f535b7e
@@ -90,6 +90,7 @@ init_agt_vars() {
 
        export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
        export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
+       export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
        export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
        HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
        HSM_ARCHIVE_NUMBER=2
@@ -164,6 +165,8 @@ copytool_setup() {
        # independent of hardware
        local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root"
        [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
+       [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] ||
+               cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL"
        cmd+=" --bandwidth 1 $lustre_mntpnt"
 
        # Redirect the standard output and error to a log file which
@@ -562,6 +565,21 @@ wait_request_state() {
                error "request on $fid is not $state on $mds"
 }
 
+wait_request_progress() {
+       local fid=$1
+       local request=$2
+       local progress=$3
+       # 4th arg (mdt index) is optional
+       local mdtidx=${4:-0}
+       local mds=mds$((mdtidx + 1))
+
+       local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.active_requests"
+       cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$12}' | cut -f2 -d="
+
+       wait_result $mds "$cmd" $progress 100 ||
+               error "request on $fid has not made progress $progress on $mds"
+}
+
 get_request_state() {
        local fid=$1
        local request=$2
@@ -2531,6 +2549,36 @@ test_58() {
 }
 run_test 58 "Truncate a released file will trigger restore"
 
+test_60() {
+       local interval=5
+       local progress_timeout=$((interval * 2))
+
+       # test needs a new running copytool
+       copytool_cleanup
+       HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup
+
+       mkdir -p $DIR/$tdir
+       local f=$DIR/$tdir/$tfile
+       local fid=$(make_large_for_progress $f)
+
+       local start_at=$(date +%s)
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
+               error "could not archive file"
+       wait_request_progress $fid ARCHIVE 5242880
+       local finish_at=$(date +%s)
+       local elapsed=$((finish_at - start_at))
+
+       if [ $elapsed -gt $progress_timeout ]; then
+               error "Expected progress update within $progress_timeout seconds"
+       elif [ $elapsed -lt $interval ]; then
+               error "Expected progress update after at least $interval seconds"
+       fi
+
+       cdt_clear_no_retry
+       copytool_cleanup
+}
+run_test 60 "Changing progress update interval from default"
+
 test_90() {
        file_count=57
        mkdir -p $DIR/$tdir
index f182dac..ec5f305 100644 (file)
@@ -164,16 +164,18 @@ static void usage(const char *name, int rc)
        "       each line of <list_file> consists of <old_FID> <new_FID>\n"
        "   %s [options] --max-sequence <fsname>\n"
        "       return the max fid sequence of archived files\n"
-       "   -A, --archive <#>        Archive number (repeatable)\n"
-       "   -p, --hsm-root <path>    Target HSM mount point\n"
-       "   -q, --quiet              Produce less verbose output\n"
-       "   -v, --verbose            Produce more verbose output\n"
-       "   -c, --chunk-size <sz>    I/O size used during data copy\n"
-       "                            (unit can be used, default is MB)\n"
-       "   --abort-on-error         Abort operation on major error\n"
-       "   --dry-run                Don't run, just show what would be done\n"
-       "   --bandwidth <bw>         Limit I/O bandwidth (unit can be used\n,"
-       "                            default is MB)\n",
+       "   --abort-on-error          Abort operation on major error\n"
+       "   -A, --archive <#>         Archive number (repeatable)\n"
+       "   -b, --bandwidth <bw>      Limit I/O bandwidth (unit can be used\n,"
+       "                             default is MB)\n"
+       "   --dry-run                 Don't run, just show what would be done\n"
+       "   -c, --chunk-size <sz>     I/O size used during data copy\n"
+       "                             (unit can be used, default is MB)\n"
+       "   -p, --hsm-root <path>     Target HSM mount point\n"
+       "   -q, --quiet               Produce less verbose output\n"
+       "   -u, --update-interval <s> Interval between progress reports sent\n"
+       "                             to Coordinator\n"
+       "   -v, --verbose             Produce more verbose output\n",
        cmd_name, cmd_name, cmd_name, cmd_name, cmd_name);
 
        exit(rc);
@@ -204,7 +206,8 @@ static int ct_parseopts(int argc, char * const *argv)
                {"no_xattr",       no_argument,       &opt.o_copy_xattrs,   0},
                {"quiet",          no_argument,       NULL,                'q'},
                {"rebind",         no_argument,       NULL,                'r'},
-               {"report",         required_argument, &opt.o_report_int,    0},
+               {"update-interval", required_argument,  NULL,              'u'},
+               {"update_interval", required_argument,  NULL,              'u'},
                {"verbose",        no_argument,       NULL,                'v'},
                {0, 0, 0, 0}
        };
@@ -213,7 +216,7 @@ static int ct_parseopts(int argc, char * const *argv)
        unsigned long long       unit;
 
        optind = 0;
-       while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qrv",
+       while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qru:v",
                                long_opts, NULL)) != -1) {
                switch (c) {
                case 'A':
@@ -258,6 +261,15 @@ static int ct_parseopts(int argc, char * const *argv)
                case 'r':
                        opt.o_action = CA_REBIND;
                        break;
+               case 'u':
+                       opt.o_report_int = atoi(optarg);
+                       if (opt.o_report_int < 0) {
+                               rc = -EINVAL;
+                               CT_ERROR(rc, "bad value for -%c '%s'", c,
+                                        optarg);
+                               return rc;
+                       }
+                       break;
                case 'v':
                        opt.o_verbose++;
                        break;