Whamcloud - gitweb
LU-11653 hsm: copytool registration wakes the coordinator 49/33649/8
authorQuentin Bouget <quentin.bouget@cea.fr>
Mon, 12 Nov 2018 19:50:20 +0000 (20:50 +0100)
committerOleg Drokin <green@whamcloud.com>
Wed, 28 Nov 2018 19:14:05 +0000 (19:14 +0000)
When a copytool registers to the MDS, it is possible there are
pending requests in the coordinator's llog that previously could not
be sent (either because there were not any copytools, or not any
compatible copytools).

With this patch, the coordinator will process those requests on its
next wake up (which happens every second).

Test-Parameters: trivial
Test-Parameters: mdscount=2 mdtcount=4 mdtfilesystemtype=zfs testlist=sanity-hsm
Test-Parameters: mdscount=2 mdtcount=4 mdtfilesystemtype=ldiskfs testlist=sanity-hsm
Test-Parameters: mdscount=2 mdtcount=4 mdtfilesystemtype=zfs testlist=sanity-hsm
Test-Parameters: mdscount=2 mdtcount=4 mdtfilesystemtype=ldiskfs testlist=sanity-hsm

Signed-off-by: Quentin Bouget <quentin.bouget@cea.fr>
Change-Id: Ie49b40d312f2f3e0d9c85dee27bb8813dc4dde40
Reviewed-on: https://review.whamcloud.com/33649
Tested-by: Jenkins
Reviewed-by: Ben Evans <bevans@cray.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdt/mdt_hsm_cdt_agent.c
lustre/tests/sanity-hsm.sh

index f863eca..8db738d 100644 (file)
@@ -140,6 +140,10 @@ out_free:
        if (ha != NULL)
                OBD_FREE_PTR(ha);
 out:
+       /* wake the coordinator to potentially schedule requests */
+       if (rc == -EEXIST || rc == 0)
+               mdt_hsm_cdt_event(cdt);
+
        return rc;
 }
 
index bbe809e..32cd275 100755 (executable)
@@ -737,7 +737,7 @@ wait_request_state() {
        local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
        cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
 
-       wait_result $mds "$cmd" $state 200 ||
+       wait_result $mds "$cmd" "$state" 200 ||
                error "request on $fid is not $state on $mds"
 }
 
@@ -4654,6 +4654,55 @@ test_254b()
 }
 run_test 254b "Request counters are correctly incremented and decremented"
 
+test_255()
+{
+       local file="$DIR/$tdir/$tfile"
+       local fid=$(create_empty_file "$file")
+
+       # How do you make sure the coordinator has consumed any outstanding
+       # event, without triggering an event yourself?
+       #
+       # You wait for a request to disappear from the coordinator's llog.
+
+       # Warning: the setup represents 90% of this test
+
+       # Create and process an HSM request
+       copytool setup
+       "$LFS" hsm_archive "$file"
+       wait_request_state $fid ARCHIVE SUCCEED
+
+       kill_copytools
+       wait_copytools || error "failed to stop copytools"
+
+       # Launch a new HSM request
+       rm "$file"
+       create_empty_file "$file"
+       "$LFS" hsm_archive "$file"
+
+       cdt_shutdown
+
+       # Have the completed request be removed as soon as the cdt wakes up
+       stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT
+       set_hsm_param grace_delay 1
+       # (Hopefully, time on the MDS will behave nicely)
+       do_facet $SINGLEMDS sleep 2 &
+
+       # Increase `loop_period' as a mean to prevent the coordinator from
+       # waking itself up to do some housekeeping.
+       stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+       set_hsm_param loop_period 1000
+
+       wait $! || error "waiting failed"
+       cdt_enable
+       wait_request_state $fid ARCHIVE ""
+       # The coordinator will not wake up on its own for ~`loop_period' secs...
+
+       # ... Unless a copytool registers. Now the real test begins
+       copytool setup
+       wait_request_state $(path2fid "$file") ARCHIVE SUCCEED
+}
+run_test 255 "Copytool registration wakes the coordinator up"
+
 # tests 260[a-c] rely on the parsing of the copytool's log file, they might
 # break in the future because of that.
 test_260a()