struct mdt_device *mdt = mti->mti_mdt;
struct coordinator *cdt = &mdt->mdt_coordinator;
struct hsm_scan_data hsd = { NULL };
+ time64_t wait_event_time = 1 * HZ;
+ time64_t last_housekeeping = 0;
int rc = 0;
int request_sz;
ENTRY;
wake_up_all(&cdt->cdt_waitq);
while (1) {
- struct l_wait_info lwi;
int i;
- lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period),
- NULL, NULL);
- l_wait_event(cdt->cdt_waitq,
- cdt->cdt_event || kthread_should_stop(),
- &lwi);
+ /* Limit execution of the expensive requests traversal
+ * to at most every "wait_event_time" jiffies. This prevents
+ * repeatedly locking/unlocking the catalog for each request
+ * and preventing other HSM operations from happening */
+ wait_event_interruptible_timeout(cdt->cdt_waitq,
+ kthread_should_stop(),
+ wait_event_time);
CDEBUG(D_HSM, "coordinator resumes\n");
if (kthread_should_stop()) {
+ CDEBUG(D_HSM, "Coordinator stops\n");
rc = 0;
break;
}
- cdt->cdt_event = false;
-
/* if coordinator is suspended continue to wait */
if (cdt->cdt_state == CDT_DISABLE) {
CDEBUG(D_HSM, "disable state, coordinator sleeps\n");
continue;
}
+ /* If no event, and no housekeeping to do, continue to
+ * wait. */
+ if (last_housekeeping + cdt->cdt_loop_period <= get_seconds())
+ last_housekeeping = get_seconds();
+ else if (!cdt->cdt_event)
+ continue;
+
+ cdt->cdt_event = false;
+
CDEBUG(D_HSM, "coordinator starts reading llog\n");
if (hsd.max_requests != cdt->cdt_max_requests) {
}
/**
- * wake up coordinator thread
- * \param mdt [IN] device
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_hsm_cdt_wakeup(struct mdt_device *mdt)
-{
- struct coordinator *cdt = &mdt->mdt_coordinator;
- ENTRY;
-
- if (cdt->cdt_state == CDT_STOPPED)
- RETURN(-ESRCH);
-
- /* wake up coordinator */
- cdt->cdt_event = true;
- wake_up_all(&cdt->cdt_waitq);
-
- RETURN(0);
-}
-
-/**
* initialize coordinator struct
* \param mdt [IN] device
* \retval 0 success
/* then remove request from memory list (LU-9075) */
mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
- /* ct has completed a request, so a slot is available, wakeup
- * cdt to find new work */
- mdt_hsm_cdt_wakeup(mdt);
+ /* ct has completed a request, so a slot is available,
+ * signal the coordinator to find new work */
+ mdt_hsm_cdt_event(cdt);
} else {
/* if copytool send a progress on a canceled request
* we inform copytool it should stop
if (strcmp(kernbuf, CDT_ENABLE_CMD) == 0) {
if (cdt->cdt_state == CDT_DISABLE) {
rc = set_cdt_state(cdt, CDT_RUNNING, NULL);
- mdt_hsm_cdt_wakeup(mdt);
+ mdt_hsm_cdt_event(cdt);
+ wake_up(&cdt->cdt_waitq);
} else {
rc = mdt_hsm_cdt_start(mdt);
}
echo "$count archive requests submitted"
}
+cleanup_test_40() {
+ trap 0
+ set_hsm_param max_requests $max_requests
+ copytool_cleanup
+}
+
test_40() {
local stream_count=4
local file_count=100
local i=""
local p=""
local fid=""
+ local max_requests=$(get_hsm_param max_requests)
+
+ # Increase the number of HSM request that can be performed in
+ # parallel. With the coordinator running once per second, this
+ # also limits the number of requests per seconds that can be
+ # performed, so we pick a decent number. But we also need to keep
+ # that number low because the copytool has no rate limit and will
+ # fail some requests if if gets too many at once.
+ set_hsm_param max_requests 300
+
+ trap cleanup_test_40 EXIT
for i in $(seq 1 $file_count); do
for p in $(seq 1 $stream_count); do
wait ${pids[*]}
echo OK
wait_all_done 100
- copytool_cleanup
+
+ cleanup_test_40
}
run_test 40 "Parallel archive requests"
DATA=CEA
DATAHEX='[434541]'
test_104() {
- # test needs a running copytool
- copytool_setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
fid=$(make_custom_file_for_progress $f 39 1000000)
[ $? != 0 ] && skip "not enough free space" && return
- # if cdt is on, it can serve too quickly the request
- cdt_disable
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f
local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
$HSM_PARAM.actions |\
grep $fid | cut -f16 -d=")
- cdt_enable
[[ "$data1" == "$DATAHEX" ]] ||
error "Data field in records is ($data1) and not ($DATAHEX)"
+ # archive the file
+ copytool_setup
+
+ wait_request_state $fid ARCHIVE SUCCEED
+
copytool_cleanup
}
run_test 104 "Copy tool data field"
+cleanup_test_105() {
+ trap 0
+ set_hsm_param max_requests $max_requests
+ copytool_cleanup
+}
+
test_105() {
+ local max_requests=$(get_hsm_param max_requests)
mkdir -p $DIR/$tdir
local i=""
+ set_hsm_param max_requests 300
+
+ trap cleanup_test_105 EXIT
+
cdt_disable
for i in $(seq -w 1 10); do
cp /etc/passwd $DIR/$tdir/$i
$HSM_PARAM.actions |\
grep WAITING | wc -l")
cdt_restart
+
cdt_disable
local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\
$HSM_PARAM.actions |\
[[ "$reqcnt1" == "$reqcnt2" ]] ||
error "Requests count after shutdown $reqcnt2 != "\
"before shutdown $reqcnt1"
+
+ cleanup_test_105
}
run_test 105 "Restart of coordinator"
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ # wait request to reach CT
+ wait_request_state $fid ARCHIVE STARTED
+
rm -f $f
copytool_continue
copytool_suspend
$LFS hsm_restore $f
+
+ # wait request to reach CT
+ wait_request_state $fid RESTORE STARTED
+
rm -f $f
copytool_continue
copytool_suspend
$LFS hsm_remove $f
+
+ # wait for request to reach CT
+ wait_request_state $fid REMOVE STARTED
+
rm -f $f
copytool_continue