* for new work?
*/
bool hsd_housekeeping;
+ bool hsd_one_restore;
int hsd_action_count;
int hsd_request_len; /* array alloc len */
int hsd_request_count; /* array used count */
/* Are agents full? */
if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
cdt->cdt_max_requests) {
- if (hsd->hsd_housekeeping) {
- /* Unknown request and no more room for a new
- * request. Continue to scan to find other
- * entries for already existing requests. */
- RETURN(0);
- } else {
- /* We cannot send and more requests, stop
- * here. There might be more known requests
- * that could be merged, but this avoid
- * analyzing too many llogs for minor
- * gains. */
- RETURN(LLOG_PROC_BREAK);
- }
+ /* We cannot send any more request
+ *
+ * *** SPECIAL CASE ***
+ *
+ * Restore requests are too important not to schedule at least
+ * one, everytime we can.
+ */
+ if (larr->arr_hai.hai_action != HSMA_RESTORE ||
+ hsd->hsd_one_restore)
+ RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
}
hai_size = cfs_size_round(larr->arr_hai.hai_len);
}
}
- if (!request) {
- struct hsm_action_list *hal;
+ /* Are we trying to force-schedule a request? */
+ if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
+ cdt->cdt_max_requests) {
+ /* Is there really no compatible hsm_scan_request? */
+ if (!request) {
+ for (i -= 1; i >= 0; i--) {
+ if (hsd->hsd_request[i].hal->hal_archive_id ==
+ archive_id) {
+ request = &hsd->hsd_request[i];
+ break;
+ }
+ }
+ }
+
+ /* Make room for the hai */
+ if (request) {
+ /* Discard the last hai until there is enough space */
+ do {
+ request->hal->hal_count--;
+
+ hai = hai_first(request->hal);
+ for (i = 0; i < request->hal->hal_count; i++)
+ hai = hai_next(hai);
+ request->hal_used_sz -=
+ cfs_size_round(hai->hai_len);
+ hsd->hsd_action_count--;
+ } while (request->hal_used_sz + hai_size >
+ LDLM_MAXREQSIZE);
+ } else if (hsd->hsd_housekeeping) {
+ struct hsm_scan_request *tmp;
+
+ /* Discard the (whole) last hal */
+ hsd->hsd_request_count--;
+ tmp = &hsd->hsd_request[hsd->hsd_request_count];
+ hsd->hsd_action_count -= tmp->hal->hal_count;
+ OBD_FREE(tmp->hal, tmp->hal_sz);
+ } else {
+ /* Bailing out, this code path is too hot */
+ RETURN(LLOG_PROC_BREAK);
- if (hsd->hsd_request_count == hsd->hsd_request_len) {
- /* Logic as above. */
- if (hsd->hsd_housekeeping)
- RETURN(0);
- else
- RETURN(LLOG_PROC_BREAK);
}
+ }
+
+ if (!request) {
+ struct hsm_action_list *hal;
+ LASSERT(hsd->hsd_request_count < hsd->hsd_request_len);
request = &hsd->hsd_request[hsd->hsd_request_count];
/* allocates hai vector size just needs to be large
memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
- request->hal_used_sz += cfs_size_round(hai->hai_len);
+ request->hal_used_sz += hai_size;
request->hal->hal_count++;
hsd->hsd_action_count++;
- if (hai->hai_action != HSMA_CANCEL)
+ switch (hai->hai_action) {
+ case HSMA_CANCEL:
+ break;
+ case HSMA_RESTORE:
+ hsd->hsd_one_restore = true;
+ /* Intentional fallthrough */
+ default:
cdt_agent_record_hash_add(cdt, hai->hai_cookie,
llh->lgh_hdr->llh_cat_idx,
larr->arr_hdr.lrh_index);
+ }
RETURN(0);
}
hsd.hsd_action_count = 0;
hsd.hsd_request_count = 0;
+ hsd.hsd_one_restore = false;
rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
&hsd, 0, 0, WRITE);
}
run_test 254b "Request counters are correctly incremented and decremented"
+# tests 260[a-c] rely on the parsing of the copytool's log file, they might
+# break in the future because of that.
+test_260a()
+{
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup
+ "$LFS" hsm_archive "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Launch a copytool
+ copytool setup
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
+ done
+
+ error "Too many ARCHIVE requests were run before the RESTORE request"
+}
+run_test 260a "Restore request have priority over other requests"
+
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260b()
+{
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
+ done
+
+ error "Too many ARCHIVE requests were run before the RESTORE request"
+}
+run_test 260b "Restore request have priority over other requests"
+
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260c()
+{
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 10
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ "$LFS" hsm_archive "${files[1]}"
+
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
+
+ wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED
+
+ # Send several archive requests
+ for file in "${files[@]:2}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:2}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] &&
+ error "Restore requests should not be prioritised" \
+ "unless the coordinator is doing housekeeping"
+ done
+ return 0
+}
+run_test 260c "Restore request have priority over other requests"
+
test_300() {
[ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return