}
if (update_idx) {
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- updates, update_idx);
+ rc = mdt_agent_record_update(mti, updates, update_idx);
if (rc)
CERROR("%s: mdt_agent_record_update() failed, "
"rc=%d, cannot update records "
.status = ARS_CANCELED,
};
- rc = mdt_agent_record_update(mti->mti_env, mti->mti_mdt,
- &update, 1);
+ rc = mdt_agent_record_update(mti, &update, 1);
if (rc) {
CERROR("%s: mdt_agent_record_update() failed, "
"rc=%d, cannot update status to %s "
update.cookie = pgs->hpk_cookie;
update.status = status;
- rc1 = mdt_agent_record_update(mti->mti_env, mdt,
- &update, 1);
+ rc1 = mdt_agent_record_update(mti, &update, 1);
if (rc1)
CERROR("%s: mdt_agent_record_update() failed,"
" rc=%d, cannot update status to %s"
/**
- * data passed to llog_cat_process() callback
- * to cancel requests
- */
-struct hsm_cancel_all_data {
- struct mdt_device *mdt;
-};
-
-/**
* llog_cat_process() callback, used to:
* - purge all requests
* \param env [IN] environment
* \param llh [IN] llog handle
* \param hdr [IN] llog record
- * \param data [IN] cb data = struct hsm_cancel_all_data
+ * \param data [IN] cb data = struct mdt_thread_info
* \retval 0 success
* \retval -ve failure
*/
struct llog_handle *llh,
struct llog_rec_hdr *hdr, void *data)
{
- struct llog_agent_req_rec *larr;
- struct hsm_cancel_all_data *hcad;
- int rc = 0;
+ struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
+ struct hsm_action_item *hai = &larr->arr_hai;
+ struct mdt_thread_info *mti = data;
+ struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+ int rc;
ENTRY;
- larr = (struct llog_agent_req_rec *)hdr;
- hcad = data;
- if (larr->arr_status == ARS_WAITING ||
- larr->arr_status == ARS_STARTED) {
- larr->arr_status = ARS_CANCELED;
- larr->arr_req_change = ktime_get_real_seconds();
- rc = llog_write(env, llh, hdr, hdr->lrh_index);
+ if (larr->arr_status != ARS_WAITING &&
+ larr->arr_status != ARS_STARTED)
+ RETURN(0);
+
+ /* Unlock the EX layout lock */
+ if (hai->hai_action == HSMA_RESTORE)
+ cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
+
+ larr->arr_status = ARS_CANCELED;
+ larr->arr_req_change = ktime_get_real_seconds();
+ rc = llog_write(env, llh, hdr, hdr->lrh_index);
+ if (rc < 0) {
+ CERROR("%s: cannot update agent log: rc = %d\n",
+ mdt_obd_name(mti->mti_mdt), rc);
+ rc = LLOG_DEL_RECORD;
}
RETURN(rc);
struct cdt_agent_req *car;
struct hsm_action_list *hal = NULL;
struct hsm_action_item *hai;
- struct hsm_cancel_all_data hcad;
int hal_sz = 0, hal_len, rc;
enum cdt_states old_state;
ENTRY;
OBD_FREE(hal, hal_sz);
/* cancel all on-disk records */
- hcad.mdt = mdt;
-
rc = cdt_llog_process(mti->mti_env, mti->mti_mdt, mdt_cancel_all_cb,
- &hcad, 0, 0, WRITE);
+ (void *)mti, 0, 0, WRITE);
out_cdt_state:
/* Enable coordinator, unless the coordinator was stopping. */
set_cdt_state_locked(cdt, old_state);
* to find requests
*/
struct data_update_cb {
- struct mdt_device *mdt;
+ struct mdt_thread_info *mti;
struct hsm_record_update *updates;
- unsigned int updates_count;
- unsigned int updates_done;
- time64_t change_time;
+ unsigned int updates_count;
+ unsigned int updates_done;
+ time64_t change_time;
};
/**
struct llog_rec_hdr *hdr,
void *data)
{
- struct llog_agent_req_rec *larr;
- struct data_update_cb *ducb;
- int rc, i;
+ struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
+ struct hsm_action_item *hai = &larr->arr_hai;
+ struct data_update_cb *ducb = data;
+ struct mdt_thread_info *mti = ducb->mti;
+ struct mdt_device *mdt = ducb->mti->mti_mdt;
+ struct coordinator *cdt = &mdt->mdt_coordinator;
+ int rc, i;
ENTRY;
- larr = (struct llog_agent_req_rec *)hdr;
- ducb = data;
-
/* check if all done */
if (ducb->updates_count == ducb->updates_done)
RETURN(LLOG_PROC_BREAK);
struct hsm_record_update *update = &ducb->updates[i];
CDEBUG(D_HSM, "%s: search %#llx, found %#llx\n",
- mdt_obd_name(ducb->mdt), update->cookie,
- larr->arr_hai.hai_cookie);
- if (larr->arr_hai.hai_cookie == update->cookie) {
+ mdt_obd_name(mdt), update->cookie,
+ hai->hai_cookie);
+ if (hai->hai_cookie == update->cookie) {
/* If record is a cancel request, it cannot be
* canceled. This is to manage the following
* has to be set to ARS_CANCELED and the 2nd
* to ARS_SUCCEED
*/
- if (larr->arr_hai.hai_action == HSMA_CANCEL &&
+ if (hai->hai_action == HSMA_CANCEL &&
update->status == ARS_CANCELED)
RETURN(0);
larr->arr_status = update->status;
larr->arr_req_change = ducb->change_time;
rc = llog_write(env, llh, hdr, hdr->lrh_index);
+ if (rc < 0)
+ break;
+
ducb->updates_done++;
+
+ /* Unlock the EX layout lock */
+ if (hai->hai_action == HSMA_RESTORE &&
+ update->status == ARS_CANCELED)
+ cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
+
break;
}
}
if (rc < 0)
CERROR("%s: mdt_agent_llog_update_rec() failed, rc = %d\n",
- mdt_obd_name(ducb->mdt), rc);
+ mdt_obd_name(mdt), rc);
RETURN(rc);
}
* \retval 0 on success
* \retval negative on failure
*/
-int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
+int mdt_agent_record_update(struct mdt_thread_info *mti,
struct hsm_record_update *updates,
unsigned int updates_count)
{
+ const struct lu_env *env = mti->mti_env;
+ struct mdt_device *mdt = mti->mti_mdt;
struct data_update_cb ducb;
u32 start_cat_idx = -1;
u32 start_rec_idx = -1;
if (start_rec_idx != 0)
start_rec_idx -= 1;
- ducb.mdt = mdt;
+ ducb.mti = mti;
ducb.updates = updates;
ducb.updates_count = updates_count;
ducb.updates_done = 0;
update.cookie = hai->hai_cookie;
update.status = ARS_SUCCEED;
- rc2 = mdt_agent_record_update(mti->mti_env, mdt,
- &update, 1);
+ rc2 = mdt_agent_record_update(mti, &update, 1);
if (rc2) {
CERROR("%s: mdt_agent_record_update() "
"failed, cannot update "
continue;
fail_request = true;
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- &update, 1);
+ rc = mdt_agent_record_update(mti, &update, 1);
if (rc < 0) {
CERROR("%s: mdt_agent_record_update() failed, "
"cannot update status to %s for cookie "
* make the same HAL with valid only
* records */
fail_request = true;
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- &update, 1);
+ rc = mdt_agent_record_update(mti, &update, 1);
if (rc) {
CERROR("%s: mdt_agent_record_update() failed, "
"cannot update status to %s for cookie "
int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt,
__u32 archive_id, __u64 flags,
struct hsm_action_item *hai);
-int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
+int mdt_agent_record_update(struct mdt_thread_info *mti,
struct hsm_record_update *updates,
unsigned int updates_count);
void cdt_agent_record_hash_add(struct coordinator *cdt, u64 cookie, u32 cat_idt,
}
run_test 103 "Purge all requests"
+test_103a() {
+ cdt_clear_non_blocking_restore
+
+ # test needs a running copytool
+ copytool setup
+
+ local -a fids=()
+ local i
+
+ mkdir_on_mdt0 $DIR/$tdir
+ for i in {0..9}; do
+ fids+=( $(copy_file /etc/passwd $DIR/$tdir/${tfile}_$i) )
+ done
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/*
+
+ local time=0
+ local cnt=0
+ local grep_regex="($(tr ' ' '|' <<< "${fids[*]}")).*action=ARCHIVE.*status=SUCCEED"
+ echo $grep_regex
+ while [[ $time -lt 5 ]] && [[ $cnt -ne ${#fids[@]} ]]; do
+ cnt=$(do_facet mds1 "$LCTL get_param -n $HSM_PARAM.actions |
+ grep -c -E '$grep_regex'")
+ sleep 1
+ ((++time))
+ done
+ [[ $cnt -eq ${#fids[@]} ]] || error "Fail to archive files $cnt/${#fids[@]}"
+
+ $LFS hsm_release $DIR/$tdir/*
+
+ kill_copytools
+ wait_copytools || error "Copytool failed to stop"
+
+ local -a pids=()
+ for i in "${fids[@]}"; do
+ cat $DIR/.lustre/fid/$i > /dev/null & pids+=($!)
+ done
+
+ cdt_purge
+ grep_regex="($(tr ' ' '|' <<< "${fids[*]}")).*action=RESTORE.*status=CANCELED"
+ cnt=$(do_facet mds1 "$LCTL get_param -n $HSM_PARAM.actions |
+ grep -cE '$grep_regex'")
+
+ [[ "$cnt" -eq ${#fids[@]} ]] ||
+ error "Some request have not been canceled ($cnt/${#fids[@]} canceled)"
+
+ # cat cmds should not hang and should fail
+ for i in "${!pids[@]}"; do
+ wait ${pids[$i]} &&
+ error "Restore for ${tfile}_$i (${pids[$i]}) should fail" ||
+ true
+ done
+}
+run_test 103a "Purge pending restore requests"
+
DATA=CEA
DATAHEX='[434541]'
test_104() {