* error may happen if coordinator crashes or stopped
* with running request
*/
- car = mdt_cdt_find_request(cdt, larr->arr_hai.hai_cookie, NULL);
+ car = mdt_cdt_find_request(cdt, larr->arr_hai.hai_cookie);
if (car == NULL) {
- last = larr->arr_req_create;
+ last = larr->arr_req_change;
} else {
last = car->car_req_update;
mdt_cdt_put_request(car);
int request_sz;
ENTRY;
- cdt->cdt_thread.t_flags = SVC_RUNNING;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_RUNNING;
+ wake_up(&cdt->cdt_waitq);
CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
mdt_obd_name(mdt), current_pid());
lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period),
NULL, NULL);
- l_wait_event(cdt->cdt_thread.t_ctl_waitq,
- (cdt->cdt_thread.t_flags &
- (SVC_STOPPING|SVC_EVENT)),
+ l_wait_event(cdt->cdt_waitq,
+ cdt->cdt_flags & (SVC_STOPPING|SVC_EVENT),
&lwi);
CDEBUG(D_HSM, "coordinator resumes\n");
- if (cdt->cdt_thread.t_flags & SVC_STOPPING ||
+ if (cdt->cdt_flags & SVC_STOPPING ||
cdt->cdt_state == CDT_STOPPING) {
- cdt->cdt_thread.t_flags &= ~SVC_STOPPING;
+ cdt->cdt_flags &= ~SVC_STOPPING;
rc = 0;
break;
}
/* wake up before timeout, new work arrives */
- if (cdt->cdt_thread.t_flags & SVC_EVENT)
- cdt->cdt_thread.t_flags &= ~SVC_EVENT;
+ if (cdt->cdt_flags & SVC_EVENT)
+ cdt->cdt_flags &= ~SVC_EVENT;
/* if coordinator is suspended continue to wait */
if (cdt->cdt_state == CDT_DISABLE) {
* by mdt_stop_coordinator(), we have to ack
* and cdt cleaning will be done by event sender
*/
- cdt->cdt_thread.t_flags = SVC_STOPPED;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_STOPPED;
+ wake_up(&cdt->cdt_waitq);
}
if (rc != 0)
RETURN(-ESRCH);
/* wake up coordinator */
- cdt->cdt_thread.t_flags = SVC_EVENT;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_EVENT;
+ wake_up(&cdt->cdt_waitq);
RETURN(0);
}
cdt->cdt_state = CDT_STOPPED;
- init_waitqueue_head(&cdt->cdt_thread.t_ctl_waitq);
+ init_waitqueue_head(&cdt->cdt_waitq);
mutex_init(&cdt->cdt_llog_lock);
init_rwsem(&cdt->cdt_agent_lock);
init_rwsem(&cdt->cdt_request_lock);
mutex_init(&cdt->cdt_restore_lock);
- INIT_LIST_HEAD(&cdt->cdt_requests);
+ INIT_LIST_HEAD(&cdt->cdt_request_list);
INIT_LIST_HEAD(&cdt->cdt_agents);
INIT_LIST_HEAD(&cdt->cdt_restore_hdl);
+ cdt->cdt_request_cookie_hash = cfs_hash_create("REQUEST_COOKIE_HASH",
+ CFS_HASH_BITS_MIN,
+ CFS_HASH_BITS_MAX,
+ CFS_HASH_BKT_BITS,
+ 0 /* extra bytes */,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &cdt_request_cookie_hash_ops,
+ CFS_HASH_DEFAULT);
+ if (cdt->cdt_request_cookie_hash == NULL)
+ RETURN(-ENOMEM);
+
rc = lu_env_init(&cdt->cdt_env, LCT_MD_THREAD);
if (rc < 0)
- RETURN(rc);
+ GOTO(out_request_cookie_hash, rc);
/* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
rc = lu_context_init(&cdt->cdt_session, LCT_SERVER_SESSION);
- if (rc == 0) {
- lu_context_enter(&cdt->cdt_session);
- cdt->cdt_env.le_ses = &cdt->cdt_session;
- } else {
- lu_env_fini(&cdt->cdt_env);
- RETURN(rc);
- }
+ if (rc < 0)
+ GOTO(out_env, rc);
+
+ lu_context_enter(&cdt->cdt_session);
+ cdt->cdt_env.le_ses = &cdt->cdt_session;
cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
LASSERT(cdt_mti != NULL);
cdt->cdt_active_req_timeout = 3600;
RETURN(0);
+
+out_env:
+ lu_env_fini(&cdt->cdt_env);
+out_request_cookie_hash:
+ cfs_hash_putref(cdt->cdt_request_cookie_hash);
+ cdt->cdt_request_cookie_hash = NULL;
+
+ return rc;
}
/**
lu_env_fini(&cdt->cdt_env);
+ cfs_hash_putref(cdt->cdt_request_cookie_hash);
+ cdt->cdt_request_cookie_hash = NULL;
+
RETURN(0);
}
" for registered restore: %d\n",
mdt_obd_name(mdt), rc);
+ if (mdt->mdt_bottom->dd_rdonly)
+ RETURN(0);
+
task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
rc = 0;
}
- wait_event(cdt->cdt_thread.t_ctl_waitq,
- (cdt->cdt_thread.t_flags & SVC_RUNNING));
+ wait_event(cdt->cdt_waitq,
+ (cdt->cdt_flags & SVC_RUNNING));
cdt->cdt_state = CDT_RUNNING;
mdt->mdt_opts.mo_coordinator = 1;
if (cdt->cdt_state != CDT_STOPPING) {
/* stop coordinator thread before cleaning */
- cdt->cdt_thread.t_flags = SVC_STOPPING;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
- wait_event(cdt->cdt_thread.t_ctl_waitq,
- cdt->cdt_thread.t_flags & SVC_STOPPED);
+ cdt->cdt_flags = SVC_STOPPING;
+ wake_up(&cdt->cdt_waitq);
+ wait_event(cdt->cdt_waitq,
+ cdt->cdt_flags & SVC_STOPPED);
}
cdt->cdt_state = CDT_STOPPED;
/* start cleaning */
down_write(&cdt->cdt_request_lock);
- list_for_each_entry_safe(car, tmp1, &cdt->cdt_requests,
+ list_for_each_entry_safe(car, tmp1, &cdt->cdt_request_list,
car_request_list) {
+ cfs_hash_del(cdt->cdt_request_cookie_hash,
+ &car->car_hai->hai_cookie,
+ &car->car_cookie_hash);
list_del(&car->car_request_list);
- mdt_cdt_free_request(car);
+ mdt_cdt_put_request(car);
}
up_write(&cdt->cdt_request_lock);
}
/* find the running request to set it canceled */
- car = mdt_cdt_find_request(cdt, hai->hai_cookie, NULL);
+ car = mdt_cdt_find_request(cdt, hai->hai_cookie);
if (car != NULL) {
car->car_canceled = 1;
/* uuid has to be changed to the one running the
int cl_flags = 0, rc = 0;
struct md_hsm mh;
bool is_mh_changed;
+ bool need_changelog = true;
ENTRY;
/* default is to retry */
*status = ARS_WAITING;
- /* find object by FID
+ /* find object by FID, mdt_hsm_get_md_hsm() returns obj or err
* if error/removed continue anyway to get correct reporting done */
obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh);
/* we will update MD HSM only if needed */
/* restore in data FID done, we swap the layouts
* only if restore is successful */
- if (pgs->hpk_errval == 0 && !IS_ERR_OR_NULL(obj)) {
+ if (pgs->hpk_errval == 0 && !IS_ERR(obj)) {
rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid,
&mh);
if (rc) {
if (*status == ARS_WAITING)
GOTO(out, rc);
+ /* restore special case, need to create ChangeLog record
+ * before to give back layout lock to avoid concurrent
+ * file updater to post out of order ChangeLog */
+ mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ &car->car_hai->hai_fid);
+ need_changelog = false;
+
/* give back layout lock */
mutex_lock(&cdt->cdt_restore_lock);
crh = mdt_hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid);
out:
/* always add a ChangeLog record */
- mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
- &car->car_hai->hai_fid);
+ if (need_changelog)
+ mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ &car->car_hai->hai_fid);
if (!IS_ERR(obj))
mdt_object_put(mti->mti_env, obj);
rc = hsm_cdt_request_completed(mti, pgs, car, &status);
- /* remove request from memory list */
- mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
-
- CDEBUG(D_HSM, "Updating record: fid="DFID" cookie=%#llx"
- " action=%s status=%s\n", PFID(&pgs->hpk_fid),
- pgs->hpk_cookie,
+ CDEBUG(D_HSM, "%s record: fid="DFID" cookie=%#llx action=%s "
+ "status=%s\n",
+ update_record ? "Updating" : "Not updating",
+ PFID(&pgs->hpk_fid), pgs->hpk_cookie,
hsm_copytool_action2name(car->car_hai->hai_action),
agent_req_status2name(status));
+ /* update record first (LU-9075) */
if (update_record) {
int rc1;
pgs->hpk_cookie);
rc = (rc != 0 ? rc : rc1);
}
+
+ /* then remove request from memory list (LU-9075) */
+ mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
+
/* ct has completed a request, so a slot is available, wakeup
* cdt to find new work */
mdt_hsm_cdt_wakeup(mdt);
*/
static int hsm_cancel_all_actions(struct mdt_device *mdt)
{
+ struct lu_env env;
+ struct lu_context session;
struct mdt_thread_info *mti;
struct coordinator *cdt = &mdt->mdt_coordinator;
struct cdt_agent_req *car;
enum cdt_states save_state;
ENTRY;
- /* retrieve coordinator context */
- mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
+ rc = lu_env_init(&env, LCT_MD_THREAD);
+ if (rc < 0)
+ RETURN(rc);
+
+ /* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
+ rc = lu_context_init(&session, LCT_SERVER_SESSION);
+ if (rc < 0)
+ GOTO(out_env, rc);
+
+ lu_context_enter(&session);
+ env.le_ses = &session;
+
+ mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
+ LASSERT(mti != NULL);
+
+ mti->mti_env = &env;
+ mti->mti_mdt = mdt;
+
+ hsm_init_ucred(mdt_ucred(mti));
/* disable coordinator */
save_state = cdt->cdt_state;
/* send cancel to all running requests */
down_read(&cdt->cdt_request_lock);
- list_for_each_entry(car, &cdt->cdt_requests, car_request_list) {
+ list_for_each_entry(car, &cdt->cdt_request_list, car_request_list) {
mdt_cdt_get_request(car);
/* request is not yet removed from list, it will be done
* when copytool will return progress
if (hal == NULL) {
mdt_cdt_put_request(car);
up_read(&cdt->cdt_request_lock);
- GOTO(out, rc = -ENOMEM);
+ GOTO(out_cdt_state, rc = -ENOMEM);
}
}
rc = cdt_llog_process(mti->mti_env, mti->mti_mdt,
mdt_cancel_all_cb, &hcad);
-out:
+out_cdt_state:
/* enable coordinator */
cdt->cdt_state = save_state;
+ lu_context_exit(&session);
+ lu_context_fini(&session);
+out_env:
+ lu_env_fini(&env);
RETURN(rc);
}
}
/* remove last ' ' */
m->count--;
- seq_putc(m, '\0');
+ seq_putc(m, '\n');
}
/* methods to read/write HSM policy flags */
rc = -EALREADY;
} else {
cdt->cdt_state = CDT_STOPPING;
+ mdt_hsm_cdt_wakeup(mdt);
}
} else if (strcmp(kernbuf, CDT_DISABLE_CMD) == 0) {
if ((cdt->cdt_state == CDT_STOPPING) ||