* Copyright (c) 2011, 2012 Commissariat a l'energie atomique et aux energies
* alternatives
*
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2013, 2016, Intel Corporation.
* Use is subject to license terms.
*/
/*
* error may happen if coordinator crashes or stopped
* with running request
*/
- car = mdt_cdt_find_request(cdt, larr->arr_hai.hai_cookie, NULL);
+ car = mdt_cdt_find_request(cdt, larr->arr_hai.hai_cookie);
if (car == NULL) {
- last = larr->arr_req_create;
+ last = larr->arr_req_change;
} else {
last = car->car_req_update;
mdt_cdt_put_request(car);
int request_sz;
ENTRY;
- cdt->cdt_thread.t_flags = SVC_RUNNING;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_RUNNING;
+ wake_up(&cdt->cdt_waitq);
CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
mdt_obd_name(mdt), current_pid());
*/
hsd.max_requests = cdt->cdt_max_requests;
request_sz = hsd.max_requests * sizeof(*hsd.request);
- OBD_ALLOC(hsd.request, request_sz);
+ OBD_ALLOC_LARGE(hsd.request, request_sz);
if (!hsd.request)
GOTO(out, rc = -ENOMEM);
lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period),
NULL, NULL);
- l_wait_event(cdt->cdt_thread.t_ctl_waitq,
- (cdt->cdt_thread.t_flags &
- (SVC_STOPPING|SVC_EVENT)),
+ l_wait_event(cdt->cdt_waitq,
+ cdt->cdt_flags & (SVC_STOPPING|SVC_EVENT),
&lwi);
CDEBUG(D_HSM, "coordinator resumes\n");
- if (cdt->cdt_thread.t_flags & SVC_STOPPING ||
+ if (cdt->cdt_flags & SVC_STOPPING ||
cdt->cdt_state == CDT_STOPPING) {
- cdt->cdt_thread.t_flags &= ~SVC_STOPPING;
+ cdt->cdt_flags &= ~SVC_STOPPING;
rc = 0;
break;
}
/* wake up before timeout, new work arrives */
- if (cdt->cdt_thread.t_flags & SVC_EVENT)
- cdt->cdt_thread.t_flags &= ~SVC_EVENT;
+ if (cdt->cdt_flags & SVC_EVENT)
+ cdt->cdt_flags &= ~SVC_EVENT;
/* if coordinator is suspended continue to wait */
if (cdt->cdt_state == CDT_DISABLE) {
/* cdt_max_requests has changed,
* we need to allocate a new buffer
*/
- OBD_FREE(hsd.request, request_sz);
+ OBD_FREE_LARGE(hsd.request, request_sz);
hsd.max_requests = cdt->cdt_max_requests;
request_sz = hsd.max_requests * sizeof(*hsd.request);
- OBD_ALLOC(hsd.request, request_sz);
+ OBD_ALLOC_LARGE(hsd.request, request_sz);
if (!hsd.request) {
rc = -ENOMEM;
break;
EXIT;
out:
if (hsd.request)
- OBD_FREE(hsd.request, request_sz);
+ OBD_FREE_LARGE(hsd.request, request_sz);
if (cdt->cdt_state == CDT_STOPPING) {
/* request comes from /proc path, so we need to clean cdt
* by mdt_stop_coordinator(), we have to ack
* and cdt cleaning will be done by event sender
*/
- cdt->cdt_thread.t_flags = SVC_STOPPED;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_STOPPED;
+ wake_up(&cdt->cdt_waitq);
}
if (rc != 0)
RETURN(-ESRCH);
/* wake up coordinator */
- cdt->cdt_thread.t_flags = SVC_EVENT;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
+ cdt->cdt_flags = SVC_EVENT;
+ wake_up(&cdt->cdt_waitq);
RETURN(0);
}
cdt->cdt_state = CDT_STOPPED;
- init_waitqueue_head(&cdt->cdt_thread.t_ctl_waitq);
+ init_waitqueue_head(&cdt->cdt_waitq);
mutex_init(&cdt->cdt_llog_lock);
init_rwsem(&cdt->cdt_agent_lock);
init_rwsem(&cdt->cdt_request_lock);
mutex_init(&cdt->cdt_restore_lock);
- INIT_LIST_HEAD(&cdt->cdt_requests);
+ INIT_LIST_HEAD(&cdt->cdt_request_list);
INIT_LIST_HEAD(&cdt->cdt_agents);
INIT_LIST_HEAD(&cdt->cdt_restore_hdl);
+ cdt->cdt_request_cookie_hash = cfs_hash_create("REQUEST_COOKIE_HASH",
+ CFS_HASH_BITS_MIN,
+ CFS_HASH_BITS_MAX,
+ CFS_HASH_BKT_BITS,
+ 0 /* extra bytes */,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &cdt_request_cookie_hash_ops,
+ CFS_HASH_DEFAULT);
+ if (cdt->cdt_request_cookie_hash == NULL)
+ RETURN(-ENOMEM);
+
rc = lu_env_init(&cdt->cdt_env, LCT_MD_THREAD);
if (rc < 0)
- RETURN(rc);
+ GOTO(out_request_cookie_hash, rc);
/* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
rc = lu_context_init(&cdt->cdt_session, LCT_SERVER_SESSION);
- if (rc == 0) {
- lu_context_enter(&cdt->cdt_session);
- cdt->cdt_env.le_ses = &cdt->cdt_session;
- } else {
- lu_env_fini(&cdt->cdt_env);
- RETURN(rc);
- }
+ if (rc < 0)
+ GOTO(out_env, rc);
+
+ lu_context_enter(&cdt->cdt_session);
+ cdt->cdt_env.le_ses = &cdt->cdt_session;
cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
LASSERT(cdt_mti != NULL);
cdt->cdt_active_req_timeout = 3600;
RETURN(0);
+
+out_env:
+ lu_env_fini(&cdt->cdt_env);
+out_request_cookie_hash:
+ cfs_hash_putref(cdt->cdt_request_cookie_hash);
+ cdt->cdt_request_cookie_hash = NULL;
+
+ return rc;
}
/**
lu_env_fini(&cdt->cdt_env);
+ cfs_hash_putref(cdt->cdt_request_cookie_hash);
+ cdt->cdt_request_cookie_hash = NULL;
+
RETURN(0);
}
" for registered restore: %d\n",
mdt_obd_name(mdt), rc);
+ if (mdt->mdt_bottom->dd_rdonly)
+ RETURN(0);
+
task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
rc = 0;
}
- wait_event(cdt->cdt_thread.t_ctl_waitq,
- (cdt->cdt_thread.t_flags & SVC_RUNNING));
+ wait_event(cdt->cdt_waitq,
+ (cdt->cdt_flags & SVC_RUNNING));
cdt->cdt_state = CDT_RUNNING;
mdt->mdt_opts.mo_coordinator = 1;
if (cdt->cdt_state != CDT_STOPPING) {
/* stop coordinator thread before cleaning */
- cdt->cdt_thread.t_flags = SVC_STOPPING;
- wake_up(&cdt->cdt_thread.t_ctl_waitq);
- wait_event(cdt->cdt_thread.t_ctl_waitq,
- cdt->cdt_thread.t_flags & SVC_STOPPED);
+ cdt->cdt_flags = SVC_STOPPING;
+ wake_up(&cdt->cdt_waitq);
+ wait_event(cdt->cdt_waitq,
+ cdt->cdt_flags & SVC_STOPPED);
}
cdt->cdt_state = CDT_STOPPED;
/* start cleaning */
down_write(&cdt->cdt_request_lock);
- list_for_each_entry_safe(car, tmp1, &cdt->cdt_requests,
+ list_for_each_entry_safe(car, tmp1, &cdt->cdt_request_list,
car_request_list) {
+ cfs_hash_del(cdt->cdt_request_cookie_hash,
+ &car->car_hai->hai_cookie,
+ &car->car_cookie_hash);
list_del(&car->car_request_list);
- mdt_cdt_free_request(car);
+ mdt_cdt_put_request(car);
}
up_write(&cdt->cdt_request_lock);
RETURN(0);
}
+static int mdt_hsm_set_exists(struct mdt_thread_info *mti,
+ const struct lu_fid *fid,
+ u32 archive_id)
+{
+ struct mdt_object *obj;
+ struct md_hsm mh;
+ int rc;
+
+ obj = mdt_hsm_get_md_hsm(mti, fid, &mh);
+ if (IS_ERR(obj))
+ GOTO(out, rc = PTR_ERR(obj));
+
+ if (mh.mh_flags & HS_EXISTS &&
+ mh.mh_arch_id == archive_id)
+ GOTO(out_obj, rc = 0);
+
+ mh.mh_flags |= HS_EXISTS;
+ mh.mh_arch_id = archive_id;
+ rc = mdt_hsm_attr_set(mti, obj, &mh);
+
+out_obj:
+ mdt_object_put(mti->mti_env, obj);
+out:
+ return rc;
+}
+
/**
* register all requests from an hal in the memory list
* \param mti [IN] context
}
/* find the running request to set it canceled */
- car = mdt_cdt_find_request(cdt, hai->hai_cookie, NULL);
+ car = mdt_cdt_find_request(cdt, hai->hai_cookie);
if (car != NULL) {
car->car_canceled = 1;
/* uuid has to be changed to the one running the
}
if (hai->hai_action == HSMA_ARCHIVE) {
- struct mdt_object *obj;
- struct md_hsm hsm;
-
- obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
- if (IS_ERR(obj) && (PTR_ERR(obj) == -ENOENT))
+ rc = mdt_hsm_set_exists(mti, &hai->hai_fid,
+ hal->hal_archive_id);
+ if (rc == -ENOENT)
continue;
- if (IS_ERR(obj))
- GOTO(out, rc = PTR_ERR(obj));
-
- hsm.mh_flags |= HS_EXISTS;
- hsm.mh_arch_id = hal->hal_archive_id;
- rc = mdt_hsm_attr_set(mti, obj, &hsm);
- mdt_object_put(mti->mti_env, obj);
- if (rc)
+ else if (rc < 0)
GOTO(out, rc);
}
/**
* swap layouts between 2 fids
* \param mti [IN] context
- * \param fid1 [IN]
- * \param fid2 [IN]
+ * \param obj [IN]
+ * \param dfid [IN]
* \param mh_common [IN] MD HSM
*/
static int hsm_swap_layouts(struct mdt_thread_info *mti,
- const lustre_fid *fid, const lustre_fid *dfid,
+ struct mdt_object *obj, const struct lu_fid *dfid,
struct md_hsm *mh_common)
{
- struct mdt_device *mdt = mti->mti_mdt;
- struct mdt_object *child1, *child2;
- struct mdt_lock_handle *lh2;
+ struct mdt_object *dobj;
+ struct mdt_lock_handle *dlh;
int rc;
ENTRY;
- child1 = mdt_object_find(mti->mti_env, mdt, fid);
- if (IS_ERR(child1))
- GOTO(out, rc = PTR_ERR(child1));
+ if (!mdt_object_exists(obj))
+ GOTO(out, rc = -ENOENT);
- /* we already have layout lock on FID so take only
+ /* we already have layout lock on obj so take only
* on dfid */
- lh2 = &mti->mti_lh[MDT_LH_OLD];
- mdt_lock_reg_init(lh2, LCK_EX);
- child2 = mdt_object_find_lock(mti, dfid, lh2, MDS_INODELOCK_LAYOUT);
- if (IS_ERR(child2))
- GOTO(out_child1, rc = PTR_ERR(child2));
+ dlh = &mti->mti_lh[MDT_LH_OLD];
+ mdt_lock_reg_init(dlh, LCK_EX);
+ dobj = mdt_object_find_lock(mti, dfid, dlh, MDS_INODELOCK_LAYOUT);
+ if (IS_ERR(dobj))
+ GOTO(out, rc = PTR_ERR(dobj));
/* if copy tool closes the volatile before sending the final
* progress through llapi_hsm_copy_end(), all the objects
* are removed and mdd_swap_layout LBUG */
- if (!mdt_object_exists(child2)) {
+ if (!mdt_object_exists(dobj)) {
CERROR("%s: Copytool has closed volatile file "DFID"\n",
mdt_obd_name(mti->mti_mdt), PFID(dfid));
- GOTO(out_child2, rc = -ENOENT);
+ GOTO(out_dobj, rc = -ENOENT);
}
/* Since we only handle restores here, unconditionally use
* SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will
* only need to clear RELEASED and DIRTY.
*/
mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY);
- rc = mdt_hsm_attr_set(mti, child2, mh_common);
+ rc = mdt_hsm_attr_set(mti, dobj, mh_common);
if (rc == 0)
rc = mo_swap_layouts(mti->mti_env,
- mdt_object_child(child1),
- mdt_object_child(child2),
+ mdt_object_child(obj),
+ mdt_object_child(dobj),
SWAP_LAYOUTS_MDS_HSM);
-out_child2:
- mdt_object_unlock_put(mti, child2, lh2, 1);
-out_child1:
- mdt_object_put(mti->mti_env, child1);
+out_dobj:
+ mdt_object_unlock_put(mti, dobj, dlh, 1);
out:
RETURN(rc);
}
int cl_flags = 0, rc = 0;
struct md_hsm mh;
bool is_mh_changed;
+ bool need_changelog = true;
ENTRY;
/* default is to retry */
*status = ARS_WAITING;
- /* find object by FID */
+ /* find object by FID, mdt_hsm_get_md_hsm() returns obj or err
+ * if error/removed continue anyway to get correct reporting done */
obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh);
/* we will update MD HSM only if needed */
is_mh_changed = false;
- if (IS_ERR(obj)) {
- /* object removed */
- *status = ARS_SUCCEED;
- goto unlock;
- }
/* no need to change mh->mh_arch_id
* mdt_hsm_get_md_hsm() got it from disk and it is still valid
*status = ARS_SUCCEED;
break;
default:
+ /* retry only if current policy or requested, and
+ * object is not on error/removed */
*status = (cdt->cdt_policy & CDT_NORETRY_ACTION ||
- !(pgs->hpk_flags & HP_FLAG_RETRY) ?
- ARS_FAILED : ARS_WAITING);
+ !(pgs->hpk_flags & HP_FLAG_RETRY) ||
+ IS_ERR(obj)) ? ARS_FAILED : ARS_WAITING;
break;
}
mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
/* unlock is done later, after layout lock management */
- if (is_mh_changed)
+ if (is_mh_changed && !IS_ERR(obj))
rc = mdt_hsm_attr_set(mti, obj, &mh);
-unlock:
/* we give back layout lock only if restore was successful or
- * if restore was canceled or if policy is to not retry
+ * if no retry will be attempted and if object is still alive,
* in other cases we just unlock the object */
- if (car->car_hai->hai_action == HSMA_RESTORE &&
- (pgs->hpk_errval == 0 || pgs->hpk_errval == ECANCELED ||
- cdt->cdt_policy & CDT_NORETRY_ACTION)) {
+ if (car->car_hai->hai_action == HSMA_RESTORE) {
struct cdt_restore_handle *crh;
/* restore in data FID done, we swap the layouts
* only if restore is successful */
- if (pgs->hpk_errval == 0) {
- rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid,
- &car->car_hai->hai_dfid, &mh);
+ if (pgs->hpk_errval == 0 && !IS_ERR(obj)) {
+ rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid,
+ &mh);
if (rc) {
if (cdt->cdt_policy & CDT_NORETRY_ACTION)
*status = ARS_FAILED;
if (*status == ARS_WAITING)
GOTO(out, rc);
+ /* restore special case, need to create ChangeLog record
+ * before to give back layout lock to avoid concurrent
+ * file updater to post out of order ChangeLog */
+ mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ &car->car_hai->hai_fid);
+ need_changelog = false;
+
/* give back layout lock */
mutex_lock(&cdt->cdt_restore_lock);
crh = mdt_hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid);
if (crh != NULL)
list_del(&crh->crh_list);
mutex_unlock(&cdt->cdt_restore_lock);
- /* just give back layout lock, we keep
- * the reference which is given back
- * later with the lock for HSM flags */
- if (!IS_ERR(obj) && crh != NULL)
- mdt_object_unlock(mti, obj, &crh->crh_lh, 1);
+ /* Just give back layout lock, we keep the reference
+ * which is given back later with the lock for HSM
+ * flags.
+ * XXX obj may be invalid so we do not pass it. */
+ if (crh != NULL)
+ mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
if (crh != NULL)
OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
GOTO(out, rc);
out:
- if (obj != NULL && !IS_ERR(obj)) {
- mo_changelog(env, CL_HSM, cl_flags,
- mdt_object_child(obj));
+ /* always add a ChangeLog record */
+ if (need_changelog)
+ mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ &car->car_hai->hai_fid);
+
+ if (!IS_ERR(obj))
mdt_object_put(mti->mti_env, obj);
- }
RETURN(rc);
}
rc = hsm_cdt_request_completed(mti, pgs, car, &status);
- /* remove request from memory list */
- mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
-
- CDEBUG(D_HSM, "Updating record: fid="DFID" cookie=%#llx"
- " action=%s status=%s\n", PFID(&pgs->hpk_fid),
- pgs->hpk_cookie,
+ CDEBUG(D_HSM, "%s record: fid="DFID" cookie=%#llx action=%s "
+ "status=%s\n",
+ update_record ? "Updating" : "Not updating",
+ PFID(&pgs->hpk_fid), pgs->hpk_cookie,
hsm_copytool_action2name(car->car_hai->hai_action),
agent_req_status2name(status));
+ /* update record first (LU-9075) */
if (update_record) {
int rc1;
pgs->hpk_cookie);
rc = (rc != 0 ? rc : rc1);
}
+
+ /* then remove request from memory list (LU-9075) */
+ mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
+
/* ct has completed a request, so a slot is available, wakeup
* cdt to find new work */
mdt_hsm_cdt_wakeup(mdt);
*/
static int hsm_cancel_all_actions(struct mdt_device *mdt)
{
+ struct lu_env env;
+ struct lu_context session;
struct mdt_thread_info *mti;
struct coordinator *cdt = &mdt->mdt_coordinator;
struct cdt_agent_req *car;
enum cdt_states save_state;
ENTRY;
- /* retrieve coordinator context */
- mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
+ rc = lu_env_init(&env, LCT_MD_THREAD);
+ if (rc < 0)
+ RETURN(rc);
+
+ /* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
+ rc = lu_context_init(&session, LCT_SERVER_SESSION);
+ if (rc < 0)
+ GOTO(out_env, rc);
+
+ lu_context_enter(&session);
+ env.le_ses = &session;
+
+ mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
+ LASSERT(mti != NULL);
+
+ mti->mti_env = &env;
+ mti->mti_mdt = mdt;
+
+ hsm_init_ucred(mdt_ucred(mti));
/* disable coordinator */
save_state = cdt->cdt_state;
/* send cancel to all running requests */
down_read(&cdt->cdt_request_lock);
- list_for_each_entry(car, &cdt->cdt_requests, car_request_list) {
+ list_for_each_entry(car, &cdt->cdt_request_list, car_request_list) {
mdt_cdt_get_request(car);
/* request is not yet removed from list, it will be done
* when copytool will return progress
if (hal == NULL) {
mdt_cdt_put_request(car);
up_read(&cdt->cdt_request_lock);
- GOTO(out, rc = -ENOMEM);
+ GOTO(out_cdt_state, rc = -ENOMEM);
}
}
rc = cdt_llog_process(mti->mti_env, mti->mti_mdt,
mdt_cancel_all_cb, &hcad);
-out:
+out_cdt_state:
/* enable coordinator */
cdt->cdt_state = save_state;
+ lu_context_exit(&session);
+ lu_context_fini(&session);
+out_env:
+ lu_env_fini(&env);
RETURN(rc);
}
/**
* check if a request is compatible with file status
* \param hai [IN] request description
- * \param hal_an [IN] request archive number (not used)
+ * \param archive_id [IN] request archive id
* \param rq_flags [IN] request flags
* \param hsm [IN] file HSM metadata
* \retval boolean
*/
bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai,
- const int hal_an, const __u64 rq_flags,
+ u32 archive_id, u64 rq_flags,
const struct md_hsm *hsm)
{
int is_compat = false;
if (!(hsm_flags & HS_NOARCHIVE) &&
(hsm_flags & HS_DIRTY || !(hsm_flags & HS_ARCHIVED)))
is_compat = true;
+
+ if (hsm_flags & HS_EXISTS &&
+ archive_id != 0 &&
+ archive_id != hsm->mh_arch_id)
+ is_compat = false;
+
break;
case HSMA_RESTORE:
if (!(hsm_flags & HS_DIRTY) && (hsm_flags & HS_RELEASED) &&
}
/* remove last ' ' */
m->count--;
- seq_putc(m, '\0');
+ seq_putc(m, '\n');
}
/* methods to read/write HSM policy flags */
rc = -EALREADY;
} else {
cdt->cdt_state = CDT_STOPPING;
+ mdt_hsm_cdt_wakeup(mdt);
}
} else if (strcmp(kernbuf, CDT_DISABLE_CMD) == 0) {
if ((cdt->cdt_state == CDT_STOPPING) ||