#include <lustre_net.h>
#include <lustre_export.h>
#include <obd.h>
-#include <obd_lov.h>
#include <lprocfs_status.h>
#include <lustre_log.h>
#include "mdt_internal.h"
char fs_name[MTI_NAME_MAXLEN+1];
/* request to be send to agents */
int request_sz; /** allocated size */
- int max_request; /** vector size */
+ int max_requests; /** vector size */
int request_cnt; /** used count */
struct {
int hal_sz;
/* Are agents full? */
if (atomic_read(&cdt->cdt_request_count) ==
- cdt->cdt_max_request)
+ cdt->cdt_max_requests)
break;
/* first search if the request if known in the list we have
* build and if there is room in the request vector */
empty_slot = -1;
found = -1;
- for (i = 0; i < hsd->max_request &&
+ for (i = 0; i < hsd->max_requests &&
(empty_slot == -1 || found == -1); i++) {
if (hsd->request[i].hal == NULL) {
empty_slot = i;
/* test if request too long, if yes cancel it
* the same way the copy tool acknowledge a cancel request */
- if ((last + cdt->cdt_timeout) < cfs_time_current_sec()) {
+ if ((last + cdt->cdt_active_req_timeout)
+ < cfs_time_current_sec()) {
struct hsm_progress_kernel pgs;
dump_llog_agent_req_rec("mdt_coordinator_cb(): "
case ARS_FAILED:
case ARS_CANCELED:
case ARS_SUCCEED:
- if ((larr->arr_req_change + cdt->cdt_delay) <
+ if ((larr->arr_req_change + cdt->cdt_grace_delay) <
cfs_time_current_sec())
RETURN(LLOG_DEL_RECORD);
break;
hsd.max_cookie = 0;
hsd.cookie_cnt = 0;
hsd.cookies = NULL;
- /* we use a copy of cdt_max_request in the cb, so if cdt_max_request
+ /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests
* increases due to a change from /proc we do not overflow the
* hsd.request[] vector
*/
- hsd.max_request = cdt->cdt_max_request;
- hsd.request_sz = hsd.max_request * sizeof(*hsd.request);
+ hsd.max_requests = cdt->cdt_max_requests;
+ hsd.request_sz = hsd.max_requests * sizeof(*hsd.request);
OBD_ALLOC(hsd.request, hsd.request_sz);
if (!hsd.request)
GOTO(out, rc = -ENOMEM);
CDEBUG(D_HSM, "coordinator starts reading llog\n");
- if (hsd.max_request != cdt->cdt_max_request) {
- /* cdt_max_request has changed,
+ if (hsd.max_requests != cdt->cdt_max_requests) {
+ /* cdt_max_requests has changed,
* we need to allocate a new buffer
*/
OBD_FREE(hsd.request, hsd.request_sz);
- hsd.max_request = cdt->cdt_max_request;
+ hsd.max_requests = cdt->cdt_max_requests;
hsd.request_sz =
- hsd.max_request * sizeof(*hsd.request);
+ hsd.max_requests * sizeof(*hsd.request);
OBD_ALLOC(hsd.request, hsd.request_sz);
if (!hsd.request) {
rc = -ENOMEM;
}
/* here hsd contains a list of requests to be started */
- for (i = 0; i < hsd.max_request; i++) {
+ for (i = 0; i < hsd.max_requests; i++) {
struct hsm_action_list *hal;
struct hsm_action_item *hai;
__u64 *cookies;
/* still room for work ? */
if (atomic_read(&cdt->cdt_request_count) ==
- cdt->cdt_max_request)
+ cdt->cdt_max_requests)
break;
if (hsd.request[i].hal == NULL)
}
/* free hal allocated by callback */
- for (i = 0; i < hsd.max_request; i++) {
+ for (i = 0; i < hsd.max_requests; i++) {
if (hsd.request[i].hal) {
OBD_FREE(hsd.request[i].hal,
hsd.request[i].hal_sz);
RETURN(rc);
/* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
- rc = lu_context_init(&cdt->cdt_session, LCT_SESSION);
+ rc = lu_context_init(&cdt->cdt_session, LCT_SERVER_SESSION);
if (rc == 0) {
lu_context_enter(&cdt->cdt_session);
cdt->cdt_env.le_ses = &cdt->cdt_session;
/* default values for /proc tunnables
* can be override by MGS conf */
cdt->cdt_default_archive_id = 1;
- cdt->cdt_delay = 60;
+ cdt->cdt_grace_delay = 60;
cdt->cdt_loop_period = 10;
- cdt->cdt_max_request = 3;
+ cdt->cdt_max_requests = 3;
cdt->cdt_policy = CDT_DEFAULT_POLICY;
- cdt->cdt_timeout = 3600;
+ cdt->cdt_active_req_timeout = 3600;
RETURN(0);
}
/* just need to be larger than previous one */
/* cdt_last_cookie is protected by cdt_llog_lock */
cdt->cdt_last_cookie = cfs_time_current_sec();
-
atomic_set(&cdt->cdt_request_count, 0);
cdt->cdt_user_request_mask = (1UL << HSMA_RESTORE);
cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE);
rc = mdt_hsm_pending_restore(cdt_mti);
if (rc)
CERROR("%s: cannot take the layout locks needed"
- " for registered restore: %d",
+ " for registered restore: %d\n",
mdt_obd_name(mdt), rc);
task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
* \param mti [IN] context
* \param fid1 [IN]
* \param fid2 [IN]
+ * \param mh_common [IN] MD HSM
*/
static int hsm_swap_layouts(struct mdt_thread_info *mti,
- const lustre_fid *fid, const lustre_fid *dfid)
+ const lustre_fid *fid, const lustre_fid *dfid,
+ struct md_hsm *mh_common)
{
struct mdt_device *mdt = mti->mti_mdt;
struct mdt_object *child1, *child2;
/* if copy tool closes the volatile before sending the final
* progress through llapi_hsm_copy_end(), all the objects
* are removed and mdd_swap_layout LBUG */
- if (mdt_object_exists(child2)) {
- rc = mo_swap_layouts(mti->mti_env, mdt_object_child(child1),
- mdt_object_child(child2), 0);
- } else {
+ if (!mdt_object_exists(child2)) {
CERROR("%s: Copytool has closed volatile file "DFID"\n",
mdt_obd_name(mti->mti_mdt), PFID(dfid));
- rc = -ENOENT;
+ GOTO(out_child2, rc = -ENOENT);
}
+ /* Since we only handle restores here, unconditionally use
+ * SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will
+ * be preserved in case of failure during swap_layout and not
+ * leave a file in an intermediate but incoherent state.
+ * But need to setup HSM xattr of data FID before, reuse
+ * mti and mh presets for FID in hsm_cdt_request_completed(),
+ * only need to clear RELEASED and DIRTY.
+ */
+ mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY);
+ rc = mdt_hsm_attr_set(mti, child2, mh_common);
+ if (rc == 0)
+ rc = mo_swap_layouts(mti->mti_env,
+ mdt_object_child(child1),
+ mdt_object_child(child2),
+ SWAP_LAYOUTS_MDS_HSM);
+out_child2:
mdt_object_unlock_put(mti, child2, lh2, 1);
out_child1:
mdt_object_put(mti->mti_env, child1);
case HSMA_RESTORE:
hsm_set_cl_event(&cl_flags, HE_RESTORE);
- /* clear RELEASED and DIRTY */
- mh.mh_flags &= ~(HS_RELEASED | HS_DIRTY);
+ /* do not clear RELEASED and DIRTY here
+ * this will occur in hsm_swap_layouts()
+ */
+
/* Restoring has changed the file version on
* disk. */
mh.mh_arch_ver = pgs->hpk_data_version;
* only if restore is successfull */
if (pgs->hpk_errval == 0) {
rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid,
- &car->car_hai->hai_dfid);
+ &car->car_hai->hai_dfid, &mh);
if (rc) {
if (cdt->cdt_policy & CDT_NORETRY_ACTION)
*status = ARS_FAILED;
RETURN(-ENOMEM);
if (copy_from_user(buf, buffer, count))
- RETURN(-EFAULT);
+ GOTO(out, rc = -EFAULT);
+
buf[count] = '\0';
start = buf;
sz = PAGE_SIZE;
OBD_ALLOC(msg, sz);
if (!msg)
- RETURN(-ENOMEM);
+ GOTO(out, rc = -ENOMEM);
hsm_policy_bit2str(0, false, msg, sz);
CWARN("%s: '%s' is unknown, "
}
GENERATE_PROC_METHOD(cdt_loop_period)
-GENERATE_PROC_METHOD(cdt_delay)
-GENERATE_PROC_METHOD(cdt_timeout)
-GENERATE_PROC_METHOD(cdt_max_request)
+GENERATE_PROC_METHOD(cdt_grace_delay)
+GENERATE_PROC_METHOD(cdt_active_req_timeout)
+GENERATE_PROC_METHOD(cdt_max_requests)
GENERATE_PROC_METHOD(cdt_default_archive_id)
/*
#define CDT_DISABLE_CMD "disabled"
#define CDT_PURGE_CMD "purge"
#define CDT_HELP_CMD "help"
+#define CDT_MAX_CMD_LEN 10
-int lprocfs_wr_hsm_cdt_control(struct file *file, const char *buffer,
+int lprocfs_wr_hsm_cdt_control(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
struct obd_device *obd = data;
struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
struct coordinator *cdt = &(mdt->mdt_coordinator);
int rc, usage = 0;
+ char kernbuf[CDT_MAX_CMD_LEN];
ENTRY;
+ if (count == 0 || count >= sizeof(kernbuf))
+ RETURN(-EINVAL);
+
+ if (copy_from_user(kernbuf, buffer, count))
+ RETURN(-EFAULT);
+ kernbuf[count] = 0;
+
+ if (kernbuf[count - 1] == '\n')
+ kernbuf[count - 1] = 0;
+
rc = 0;
- if (strncmp(buffer, CDT_ENABLE_CMD, strlen(CDT_ENABLE_CMD)) == 0) {
+ if (strcmp(kernbuf, CDT_ENABLE_CMD) == 0) {
if (cdt->cdt_state == CDT_DISABLE) {
cdt->cdt_state = CDT_RUNNING;
mdt_hsm_cdt_wakeup(mdt);
} else {
rc = mdt_hsm_cdt_start(mdt);
}
- } else if (strncmp(buffer, CDT_STOP_CMD, strlen(CDT_STOP_CMD)) == 0) {
- cdt->cdt_state = CDT_STOPPING;
- } else if (strncmp(buffer, CDT_DISABLE_CMD,
- strlen(CDT_DISABLE_CMD)) == 0) {
- cdt->cdt_state = CDT_DISABLE;
- } else if (strncmp(buffer, CDT_PURGE_CMD, strlen(CDT_PURGE_CMD)) == 0) {
+ } else if (strcmp(kernbuf, CDT_STOP_CMD) == 0) {
+ if ((cdt->cdt_state == CDT_STOPPING) ||
+ (cdt->cdt_state == CDT_STOPPED)) {
+ CERROR("%s: Coordinator already stopped\n",
+ mdt_obd_name(mdt));
+ rc = -EALREADY;
+ } else {
+ cdt->cdt_state = CDT_STOPPING;
+ }
+ } else if (strcmp(kernbuf, CDT_DISABLE_CMD) == 0) {
+ if ((cdt->cdt_state == CDT_STOPPING) ||
+ (cdt->cdt_state == CDT_STOPPED)) {
+ CERROR("%s: Coordinator is stopped\n",
+ mdt_obd_name(mdt));
+ rc = -EINVAL;
+ } else {
+ cdt->cdt_state = CDT_DISABLE;
+ }
+ } else if (strcmp(kernbuf, CDT_PURGE_CMD) == 0) {
rc = hsm_cancel_all_actions(mdt);
- } else if (strncmp(buffer, CDT_HELP_CMD, strlen(CDT_HELP_CMD)) == 0) {
+ } else if (strcmp(kernbuf, CDT_HELP_CMD) == 0) {
usage = 1;
} else {
usage = 1;
}
static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = {
- { "agents", NULL, NULL, NULL, &mdt_hsm_agent_fops, 0 },
- { "agent_actions", NULL, NULL, NULL,
- &mdt_agent_actions_fops, 0444 },
- { "default_archive_id", lprocfs_rd_hsm_cdt_default_archive_id,
- lprocfs_wr_hsm_cdt_default_archive_id,
- NULL, NULL, 0 },
- { "grace_delay", lprocfs_rd_hsm_cdt_delay,
- lprocfs_wr_hsm_cdt_delay,
- NULL, NULL, 0 },
- { "loop_period", lprocfs_rd_hsm_cdt_loop_period,
- lprocfs_wr_hsm_cdt_loop_period,
- NULL, NULL, 0 },
- { "max_requests", lprocfs_rd_hsm_cdt_max_request,
- lprocfs_wr_hsm_cdt_max_request,
- NULL, NULL, 0 },
- { "policy", lprocfs_rd_hsm_policy, lprocfs_wr_hsm_policy,
- NULL, NULL, 0 },
- { "request_timeout", lprocfs_rd_hsm_cdt_timeout,
- lprocfs_wr_hsm_cdt_timeout,
- NULL, NULL, 0 },
- { "requests", NULL, NULL, NULL, &mdt_hsm_request_fops, 0 },
- { "user_request_mask", lprocfs_rd_hsm_user_request_mask,
- lprocfs_wr_hsm_user_request_mask, },
- { "group_request_mask", lprocfs_rd_hsm_group_request_mask,
- lprocfs_wr_hsm_group_request_mask, },
- { "other_request_mask", lprocfs_rd_hsm_other_request_mask,
- lprocfs_wr_hsm_other_request_mask, },
- { NULL }
+ { "agents", NULL, NULL, NULL, &mdt_hsm_agent_fops,
+ 0 },
+ { "actions", NULL, NULL, NULL, &mdt_hsm_actions_fops,
+ 0444 },
+ { "default_archive_id", lprocfs_rd_hsm_cdt_default_archive_id,
+ lprocfs_wr_hsm_cdt_default_archive_id,
+ NULL, NULL, 0 },
+ { "grace_delay", lprocfs_rd_hsm_cdt_grace_delay,
+ lprocfs_wr_hsm_cdt_grace_delay,
+ NULL, NULL, 0 },
+ { "loop_period", lprocfs_rd_hsm_cdt_loop_period,
+ lprocfs_wr_hsm_cdt_loop_period,
+ NULL, NULL, 0 },
+ { "max_requests", lprocfs_rd_hsm_cdt_max_requests,
+ lprocfs_wr_hsm_cdt_max_requests,
+ NULL, NULL, 0 },
+ { "policy", lprocfs_rd_hsm_policy,
+ lprocfs_wr_hsm_policy,
+ NULL, NULL, 0 },
+ { "active_request_timeout", lprocfs_rd_hsm_cdt_active_req_timeout,
+ lprocfs_wr_hsm_cdt_active_req_timeout,
+ NULL, NULL, 0 },
+ { "active_requests", NULL, NULL, NULL,
+ &mdt_hsm_active_requests_fops, 0 },
+ { "user_request_mask", lprocfs_rd_hsm_user_request_mask,
+ lprocfs_wr_hsm_user_request_mask, },
+ { "group_request_mask", lprocfs_rd_hsm_group_request_mask,
+ lprocfs_wr_hsm_group_request_mask, },
+ { "other_request_mask", lprocfs_rd_hsm_other_request_mask,
+ lprocfs_wr_hsm_other_request_mask, },
+ { 0 }
};