X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_coordinator.c;h=536320ececb6645e4be141821ffcb2c546e0db04;hp=a48bc3682f0d87a6424ed6154627f6468bfa2d48;hb=9811edb26d3bff66c61acc39a72554461445298f;hpb=1294081248bbd3e9dc23eb1ffb275dc59a4278db diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index a48bc36..536320e 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -20,10 +20,11 @@ * GPL HEADER END */ /* - * Copyright (c) 2012, 2013, Intel Corporation. - * Use is subject to license terms. * Copyright (c) 2011, 2012 Commissariat a l'energie atomique et aux energies * alternatives + * + * Copyright (c) 2013, 2016, Intel Corporation. + * Use is subject to license terms. */ /* * lustre/mdt/mdt_coordinator.c @@ -37,13 +38,14 @@ #define DEBUG_SUBSYSTEM S_MDS +#include #include #include #include #include -#include #include #include +#include #include "mdt_internal.h" static struct lprocfs_vars lprocfs_mdt_hsm_vars[]; @@ -53,7 +55,7 @@ static struct lprocfs_vars lprocfs_mdt_hsm_vars[]; * \param mti [IN] context * \param fid [IN] object fid * \param hsm [OUT] HSM meta data - * \retval obj + * \retval obj or error (-ENOENT if not found) */ struct mdt_object *mdt_hsm_get_md_hsm(struct mdt_thread_info *mti, const struct lu_fid *fid, @@ -100,17 +102,17 @@ void mdt_hsm_dump_hal(int level, const char *prefix, struct hsm_action_item *hai; char buf[12]; - CDEBUG(level, "%s: HAL header: version %X count %d compound "LPX64 - " archive_id %d flags "LPX64"\n", + CDEBUG(level, "%s: HAL header: version %X count %d compound %#llx" + " archive_id %d flags %#llx\n", prefix, hal->hal_version, hal->hal_count, hal->hal_compound_id, hal->hal_archive_id, hal->hal_flags); - hai = hai_zero(hal); + hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++) { sz = hai->hai_len - sizeof(*hai); CDEBUG(level, "%s %d: fid="DFID" dfid="DFID - " compound/cookie="LPX64"/"LPX64 - " action=%s extent="LPX64"-"LPX64" gid="LPX64 + " compound/cookie=%#llx/%#llx" + " action=%s extent=%#llx-%#llx gid=%#llx" " datalen=%d data=[%s]\n", prefix, i, PFID(&hai->hai_fid), PFID(&hai->hai_dfid), @@ -128,22 +130,19 @@ void mdt_hsm_dump_hal(int level, const char *prefix, * data passed to llog_cat_process() callback * to scan requests and take actions */ +struct hsm_scan_request { + int hal_sz; + int hal_used_sz; + struct hsm_action_list *hal; +}; + struct hsm_scan_data { struct mdt_thread_info *mti; char fs_name[MTI_NAME_MAXLEN+1]; /* request to be send to agents */ - int request_sz; /** allocated size */ - int max_request; /** vector size */ + int max_requests; /** vector size */ int request_cnt; /** used count */ - struct { - int hal_sz; - int hal_used_sz; - struct hsm_action_list *hal; - } *request; - /* records to be canceled */ - int max_cookie; /** vector size */ - int cookie_cnt; /** used count */ - __u64 *cookies; + struct hsm_scan_request *request; }; /** @@ -162,7 +161,7 @@ static int mdt_coordinator_cb(const struct lu_env *env, struct llog_rec_hdr *hdr, void *data) { - const struct llog_agent_req_rec *larr; + struct llog_agent_req_rec *larr; struct hsm_scan_data *hsd; struct hsm_action_item *hai; struct mdt_device *mdt; @@ -178,68 +177,58 @@ static int mdt_coordinator_cb(const struct lu_env *env, dump_llog_agent_req_rec("mdt_coordinator_cb(): ", larr); switch (larr->arr_status) { case ARS_WAITING: { - int i, empty_slot, found; + int i; + struct hsm_scan_request *request; /* Are agents full? */ - if (atomic_read(&cdt->cdt_request_count) == - cdt->cdt_max_request) + if (atomic_read(&cdt->cdt_request_count) >= + cdt->cdt_max_requests) break; - /* first search if the request if known in the list we have - * build and if there is room in the request vector */ - empty_slot = -1; - found = -1; - for (i = 0; i < hsd->max_request && - (empty_slot == -1 || found == -1); i++) { - if (hsd->request[i].hal == NULL) { - empty_slot = i; - continue; - } + /* first search whether the request is found in the + * list we have built. */ + request = NULL; + for (i = 0; i < hsd->request_cnt; i++) { if (hsd->request[i].hal->hal_compound_id == - larr->arr_compound_id) { - found = i; - continue; + larr->arr_compound_id) { + request = &hsd->request[i]; + break; } } - if ((found == -1) && (empty_slot == -1)) - /* unknown request and no more room for new request, - * continue scan for to find other entries for - * already found request - */ - RETURN(0); - if (found == -1) { + if (!request) { struct hsm_action_list *hal; - /* request is not already known */ + if (hsd->request_cnt == hsd->max_requests) + /* Unknown request and no more room + * for a new request. Continue to scan + * to find other entries for already + * existing requests. + */ + RETURN(0); + + request = &hsd->request[hsd->request_cnt]; + /* allocates hai vector size just needs to be large * enough */ - hsd->request[empty_slot].hal_sz = - sizeof(*hsd->request[empty_slot].hal) + - cfs_size_round(MTI_NAME_MAXLEN+1) + - 2 * cfs_size_round(larr->arr_hai.hai_len); - OBD_ALLOC(hal, hsd->request[empty_slot].hal_sz); - if (!hal) { - CERROR("%s: Cannot allocate memory (%d o)" - "for compound "LPX64"\n", - mdt_obd_name(mdt), - hsd->request[i].hal_sz, - larr->arr_compound_id); + request->hal_sz = + sizeof(*request->hal) + + cfs_size_round(MTI_NAME_MAXLEN+1) + + 2 * cfs_size_round(larr->arr_hai.hai_len); + OBD_ALLOC(hal, request->hal_sz); + if (!hal) RETURN(-ENOMEM); - } hal->hal_version = HAL_VERSION; - strncpy(hal->hal_fsname, hsd->fs_name, - MTI_NAME_MAXLEN); - hal->hal_fsname[MTI_NAME_MAXLEN] = '\0'; + strlcpy(hal->hal_fsname, hsd->fs_name, + MTI_NAME_MAXLEN + 1); hal->hal_compound_id = larr->arr_compound_id; hal->hal_archive_id = larr->arr_archive_id; hal->hal_flags = larr->arr_flags; hal->hal_count = 0; - hsd->request[empty_slot].hal_used_sz = hal_size(hal); - hsd->request[empty_slot].hal = hal; + request->hal_used_sz = hal_size(hal); + request->hal = hal; hsd->request_cnt++; - found = empty_slot; - hai = hai_zero(hal); + hai = hai_first(hal); } else { /* request is known */ /* we check if record archive num is the same as the @@ -250,48 +239,43 @@ static int mdt_coordinator_cb(const struct lu_env *env, * where the files are not archived in the same backend */ if (larr->arr_archive_id != - hsd->request[found].hal->hal_archive_id) + request->hal->hal_archive_id) RETURN(0); - if (hsd->request[found].hal_sz < - hsd->request[found].hal_used_sz + - cfs_size_round(larr->arr_hai.hai_len)) { + if (request->hal_sz < + request->hal_used_sz + + cfs_size_round(larr->arr_hai.hai_len)) { /* Not enough room, need an extension */ void *hal_buffer; int sz; - sz = 2 * hsd->request[found].hal_sz; + sz = 2 * request->hal_sz; OBD_ALLOC(hal_buffer, sz); - if (!hal_buffer) { - CERROR("%s: Cannot allocate memory " - "(%d o) for compound "LPX64"\n", - mdt_obd_name(mdt), sz, - larr->arr_compound_id); + if (!hal_buffer) RETURN(-ENOMEM); - } - memcpy(hal_buffer, hsd->request[found].hal, - hsd->request[found].hal_used_sz); - OBD_FREE(hsd->request[found].hal, - hsd->request[found].hal_sz); - hsd->request[found].hal = hal_buffer; - hsd->request[found].hal_sz = sz; + memcpy(hal_buffer, request->hal, + request->hal_used_sz); + OBD_FREE(request->hal, + request->hal_sz); + request->hal = hal_buffer; + request->hal_sz = sz; } - hai = hai_zero(hsd->request[found].hal); - for (i = 0; i < hsd->request[found].hal->hal_count; - i++) + hai = hai_first(request->hal); + for (i = 0; i < request->hal->hal_count; i++) hai = hai_next(hai); } memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len); hai->hai_cookie = larr->arr_hai.hai_cookie; hai->hai_gid = larr->arr_hai.hai_gid; - hsd->request[found].hal_used_sz += - cfs_size_round(hai->hai_len); - hsd->request[found].hal->hal_count++; + request->hal_used_sz += cfs_size_round(hai->hai_len); + request->hal->hal_count++; break; } case ARS_STARTED: { + struct hsm_progress_kernel pgs; struct cdt_agent_req *car; + cfs_time_t now = cfs_time_current_sec(); cfs_time_t last; /* we search for a running request @@ -308,70 +292,58 @@ static int mdt_coordinator_cb(const struct lu_env *env, /* test if request too long, if yes cancel it * the same way the copy tool acknowledge a cancel request */ - if ((last + cdt->cdt_timeout) < cfs_time_current_sec()) { - struct hsm_progress_kernel pgs; - - dump_llog_agent_req_rec("mdt_coordinator_cb(): " - "request timeouted, start " - "cleaning", larr); - /* a too old cancel request just needs to be removed - * this can happen, if copy tool does not support cancel - * for other requests, we have to remove the running - * request and notify the copytool - */ - pgs.hpk_fid = larr->arr_hai.hai_fid; - pgs.hpk_cookie = larr->arr_hai.hai_cookie; - pgs.hpk_extent = larr->arr_hai.hai_extent; - pgs.hpk_flags = HP_FLAG_COMPLETED; - pgs.hpk_errval = ENOSYS; - pgs.hpk_data_version = 0; - /* update request state, but do not record in llog, to - * avoid deadlock on cdt_llog_lock - */ - rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); - if (rc) - CERROR("%s: Cannot cleanup timeouted request: " - DFID" for cookie "LPX64" action=%s\n", - mdt_obd_name(mdt), - PFID(&pgs.hpk_fid), pgs.hpk_cookie, - hsm_copytool_action2name( - larr->arr_hai.hai_action)); - - /* add the cookie to the list of record to be - * canceled by caller */ - if (hsd->max_cookie == (hsd->cookie_cnt - 1)) { - __u64 *ptr, *old_ptr; - int old_sz, new_sz, new_cnt; - - /* need to increase vector size */ - old_sz = sizeof(__u64) * hsd->max_cookie; - old_ptr = hsd->cookies; - - new_cnt = 2 * hsd->max_cookie; - new_sz = sizeof(__u64) * new_cnt; - - OBD_ALLOC(ptr, new_sz); - if (!ptr) { - CERROR("%s: Cannot allocate memory " - "(%d o) for cookie vector\n", - mdt_obd_name(mdt), new_sz); - RETURN(-ENOMEM); - } - memcpy(ptr, hsd->cookies, old_sz); - hsd->cookies = ptr; - hsd->max_cookie = new_cnt; - OBD_FREE(old_ptr, old_sz); - } - hsd->cookies[hsd->cookie_cnt] = - larr->arr_hai.hai_cookie; - hsd->cookie_cnt++; + if (now <= last + cdt->cdt_active_req_timeout) + RETURN(0); + + dump_llog_agent_req_rec("request timed out, start cleaning", + larr); + /* a too old cancel request just needs to be removed + * this can happen, if copy tool does not support + * cancel for other requests, we have to remove the + * running request and notify the copytool */ + pgs.hpk_fid = larr->arr_hai.hai_fid; + pgs.hpk_cookie = larr->arr_hai.hai_cookie; + pgs.hpk_extent = larr->arr_hai.hai_extent; + pgs.hpk_flags = HP_FLAG_COMPLETED; + pgs.hpk_errval = ENOSYS; + pgs.hpk_data_version = 0; + + /* update request state, but do not record in llog, to + * avoid deadlock on cdt_llog_lock */ + rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); + if (rc) + CERROR("%s: cannot cleanup timed out request: " + DFID" for cookie %#llx action=%s\n", + mdt_obd_name(mdt), + PFID(&pgs.hpk_fid), pgs.hpk_cookie, + hsm_copytool_action2name( + larr->arr_hai.hai_action)); + + if (rc == -ENOENT) { + /* The request no longer exists, forget + * about it, and do not send a cancel request + * to the client, for which an error will be + * sent back, leading to an endless cycle of + * cancellation. */ + RETURN(LLOG_DEL_RECORD); } + + /* XXX A cancel request cannot be cancelled. */ + if (larr->arr_hai.hai_action == HSMA_CANCEL) + RETURN(0); + + larr->arr_status = ARS_CANCELED; + larr->arr_req_change = now; + rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index); + if (rc < 0) + CERROR("%s: cannot update agent log: rc = %d\n", + mdt_obd_name(mdt), rc); break; } case ARS_FAILED: case ARS_CANCELED: case ARS_SUCCEED: - if ((larr->arr_req_change + cdt->cdt_delay) < + if ((larr->arr_req_change + cdt->cdt_grace_delay) < cfs_time_current_sec()) RETURN(LLOG_DEL_RECORD); break; @@ -385,7 +357,7 @@ static int mdt_coordinator_cb(const struct lu_env *env, * \retval 0 success * \retval -ve failure */ -static int hsm_cdt_procfs_init(struct mdt_device *mdt) +int hsm_cdt_procfs_init(struct mdt_device *mdt) { struct coordinator *cdt = &mdt->mdt_coordinator; int rc = 0; @@ -407,6 +379,29 @@ static int hsm_cdt_procfs_init(struct mdt_device *mdt) } /** + * remove /proc entries for coordinator + * \param mdt [IN] + */ +void hsm_cdt_procfs_fini(struct mdt_device *mdt) +{ + struct coordinator *cdt = &mdt->mdt_coordinator; + + LASSERT(cdt->cdt_state == CDT_STOPPED); + if (cdt->cdt_proc_dir != NULL) + lprocfs_remove(&cdt->cdt_proc_dir); +} + +/** + * get vector of hsm cdt /proc vars + * \param none + * \retval var vector + */ +struct lprocfs_vars *hsm_cdt_get_proc_vars(void) +{ + return lprocfs_mdt_hsm_vars; +} + +/** * coordinator thread * \param data [IN] obd device * \retval 0 success @@ -417,31 +412,24 @@ static int mdt_coordinator(void *data) struct mdt_thread_info *mti = data; struct mdt_device *mdt = mti->mti_mdt; struct coordinator *cdt = &mdt->mdt_coordinator; - struct hsm_scan_data hsd = { 0 }; + struct hsm_scan_data hsd = { NULL }; int rc = 0; + int request_sz; ENTRY; - cdt->cdt_thread.t_flags = SVC_RUNNING; - cfs_waitq_signal(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_RUNNING; + wake_up(&cdt->cdt_waitq); CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n", - mdt_obd_name(mdt), cfs_curproc_pid()); + mdt_obd_name(mdt), current_pid()); - /* - * create /proc entries for coordinator - */ - hsm_cdt_procfs_init(mdt); - /* timeouted cookie vector initialization */ - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; - /* we use a copy of cdt_max_request in the cb, so if cdt_max_request + /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests * increases due to a change from /proc we do not overflow the * hsd.request[] vector */ - hsd.max_request = cdt->cdt_max_request; - hsd.request_sz = hsd.max_request * sizeof(*hsd.request); - OBD_ALLOC(hsd.request, hsd.request_sz); + hsd.max_requests = cdt->cdt_max_requests; + request_sz = hsd.max_requests * sizeof(*hsd.request); + OBD_ALLOC(hsd.request, request_sz); if (!hsd.request) GOTO(out, rc = -ENOMEM); @@ -454,23 +442,22 @@ static int mdt_coordinator(void *data) lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period), NULL, NULL); - l_wait_event(cdt->cdt_thread.t_ctl_waitq, - (cdt->cdt_thread.t_flags & - (SVC_STOPPING|SVC_EVENT)), + l_wait_event(cdt->cdt_waitq, + cdt->cdt_flags & (SVC_STOPPING|SVC_EVENT), &lwi); CDEBUG(D_HSM, "coordinator resumes\n"); - if ((cdt->cdt_thread.t_flags & SVC_STOPPING) || - (cdt->cdt_state == CDT_STOPPING)) { - cdt->cdt_thread.t_flags &= ~SVC_STOPPING; + if (cdt->cdt_flags & SVC_STOPPING || + cdt->cdt_state == CDT_STOPPING) { + cdt->cdt_flags &= ~SVC_STOPPING; rc = 0; break; } /* wake up before timeout, new work arrives */ - if (cdt->cdt_thread.t_flags & SVC_EVENT) - cdt->cdt_thread.t_flags &= ~SVC_EVENT; + if (cdt->cdt_flags & SVC_EVENT) + cdt->cdt_flags &= ~SVC_EVENT; /* if coordinator is suspended continue to wait */ if (cdt->cdt_state == CDT_DISABLE) { @@ -480,31 +467,20 @@ static int mdt_coordinator(void *data) CDEBUG(D_HSM, "coordinator starts reading llog\n"); - if (hsd.max_request != cdt->cdt_max_request) { - /* cdt_max_request has changed, + if (hsd.max_requests != cdt->cdt_max_requests) { + /* cdt_max_requests has changed, * we need to allocate a new buffer */ - OBD_FREE(hsd.request, hsd.request_sz); - hsd.max_request = cdt->cdt_max_request; - hsd.request_sz = - hsd.max_request * sizeof(*hsd.request); - OBD_ALLOC(hsd.request, hsd.request_sz); + OBD_FREE(hsd.request, request_sz); + hsd.max_requests = cdt->cdt_max_requests; + request_sz = hsd.max_requests * sizeof(*hsd.request); + OBD_ALLOC(hsd.request, request_sz); if (!hsd.request) { rc = -ENOMEM; break; } } - /* create canceled cookie vector for an arbitrary size - * if needed, vector will grow during llog scan - */ - hsd.max_cookie = 10; - hsd.cookie_cnt = 0; - OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - if (!hsd.cookies) { - rc = -ENOMEM; - goto clean_cb_alloc; - } hsd.request_cnt = 0; rc = cdt_llog_process(mti->mti_env, mdt, @@ -512,23 +488,7 @@ static int mdt_coordinator(void *data) if (rc < 0) goto clean_cb_alloc; - CDEBUG(D_HSM, "Found %d requests to send and %d" - " requests to cancel\n", - hsd.request_cnt, hsd.cookie_cnt); - /* first we cancel llog records of the timeouted requests */ - if (hsd.cookie_cnt > 0) { - rc = mdt_agent_record_update(mti->mti_env, mdt, - hsd.cookies, - hsd.cookie_cnt, - ARS_CANCELED); - if (rc) - CERROR("%s: mdt_agent_record_update() failed, " - "rc=%d, cannot update status to %s " - "for %d cookies\n", - mdt_obd_name(mdt), rc, - agent_req_status2name(ARS_CANCELED), - hsd.cookie_cnt); - } + CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt); if (list_empty(&cdt->cdt_agents)) { CDEBUG(D_HSM, "no agent available, " @@ -537,38 +497,19 @@ static int mdt_coordinator(void *data) } /* here hsd contains a list of requests to be started */ - for (i = 0; i < hsd.max_request; i++) { - struct hsm_action_list *hal; + for (i = 0; i < hsd.request_cnt; i++) { + struct hsm_scan_request *request = &hsd.request[i]; + struct hsm_action_list *hal = request->hal; struct hsm_action_item *hai; __u64 *cookies; int sz, j; enum agent_req_status status; /* still room for work ? */ - if (atomic_read(&cdt->cdt_request_count) == - cdt->cdt_max_request) + if (atomic_read(&cdt->cdt_request_count) >= + cdt->cdt_max_requests) break; - if (hsd.request[i].hal == NULL) - continue; - - /* found a request, we start it */ - /* kuc payload allocation so we avoid an additionnal - * allocation in mdt_hsm_agent_send() - */ - hal = kuc_alloc(hsd.request[i].hal_used_sz, - KUC_TRANSPORT_HSM, HMT_ACTION_LIST); - if (IS_ERR(hal)) { - CERROR("%s: Cannot allocate memory (%d o) " - "for compound "LPX64"\n", - mdt_obd_name(mdt), - hsd.request[i].hal_used_sz, - hsd.request[i].hal->hal_compound_id); - continue; - } - memcpy(hal, hsd.request[i].hal, - hsd.request[i].hal_used_sz); - rc = mdt_hsm_agent_send(mti, hal, 0); /* if failure, we suppose it is temporary * if the copy tool failed to do the request @@ -579,67 +520,41 @@ static int mdt_coordinator(void *data) /* set up cookie vector to set records status * after copy tools start or failed */ - sz = hsd.request[i].hal->hal_count * sizeof(__u64); + sz = hal->hal_count * sizeof(__u64); OBD_ALLOC(cookies, sz); - if (cookies == NULL) { - CERROR("%s: Cannot allocate memory (%d o) " - "for cookies vector "LPX64"\n", - mdt_obd_name(mdt), sz, - hsd.request[i].hal->hal_compound_id); - kuc_free(hal, hsd.request[i].hal_used_sz); + if (cookies == NULL) continue; - } - hai = hai_zero(hal); - for (j = 0; j < hsd.request[i].hal->hal_count; j++) { + + hai = hai_first(hal); + for (j = 0; j < hal->hal_count; j++) { cookies[j] = hai->hai_cookie; hai = hai_next(hai); } rc = mdt_agent_record_update(mti->mti_env, mdt, cookies, - hsd.request[i].hal->hal_count, - status); + hal->hal_count, status); if (rc) CERROR("%s: mdt_agent_record_update() failed, " "rc=%d, cannot update status to %s " "for %d cookies\n", mdt_obd_name(mdt), rc, agent_req_status2name(status), - hsd.request[i].hal->hal_count); + hal->hal_count); OBD_FREE(cookies, sz); - kuc_free(hal, hsd.request[i].hal_used_sz); } clean_cb_alloc: - /* free cookie vector allocated for/by callback */ - if (hsd.cookies) { - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; - } - /* free hal allocated by callback */ - for (i = 0; i < hsd.max_request; i++) { - if (hsd.request[i].hal) { - OBD_FREE(hsd.request[i].hal, - hsd.request[i].hal_sz); - hsd.request[i].hal_sz = 0; - hsd.request[i].hal = NULL; - hsd.request_cnt--; - } - } - LASSERT(hsd.request_cnt == 0); + for (i = 0; i < hsd.request_cnt; i++) { + struct hsm_scan_request *request = &hsd.request[i]; - /* reset callback data */ - memset(hsd.request, 0, hsd.request_sz); + OBD_FREE(request->hal, request->hal_sz); + } } EXIT; out: if (hsd.request) - OBD_FREE(hsd.request, hsd.request_sz); - - if (hsd.cookies) - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); + OBD_FREE(hsd.request, request_sz); if (cdt->cdt_state == CDT_STOPPING) { /* request comes from /proc path, so we need to clean cdt @@ -651,17 +566,17 @@ out: * by mdt_stop_coordinator(), we have to ack * and cdt cleaning will be done by event sender */ - cdt->cdt_thread.t_flags = SVC_STOPPED; - cfs_waitq_signal(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_STOPPED; + wake_up(&cdt->cdt_waitq); } if (rc != 0) CERROR("%s: coordinator thread exiting, process=%d, rc=%d\n", - mdt_obd_name(mdt), cfs_curproc_pid(), rc); + mdt_obd_name(mdt), current_pid(), rc); else CDEBUG(D_HSM, "%s: coordinator thread exiting, process=%d," " no error\n", - mdt_obd_name(mdt), cfs_curproc_pid()); + mdt_obd_name(mdt), current_pid()); return rc; } @@ -674,7 +589,7 @@ out: * \retval cdt_restore_handle found * \retval NULL not found */ -static struct cdt_restore_handle *hsm_restore_hdl_find(struct coordinator *cdt, +struct cdt_restore_handle *mdt_hsm_restore_hdl_find(struct coordinator *cdt, const struct lu_fid *fid) { struct cdt_restore_handle *crh; @@ -725,12 +640,27 @@ static int hsm_restore_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hai = &larr->arr_hai; - if ((hai->hai_action != HSMA_RESTORE) || - agent_req_in_final_state(larr->arr_status)) + if (hai->hai_cookie > cdt->cdt_last_cookie) + /* update the cookie to avoid collision */ + cdt->cdt_last_cookie = hai->hai_cookie + 1; + + if (hai->hai_action != HSMA_RESTORE || + agent_req_in_final_state(larr->arr_status)) RETURN(0); /* restore request not in a final state */ + /* force replay of restore requests left in started state from previous + * CDT context, to be canceled later if finally found to be incompatible + * when being re-started */ + if (larr->arr_status == ARS_STARTED) { + larr->arr_status = ARS_WAITING; + larr->arr_req_change = cfs_time_current_sec(); + rc = llog_write(env, llh, hdr, hdr->lrh_index); + if (rc != 0) + GOTO(out, rc); + } + OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem); if (crh == NULL) RETURN(-ENOMEM); @@ -741,7 +671,7 @@ static int hsm_restore_cb(const struct lu_env *env, crh->extent.end = hai->hai_extent.offset + hai->hai_extent.length; */ crh->crh_extent.start = 0; - crh->crh_extent.end = OBD_OBJECT_EOF; + crh->crh_extent.end = hai->hai_extent.length; /* get the layout lock */ mdt_lock_reg_init(&crh->crh_lh, LCK_EX); child = mdt_object_find_lock(mti, &crh->crh_fid, &crh->crh_lh, @@ -796,7 +726,7 @@ static int hsm_init_ucred(struct lu_ucred *uc) uc->uc_fsgid = 0; uc->uc_suppgids[0] = -1; uc->uc_suppgids[1] = -1; - uc->uc_cap = 0; + uc->uc_cap = CFS_CAP_FS_MASK; uc->uc_umask = 0777; uc->uc_ginfo = NULL; uc->uc_identity = NULL; @@ -819,8 +749,8 @@ int mdt_hsm_cdt_wakeup(struct mdt_device *mdt) RETURN(-ESRCH); /* wake up coordinator */ - cdt->cdt_thread.t_flags = SVC_EVENT; - cfs_waitq_signal(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_EVENT; + wake_up(&cdt->cdt_waitq); RETURN(0); } @@ -840,22 +770,22 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt) cdt->cdt_state = CDT_STOPPED; - cfs_waitq_init(&cdt->cdt_thread.t_ctl_waitq); + init_waitqueue_head(&cdt->cdt_waitq); mutex_init(&cdt->cdt_llog_lock); init_rwsem(&cdt->cdt_agent_lock); init_rwsem(&cdt->cdt_request_lock); mutex_init(&cdt->cdt_restore_lock); - CFS_INIT_LIST_HEAD(&cdt->cdt_requests); - CFS_INIT_LIST_HEAD(&cdt->cdt_agents); - CFS_INIT_LIST_HEAD(&cdt->cdt_restore_hdl); + INIT_LIST_HEAD(&cdt->cdt_requests); + INIT_LIST_HEAD(&cdt->cdt_agents); + INIT_LIST_HEAD(&cdt->cdt_restore_hdl); rc = lu_env_init(&cdt->cdt_env, LCT_MD_THREAD); if (rc < 0) RETURN(rc); /* for mdt_ucred(), lu_ucred stored in lu_ucred_key */ - rc = lu_context_init(&cdt->cdt_session, LCT_SESSION); + rc = lu_context_init(&cdt->cdt_session, LCT_SERVER_SESSION); if (rc == 0) { lu_context_enter(&cdt->cdt_session); cdt->cdt_env.le_ses = &cdt->cdt_session; @@ -872,6 +802,15 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt) hsm_init_ucred(mdt_ucred(cdt_mti)); + /* default values for /proc tunnables + * can be override by MGS conf */ + cdt->cdt_default_archive_id = 1; + cdt->cdt_grace_delay = 60; + cdt->cdt_loop_period = 10; + cdt->cdt_max_requests = 3; + cdt->cdt_policy = CDT_DEFAULT_POLICY; + cdt->cdt_active_req_timeout = 3600; + RETURN(0); } @@ -898,13 +837,13 @@ int mdt_hsm_cdt_fini(struct mdt_device *mdt) * \retval 0 success * \retval -ve failure */ -int mdt_hsm_cdt_start(struct mdt_device *mdt) +static int mdt_hsm_cdt_start(struct mdt_device *mdt) { struct coordinator *cdt = &mdt->mdt_coordinator; int rc; void *ptr; struct mdt_thread_info *cdt_mti; - cfs_task_t *task; + struct task_struct *task; ENTRY; /* functions defined but not yet used @@ -918,18 +857,19 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt) RETURN(-EALREADY); } + CLASSERT(1 << (CDT_POLICY_SHIFT_COUNT - 1) == CDT_POLICY_LAST); cdt->cdt_policy = CDT_DEFAULT_POLICY; + cdt->cdt_state = CDT_INIT; - cfs_atomic_set(&cdt->cdt_compound_id, cfs_time_current_sec()); + atomic_set(&cdt->cdt_compound_id, cfs_time_current_sec()); /* just need to be larger than previous one */ /* cdt_last_cookie is protected by cdt_llog_lock */ cdt->cdt_last_cookie = cfs_time_current_sec(); - cdt->cdt_loop_period = 10; - cdt->cdt_delay = 60; - cdt->cdt_timeout = 3600; - cdt->cdt_max_request = 3; atomic_set(&cdt->cdt_request_count, 0); + cdt->cdt_user_request_mask = (1UL << HSMA_RESTORE); + cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE); + cdt->cdt_other_request_mask = (1UL << HSMA_RESTORE); /* to avoid deadlock when start is made through /proc * /proc entries are created by the coordinator thread */ @@ -939,7 +879,7 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt) rc = mdt_hsm_pending_restore(cdt_mti); if (rc) CERROR("%s: cannot take the layout locks needed" - " for registered restore: %d", + " for registered restore: %d\n", mdt_obd_name(mdt), rc); task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr"); @@ -955,8 +895,8 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt) rc = 0; } - cfs_wait_event(cdt->cdt_thread.t_ctl_waitq, - (cdt->cdt_thread.t_flags & SVC_RUNNING)); + wait_event(cdt->cdt_waitq, + (cdt->cdt_flags & SVC_RUNNING)); cdt->cdt_state = CDT_RUNNING; mdt->mdt_opts.mo_coordinator = 1; @@ -982,16 +922,12 @@ int mdt_hsm_cdt_stop(struct mdt_device *mdt) RETURN(-EALREADY); } - /* remove proc entries */ - if (cdt->cdt_proc_dir != NULL) - lprocfs_remove(&cdt->cdt_proc_dir); - if (cdt->cdt_state != CDT_STOPPING) { /* stop coordinator thread before cleaning */ - cdt->cdt_thread.t_flags = SVC_STOPPING; - cfs_waitq_signal(&cdt->cdt_thread.t_ctl_waitq); - cfs_wait_event(cdt->cdt_thread.t_ctl_waitq, - cdt->cdt_thread.t_flags & SVC_STOPPED); + cdt->cdt_flags = SVC_STOPPING; + wake_up(&cdt->cdt_waitq); + wait_event(cdt->cdt_waitq, + cdt->cdt_flags & SVC_STOPPED); } cdt->cdt_state = CDT_STOPPED; @@ -1051,7 +987,7 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti, ENTRY; /* register request in memory list */ - hai = hai_zero(hal); + hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) { struct cdt_agent_req *car; @@ -1067,7 +1003,7 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti, if (rc) { CERROR("%s: mdt_agent_record_update() failed, " "rc=%d, cannot update status to %s " - "for cookie "LPX64"\n", + "for cookie %#llx\n", mdt_obd_name(mdt), rc, agent_req_status2name(ARS_CANCELED), hai->hai_cookie); @@ -1125,45 +1061,56 @@ out: /** * swap layouts between 2 fids * \param mti [IN] context - * \param fid1 [IN] - * \param fid2 [IN] + * \param obj [IN] + * \param dfid [IN] + * \param mh_common [IN] MD HSM */ static int hsm_swap_layouts(struct mdt_thread_info *mti, - const lustre_fid *fid, const lustre_fid *dfid) + struct mdt_object *obj, const struct lu_fid *dfid, + struct md_hsm *mh_common) { - struct mdt_device *mdt = mti->mti_mdt; - struct mdt_object *child1, *child2; - struct mdt_lock_handle *lh2; + struct mdt_object *dobj; + struct mdt_lock_handle *dlh; int rc; ENTRY; - child1 = mdt_object_find(mti->mti_env, mdt, fid); - if (IS_ERR(child1)) - GOTO(out, rc = PTR_ERR(child1)); + if (!mdt_object_exists(obj)) + GOTO(out, rc = -ENOENT); - /* we already have layout lock on FID so take only + /* we already have layout lock on obj so take only * on dfid */ - lh2 = &mti->mti_lh[MDT_LH_OLD]; - mdt_lock_reg_init(lh2, LCK_EX); - child2 = mdt_object_find_lock(mti, dfid, lh2, MDS_INODELOCK_LAYOUT); - if (IS_ERR(child2)) - GOTO(out_child1, rc = PTR_ERR(child2)); + dlh = &mti->mti_lh[MDT_LH_OLD]; + mdt_lock_reg_init(dlh, LCK_EX); + dobj = mdt_object_find_lock(mti, dfid, dlh, MDS_INODELOCK_LAYOUT); + if (IS_ERR(dobj)) + GOTO(out, rc = PTR_ERR(dobj)); /* if copy tool closes the volatile before sending the final * progress through llapi_hsm_copy_end(), all the objects * are removed and mdd_swap_layout LBUG */ - if (mdt_object_exists(child2)) { - rc = mo_swap_layouts(mti->mti_env, mdt_object_child(child1), - mdt_object_child(child2), 0); - } else { + if (!mdt_object_exists(dobj)) { CERROR("%s: Copytool has closed volatile file "DFID"\n", mdt_obd_name(mti->mti_mdt), PFID(dfid)); - rc = -ENOENT; + GOTO(out_dobj, rc = -ENOENT); } + /* Since we only handle restores here, unconditionally use + * SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will + * be preserved in case of failure during swap_layout and not + * leave a file in an intermediate but incoherent state. + * But need to setup HSM xattr of data FID before, reuse + * mti and mh presets for FID in hsm_cdt_request_completed(), + * only need to clear RELEASED and DIRTY. + */ + mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY); + rc = mdt_hsm_attr_set(mti, dobj, mh_common); + if (rc == 0) + rc = mo_swap_layouts(mti->mti_env, + mdt_object_child(obj), + mdt_object_child(dobj), + SWAP_LAYOUTS_MDS_HSM); - mdt_object_unlock_put(mti, child2, lh2, 1); -out_child1: - mdt_object_put(mti->mti_env, child1); +out_dobj: + mdt_object_unlock_put(mti, dobj, dlh, 1); out: RETURN(rc); } @@ -1193,15 +1140,11 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, /* default is to retry */ *status = ARS_WAITING; - /* find object by FID */ + /* find object by FID + * if error/removed continue anyway to get correct reporting done */ obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh); /* we will update MD HSM only if needed */ is_mh_changed = false; - if (IS_ERR(obj)) { - /* object removed */ - *status = ARS_SUCCEED; - goto unlock; - } /* no need to change mh->mh_arch_id * mdt_hsm_get_md_hsm() got it from disk and it is still valid @@ -1217,7 +1160,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, * ENOSYS only if does not support cancel */ /* this can also happen when cdt calls it to - * for a timeouted request */ + * for a timed out request */ *status = ARS_FAILED; /* to have a cancel event in changelog */ pgs->hpk_errval = ECANCELED; @@ -1229,15 +1172,16 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, *status = ARS_SUCCEED; break; default: - *status = (((cdt->cdt_policy & - CDT_NORETRY_ACTION) || - !(pgs->hpk_flags & HP_FLAG_RETRY)) ? - ARS_FAILED : ARS_WAITING); + /* retry only if current policy or requested, and + * object is not on error/removed */ + *status = (cdt->cdt_policy & CDT_NORETRY_ACTION || + !(pgs->hpk_flags & HP_FLAG_RETRY) || + IS_ERR(obj)) ? ARS_FAILED : ARS_WAITING; break; } if (pgs->hpk_errval > CLF_HSM_MAXERROR) { - CERROR("%s: Request "LPX64" on "DFID + CERROR("%s: Request %#llx on "DFID " failed, error code %d too large\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), @@ -1261,14 +1205,14 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, break; case HSMA_CANCEL: hsm_set_cl_event(&cl_flags, HE_CANCEL); - CERROR("%s: Failed request "LPX64" on "DFID + CERROR("%s: Failed request %#llx on "DFID " cannot be a CANCEL\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); break; default: - CERROR("%s: Failed request "LPX64" on "DFID + CERROR("%s: Failed request %#llx on "DFID " %d is an unknown action\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), @@ -1291,8 +1235,10 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, case HSMA_RESTORE: hsm_set_cl_event(&cl_flags, HE_RESTORE); - /* clear RELEASED and DIRTY */ - mh.mh_flags &= ~(HS_RELEASED | HS_DIRTY); + /* do not clear RELEASED and DIRTY here + * this will occur in hsm_swap_layouts() + */ + /* Restoring has changed the file version on * disk. */ mh.mh_arch_ver = pgs->hpk_data_version; @@ -1306,17 +1252,13 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, break; case HSMA_CANCEL: hsm_set_cl_event(&cl_flags, HE_CANCEL); - CERROR("%s: Successful request "LPX64 - " on "DFID - " cannot be a CANCEL\n", + CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); break; default: - CERROR("%s: Successful request "LPX64 - " on "DFID - " %d is an unknown action\n", + CERROR("%s: Successful request %#llx on "DFID" %d is an unknown action\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), car->car_hai->hai_action); @@ -1329,27 +1271,24 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, * a crasy CT no need to manage DIRTY */ if (rc == 0) - hsm_set_cl_flags(&cl_flags, ((mh.mh_flags & HS_DIRTY) ? - CLF_HSM_DIRTY : 0)); + hsm_set_cl_flags(&cl_flags, + mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0); /* unlock is done later, after layout lock management */ - if (is_mh_changed) + if (is_mh_changed && !IS_ERR(obj)) rc = mdt_hsm_attr_set(mti, obj, &mh); -unlock: /* we give back layout lock only if restore was successful or - * if restore was canceled or if policy is to not retry + * if no retry will be attempted and if object is still alive, * in other cases we just unlock the object */ - if ((car->car_hai->hai_action == HSMA_RESTORE) && - ((pgs->hpk_errval == 0) || (pgs->hpk_errval == ECANCELED) || - (cdt->cdt_policy & CDT_NORETRY_ACTION))) { + if (car->car_hai->hai_action == HSMA_RESTORE) { struct cdt_restore_handle *crh; /* restore in data FID done, we swap the layouts - * only if restore is successfull */ - if (pgs->hpk_errval == 0) { - rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid, - &car->car_hai->hai_dfid); + * only if restore is successful */ + if (pgs->hpk_errval == 0 && !IS_ERR_OR_NULL(obj)) { + rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid, + &mh); if (rc) { if (cdt->cdt_policy & CDT_NORETRY_ACTION) *status = ARS_FAILED; @@ -1362,15 +1301,17 @@ unlock: /* give back layout lock */ mutex_lock(&cdt->cdt_restore_lock); - crh = hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid); + crh = mdt_hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid); if (crh != NULL) list_del(&crh->crh_list); mutex_unlock(&cdt->cdt_restore_lock); - /* just give back layout lock, we keep - * the reference which is given back - * later with the lock for HSM flags */ - if (!IS_ERR(obj)) - mdt_object_unlock(mti, obj, &crh->crh_lh, 1); + /* Just give back layout lock, we keep the reference + * which is given back later with the lock for HSM + * flags. + * XXX obj may be invalid so we do not pass it. */ + if (crh != NULL) + mdt_object_unlock(mti, NULL, &crh->crh_lh, 1); + if (crh != NULL) OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem); } @@ -1378,11 +1319,12 @@ unlock: GOTO(out, rc); out: - if ((obj != NULL) && !IS_ERR(obj)) { - mo_changelog(env, CL_HSM, cl_flags, - mdt_object_child(obj)); + /* always add a ChangeLog record */ + mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child, + &car->car_hai->hai_fid); + + if (!IS_ERR(obj)) mdt_object_put(mti->mti_env, obj); - } RETURN(rc); } @@ -1412,14 +1354,15 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* first do sanity checks */ car = mdt_cdt_update_request(cdt, pgs); if (IS_ERR(car)) { - CERROR("%s: Cannot find running request for cookie "LPX64 + CERROR("%s: Cannot find running request for cookie %#llx" " on fid="DFID"\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); + RETURN(PTR_ERR(car)); } - CDEBUG(D_HSM, "Progress received for fid="DFID" cookie="LPX64 + CDEBUG(D_HSM, "Progress received for fid="DFID" cookie=%#llx" " action=%s flags=%d err=%d fid="DFID" dfid="DFID"\n", PFID(&pgs->hpk_fid), pgs->hpk_cookie, hsm_copytool_action2name(car->car_hai->hai_action), @@ -1430,15 +1373,15 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* progress is done on FID or data FID depending of the action and * of the copy progress */ /* for restore progress is used to send back the data FID to cdt */ - if ((car->car_hai->hai_action == HSMA_RESTORE) && - (lu_fid_eq(&car->car_hai->hai_fid, &car->car_hai->hai_dfid))) + if (car->car_hai->hai_action == HSMA_RESTORE && + lu_fid_eq(&car->car_hai->hai_fid, &car->car_hai->hai_dfid)) car->car_hai->hai_dfid = pgs->hpk_fid; - if (((car->car_hai->hai_action == HSMA_RESTORE) || - (car->car_hai->hai_action == HSMA_ARCHIVE)) && + if ((car->car_hai->hai_action == HSMA_RESTORE || + car->car_hai->hai_action == HSMA_ARCHIVE) && (!lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_dfid) && !lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_fid))) { - CERROR("%s: Progress on "DFID" for cookie "LPX64 + CERROR("%s: Progress on "DFID" for cookie %#llx" " does not match request FID "DFID" nor data FID " DFID"\n", mdt_obd_name(mdt), @@ -1449,7 +1392,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, } if (pgs->hpk_errval != 0 && !(pgs->hpk_flags & HP_FLAG_COMPLETED)) { - CERROR("%s: Progress on "DFID" for cookie "LPX64" action=%s" + CERROR("%s: Progress on "DFID" for cookie %#llx action=%s" " is not coherent (err=%d and not completed" " (flags=%d))\n", mdt_obd_name(mdt), @@ -1472,7 +1415,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* remove request from memory list */ mdt_cdt_remove_request(cdt, pgs->hpk_cookie); - CDEBUG(D_HSM, "Updating record: fid="DFID" cookie="LPX64 + CDEBUG(D_HSM, "Updating record: fid="DFID" cookie=%#llx" " action=%s status=%s\n", PFID(&pgs->hpk_fid), pgs->hpk_cookie, hsm_copytool_action2name(car->car_hai->hai_action), @@ -1487,7 +1430,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, if (rc1) CERROR("%s: mdt_agent_record_update() failed," " rc=%d, cannot update status to %s" - " for cookie "LPX64"\n", + " for cookie %#llx\n", mdt_obd_name(mdt), rc1, agent_req_status2name(status), pgs->hpk_cookie); @@ -1542,14 +1485,13 @@ static int mdt_cancel_all_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hcad = data; - if ((larr->arr_status == ARS_WAITING) || - (larr->arr_status == ARS_STARTED)) { + if (larr->arr_status == ARS_WAITING || + larr->arr_status == ARS_STARTED) { larr->arr_status = ARS_CANCELED; larr->arr_req_change = cfs_time_current_sec(); - rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr); - if (rc == 0) - RETURN(LLOG_DEL_RECORD); + rc = llog_write(env, llh, hdr, hdr->lrh_index); } + RETURN(rc); } @@ -1593,7 +1535,7 @@ static int hsm_cancel_all_actions(struct mdt_device *mdt) hal_len = sizeof(*hal) + cfs_size_round(MTI_NAME_MAXLEN + 1) + cfs_size_round(car->car_hai->hai_len); - if ((hal_len > hal_sz) && (hal_sz > 0)) { + if (hal_len > hal_sz && hal_sz > 0) { /* not enough room, free old buffer */ OBD_FREE(hal, hal_sz); hal = NULL; @@ -1619,7 +1561,7 @@ static int hsm_cancel_all_actions(struct mdt_device *mdt) hal->hal_flags = car->car_flags; hal->hal_count = 0; - hai = hai_zero(hal); + hai = hai_first(hal); memcpy(hai, car->car_hai, car->car_hai->hai_len); hai->hai_action = HSMA_CANCEL; hal->hal_count = 1; @@ -1654,7 +1596,7 @@ out: } /** - * check if a request is comptaible with file status + * check if a request is compatible with file status * \param hai [IN] request description * \param hal_an [IN] request archive number (not used) * \param rq_flags [IN] request flags @@ -1673,12 +1615,12 @@ bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai, switch (hai->hai_action) { case HSMA_ARCHIVE: if (!(hsm_flags & HS_NOARCHIVE) && - ((hsm_flags & HS_DIRTY) || !(hsm_flags & HS_ARCHIVED))) + (hsm_flags & HS_DIRTY || !(hsm_flags & HS_ARCHIVED))) is_compat = true; break; case HSMA_RESTORE: if (!(hsm_flags & HS_DIRTY) && (hsm_flags & HS_RELEASED) && - (hsm_flags & HS_ARCHIVED) && !(hsm_flags & HS_LOST)) + hsm_flags & HS_ARCHIVED && !(hsm_flags & HS_LOST)) is_compat = true; break; case HSMA_REMOVE: @@ -1690,8 +1632,8 @@ bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai, is_compat = true; break; } - CDEBUG(D_HSM, "fid="DFID" action=%s flags="LPX64 - " extent="LPX64"-"LPX64" hsm_flags=%.8X %s\n", + CDEBUG(D_HSM, "fid="DFID" action=%s flags=%#llx" + " extent=%#llx-%#llx hsm_flags=%.8X %s\n", PFID(&hai->hai_fid), hsm_copytool_action2name(hai->hai_action), rq_flags, hai->hai_extent.offset, hai->hai_extent.length, @@ -1709,8 +1651,8 @@ static const struct { char *name; char *nickname; } hsm_policy_names[] = { - { CDT_NONBLOCKING_RESTORE, "non_blocking_restore", "nbr"}, - { CDT_NORETRY_ACTION, "no_retry_action", "nra"}, + { CDT_NONBLOCKING_RESTORE, "NonBlockingRestore", "NBR"}, + { CDT_NORETRY_ACTION, "NoRetryAction", "NRA"}, { 0 }, }; @@ -1725,7 +1667,8 @@ static __u64 hsm_policy_str2bit(const char *name) int i; for (i = 0; hsm_policy_names[i].bit != 0; i++) - if (strcmp(hsm_policy_names[i].nickname, name) == 0) + if (strcmp(hsm_policy_names[i].nickname, name) == 0 || + strcmp(hsm_policy_names[i].name, name) == 0) return hsm_policy_names[i].bit; return 0; } @@ -1733,168 +1676,172 @@ static __u64 hsm_policy_str2bit(const char *name) /** * convert a policy bit field to a string * \param mask [IN] policy bit field + * \param hexa [IN] print mask before bit names * \param buffer [OUT] string * \param count [IN] size of buffer - * \retval size filled in buffer */ -static int hsm_policy_bit2str(const __u64 mask, char *buffer, int count) +static void hsm_policy_bit2str(struct seq_file *m, const __u64 mask, + const bool hexa) { - int i, j, sz; - char *ptr; + int i, j; __u64 bit; ENTRY; - ptr = buffer; - sz = snprintf(buffer, count, "("LPX64") ", mask); - ptr += sz; - count -= sz; - for (i = 0; i < (sizeof(mask) * 8); i++) { + if (hexa) + seq_printf(m, "(%#llx) ", mask); + + for (i = 0; i < CDT_POLICY_SHIFT_COUNT; i++) { bit = (1ULL << i); - if (!(bit & mask)) - continue; for (j = 0; hsm_policy_names[j].bit != 0; j++) { - if (hsm_policy_names[j].bit == bit) { - sz = snprintf(ptr, count, "%s(%s) ", - hsm_policy_names[j].name, - hsm_policy_names[j].nickname); - ptr += sz; - count -= sz; + if (hsm_policy_names[j].bit == bit) break; - } } + if (bit & mask) + seq_printf(m, "[%s] ", hsm_policy_names[j].name); + else + seq_printf(m, "%s ", hsm_policy_names[j].name); } - RETURN(ptr - buffer); + /* remove last ' ' */ + m->count--; + seq_putc(m, '\0'); } /* methods to read/write HSM policy flags */ -static int lprocfs_rd_hsm_policy(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int mdt_hsm_policy_seq_show(struct seq_file *m, void *data) { - struct mdt_device *mdt = data; + struct mdt_device *mdt = m->private; struct coordinator *cdt = &mdt->mdt_coordinator; - int sz; ENTRY; - sz = hsm_policy_bit2str(cdt->cdt_policy, page, count); - page[sz] = '\n'; - sz++; - page[sz] = '\0'; - *eof = 1; - RETURN(sz); + hsm_policy_bit2str(m, cdt->cdt_policy, false); + RETURN(0); } -static int lprocfs_wr_hsm_policy(struct file *file, const char *buffer, - unsigned long count, void *data) +static ssize_t +mdt_hsm_policy_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) { - struct mdt_device *mdt = data; + struct seq_file *m = file->private_data; + struct mdt_device *mdt = m->private; struct coordinator *cdt = &mdt->mdt_coordinator; - int sz; - char *start, *end; - __u64 policy; - int set; + char *start, *token, sign; char *buf; + __u64 policy; + __u64 add_mask, remove_mask, set_mask; + int rc; ENTRY; - if (strncmp(buffer, "help", 4) == 0) { - sz = PAGE_SIZE; - OBD_ALLOC(buf, sz); - if (!buf) - RETURN(-ENOMEM); - - hsm_policy_bit2str(CDT_POLICY_MASK, buf, sz); - CWARN("Supported policies are: %s\n", buf); - OBD_FREE(buf, sz); - RETURN(count); - } + if (count + 1 > PAGE_SIZE) + RETURN(-EINVAL); OBD_ALLOC(buf, count + 1); if (buf == NULL) RETURN(-ENOMEM); if (copy_from_user(buf, buffer, count)) - RETURN(-EFAULT); + GOTO(out, rc = -EFAULT); buf[count] = '\0'; + start = buf; + CDEBUG(D_HSM, "%s: receive new policy: '%s'\n", mdt_obd_name(mdt), + start); - policy = 0; + add_mask = remove_mask = set_mask = 0; do { - end = strchr(start, ' '); - if (end != NULL) - *end = '\0'; - switch (*start) { + token = strsep(&start, "\n "); + sign = *token; + + if (sign == '\0') + continue; + + if (sign == '-' || sign == '+') + token++; + + policy = hsm_policy_str2bit(token); + if (policy == 0) { + CWARN("%s: '%s' is unknown, " + "supported policies are:\n", mdt_obd_name(mdt), + token); + hsm_policy_bit2str(m, 0, false); + GOTO(out, rc = -EINVAL); + } + switch (sign) { case '-': - start++; - set = 0; + remove_mask |= policy; break; case '+': - start++; - set = 1; + add_mask |= policy; break; default: - set = 2; + set_mask |= policy; break; } - policy = hsm_policy_str2bit(start); - if (!policy) - break; - switch (set) { - case 0: - cdt->cdt_policy &= ~policy; - break; - case 1: - cdt->cdt_policy |= policy; - break; - case 2: - cdt->cdt_policy = policy; - break; - } + } while (start != NULL); - start = end + 1; - } while (end != NULL); + CDEBUG(D_HSM, "%s: new policy: rm=%#llx add=%#llx set=%#llx\n", + mdt_obd_name(mdt), remove_mask, add_mask, set_mask); + + /* if no sign in all string, it is a clear and set + * if some sign found, all unsigned are converted + * to add + * P1 P2 = set to P1 and P2 + * P1 -P2 = add P1 clear P2 same as +P1 -P2 + */ + if (remove_mask == 0 && add_mask == 0) { + cdt->cdt_policy = set_mask; + } else { + cdt->cdt_policy |= set_mask | add_mask; + cdt->cdt_policy &= ~remove_mask; + } + + GOTO(out, rc = count); + +out: OBD_FREE(buf, count + 1); - RETURN(count); + RETURN(rc); } +LPROC_SEQ_FOPS(mdt_hsm_policy); #define GENERATE_PROC_METHOD(VAR) \ -static int lprocfs_rd_hsm_##VAR(char *page, char **start, off_t off, \ - int count, int *eof, void *data) \ +static int mdt_hsm_##VAR##_seq_show(struct seq_file *m, void *data) \ { \ - struct mdt_device *mdt = data; \ + struct mdt_device *mdt = m->private; \ struct coordinator *cdt = &mdt->mdt_coordinator; \ - int sz; \ ENTRY; \ \ - sz = snprintf(page, count, LPU64"\n", (__u64)cdt->VAR); \ - *eof = 1; \ - RETURN(sz); \ + seq_printf(m, "%llu\n", (__u64)cdt->VAR); \ + RETURN(0); \ } \ -static int lprocfs_wr_hsm_##VAR(struct file *file, const char *buffer, \ - unsigned long count, void *data) \ +static ssize_t \ +mdt_hsm_##VAR##_seq_write(struct file *file, const char __user *buffer, \ + size_t count, loff_t *off) \ \ { \ - struct mdt_device *mdt = data; \ + struct seq_file *m = file->private_data; \ + struct mdt_device *mdt = m->private; \ struct coordinator *cdt = &mdt->mdt_coordinator; \ - int val; \ + __s64 val; \ int rc; \ ENTRY; \ \ - rc = lprocfs_write_helper(buffer, count, &val); \ + rc = lprocfs_str_to_s64(buffer, count, &val); \ if (rc) \ RETURN(rc); \ - if (val > 0) { \ + if (val > 0 && val < INT_MAX) { \ cdt->VAR = val; \ RETURN(count); \ } \ RETURN(-EINVAL); \ -} +} \ GENERATE_PROC_METHOD(cdt_loop_period) -GENERATE_PROC_METHOD(cdt_delay) -GENERATE_PROC_METHOD(cdt_timeout) -GENERATE_PROC_METHOD(cdt_max_request) +GENERATE_PROC_METHOD(cdt_grace_delay) +GENERATE_PROC_METHOD(cdt_active_req_timeout) +GENERATE_PROC_METHOD(cdt_max_requests) +GENERATE_PROC_METHOD(cdt_default_archive_id) /* * procfs write method for MDT/hsm_control @@ -1905,32 +1852,59 @@ GENERATE_PROC_METHOD(cdt_max_request) #define CDT_DISABLE_CMD "disabled" #define CDT_PURGE_CMD "purge" #define CDT_HELP_CMD "help" +#define CDT_MAX_CMD_LEN 10 -int lprocfs_wr_hsm_cdt_control(struct file *file, const char *buffer, - unsigned long count, void *data) +ssize_t +mdt_hsm_cdt_control_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) { - struct obd_device *obd = data; + struct seq_file *m = file->private_data; + struct obd_device *obd = m->private; struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); struct coordinator *cdt = &(mdt->mdt_coordinator); int rc, usage = 0; + char kernbuf[CDT_MAX_CMD_LEN]; ENTRY; + if (count == 0 || count >= sizeof(kernbuf)) + RETURN(-EINVAL); + + if (copy_from_user(kernbuf, buffer, count)) + RETURN(-EFAULT); + kernbuf[count] = 0; + + if (kernbuf[count - 1] == '\n') + kernbuf[count - 1] = 0; + rc = 0; - if (strncmp(buffer, CDT_ENABLE_CMD, strlen(CDT_ENABLE_CMD)) == 0) { + if (strcmp(kernbuf, CDT_ENABLE_CMD) == 0) { if (cdt->cdt_state == CDT_DISABLE) { cdt->cdt_state = CDT_RUNNING; mdt_hsm_cdt_wakeup(mdt); } else { rc = mdt_hsm_cdt_start(mdt); } - } else if (strncmp(buffer, CDT_STOP_CMD, strlen(CDT_STOP_CMD)) == 0) { - cdt->cdt_state = CDT_STOPPING; - } else if (strncmp(buffer, CDT_DISABLE_CMD, - strlen(CDT_DISABLE_CMD)) == 0) { - cdt->cdt_state = CDT_DISABLE; - } else if (strncmp(buffer, CDT_PURGE_CMD, strlen(CDT_PURGE_CMD)) == 0) { + } else if (strcmp(kernbuf, CDT_STOP_CMD) == 0) { + if ((cdt->cdt_state == CDT_STOPPING) || + (cdt->cdt_state == CDT_STOPPED)) { + CERROR("%s: Coordinator already stopped\n", + mdt_obd_name(mdt)); + rc = -EALREADY; + } else { + cdt->cdt_state = CDT_STOPPING; + } + } else if (strcmp(kernbuf, CDT_DISABLE_CMD) == 0) { + if ((cdt->cdt_state == CDT_STOPPING) || + (cdt->cdt_state == CDT_STOPPED)) { + CERROR("%s: Coordinator is stopped\n", + mdt_obd_name(mdt)); + rc = -EINVAL; + } else { + cdt->cdt_state = CDT_DISABLE; + } + } else if (strcmp(kernbuf, CDT_PURGE_CMD) == 0) { rc = hsm_cancel_all_actions(mdt); - } else if (strncmp(buffer, CDT_HELP_CMD, strlen(CDT_HELP_CMD)) == 0) { + } else if (strcmp(kernbuf, CDT_HELP_CMD) == 0) { usage = 1; } else { usage = 1; @@ -1949,51 +1923,209 @@ int lprocfs_wr_hsm_cdt_control(struct file *file, const char *buffer, RETURN(count); } -int lprocfs_rd_hsm_cdt_control(char *page, char **start, off_t off, - int count, int *eof, void *data) +int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data) { - struct obd_device *obd = data; + struct obd_device *obd = m->private; struct coordinator *cdt; - int sz; ENTRY; cdt = &(mdt_dev(obd->obd_lu_dev)->mdt_coordinator); - *eof = 1; if (cdt->cdt_state == CDT_INIT) - sz = snprintf(page, count, "init\n"); + seq_printf(m, "init\n"); else if (cdt->cdt_state == CDT_RUNNING) - sz = snprintf(page, count, "enabled\n"); + seq_printf(m, "enabled\n"); else if (cdt->cdt_state == CDT_STOPPING) - sz = snprintf(page, count, "stopping\n"); + seq_printf(m, "stopping\n"); else if (cdt->cdt_state == CDT_STOPPED) - sz = snprintf(page, count, "stopped\n"); + seq_printf(m, "stopped\n"); else if (cdt->cdt_state == CDT_DISABLE) - sz = snprintf(page, count, "disabled\n"); + seq_printf(m, "disabled\n"); + else + seq_printf(m, "unknown\n"); + + RETURN(0); +} + +static int +mdt_hsm_request_mask_show(struct seq_file *m, __u64 mask) +{ + bool first = true; + int i; + ENTRY; + + for (i = 0; i < 8 * sizeof(mask); i++) { + if (mask & (1UL << i)) { + seq_printf(m, "%s%s", first ? "" : " ", + hsm_copytool_action2name(i)); + first = false; + } + } + seq_putc(m, '\n'); + + RETURN(0); +} + +static int +mdt_hsm_user_request_mask_seq_show(struct seq_file *m, void *data) +{ + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_hsm_request_mask_show(m, cdt->cdt_user_request_mask); +} + +static int +mdt_hsm_group_request_mask_seq_show(struct seq_file *m, void *data) +{ + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_hsm_request_mask_show(m, cdt->cdt_group_request_mask); +} + +static int +mdt_hsm_other_request_mask_seq_show(struct seq_file *m, void *data) +{ + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_hsm_request_mask_show(m, cdt->cdt_other_request_mask); +} + +static inline enum hsm_copytool_action +hsm_copytool_name2action(const char *name) +{ + if (strcasecmp(name, "NOOP") == 0) + return HSMA_NONE; + else if (strcasecmp(name, "ARCHIVE") == 0) + return HSMA_ARCHIVE; + else if (strcasecmp(name, "RESTORE") == 0) + return HSMA_RESTORE; + else if (strcasecmp(name, "REMOVE") == 0) + return HSMA_REMOVE; + else if (strcasecmp(name, "CANCEL") == 0) + return HSMA_CANCEL; else - sz = snprintf(page, count, "unknown\n"); + return -1; +} + +static ssize_t +mdt_write_hsm_request_mask(struct file *file, const char __user *user_buf, + size_t user_count, __u64 *mask) +{ + char *buf, *pos, *name; + size_t buf_size; + __u64 new_mask = 0; + int rc; + ENTRY; + + if (!(user_count < 4096)) + RETURN(-ENOMEM); + + buf_size = user_count + 1; + + OBD_ALLOC(buf, buf_size); + if (buf == NULL) + RETURN(-ENOMEM); + + if (copy_from_user(buf, user_buf, buf_size - 1)) + GOTO(out, rc = -EFAULT); - RETURN(sz); + buf[buf_size - 1] = '\0'; + + pos = buf; + while ((name = strsep(&pos, " \t\v\n")) != NULL) { + int action; + + if (*name == '\0') + continue; + + action = hsm_copytool_name2action(name); + if (action < 0) + GOTO(out, rc = -EINVAL); + + new_mask |= (1UL << action); + } + + *mask = new_mask; + rc = user_count; +out: + OBD_FREE(buf, buf_size); + + RETURN(rc); +} + +static ssize_t +mdt_hsm_user_request_mask_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_write_hsm_request_mask(file, buf, count, + &cdt->cdt_user_request_mask); +} + +static ssize_t +mdt_hsm_group_request_mask_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_write_hsm_request_mask(file, buf, count, + &cdt->cdt_group_request_mask); } +static ssize_t +mdt_hsm_other_request_mask_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct mdt_device *mdt = m->private; + struct coordinator *cdt = &mdt->mdt_coordinator; + + return mdt_write_hsm_request_mask(file, buf, count, + &cdt->cdt_other_request_mask); +} + +LPROC_SEQ_FOPS(mdt_hsm_cdt_loop_period); +LPROC_SEQ_FOPS(mdt_hsm_cdt_grace_delay); +LPROC_SEQ_FOPS(mdt_hsm_cdt_active_req_timeout); +LPROC_SEQ_FOPS(mdt_hsm_cdt_max_requests); +LPROC_SEQ_FOPS(mdt_hsm_cdt_default_archive_id); +LPROC_SEQ_FOPS(mdt_hsm_user_request_mask); +LPROC_SEQ_FOPS(mdt_hsm_group_request_mask); +LPROC_SEQ_FOPS(mdt_hsm_other_request_mask); + static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = { - { "agents", NULL, NULL, NULL, &mdt_hsm_agent_fops, 0 }, - { "agent_actions", NULL, NULL, NULL, - &mdt_agent_actions_fops, 0444 }, - { "grace_delay", lprocfs_rd_hsm_cdt_delay, - lprocfs_wr_hsm_cdt_delay, - NULL, NULL, 0 }, - { "loop_period", lprocfs_rd_hsm_cdt_loop_period, - lprocfs_wr_hsm_cdt_loop_period, - NULL, NULL, 0 }, - { "max_requests", lprocfs_rd_hsm_cdt_max_request, - lprocfs_wr_hsm_cdt_max_request, - NULL, NULL, 0 }, - { "policy", lprocfs_rd_hsm_policy, lprocfs_wr_hsm_policy, - NULL, NULL, 0 }, - { "request_timeout", lprocfs_rd_hsm_cdt_timeout, - lprocfs_wr_hsm_cdt_timeout, - NULL, NULL, 0 }, - { "requests", NULL, NULL, NULL, &mdt_hsm_request_fops, 0 }, + { .name = "agents", + .fops = &mdt_hsm_agent_fops }, + { .name = "actions", + .fops = &mdt_hsm_actions_fops, + .proc_mode = 0444 }, + { .name = "default_archive_id", + .fops = &mdt_hsm_cdt_default_archive_id_fops }, + { .name = "grace_delay", + .fops = &mdt_hsm_cdt_grace_delay_fops }, + { .name = "loop_period", + .fops = &mdt_hsm_cdt_loop_period_fops }, + { .name = "max_requests", + .fops = &mdt_hsm_cdt_max_requests_fops }, + { .name = "policy", + .fops = &mdt_hsm_policy_fops }, + { .name = "active_request_timeout", + .fops = &mdt_hsm_cdt_active_req_timeout_fops }, + { .name = "active_requests", + .fops = &mdt_hsm_active_requests_fops }, + { .name = "user_request_mask", + .fops = &mdt_hsm_user_request_mask_fops, }, + { .name = "group_request_mask", + .fops = &mdt_hsm_group_request_mask_fops, }, + { .name = "other_request_mask", + .fops = &mdt_hsm_other_request_mask_fops, }, { 0 } };