X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_coordinator.c;h=c74cf10ebca9d9dbd4dd8384e497975677eb2369;hb=91144acb3dc1120c00797269afa621c94cb64e1e;hp=2a6bb0edcb5ca31a487a49aa98ad29a550cf62d5;hpb=e49995acfd026f3ca85d05dc1b91d97a8743fe72;p=fs%2Flustre-release.git diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 2a6bb0e..c74cf10 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -38,15 +38,17 @@ #define DEBUG_SUBSYSTEM S_MDS +#include #include #include #include #include #include #include +#include #include "mdt_internal.h" -static struct lprocfs_seq_vars lprocfs_mdt_hsm_vars[]; +static struct lprocfs_vars lprocfs_mdt_hsm_vars[]; /** * get obj and HSM attributes on a fid @@ -140,10 +142,6 @@ struct hsm_scan_data { int hal_used_sz; struct hsm_action_list *hal; } *request; - /* records to be canceled */ - int max_cookie; /** vector size */ - int cookie_cnt; /** used count */ - __u64 *cookies; }; /** @@ -162,7 +160,7 @@ static int mdt_coordinator_cb(const struct lu_env *env, struct llog_rec_hdr *hdr, void *data) { - const struct llog_agent_req_rec *larr; + struct llog_agent_req_rec *larr; struct hsm_scan_data *hsd; struct hsm_action_item *hai; struct mdt_device *mdt; @@ -185,8 +183,8 @@ static int mdt_coordinator_cb(const struct lu_env *env, cdt->cdt_max_requests) break; - /* first search if the request if known in the list we have - * build and if there is room in the request vector */ + /* first search whether the request is found in the list we + * have built and if there is room in the request vector */ empty_slot = -1; found = -1; for (i = 0; i < hsd->max_requests && @@ -290,7 +288,9 @@ static int mdt_coordinator_cb(const struct lu_env *env, break; } case ARS_STARTED: { + struct hsm_progress_kernel pgs; struct cdt_agent_req *car; + cfs_time_t now = cfs_time_current_sec(); cfs_time_t last; /* we search for a running request @@ -307,74 +307,52 @@ static int mdt_coordinator_cb(const struct lu_env *env, /* test if request too long, if yes cancel it * the same way the copy tool acknowledge a cancel request */ - if ((last + cdt->cdt_active_req_timeout) - < cfs_time_current_sec()) { - struct hsm_progress_kernel pgs; - - dump_llog_agent_req_rec("mdt_coordinator_cb(): " - "request timeouted, start " - "cleaning", larr); - /* a too old cancel request just needs to be removed - * this can happen, if copy tool does not support cancel - * for other requests, we have to remove the running - * request and notify the copytool - */ - pgs.hpk_fid = larr->arr_hai.hai_fid; - pgs.hpk_cookie = larr->arr_hai.hai_cookie; - pgs.hpk_extent = larr->arr_hai.hai_extent; - pgs.hpk_flags = HP_FLAG_COMPLETED; - pgs.hpk_errval = ENOSYS; - pgs.hpk_data_version = 0; - /* update request state, but do not record in llog, to - * avoid deadlock on cdt_llog_lock - */ - rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); - if (rc) - CERROR("%s: Cannot cleanup timeouted request: " - DFID" for cookie "LPX64" action=%s\n", - mdt_obd_name(mdt), - PFID(&pgs.hpk_fid), pgs.hpk_cookie, - hsm_copytool_action2name( - larr->arr_hai.hai_action)); - - if (rc == -ENOENT) { - /* The request no longer exists, forget - * about it, and do not send a cancel request - * to the client, for which an error will be - * sent back, leading to an endless cycle of - * cancellation. */ - RETURN(LLOG_DEL_RECORD); - } - - /* add the cookie to the list of record to be - * canceled by caller */ - if (hsd->max_cookie == (hsd->cookie_cnt - 1)) { - __u64 *ptr, *old_ptr; - int old_sz, new_sz, new_cnt; + if (now <= last + cdt->cdt_active_req_timeout) + RETURN(0); - /* need to increase vector size */ - old_sz = sizeof(__u64) * hsd->max_cookie; - old_ptr = hsd->cookies; + dump_llog_agent_req_rec("request timed out, start cleaning", + larr); + /* a too old cancel request just needs to be removed + * this can happen, if copy tool does not support + * cancel for other requests, we have to remove the + * running request and notify the copytool */ + pgs.hpk_fid = larr->arr_hai.hai_fid; + pgs.hpk_cookie = larr->arr_hai.hai_cookie; + pgs.hpk_extent = larr->arr_hai.hai_extent; + pgs.hpk_flags = HP_FLAG_COMPLETED; + pgs.hpk_errval = ENOSYS; + pgs.hpk_data_version = 0; + + /* update request state, but do not record in llog, to + * avoid deadlock on cdt_llog_lock */ + rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); + if (rc) + CERROR("%s: cannot cleanup timed out request: " + DFID" for cookie "LPX64" action=%s\n", + mdt_obd_name(mdt), + PFID(&pgs.hpk_fid), pgs.hpk_cookie, + hsm_copytool_action2name( + larr->arr_hai.hai_action)); + + if (rc == -ENOENT) { + /* The request no longer exists, forget + * about it, and do not send a cancel request + * to the client, for which an error will be + * sent back, leading to an endless cycle of + * cancellation. */ + RETURN(LLOG_DEL_RECORD); + } - new_cnt = 2 * hsd->max_cookie; - new_sz = sizeof(__u64) * new_cnt; + /* XXX A cancel request cannot be cancelled. */ + if (larr->arr_hai.hai_action == HSMA_CANCEL) + RETURN(0); - OBD_ALLOC(ptr, new_sz); - if (!ptr) { - CERROR("%s: Cannot allocate memory " - "(%d o) for cookie vector\n", - mdt_obd_name(mdt), new_sz); - RETURN(-ENOMEM); - } - memcpy(ptr, hsd->cookies, old_sz); - hsd->cookies = ptr; - hsd->max_cookie = new_cnt; - OBD_FREE(old_ptr, old_sz); - } - hsd->cookies[hsd->cookie_cnt] = - larr->arr_hai.hai_cookie; - hsd->cookie_cnt++; - } + larr->arr_status = ARS_CANCELED; + larr->arr_req_change = now; + rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index); + if (rc < 0) + CERROR("%s: cannot update agent log: rc = %d\n", + mdt_obd_name(mdt), rc); break; } case ARS_FAILED: @@ -401,7 +379,7 @@ int hsm_cdt_procfs_init(struct mdt_device *mdt) ENTRY; /* init /proc entries, failure is not critical */ - cdt->cdt_proc_dir = lprocfs_seq_register("hsm", + cdt->cdt_proc_dir = lprocfs_register("hsm", mdt2obd_dev(mdt)->obd_proc_entry, lprocfs_mdt_hsm_vars, mdt); if (IS_ERR(cdt->cdt_proc_dir)) { @@ -433,7 +411,7 @@ void hsm_cdt_procfs_fini(struct mdt_device *mdt) * \param none * \retval var vector */ -struct lprocfs_seq_vars *hsm_cdt_get_proc_vars(void) +struct lprocfs_vars *hsm_cdt_get_proc_vars(void) { return lprocfs_mdt_hsm_vars; } @@ -459,10 +437,6 @@ static int mdt_coordinator(void *data) CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n", mdt_obd_name(mdt), current_pid()); - /* timeouted cookie vector initialization */ - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests * increases due to a change from /proc we do not overflow the * hsd.request[] vector @@ -523,16 +497,6 @@ static int mdt_coordinator(void *data) } } - /* create canceled cookie vector for an arbitrary size - * if needed, vector will grow during llog scan - */ - hsd.max_cookie = 10; - hsd.cookie_cnt = 0; - OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - if (!hsd.cookies) { - rc = -ENOMEM; - goto clean_cb_alloc; - } hsd.request_cnt = 0; rc = cdt_llog_process(mti->mti_env, mdt, @@ -540,23 +504,7 @@ static int mdt_coordinator(void *data) if (rc < 0) goto clean_cb_alloc; - CDEBUG(D_HSM, "Found %d requests to send and %d" - " requests to cancel\n", - hsd.request_cnt, hsd.cookie_cnt); - /* first we cancel llog records of the timeouted requests */ - if (hsd.cookie_cnt > 0) { - rc = mdt_agent_record_update(mti->mti_env, mdt, - hsd.cookies, - hsd.cookie_cnt, - ARS_CANCELED); - if (rc) - CERROR("%s: mdt_agent_record_update() failed, " - "rc=%d, cannot update status to %s " - "for %d cookies\n", - mdt_obd_name(mdt), rc, - agent_req_status2name(ARS_CANCELED), - hsd.cookie_cnt); - } + CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt); if (list_empty(&cdt->cdt_agents)) { CDEBUG(D_HSM, "no agent available, " @@ -638,14 +586,6 @@ static int mdt_coordinator(void *data) kuc_free(hal, hsd.request[i].hal_used_sz); } clean_cb_alloc: - /* free cookie vector allocated for/by callback */ - if (hsd.cookies) { - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; - } - /* free hal allocated by callback */ for (i = 0; i < hsd.max_requests; i++) { if (hsd.request[i].hal) { @@ -666,9 +606,6 @@ out: if (hsd.request) OBD_FREE(hsd.request, hsd.request_sz); - if (hsd.cookies) - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - if (cdt->cdt_state == CDT_STOPPING) { /* request comes from /proc path, so we need to clean cdt * struct */ @@ -1270,7 +1207,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, * ENOSYS only if does not support cancel */ /* this can also happen when cdt calls it to - * for a timeouted request */ + * for a timed out request */ *status = ARS_FAILED; /* to have a cancel event in changelog */ pgs->hpk_errval = ECANCELED; @@ -1400,7 +1337,7 @@ unlock: struct cdt_restore_handle *crh; /* restore in data FID done, we swap the layouts - * only if restore is successfull */ + * only if restore is successful */ if (pgs->hpk_errval == 0) { rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid, &car->car_hai->hai_dfid, &mh); @@ -1471,8 +1408,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, " on fid="DFID"\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); - if (car == NULL) - RETURN(-ENOENT); + RETURN(PTR_ERR(car)); } @@ -1603,10 +1539,9 @@ static int mdt_cancel_all_cb(const struct lu_env *env, larr->arr_status == ARS_STARTED) { larr->arr_status = ARS_CANCELED; larr->arr_req_change = cfs_time_current_sec(); - rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr); - if (rc == 0) - RETURN(LLOG_DEL_RECORD); + rc = llog_write(env, llh, hdr, hdr->lrh_index); } + RETURN(rc); } @@ -1711,7 +1646,7 @@ out: } /** - * check if a request is comptaible with file status + * check if a request is compatible with file status * \param hai [IN] request description * \param hal_an [IN] request archive number (not used) * \param rq_flags [IN] request flags @@ -2065,17 +2000,20 @@ int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data) static int mdt_hsm_request_mask_show(struct seq_file *m, __u64 mask) { - int i, rc = 0; + bool first = true; + int i; ENTRY; for (i = 0; i < 8 * sizeof(mask); i++) { - if (mask & (1UL << i)) - rc += seq_printf(m, "%s%s", rc == 0 ? "" : " ", - hsm_copytool_action2name(i)); + if (mask & (1UL << i)) { + seq_printf(m, "%s%s", first ? "" : " ", + hsm_copytool_action2name(i)); + first = false; + } } - rc += seq_printf(m, "\n"); + seq_putc(m, '\n'); - RETURN(rc); + RETURN(0); } static int @@ -2213,7 +2151,7 @@ LPROC_SEQ_FOPS(mdt_hsm_user_request_mask); LPROC_SEQ_FOPS(mdt_hsm_group_request_mask); LPROC_SEQ_FOPS(mdt_hsm_other_request_mask); -static struct lprocfs_seq_vars lprocfs_mdt_hsm_vars[] = { +static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = { { .name = "agents", .fops = &mdt_hsm_agent_fops }, { .name = "actions",