From 91144acb3dc1120c00797269afa621c94cb64e1e Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Fri, 5 Jun 2015 16:39:23 -0500 Subject: [PATCH] LU-7986 hsm: update actions llog in place Update HSM actions llog entries in place rather than doing an append and cancel. Signed-off-by: John L. Hammond Change-Id: I5c5282d199e43fbc6709d5dc9bc050e0b98fb889 Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin Reviewed-on: http://review.whamcloud.com/19447 Tested-by: Jenkins Reviewed-by: Frank Zago Reviewed-by: jacques-Charles Lafoucriere Tested-by: Maloo --- lustre/mdt/mdt_coordinator.c | 167 ++++++++++++--------------------------- lustre/mdt/mdt_hsm_cdt_actions.c | 38 +-------- lustre/mdt/mdt_internal.h | 3 - 3 files changed, 51 insertions(+), 157 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 9f371bc..c74cf10 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -142,10 +142,6 @@ struct hsm_scan_data { int hal_used_sz; struct hsm_action_list *hal; } *request; - /* records to be canceled */ - int max_cookie; /** vector size */ - int cookie_cnt; /** used count */ - __u64 *cookies; }; /** @@ -164,7 +160,7 @@ static int mdt_coordinator_cb(const struct lu_env *env, struct llog_rec_hdr *hdr, void *data) { - const struct llog_agent_req_rec *larr; + struct llog_agent_req_rec *larr; struct hsm_scan_data *hsd; struct hsm_action_item *hai; struct mdt_device *mdt; @@ -292,7 +288,9 @@ static int mdt_coordinator_cb(const struct lu_env *env, break; } case ARS_STARTED: { + struct hsm_progress_kernel pgs; struct cdt_agent_req *car; + cfs_time_t now = cfs_time_current_sec(); cfs_time_t last; /* we search for a running request @@ -309,74 +307,52 @@ static int mdt_coordinator_cb(const struct lu_env *env, /* test if request too long, if yes cancel it * the same way the copy tool acknowledge a cancel request */ - if ((last + cdt->cdt_active_req_timeout) - < cfs_time_current_sec()) { - struct hsm_progress_kernel pgs; - - dump_llog_agent_req_rec("mdt_coordinator_cb(): " - "request timed out, start " - "cleaning", larr); - /* a too old cancel request just needs to be removed - * this can happen, if copy tool does not support cancel - * for other requests, we have to remove the running - * request and notify the copytool - */ - pgs.hpk_fid = larr->arr_hai.hai_fid; - pgs.hpk_cookie = larr->arr_hai.hai_cookie; - pgs.hpk_extent = larr->arr_hai.hai_extent; - pgs.hpk_flags = HP_FLAG_COMPLETED; - pgs.hpk_errval = ENOSYS; - pgs.hpk_data_version = 0; - /* update request state, but do not record in llog, to - * avoid deadlock on cdt_llog_lock - */ - rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); - if (rc) - CERROR("%s: Cannot cleanup timed out request: " - DFID" for cookie "LPX64" action=%s\n", - mdt_obd_name(mdt), - PFID(&pgs.hpk_fid), pgs.hpk_cookie, - hsm_copytool_action2name( - larr->arr_hai.hai_action)); - - if (rc == -ENOENT) { - /* The request no longer exists, forget - * about it, and do not send a cancel request - * to the client, for which an error will be - * sent back, leading to an endless cycle of - * cancellation. */ - RETURN(LLOG_DEL_RECORD); - } - - /* add the cookie to the list of record to be - * canceled by caller */ - if (hsd->max_cookie == (hsd->cookie_cnt - 1)) { - __u64 *ptr, *old_ptr; - int old_sz, new_sz, new_cnt; + if (now <= last + cdt->cdt_active_req_timeout) + RETURN(0); - /* need to increase vector size */ - old_sz = sizeof(__u64) * hsd->max_cookie; - old_ptr = hsd->cookies; + dump_llog_agent_req_rec("request timed out, start cleaning", + larr); + /* a too old cancel request just needs to be removed + * this can happen, if copy tool does not support + * cancel for other requests, we have to remove the + * running request and notify the copytool */ + pgs.hpk_fid = larr->arr_hai.hai_fid; + pgs.hpk_cookie = larr->arr_hai.hai_cookie; + pgs.hpk_extent = larr->arr_hai.hai_extent; + pgs.hpk_flags = HP_FLAG_COMPLETED; + pgs.hpk_errval = ENOSYS; + pgs.hpk_data_version = 0; + + /* update request state, but do not record in llog, to + * avoid deadlock on cdt_llog_lock */ + rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); + if (rc) + CERROR("%s: cannot cleanup timed out request: " + DFID" for cookie "LPX64" action=%s\n", + mdt_obd_name(mdt), + PFID(&pgs.hpk_fid), pgs.hpk_cookie, + hsm_copytool_action2name( + larr->arr_hai.hai_action)); + + if (rc == -ENOENT) { + /* The request no longer exists, forget + * about it, and do not send a cancel request + * to the client, for which an error will be + * sent back, leading to an endless cycle of + * cancellation. */ + RETURN(LLOG_DEL_RECORD); + } - new_cnt = 2 * hsd->max_cookie; - new_sz = sizeof(__u64) * new_cnt; + /* XXX A cancel request cannot be cancelled. */ + if (larr->arr_hai.hai_action == HSMA_CANCEL) + RETURN(0); - OBD_ALLOC(ptr, new_sz); - if (!ptr) { - CERROR("%s: Cannot allocate memory " - "(%d o) for cookie vector\n", - mdt_obd_name(mdt), new_sz); - RETURN(-ENOMEM); - } - memcpy(ptr, hsd->cookies, old_sz); - hsd->cookies = ptr; - hsd->max_cookie = new_cnt; - OBD_FREE(old_ptr, old_sz); - } - hsd->cookies[hsd->cookie_cnt] = - larr->arr_hai.hai_cookie; - hsd->cookie_cnt++; - } + larr->arr_status = ARS_CANCELED; + larr->arr_req_change = now; + rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index); + if (rc < 0) + CERROR("%s: cannot update agent log: rc = %d\n", + mdt_obd_name(mdt), rc); break; } case ARS_FAILED: @@ -461,10 +437,6 @@ static int mdt_coordinator(void *data) CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n", mdt_obd_name(mdt), current_pid()); - /* timeouted cookie vector initialization */ - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests * increases due to a change from /proc we do not overflow the * hsd.request[] vector @@ -525,16 +497,6 @@ static int mdt_coordinator(void *data) } } - /* create canceled cookie vector for an arbitrary size - * if needed, vector will grow during llog scan - */ - hsd.max_cookie = 10; - hsd.cookie_cnt = 0; - OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - if (!hsd.cookies) { - rc = -ENOMEM; - goto clean_cb_alloc; - } hsd.request_cnt = 0; rc = cdt_llog_process(mti->mti_env, mdt, @@ -542,23 +504,7 @@ static int mdt_coordinator(void *data) if (rc < 0) goto clean_cb_alloc; - CDEBUG(D_HSM, "Found %d requests to send and %d" - " requests to cancel\n", - hsd.request_cnt, hsd.cookie_cnt); - /* first we cancel llog records of the timed out requests */ - if (hsd.cookie_cnt > 0) { - rc = mdt_agent_record_update(mti->mti_env, mdt, - hsd.cookies, - hsd.cookie_cnt, - ARS_CANCELED); - if (rc) - CERROR("%s: mdt_agent_record_update() failed, " - "rc=%d, cannot update status to %s " - "for %d cookies\n", - mdt_obd_name(mdt), rc, - agent_req_status2name(ARS_CANCELED), - hsd.cookie_cnt); - } + CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt); if (list_empty(&cdt->cdt_agents)) { CDEBUG(D_HSM, "no agent available, " @@ -640,14 +586,6 @@ static int mdt_coordinator(void *data) kuc_free(hal, hsd.request[i].hal_used_sz); } clean_cb_alloc: - /* free cookie vector allocated for/by callback */ - if (hsd.cookies) { - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - hsd.max_cookie = 0; - hsd.cookie_cnt = 0; - hsd.cookies = NULL; - } - /* free hal allocated by callback */ for (i = 0; i < hsd.max_requests; i++) { if (hsd.request[i].hal) { @@ -668,9 +606,6 @@ out: if (hsd.request) OBD_FREE(hsd.request, hsd.request_sz); - if (hsd.cookies) - OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64)); - if (cdt->cdt_state == CDT_STOPPING) { /* request comes from /proc path, so we need to clean cdt * struct */ @@ -1473,8 +1408,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, " on fid="DFID"\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); - if (car == NULL) - RETURN(-ENOENT); + RETURN(PTR_ERR(car)); } @@ -1605,10 +1539,9 @@ static int mdt_cancel_all_cb(const struct lu_env *env, larr->arr_status == ARS_STARTED) { larr->arr_status = ARS_CANCELED; larr->arr_req_change = cfs_time_current_sec(); - rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr); - if (rc == 0) - RETURN(LLOG_DEL_RECORD); + rc = llog_write(env, llh, hdr, hdr->lrh_index); } + RETURN(rc); } diff --git a/lustre/mdt/mdt_hsm_cdt_actions.c b/lustre/mdt/mdt_hsm_cdt_actions.c index 5c5f254..137cb4a 100644 --- a/lustre/mdt/mdt_hsm_cdt_actions.c +++ b/lustre/mdt/mdt_hsm_cdt_actions.c @@ -205,12 +205,10 @@ static int mdt_agent_record_update_cb(const struct lu_env *env, struct llog_agent_req_rec *larr; struct data_update_cb *ducb; int rc, i; - int found; ENTRY; larr = (struct llog_agent_req_rec *)hdr; ducb = data; - found = 0; /* check if all done */ if (ducb->cookies_count == ducb->cookies_done) @@ -237,10 +235,8 @@ static int mdt_agent_record_update_cb(const struct lu_env *env, larr->arr_status = ducb->status; larr->arr_req_change = ducb->change_time; - rc = mdt_agent_llog_update_rec(env, ducb->mdt, llh, - larr); + rc = llog_write(env, llh, hdr, hdr->lrh_index); ducb->cookies_done++; - found = 1; break; } } @@ -249,9 +245,6 @@ static int mdt_agent_record_update_cb(const struct lu_env *env, CERROR("%s: mdt_agent_llog_update_rec() failed, rc = %d\n", mdt_obd_name(ducb->mdt), rc); - if (found == 1) - RETURN(LLOG_DEL_RECORD); - RETURN(rc); } @@ -290,35 +283,6 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, RETURN(rc); } -/** - * update a llog record - * cdt_llog_lock must be hold - * \param env [IN] environment - * \param mdt [IN] mdt device - * \param llh [IN] llog handle, must be a catalog handle - * \param larr [IN] record - * \retval 0 success - * \retval -ve failure - */ -int mdt_agent_llog_update_rec(const struct lu_env *env, - struct mdt_device *mdt, struct llog_handle *llh, - struct llog_agent_req_rec *larr) -{ - struct llog_rec_hdr saved_hdr; - int rc; - ENTRY; - - /* saved old record info */ - saved_hdr = larr->arr_hdr; - /* add new record with updated values */ - larr->arr_hdr.lrh_id = 0; - larr->arr_hdr.lrh_index = 0; - rc = llog_cat_add(env, llh->u.phd.phd_cat_handle, &larr->arr_hdr, - NULL); - larr->arr_hdr = saved_hdr; - RETURN(rc); -} - /* * Agent actions /proc seq_file methods * As llog processing uses a callback for each entry, we cannot do a sequential diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index d6256fd..8829027 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -807,9 +807,6 @@ int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, __u64 *cookies, int cookies_count, enum agent_req_status status); -int mdt_agent_llog_update_rec(const struct lu_env *env, struct mdt_device *mdt, - struct llog_handle *llh, - struct llog_agent_req_rec *larr); /* mdt/mdt_hsm_cdt_agent.c */ extern const struct file_operations mdt_hsm_agent_fops; -- 1.8.3.1