From f3a415289b560b5f422efe2bd08b3b7cff113cf0 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Tue, 18 Jul 2017 10:21:53 +0200 Subject: [PATCH] LU-7988 hsm: update many cookie status at once Instead of calling mdt_agent_record_update, which calls cdt_llog_process, once for every HAL, build a list of the cookies to update with their status and call mdt_agent_record_update just once per seconds at most. Update mdt_agent_record_update to take a status for every cookie. Test-Parameters: trivial testlist=sanity-hsm Signed-off-by: frank zago Change-Id: Ie4afd667727e07570ed6a2d51e8dfaea8302b97b Signed-off-by: Ben Evans Signed-off-by: Bruno Faccini Reviewed-on: https://review.whamcloud.com/19584 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Quentin Bouget Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_coordinator.c | 71 ++++++++++++++++++++++++------------ lustre/mdt/mdt_hsm_cdt_actions.c | 78 +++++++++++++++++++++------------------- lustre/mdt/mdt_hsm_cdt_agent.c | 25 +++++++++---- lustre/mdt/mdt_internal.h | 11 ++++-- 4 files changed, 116 insertions(+), 69 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index a579734..1372c42 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -560,6 +560,10 @@ static int mdt_coordinator(void *data) while (1) { int i; + int update_idx = 0; + int updates_sz; + int updates_cnt; + struct hsm_record_update *updates; /* Limit execution of the expensive requests traversal * to at most every "wait_event_time" jiffies. This prevents @@ -636,14 +640,32 @@ static int mdt_coordinator(void *data) goto clean_cb_alloc; } + /* Compute how many HAI we have in all the requests */ + updates_cnt = 0; + for (i = 0; i < hsd.request_cnt; i++) { + const struct hsm_scan_request *request = + &hsd.request[i]; + + updates_cnt += request->hal->hal_count; + } + + /* Allocate a temporary array to store the cookies to + * update, and their status. */ + updates_sz = updates_cnt * sizeof(*updates); + OBD_ALLOC(updates, updates_sz); + if (updates == NULL) { + CERROR("%s: Cannot allocate memory (%d o) " + "for %d updates\n", + mdt_obd_name(mdt), updates_sz, updates_cnt); + continue; + } + /* here hsd contains a list of requests to be started */ for (i = 0; i < hsd.request_cnt; i++) { struct hsm_scan_request *request = &hsd.request[i]; struct hsm_action_list *hal = request->hal; struct hsm_action_item *hai; - __u64 *cookies; - int sz, j; - enum agent_req_status status; + int j; /* still room for work ? */ if (atomic_read(&cdt->cdt_request_count) >= @@ -655,34 +677,32 @@ static int mdt_coordinator(void *data) * if the copy tool failed to do the request * it has to use hsm_progress */ - status = (rc ? ARS_WAITING : ARS_STARTED); /* set up cookie vector to set records status * after copy tools start or failed */ - sz = hal->hal_count * sizeof(__u64); - OBD_ALLOC(cookies, sz); - if (cookies == NULL) - continue; - hai = hai_first(hal); for (j = 0; j < hal->hal_count; j++) { - cookies[j] = hai->hai_cookie; + updates[update_idx].cookie = hai->hai_cookie; + updates[update_idx].status = + (rc ? ARS_WAITING : ARS_STARTED); hai = hai_next(hai); + update_idx++; } + } - rc = mdt_agent_record_update(mti->mti_env, mdt, cookies, - hal->hal_count, status); + if (update_idx) { + rc = mdt_agent_record_update(mti->mti_env, mdt, + updates, update_idx); if (rc) CERROR("%s: mdt_agent_record_update() failed, " - "rc=%d, cannot update status to %s " + "rc=%d, cannot update records " "for %d cookies\n", - mdt_obd_name(mdt), rc, - agent_req_status2name(status), - hal->hal_count); - - OBD_FREE(cookies, sz); + mdt_obd_name(mdt), rc, update_idx); } + + OBD_FREE(updates, updates_sz); + clean_cb_alloc: /* free hal allocated by callback */ for (i = 0; i < hsd.request_cnt; i++) { @@ -1140,9 +1160,13 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti, * it will be done when updating the request status */ if (hai->hai_action == HSMA_CANCEL) { + struct hsm_record_update update = { + .cookie = hai->hai_cookie, + .status = ARS_CANCELED, + }; + rc = mdt_agent_record_update(mti->mti_env, mti->mti_mdt, - &hai->hai_cookie, - 1, ARS_CANCELED); + &update, 1); if (rc) { CERROR("%s: mdt_agent_record_update() failed, " "rc=%d, cannot update status to %s " @@ -1567,10 +1591,13 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* update record first (LU-9075) */ if (update_record) { int rc1; + struct hsm_record_update update = { + .cookie = pgs->hpk_cookie, + .status = status, + }; rc1 = mdt_agent_record_update(mti->mti_env, mdt, - &pgs->hpk_cookie, 1, - status); + &update, 1); if (rc1) CERROR("%s: mdt_agent_record_update() failed," " rc=%d, cannot update status to %s" diff --git a/lustre/mdt/mdt_hsm_cdt_actions.c b/lustre/mdt/mdt_hsm_cdt_actions.c index abb3e6a..b17aa17 100644 --- a/lustre/mdt/mdt_hsm_cdt_actions.c +++ b/lustre/mdt/mdt_hsm_cdt_actions.c @@ -318,10 +318,9 @@ free: */ struct data_update_cb { struct mdt_device *mdt; - __u64 *cookies; - int cookies_count; - int cookies_done; - enum agent_req_status status; + struct hsm_record_update *updates; + unsigned int updates_count; + unsigned int updates_done; cfs_time_t change_time; }; @@ -348,32 +347,38 @@ static int mdt_agent_record_update_cb(const struct lu_env *env, ducb = data; /* check if all done */ - if (ducb->cookies_count == ducb->cookies_done) + if (ducb->updates_count == ducb->updates_done) RETURN(LLOG_PROC_BREAK); /* if record is in final state, never change */ - /* if record is a cancel request, it cannot be canceled - * this is to manage the following case: - * when a request is canceled, we have 2 records with the - * the same cookie : the one to cancel and the cancel request - * the 1st has to be set to ARS_CANCELED and the 2nd to ARS_SUCCEED - */ - if (agent_req_in_final_state(larr->arr_status) || - (larr->arr_hai.hai_action == HSMA_CANCEL && - ducb->status == ARS_CANCELED)) + if (agent_req_in_final_state(larr->arr_status)) RETURN(0); rc = 0; - for (i = 0 ; i < ducb->cookies_count ; i++) { + for (i = 0 ; i < ducb->updates_count ; i++) { + struct hsm_record_update *update = &ducb->updates[i]; + CDEBUG(D_HSM, "%s: search %#llx, found %#llx\n", - mdt_obd_name(ducb->mdt), ducb->cookies[i], + mdt_obd_name(ducb->mdt), update->cookie, larr->arr_hai.hai_cookie); - if (larr->arr_hai.hai_cookie == ducb->cookies[i]) { - - larr->arr_status = ducb->status; + if (larr->arr_hai.hai_cookie == update->cookie) { + + /* If record is a cancel request, it cannot be + * canceled. This is to manage the following + * case: when a request is canceled, we have 2 + * records with the the same cookie: the one + * to cancel and the cancel request the 1st + * has to be set to ARS_CANCELED and the 2nd + * to ARS_SUCCEED + */ + if (larr->arr_hai.hai_action == HSMA_CANCEL && + update->status == ARS_CANCELED) + RETURN(0); + + larr->arr_status = update->status; larr->arr_req_change = ducb->change_time; rc = llog_write(env, llh, hdr, hdr->lrh_index); - ducb->cookies_done++; + ducb->updates_done++; break; } } @@ -387,17 +392,18 @@ static int mdt_agent_record_update_cb(const struct lu_env *env, /** * update an entry in agent llog + * * \param env [IN] environment * \param mdt [IN] MDT device - * \param cookie [IN] entries to update - * log cookie are returned by register - * \param status [IN] new status of the request - * \retval 0 success - * \retval -ve failure + * \param updates [IN] array of entries to update + * \param updates_count [IN] number of entries in updates + * + * \retval 0 on success + * \retval negative on failure */ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, - __u64 *cookies, int cookies_count, - enum agent_req_status status) + struct hsm_record_update *updates, + unsigned int updates_count) { struct data_update_cb ducb; u32 start_cat_idx = -1; @@ -410,13 +416,14 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, /* Find the first location (start_cat_idx, start_rec_idx) * among the records corresponding to cookies. */ - for (i = 0; i < cookies_count; i++) { + for (i = 0; i < updates_count; i++) { /* If we cannot find a cached location for a cookie * (perhaps because the MDT was restart then we must * start from the beginning. In this case * mdt_agent_record_hash_get() sets both of cat_idx and * rec_idx to 0. */ - cdt_agent_record_hash_lookup(&mdt->mdt_coordinator, cookies[i], + cdt_agent_record_hash_lookup(&mdt->mdt_coordinator, + updates[i].cookie, &cat_idx, &rec_idx); if (cat_idx < start_cat_idx) { start_cat_idx = cat_idx; @@ -432,20 +439,18 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, start_rec_idx -= 1; ducb.mdt = mdt; - ducb.cookies = cookies; - ducb.cookies_count = cookies_count; - ducb.cookies_done = 0; - ducb.status = status; + ducb.updates = updates; + ducb.updates_count = updates_count; + ducb.updates_done = 0; ducb.change_time = cfs_time_current_sec(); rc = cdt_llog_process(env, mdt, mdt_agent_record_update_cb, &ducb, start_cat_idx, start_rec_idx, WRITE); if (rc < 0) CERROR("%s: cdt_llog_process() failed, rc=%d, cannot update " - "status to %s for %d cookies, done %d\n", + "status for %u cookies, done %u\n", mdt_obd_name(mdt), rc, - agent_req_status2name(status), - cookies_count, ducb.cookies_done); + updates_count, ducb.updates_done); RETURN(rc); } @@ -694,4 +699,3 @@ const struct file_operations mdt_hsm_actions_fops = { .llseek = seq_lseek, .release = lprocfs_release_hsm_actions, }; - diff --git a/lustre/mdt/mdt_hsm_cdt_agent.c b/lustre/mdt/mdt_hsm_cdt_agent.c index 4947d28..579c309 100644 --- a/lustre/mdt/mdt_hsm_cdt_agent.c +++ b/lustre/mdt/mdt_hsm_cdt_agent.c @@ -404,6 +404,8 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) { + struct hsm_record_update update; + /* only removes are concerned */ if (hai->hai_action != HSMA_REMOVE) { /* count if other actions than HSMA_REMOVE, @@ -423,9 +425,11 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, * XXX: this should only cause duplicates to be sent, * unless a method to record already successfully * reached archive_ids is implemented */ + + update.cookie = hai->hai_cookie; + update.status = ARS_SUCCEED; rc2 = mdt_agent_record_update(mti->mti_env, mdt, - &hai->hai_cookie, - 1, ARS_SUCCEED); + &update, 1); if (rc2) { CERROR("%s: mdt_agent_record_update() " "failed, cannot update " @@ -472,13 +476,17 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, if (!IS_ERR(obj)) { mdt_object_put(mti->mti_env, obj); } else if (PTR_ERR(obj) == -ENOENT) { + struct hsm_record_update update = { + .cookie = hai->hai_cookie, + .status = ARS_FAILED, + }; + if (hai->hai_action == HSMA_REMOVE) continue; fail_request = true; rc = mdt_agent_record_update(mti->mti_env, mdt, - &hai->hai_cookie, - 1, ARS_FAILED); + &update, 1); if (rc < 0) { CERROR("%s: mdt_agent_record_update() failed, " "cannot update status to %s for cookie " @@ -496,14 +504,18 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id, hal->hal_flags, &hsm)) { + struct hsm_record_update update = { + .cookie = hai->hai_cookie, + .status = ARS_FAILED, + }; + /* incompatible request, we abort the request */ /* next time coordinator will wake up, it will * make the same compound with valid only * records */ fail_request = true; rc = mdt_agent_record_update(mti->mti_env, mdt, - &hai->hai_cookie, - 1, ARS_FAILED); + &update, 1); if (rc) { CERROR("%s: mdt_agent_record_update() failed, " "cannot update status to %s for cookie " @@ -758,4 +770,3 @@ const struct file_operations mdt_hsm_agent_fops = { .llseek = seq_lseek, .release = lprocfs_seq_release, }; - diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 3dc5943..23c993c 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -526,6 +526,11 @@ struct cdt_restore_handle { }; extern struct kmem_cache *mdt_hsm_cdt_kmem; /** restore handle slab cache */ +struct hsm_record_update { + __u64 cookie; + enum agent_req_status status; +}; + static inline const struct md_device_operations * mdt_child_ops(struct mdt_device * m) { @@ -832,9 +837,9 @@ int cdt_llog_process(const struct lu_env *env, struct mdt_device *mdt, int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, __u64 compound_id, __u32 archive_id, __u64 flags, struct hsm_action_item *hai); -int mdt_agent_record_update(const struct lu_env *env, - struct mdt_device *mdt, __u64 *cookies, - int cookies_count, enum agent_req_status status); +int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt, + struct hsm_record_update *updates, + unsigned int updates_count); void cdt_agent_record_hash_add(struct coordinator *cdt, u64 cookie, u32 cat_idt, u32 rec_idx); void cdt_agent_record_hash_lookup(struct coordinator *cdt, u64 cookie, -- 1.8.3.1