X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_coordinator.c;h=536320ececb6645e4be141821ffcb2c546e0db04;hp=c74cf10ebca9d9dbd4dd8384e497975677eb2369;hb=9811edb26d3bff66c61acc39a72554461445298f;hpb=91144acb3dc1120c00797269afa621c94cb64e1e diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index c74cf10..536320e 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -23,7 +23,7 @@ * Copyright (c) 2011, 2012 Commissariat a l'energie atomique et aux energies * alternatives * - * Copyright (c) 2013, 2014, Intel Corporation. + * Copyright (c) 2013, 2016, Intel Corporation. * Use is subject to license terms. */ /* @@ -102,8 +102,8 @@ void mdt_hsm_dump_hal(int level, const char *prefix, struct hsm_action_item *hai; char buf[12]; - CDEBUG(level, "%s: HAL header: version %X count %d compound "LPX64 - " archive_id %d flags "LPX64"\n", + CDEBUG(level, "%s: HAL header: version %X count %d compound %#llx" + " archive_id %d flags %#llx\n", prefix, hal->hal_version, hal->hal_count, hal->hal_compound_id, hal->hal_archive_id, hal->hal_flags); @@ -111,8 +111,8 @@ void mdt_hsm_dump_hal(int level, const char *prefix, for (i = 0; i < hal->hal_count; i++) { sz = hai->hai_len - sizeof(*hai); CDEBUG(level, "%s %d: fid="DFID" dfid="DFID - " compound/cookie="LPX64"/"LPX64 - " action=%s extent="LPX64"-"LPX64" gid="LPX64 + " compound/cookie=%#llx/%#llx" + " action=%s extent=%#llx-%#llx gid=%#llx" " datalen=%d data=[%s]\n", prefix, i, PFID(&hai->hai_fid), PFID(&hai->hai_dfid), @@ -130,18 +130,19 @@ void mdt_hsm_dump_hal(int level, const char *prefix, * data passed to llog_cat_process() callback * to scan requests and take actions */ +struct hsm_scan_request { + int hal_sz; + int hal_used_sz; + struct hsm_action_list *hal; +}; + struct hsm_scan_data { struct mdt_thread_info *mti; char fs_name[MTI_NAME_MAXLEN+1]; /* request to be send to agents */ - int request_sz; /** allocated size */ int max_requests; /** vector size */ int request_cnt; /** used count */ - struct { - int hal_sz; - int hal_used_sz; - struct hsm_action_list *hal; - } *request; + struct hsm_scan_request *request; }; /** @@ -176,55 +177,47 @@ static int mdt_coordinator_cb(const struct lu_env *env, dump_llog_agent_req_rec("mdt_coordinator_cb(): ", larr); switch (larr->arr_status) { case ARS_WAITING: { - int i, empty_slot, found; + int i; + struct hsm_scan_request *request; /* Are agents full? */ - if (atomic_read(&cdt->cdt_request_count) == + if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests) break; - /* first search whether the request is found in the list we - * have built and if there is room in the request vector */ - empty_slot = -1; - found = -1; - for (i = 0; i < hsd->max_requests && - (empty_slot == -1 || found == -1); i++) { - if (hsd->request[i].hal == NULL) { - empty_slot = i; - continue; - } + /* first search whether the request is found in the + * list we have built. */ + request = NULL; + for (i = 0; i < hsd->request_cnt; i++) { if (hsd->request[i].hal->hal_compound_id == - larr->arr_compound_id) { - found = i; - continue; + larr->arr_compound_id) { + request = &hsd->request[i]; + break; } } - if (found == -1 && empty_slot == -1) - /* unknown request and no more room for new request, - * continue scan for to find other entries for - * already found request - */ - RETURN(0); - if (found == -1) { + if (!request) { struct hsm_action_list *hal; - /* request is not already known */ + if (hsd->request_cnt == hsd->max_requests) + /* Unknown request and no more room + * for a new request. Continue to scan + * to find other entries for already + * existing requests. + */ + RETURN(0); + + request = &hsd->request[hsd->request_cnt]; + /* allocates hai vector size just needs to be large * enough */ - hsd->request[empty_slot].hal_sz = - sizeof(*hsd->request[empty_slot].hal) + - cfs_size_round(MTI_NAME_MAXLEN+1) + - 2 * cfs_size_round(larr->arr_hai.hai_len); - OBD_ALLOC(hal, hsd->request[empty_slot].hal_sz); - if (!hal) { - CERROR("%s: Cannot allocate memory (%d o)" - "for compound "LPX64"\n", - mdt_obd_name(mdt), - hsd->request[i].hal_sz, - larr->arr_compound_id); + request->hal_sz = + sizeof(*request->hal) + + cfs_size_round(MTI_NAME_MAXLEN+1) + + 2 * cfs_size_round(larr->arr_hai.hai_len); + OBD_ALLOC(hal, request->hal_sz); + if (!hal) RETURN(-ENOMEM); - } hal->hal_version = HAL_VERSION; strlcpy(hal->hal_fsname, hsd->fs_name, MTI_NAME_MAXLEN + 1); @@ -232,10 +225,9 @@ static int mdt_coordinator_cb(const struct lu_env *env, hal->hal_archive_id = larr->arr_archive_id; hal->hal_flags = larr->arr_flags; hal->hal_count = 0; - hsd->request[empty_slot].hal_used_sz = hal_size(hal); - hsd->request[empty_slot].hal = hal; + request->hal_used_sz = hal_size(hal); + request->hal = hal; hsd->request_cnt++; - found = empty_slot; hai = hai_first(hal); } else { /* request is known */ @@ -247,44 +239,37 @@ static int mdt_coordinator_cb(const struct lu_env *env, * where the files are not archived in the same backend */ if (larr->arr_archive_id != - hsd->request[found].hal->hal_archive_id) + request->hal->hal_archive_id) RETURN(0); - if (hsd->request[found].hal_sz < - hsd->request[found].hal_used_sz + - cfs_size_round(larr->arr_hai.hai_len)) { + if (request->hal_sz < + request->hal_used_sz + + cfs_size_round(larr->arr_hai.hai_len)) { /* Not enough room, need an extension */ void *hal_buffer; int sz; - sz = 2 * hsd->request[found].hal_sz; + sz = 2 * request->hal_sz; OBD_ALLOC(hal_buffer, sz); - if (!hal_buffer) { - CERROR("%s: Cannot allocate memory " - "(%d o) for compound "LPX64"\n", - mdt_obd_name(mdt), sz, - larr->arr_compound_id); + if (!hal_buffer) RETURN(-ENOMEM); - } - memcpy(hal_buffer, hsd->request[found].hal, - hsd->request[found].hal_used_sz); - OBD_FREE(hsd->request[found].hal, - hsd->request[found].hal_sz); - hsd->request[found].hal = hal_buffer; - hsd->request[found].hal_sz = sz; + memcpy(hal_buffer, request->hal, + request->hal_used_sz); + OBD_FREE(request->hal, + request->hal_sz); + request->hal = hal_buffer; + request->hal_sz = sz; } - hai = hai_first(hsd->request[found].hal); - for (i = 0; i < hsd->request[found].hal->hal_count; - i++) + hai = hai_first(request->hal); + for (i = 0; i < request->hal->hal_count; i++) hai = hai_next(hai); } memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len); hai->hai_cookie = larr->arr_hai.hai_cookie; hai->hai_gid = larr->arr_hai.hai_gid; - hsd->request[found].hal_used_sz += - cfs_size_round(hai->hai_len); - hsd->request[found].hal->hal_count++; + request->hal_used_sz += cfs_size_round(hai->hai_len); + request->hal->hal_count++; break; } case ARS_STARTED: { @@ -328,7 +313,7 @@ static int mdt_coordinator_cb(const struct lu_env *env, rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0); if (rc) CERROR("%s: cannot cleanup timed out request: " - DFID" for cookie "LPX64" action=%s\n", + DFID" for cookie %#llx action=%s\n", mdt_obd_name(mdt), PFID(&pgs.hpk_fid), pgs.hpk_cookie, hsm_copytool_action2name( @@ -429,10 +414,11 @@ static int mdt_coordinator(void *data) struct coordinator *cdt = &mdt->mdt_coordinator; struct hsm_scan_data hsd = { NULL }; int rc = 0; + int request_sz; ENTRY; - cdt->cdt_thread.t_flags = SVC_RUNNING; - wake_up(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_RUNNING; + wake_up(&cdt->cdt_waitq); CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n", mdt_obd_name(mdt), current_pid()); @@ -442,8 +428,8 @@ static int mdt_coordinator(void *data) * hsd.request[] vector */ hsd.max_requests = cdt->cdt_max_requests; - hsd.request_sz = hsd.max_requests * sizeof(*hsd.request); - OBD_ALLOC(hsd.request, hsd.request_sz); + request_sz = hsd.max_requests * sizeof(*hsd.request); + OBD_ALLOC(hsd.request, request_sz); if (!hsd.request) GOTO(out, rc = -ENOMEM); @@ -456,23 +442,22 @@ static int mdt_coordinator(void *data) lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period), NULL, NULL); - l_wait_event(cdt->cdt_thread.t_ctl_waitq, - (cdt->cdt_thread.t_flags & - (SVC_STOPPING|SVC_EVENT)), + l_wait_event(cdt->cdt_waitq, + cdt->cdt_flags & (SVC_STOPPING|SVC_EVENT), &lwi); CDEBUG(D_HSM, "coordinator resumes\n"); - if (cdt->cdt_thread.t_flags & SVC_STOPPING || + if (cdt->cdt_flags & SVC_STOPPING || cdt->cdt_state == CDT_STOPPING) { - cdt->cdt_thread.t_flags &= ~SVC_STOPPING; + cdt->cdt_flags &= ~SVC_STOPPING; rc = 0; break; } /* wake up before timeout, new work arrives */ - if (cdt->cdt_thread.t_flags & SVC_EVENT) - cdt->cdt_thread.t_flags &= ~SVC_EVENT; + if (cdt->cdt_flags & SVC_EVENT) + cdt->cdt_flags &= ~SVC_EVENT; /* if coordinator is suspended continue to wait */ if (cdt->cdt_state == CDT_DISABLE) { @@ -486,11 +471,10 @@ static int mdt_coordinator(void *data) /* cdt_max_requests has changed, * we need to allocate a new buffer */ - OBD_FREE(hsd.request, hsd.request_sz); + OBD_FREE(hsd.request, request_sz); hsd.max_requests = cdt->cdt_max_requests; - hsd.request_sz = - hsd.max_requests * sizeof(*hsd.request); - OBD_ALLOC(hsd.request, hsd.request_sz); + request_sz = hsd.max_requests * sizeof(*hsd.request); + OBD_ALLOC(hsd.request, request_sz); if (!hsd.request) { rc = -ENOMEM; break; @@ -513,38 +497,19 @@ static int mdt_coordinator(void *data) } /* here hsd contains a list of requests to be started */ - for (i = 0; i < hsd.max_requests; i++) { - struct hsm_action_list *hal; + for (i = 0; i < hsd.request_cnt; i++) { + struct hsm_scan_request *request = &hsd.request[i]; + struct hsm_action_list *hal = request->hal; struct hsm_action_item *hai; __u64 *cookies; int sz, j; enum agent_req_status status; /* still room for work ? */ - if (atomic_read(&cdt->cdt_request_count) == + if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests) break; - if (hsd.request[i].hal == NULL) - continue; - - /* found a request, we start it */ - /* kuc payload allocation so we avoid an additionnal - * allocation in mdt_hsm_agent_send() - */ - hal = kuc_alloc(hsd.request[i].hal_used_sz, - KUC_TRANSPORT_HSM, HMT_ACTION_LIST); - if (IS_ERR(hal)) { - CERROR("%s: Cannot allocate memory (%d o) " - "for compound "LPX64"\n", - mdt_obd_name(mdt), - hsd.request[i].hal_used_sz, - hsd.request[i].hal->hal_compound_id); - continue; - } - memcpy(hal, hsd.request[i].hal, - hsd.request[i].hal_used_sz); - rc = mdt_hsm_agent_send(mti, hal, 0); /* if failure, we suppose it is temporary * if the copy tool failed to do the request @@ -555,56 +520,41 @@ static int mdt_coordinator(void *data) /* set up cookie vector to set records status * after copy tools start or failed */ - sz = hsd.request[i].hal->hal_count * sizeof(__u64); + sz = hal->hal_count * sizeof(__u64); OBD_ALLOC(cookies, sz); - if (cookies == NULL) { - CERROR("%s: Cannot allocate memory (%d o) " - "for cookies vector "LPX64"\n", - mdt_obd_name(mdt), sz, - hsd.request[i].hal->hal_compound_id); - kuc_free(hal, hsd.request[i].hal_used_sz); + if (cookies == NULL) continue; - } + hai = hai_first(hal); - for (j = 0; j < hsd.request[i].hal->hal_count; j++) { + for (j = 0; j < hal->hal_count; j++) { cookies[j] = hai->hai_cookie; hai = hai_next(hai); } rc = mdt_agent_record_update(mti->mti_env, mdt, cookies, - hsd.request[i].hal->hal_count, - status); + hal->hal_count, status); if (rc) CERROR("%s: mdt_agent_record_update() failed, " "rc=%d, cannot update status to %s " "for %d cookies\n", mdt_obd_name(mdt), rc, agent_req_status2name(status), - hsd.request[i].hal->hal_count); + hal->hal_count); OBD_FREE(cookies, sz); - kuc_free(hal, hsd.request[i].hal_used_sz); } clean_cb_alloc: /* free hal allocated by callback */ - for (i = 0; i < hsd.max_requests; i++) { - if (hsd.request[i].hal) { - OBD_FREE(hsd.request[i].hal, - hsd.request[i].hal_sz); - hsd.request[i].hal_sz = 0; - hsd.request[i].hal = NULL; - hsd.request_cnt--; - } - } - LASSERT(hsd.request_cnt == 0); + for (i = 0; i < hsd.request_cnt; i++) { + struct hsm_scan_request *request = &hsd.request[i]; - /* reset callback data */ - memset(hsd.request, 0, hsd.request_sz); + OBD_FREE(request->hal, request->hal_sz); + } } EXIT; out: if (hsd.request) - OBD_FREE(hsd.request, hsd.request_sz); + OBD_FREE(hsd.request, request_sz); if (cdt->cdt_state == CDT_STOPPING) { /* request comes from /proc path, so we need to clean cdt @@ -616,8 +566,8 @@ out: * by mdt_stop_coordinator(), we have to ack * and cdt cleaning will be done by event sender */ - cdt->cdt_thread.t_flags = SVC_STOPPED; - wake_up(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_STOPPED; + wake_up(&cdt->cdt_waitq); } if (rc != 0) @@ -639,7 +589,7 @@ out: * \retval cdt_restore_handle found * \retval NULL not found */ -static struct cdt_restore_handle *hsm_restore_hdl_find(struct coordinator *cdt, +struct cdt_restore_handle *mdt_hsm_restore_hdl_find(struct coordinator *cdt, const struct lu_fid *fid) { struct cdt_restore_handle *crh; @@ -700,6 +650,17 @@ static int hsm_restore_cb(const struct lu_env *env, /* restore request not in a final state */ + /* force replay of restore requests left in started state from previous + * CDT context, to be canceled later if finally found to be incompatible + * when being re-started */ + if (larr->arr_status == ARS_STARTED) { + larr->arr_status = ARS_WAITING; + larr->arr_req_change = cfs_time_current_sec(); + rc = llog_write(env, llh, hdr, hdr->lrh_index); + if (rc != 0) + GOTO(out, rc); + } + OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem); if (crh == NULL) RETURN(-ENOMEM); @@ -788,8 +749,8 @@ int mdt_hsm_cdt_wakeup(struct mdt_device *mdt) RETURN(-ESRCH); /* wake up coordinator */ - cdt->cdt_thread.t_flags = SVC_EVENT; - wake_up(&cdt->cdt_thread.t_ctl_waitq); + cdt->cdt_flags = SVC_EVENT; + wake_up(&cdt->cdt_waitq); RETURN(0); } @@ -809,7 +770,7 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt) cdt->cdt_state = CDT_STOPPED; - init_waitqueue_head(&cdt->cdt_thread.t_ctl_waitq); + init_waitqueue_head(&cdt->cdt_waitq); mutex_init(&cdt->cdt_llog_lock); init_rwsem(&cdt->cdt_agent_lock); init_rwsem(&cdt->cdt_request_lock); @@ -876,7 +837,7 @@ int mdt_hsm_cdt_fini(struct mdt_device *mdt) * \retval 0 success * \retval -ve failure */ -int mdt_hsm_cdt_start(struct mdt_device *mdt) +static int mdt_hsm_cdt_start(struct mdt_device *mdt) { struct coordinator *cdt = &mdt->mdt_coordinator; int rc; @@ -934,8 +895,8 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt) rc = 0; } - wait_event(cdt->cdt_thread.t_ctl_waitq, - (cdt->cdt_thread.t_flags & SVC_RUNNING)); + wait_event(cdt->cdt_waitq, + (cdt->cdt_flags & SVC_RUNNING)); cdt->cdt_state = CDT_RUNNING; mdt->mdt_opts.mo_coordinator = 1; @@ -963,10 +924,10 @@ int mdt_hsm_cdt_stop(struct mdt_device *mdt) if (cdt->cdt_state != CDT_STOPPING) { /* stop coordinator thread before cleaning */ - cdt->cdt_thread.t_flags = SVC_STOPPING; - wake_up(&cdt->cdt_thread.t_ctl_waitq); - wait_event(cdt->cdt_thread.t_ctl_waitq, - cdt->cdt_thread.t_flags & SVC_STOPPED); + cdt->cdt_flags = SVC_STOPPING; + wake_up(&cdt->cdt_waitq); + wait_event(cdt->cdt_waitq, + cdt->cdt_flags & SVC_STOPPED); } cdt->cdt_state = CDT_STOPPED; @@ -1042,7 +1003,7 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti, if (rc) { CERROR("%s: mdt_agent_record_update() failed, " "rc=%d, cannot update status to %s " - "for cookie "LPX64"\n", + "for cookie %#llx\n", mdt_obd_name(mdt), rc, agent_req_status2name(ARS_CANCELED), hai->hai_cookie); @@ -1100,39 +1061,37 @@ out: /** * swap layouts between 2 fids * \param mti [IN] context - * \param fid1 [IN] - * \param fid2 [IN] + * \param obj [IN] + * \param dfid [IN] * \param mh_common [IN] MD HSM */ static int hsm_swap_layouts(struct mdt_thread_info *mti, - const lustre_fid *fid, const lustre_fid *dfid, + struct mdt_object *obj, const struct lu_fid *dfid, struct md_hsm *mh_common) { - struct mdt_device *mdt = mti->mti_mdt; - struct mdt_object *child1, *child2; - struct mdt_lock_handle *lh2; + struct mdt_object *dobj; + struct mdt_lock_handle *dlh; int rc; ENTRY; - child1 = mdt_object_find(mti->mti_env, mdt, fid); - if (IS_ERR(child1)) - GOTO(out, rc = PTR_ERR(child1)); + if (!mdt_object_exists(obj)) + GOTO(out, rc = -ENOENT); - /* we already have layout lock on FID so take only + /* we already have layout lock on obj so take only * on dfid */ - lh2 = &mti->mti_lh[MDT_LH_OLD]; - mdt_lock_reg_init(lh2, LCK_EX); - child2 = mdt_object_find_lock(mti, dfid, lh2, MDS_INODELOCK_LAYOUT); - if (IS_ERR(child2)) - GOTO(out_child1, rc = PTR_ERR(child2)); + dlh = &mti->mti_lh[MDT_LH_OLD]; + mdt_lock_reg_init(dlh, LCK_EX); + dobj = mdt_object_find_lock(mti, dfid, dlh, MDS_INODELOCK_LAYOUT); + if (IS_ERR(dobj)) + GOTO(out, rc = PTR_ERR(dobj)); /* if copy tool closes the volatile before sending the final * progress through llapi_hsm_copy_end(), all the objects * are removed and mdd_swap_layout LBUG */ - if (!mdt_object_exists(child2)) { + if (!mdt_object_exists(dobj)) { CERROR("%s: Copytool has closed volatile file "DFID"\n", mdt_obd_name(mti->mti_mdt), PFID(dfid)); - GOTO(out_child2, rc = -ENOENT); + GOTO(out_dobj, rc = -ENOENT); } /* Since we only handle restores here, unconditionally use * SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will @@ -1143,17 +1102,15 @@ static int hsm_swap_layouts(struct mdt_thread_info *mti, * only need to clear RELEASED and DIRTY. */ mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY); - rc = mdt_hsm_attr_set(mti, child2, mh_common); + rc = mdt_hsm_attr_set(mti, dobj, mh_common); if (rc == 0) rc = mo_swap_layouts(mti->mti_env, - mdt_object_child(child1), - mdt_object_child(child2), + mdt_object_child(obj), + mdt_object_child(dobj), SWAP_LAYOUTS_MDS_HSM); -out_child2: - mdt_object_unlock_put(mti, child2, lh2, 1); -out_child1: - mdt_object_put(mti->mti_env, child1); +out_dobj: + mdt_object_unlock_put(mti, dobj, dlh, 1); out: RETURN(rc); } @@ -1183,15 +1140,11 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, /* default is to retry */ *status = ARS_WAITING; - /* find object by FID */ + /* find object by FID + * if error/removed continue anyway to get correct reporting done */ obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh); /* we will update MD HSM only if needed */ is_mh_changed = false; - if (IS_ERR(obj)) { - /* object removed */ - *status = ARS_SUCCEED; - goto unlock; - } /* no need to change mh->mh_arch_id * mdt_hsm_get_md_hsm() got it from disk and it is still valid @@ -1219,14 +1172,16 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, *status = ARS_SUCCEED; break; default: + /* retry only if current policy or requested, and + * object is not on error/removed */ *status = (cdt->cdt_policy & CDT_NORETRY_ACTION || - !(pgs->hpk_flags & HP_FLAG_RETRY) ? - ARS_FAILED : ARS_WAITING); + !(pgs->hpk_flags & HP_FLAG_RETRY) || + IS_ERR(obj)) ? ARS_FAILED : ARS_WAITING; break; } if (pgs->hpk_errval > CLF_HSM_MAXERROR) { - CERROR("%s: Request "LPX64" on "DFID + CERROR("%s: Request %#llx on "DFID " failed, error code %d too large\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), @@ -1250,14 +1205,14 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, break; case HSMA_CANCEL: hsm_set_cl_event(&cl_flags, HE_CANCEL); - CERROR("%s: Failed request "LPX64" on "DFID + CERROR("%s: Failed request %#llx on "DFID " cannot be a CANCEL\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); break; default: - CERROR("%s: Failed request "LPX64" on "DFID + CERROR("%s: Failed request %#llx on "DFID " %d is an unknown action\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), @@ -1297,17 +1252,13 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, break; case HSMA_CANCEL: hsm_set_cl_event(&cl_flags, HE_CANCEL); - CERROR("%s: Successful request "LPX64 - " on "DFID - " cannot be a CANCEL\n", + CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); break; default: - CERROR("%s: Successful request "LPX64 - " on "DFID - " %d is an unknown action\n", + CERROR("%s: Successful request %#llx on "DFID" %d is an unknown action\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid), car->car_hai->hai_action); @@ -1324,23 +1275,20 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti, mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0); /* unlock is done later, after layout lock management */ - if (is_mh_changed) + if (is_mh_changed && !IS_ERR(obj)) rc = mdt_hsm_attr_set(mti, obj, &mh); -unlock: /* we give back layout lock only if restore was successful or - * if restore was canceled or if policy is to not retry + * if no retry will be attempted and if object is still alive, * in other cases we just unlock the object */ - if (car->car_hai->hai_action == HSMA_RESTORE && - (pgs->hpk_errval == 0 || pgs->hpk_errval == ECANCELED || - cdt->cdt_policy & CDT_NORETRY_ACTION)) { + if (car->car_hai->hai_action == HSMA_RESTORE) { struct cdt_restore_handle *crh; /* restore in data FID done, we swap the layouts * only if restore is successful */ - if (pgs->hpk_errval == 0) { - rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid, - &car->car_hai->hai_dfid, &mh); + if (pgs->hpk_errval == 0 && !IS_ERR_OR_NULL(obj)) { + rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid, + &mh); if (rc) { if (cdt->cdt_policy & CDT_NORETRY_ACTION) *status = ARS_FAILED; @@ -1353,15 +1301,16 @@ unlock: /* give back layout lock */ mutex_lock(&cdt->cdt_restore_lock); - crh = hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid); + crh = mdt_hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid); if (crh != NULL) list_del(&crh->crh_list); mutex_unlock(&cdt->cdt_restore_lock); - /* just give back layout lock, we keep - * the reference which is given back - * later with the lock for HSM flags */ - if (!IS_ERR(obj) && crh != NULL) - mdt_object_unlock(mti, obj, &crh->crh_lh, 1); + /* Just give back layout lock, we keep the reference + * which is given back later with the lock for HSM + * flags. + * XXX obj may be invalid so we do not pass it. */ + if (crh != NULL) + mdt_object_unlock(mti, NULL, &crh->crh_lh, 1); if (crh != NULL) OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem); @@ -1370,11 +1319,12 @@ unlock: GOTO(out, rc); out: - if (obj != NULL && !IS_ERR(obj)) { - mo_changelog(env, CL_HSM, cl_flags, - mdt_object_child(obj)); + /* always add a ChangeLog record */ + mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child, + &car->car_hai->hai_fid); + + if (!IS_ERR(obj)) mdt_object_put(mti->mti_env, obj); - } RETURN(rc); } @@ -1404,7 +1354,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* first do sanity checks */ car = mdt_cdt_update_request(cdt, pgs); if (IS_ERR(car)) { - CERROR("%s: Cannot find running request for cookie "LPX64 + CERROR("%s: Cannot find running request for cookie %#llx" " on fid="DFID"\n", mdt_obd_name(mdt), pgs->hpk_cookie, PFID(&pgs->hpk_fid)); @@ -1412,7 +1362,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, RETURN(PTR_ERR(car)); } - CDEBUG(D_HSM, "Progress received for fid="DFID" cookie="LPX64 + CDEBUG(D_HSM, "Progress received for fid="DFID" cookie=%#llx" " action=%s flags=%d err=%d fid="DFID" dfid="DFID"\n", PFID(&pgs->hpk_fid), pgs->hpk_cookie, hsm_copytool_action2name(car->car_hai->hai_action), @@ -1431,7 +1381,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, car->car_hai->hai_action == HSMA_ARCHIVE) && (!lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_dfid) && !lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_fid))) { - CERROR("%s: Progress on "DFID" for cookie "LPX64 + CERROR("%s: Progress on "DFID" for cookie %#llx" " does not match request FID "DFID" nor data FID " DFID"\n", mdt_obd_name(mdt), @@ -1442,7 +1392,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, } if (pgs->hpk_errval != 0 && !(pgs->hpk_flags & HP_FLAG_COMPLETED)) { - CERROR("%s: Progress on "DFID" for cookie "LPX64" action=%s" + CERROR("%s: Progress on "DFID" for cookie %#llx action=%s" " is not coherent (err=%d and not completed" " (flags=%d))\n", mdt_obd_name(mdt), @@ -1465,7 +1415,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, /* remove request from memory list */ mdt_cdt_remove_request(cdt, pgs->hpk_cookie); - CDEBUG(D_HSM, "Updating record: fid="DFID" cookie="LPX64 + CDEBUG(D_HSM, "Updating record: fid="DFID" cookie=%#llx" " action=%s status=%s\n", PFID(&pgs->hpk_fid), pgs->hpk_cookie, hsm_copytool_action2name(car->car_hai->hai_action), @@ -1480,7 +1430,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti, if (rc1) CERROR("%s: mdt_agent_record_update() failed," " rc=%d, cannot update status to %s" - " for cookie "LPX64"\n", + " for cookie %#llx\n", mdt_obd_name(mdt), rc1, agent_req_status2name(status), pgs->hpk_cookie); @@ -1682,8 +1632,8 @@ bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai, is_compat = true; break; } - CDEBUG(D_HSM, "fid="DFID" action=%s flags="LPX64 - " extent="LPX64"-"LPX64" hsm_flags=%.8X %s\n", + CDEBUG(D_HSM, "fid="DFID" action=%s flags=%#llx" + " extent=%#llx-%#llx hsm_flags=%.8X %s\n", PFID(&hai->hai_fid), hsm_copytool_action2name(hai->hai_action), rq_flags, hai->hai_extent.offset, hai->hai_extent.length, @@ -1738,7 +1688,7 @@ static void hsm_policy_bit2str(struct seq_file *m, const __u64 mask, ENTRY; if (hexa) - seq_printf(m, "("LPX64") ", mask); + seq_printf(m, "(%#llx) ", mask); for (i = 0; i < CDT_POLICY_SHIFT_COUNT; i++) { bit = (1ULL << i); @@ -1831,7 +1781,7 @@ mdt_hsm_policy_seq_write(struct file *file, const char __user *buffer, } while (start != NULL); - CDEBUG(D_HSM, "%s: new policy: rm="LPX64" add="LPX64" set="LPX64"\n", + CDEBUG(D_HSM, "%s: new policy: rm=%#llx add=%#llx set=%#llx\n", mdt_obd_name(mdt), remove_mask, add_mask, set_mask); /* if no sign in all string, it is a clear and set @@ -1862,7 +1812,7 @@ static int mdt_hsm_##VAR##_seq_show(struct seq_file *m, void *data) \ struct coordinator *cdt = &mdt->mdt_coordinator; \ ENTRY; \ \ - seq_printf(m, LPU64"\n", (__u64)cdt->VAR); \ + seq_printf(m, "%llu\n", (__u64)cdt->VAR); \ RETURN(0); \ } \ static ssize_t \ @@ -1873,14 +1823,14 @@ mdt_hsm_##VAR##_seq_write(struct file *file, const char __user *buffer, \ struct seq_file *m = file->private_data; \ struct mdt_device *mdt = m->private; \ struct coordinator *cdt = &mdt->mdt_coordinator; \ - int val; \ + __s64 val; \ int rc; \ ENTRY; \ \ - rc = lprocfs_write_helper(buffer, count, &val); \ + rc = lprocfs_str_to_s64(buffer, count, &val); \ if (rc) \ RETURN(rc); \ - if (val > 0) { \ + if (val > 0 && val < INT_MAX) { \ cdt->VAR = val; \ RETURN(count); \ } \