X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_hsm_cdt_agent.c;h=1767e112bdf11a1b931a6d837dd2ac1ff919ad6c;hb=2eaa49ef0f16798d564883b16cea9e96fad52495;hp=9a9ce6d7b8cdddeb78c30ef6a88fc4b67805b807;hpb=c54e61cef76eb0e00c9e69729cf4ac8578c90f47;p=fs%2Flustre-release.git diff --git a/lustre/mdt/mdt_hsm_cdt_agent.c b/lustre/mdt/mdt_hsm_cdt_agent.c index 9a9ce6d..1767e11 100644 --- a/lustre/mdt/mdt_hsm_cdt_agent.c +++ b/lustre/mdt/mdt_hsm_cdt_agent.c @@ -21,6 +21,8 @@ */ /* * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies + * + * Copyright (c) 2016, 2017, Intel Corporation. * alternatives * */ @@ -38,8 +40,8 @@ #include #include #include -#include #include +#include #include "mdt_internal.h" /* @@ -48,7 +50,7 @@ /* * find a hsm_agent by uuid - * lock cdt_agent_lock needs to be hold by caller + * lock cdt_agent_lock needs to be held by caller * \param cdt [IN] coordinator * \param uuid [IN] agent UUID * \retval hsm_agent pointer or NULL if not found @@ -133,11 +135,14 @@ int mdt_hsm_agent_register(struct mdt_thread_info *mti, out_free: if (ha != NULL && ha->ha_archive_id != NULL) - OBD_FREE(ha->ha_archive_id, - ha->ha_archive_cnt * sizeof(*ha->ha_archive_id)); + OBD_FREE_PTR_ARRAY(ha->ha_archive_id, ha->ha_archive_cnt); if (ha != NULL) OBD_FREE_PTR(ha); out: + /* wake the coordinator to potentially schedule requests */ + if (rc == -EEXIST || rc == 0) + mdt_hsm_cdt_event(cdt); + return rc; } @@ -159,13 +164,13 @@ int mdt_hsm_agent_register_mask(struct mdt_thread_info *mti, nr_archives = hweight32(archive_mask); if (nr_archives != 0) { - OBD_ALLOC(archive_id, nr_archives * sizeof(*archive_id)); + OBD_ALLOC_PTR_ARRAY(archive_id, nr_archives); if (!archive_id) RETURN(-ENOMEM); nr_archives = 0; for (i = 0; i < sizeof(archive_mask) * 8; i++) { - if ((1 << i) & archive_mask) { + if (BIT(i) & archive_mask) { archive_id[nr_archives] = i + 1; nr_archives++; } @@ -175,7 +180,7 @@ int mdt_hsm_agent_register_mask(struct mdt_thread_info *mti, rc = mdt_hsm_agent_register(mti, uuid, nr_archives, archive_id); if (archive_id != NULL) - OBD_FREE(archive_id, nr_archives * sizeof(*archive_id)); + OBD_FREE_PTR_ARRAY(archive_id, nr_archives); RETURN(rc); } @@ -211,8 +216,7 @@ int mdt_hsm_agent_unregister(struct mdt_thread_info *mti, GOTO(out, rc = -ENOENT); if (ha->ha_archive_cnt != 0) - OBD_FREE(ha->ha_archive_id, - ha->ha_archive_cnt * sizeof(*ha->ha_archive_id)); + OBD_FREE_PTR_ARRAY(ha->ha_archive_id, ha->ha_archive_cnt); OBD_FREE_PTR(ha); GOTO(out, rc = 0); @@ -305,8 +309,57 @@ int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive, RETURN(rc); } +int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti, + struct hsm_action_item *hai) +{ + struct hsm_agent *ha; + __u32 archive_mask = 0; + struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator; + int i; + /* return error by default in case all archive_ids have unregistered */ + int rc = -EAGAIN; + ENTRY; + + /* send action to all registered archive_ids */ + down_read(&cdt->cdt_agent_lock); + list_for_each_entry(ha, &cdt->cdt_agents, ha_list) { + for (i = 0; (i < ha->ha_archive_cnt); i++) { + /* only send once for each archive_id */ + if (BIT(ha->ha_archive_id[i]) & archive_mask) + continue; + archive_mask |= BIT(ha->ha_archive_id[i]); + + /* XXX: it could make sense to gather all + * actions for the same archive_id like in + * mdt_hsm_add_actions() ?? */ + rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt, + ha->ha_archive_id[i], 0, + hai); + if (rc) { + CERROR("%s: unable to add HSM remove request " + "for "DFID": rc=%d\n", + mdt_obd_name(mti->mti_mdt), + PFID(&hai->hai_fid), rc); + break; + } else { + CDEBUG(D_HSM, "%s: added HSM remove request " + "for "DFID", archive_id=%d\n", + mdt_obd_name(mti->mti_mdt), + PFID(&hai->hai_fid), + ha->ha_archive_id[i]); + } + } + /* early exit from loop due to error? */ + if (i != ha->ha_archive_cnt) + break; + } + up_read(&cdt->cdt_agent_lock); + + RETURN(rc); +} + /** - * send a compound request to the agent + * send a HAL to the agent * \param mti [IN] context * \param hal [IN] request (can be a kuc payload) * \param purge [IN] purge mode (no record) @@ -333,6 +386,65 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, ENTRY; rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid); + if (rc && hal->hal_archive_id == 0) { + uint notrmcount = 0; + int rc2 = 0; + + /* special case of remove requests with no archive_id specified, + * and no agent registered to serve all archives, then create a + * set of new requests, each to be sent to each registered + * archives. + * Todo so, find all HSMA_REMOVE entries, and then : + * _ set completed status as SUCCESS (or FAIL?) + * _ create a new LLOG record for each archive_id + * presently being served by any CT + */ + hai = hai_first(hal); + for (i = 0; i < hal->hal_count; i++, + hai = hai_next(hai)) { + struct hsm_record_update update; + + /* only removes are concerned */ + if (hai->hai_action != HSMA_REMOVE) { + /* count if other actions than HSMA_REMOVE, + * to return original error/rc */ + notrmcount++; + continue; + } + + /* send remove request to all registered archive_ids */ + rc2 = mdt_hsm_send_action_to_each_archive(mti, hai); + if (rc2) + break; + + /* only update original request as SUCCEED if it has + * been successfully broadcasted to all available + * archive_ids + * XXX: this should only cause duplicates to be sent, + * unless a method to record already successfully + * reached archive_ids is implemented */ + + update.cookie = hai->hai_cookie; + update.status = ARS_SUCCEED; + rc2 = mdt_agent_record_update(mti->mti_env, mdt, + &update, 1); + if (rc2) { + CERROR("%s: mdt_agent_record_update() " + "failed, cannot update " + "status to %s for cookie " + "%#llx: rc = %d\n", + mdt_obd_name(mdt), + agent_req_status2name(ARS_SUCCEED), + hai->hai_cookie, rc2); + break; + } + } + /* only remove requests with archive_id=0 */ + if (notrmcount == 0) + RETURN(rc2); + + } + if (rc) { CERROR("%s: Cannot find agent for archive %d: rc = %d\n", mdt_obd_name(mdt), hal->hal_archive_id, rc); @@ -343,84 +455,86 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, hal->hal_archive_id); len = hal_size(hal); - if (kuc_ispayload(hal)) { - /* hal is already a kuc payload - * we do not need to alloc a new one - * this avoid a alloc/memcpy/free - */ - buf = hal; - } else { - buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST); - if (IS_ERR(buf)) - RETURN(PTR_ERR(buf)); - memcpy(buf, hal, len); - } + buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST); + if (IS_ERR(buf)) + RETURN(PTR_ERR(buf)); + memcpy(buf, hal, len); /* Check if request is still valid (cf file hsm flags) */ fail_request = false; hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) { - if (hai->hai_action != HSMA_CANCEL) { - struct mdt_object *obj; - struct md_hsm hsm; + struct mdt_object *obj; + struct md_hsm hsm; - obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm); - if (!IS_ERR(obj) && obj != NULL) { - mdt_object_put(mti->mti_env, obj); - } else { - if (hai->hai_action == HSMA_REMOVE) - continue; - - if (obj == NULL) { - fail_request = true; - rc = mdt_agent_record_update( - mti->mti_env, mdt, - &hai->hai_cookie, - 1, ARS_FAILED); - if (rc) { - CERROR( - "%s: mdt_agent_record_update() " - "failed, cannot update " - "status to %s for cookie " - LPX64": rc = %d\n", - mdt_obd_name(mdt), - agent_req_status2name(ARS_FAILED), - hai->hai_cookie, rc); - GOTO(out_buf, rc); - } - continue; - } - GOTO(out_buf, rc = PTR_ERR(obj)); + if (hai->hai_action == HSMA_CANCEL) + continue; + + obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm); + if (!IS_ERR(obj)) { + mdt_object_put(mti->mti_env, obj); + } else if (PTR_ERR(obj) == -ENOENT) { + struct hsm_record_update update = { + .cookie = hai->hai_cookie, + .status = ARS_FAILED, + }; + + if (hai->hai_action == HSMA_REMOVE) + continue; + + fail_request = true; + rc = mdt_agent_record_update(mti->mti_env, mdt, + &update, 1); + if (rc < 0) { + CERROR("%s: mdt_agent_record_update() failed, " + "cannot update status to %s for cookie " + "%#llx: rc = %d\n", + mdt_obd_name(mdt), + agent_req_status2name(ARS_FAILED), + hai->hai_cookie, rc); + GOTO(out_buf, rc); } - if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id, - hal->hal_flags, &hsm)) { - /* incompatible request, we abort the request */ - /* next time coordinator will wake up, it will - * make the same compound with valid only - * records */ - fail_request = true; - rc = mdt_agent_record_update(mti->mti_env, mdt, - &hai->hai_cookie, - 1, ARS_FAILED); - if (rc) { - CERROR("%s: mdt_agent_record_update() " - "failed, cannot update " - "status to %s for cookie " - LPX64": rc = %d\n", - mdt_obd_name(mdt), - agent_req_status2name(ARS_FAILED), - hai->hai_cookie, rc); - GOTO(out_buf, rc); - } + continue; + } else { + GOTO(out_buf, rc = PTR_ERR(obj)); + } + + if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id, + hal->hal_flags, &hsm)) { + struct hsm_record_update update = { + .cookie = hai->hai_cookie, + .status = ARS_FAILED, + }; + + /* incompatible request, we abort the request */ + /* next time coordinator will wake up, it will + * make the same HAL with valid only + * records */ + fail_request = true; + rc = mdt_agent_record_update(mti->mti_env, mdt, + &update, 1); + if (rc) { + CERROR("%s: mdt_agent_record_update() failed, " + "cannot update status to %s for cookie " + "%#llx: rc = %d\n", + mdt_obd_name(mdt), + agent_req_status2name(ARS_FAILED), + hai->hai_cookie, rc); + GOTO(out_buf, rc); } + + /* if restore and record status updated, give + * back granted layout lock */ + if (hai->hai_action == HSMA_RESTORE) + cdt_restore_handle_del(mti, cdt, &hai->hai_fid); } } - /* we found incompatible requests, so the compound cannot be send + /* we found incompatible requests, so the HAL cannot be sent * as is. Bad records have been invalidated in llog. * Valid one will be reschedule next time coordinator will wake up - * So no need the rebuild a full valid compound request now + * So no need the rebuild a full valid HAL now */ if (fail_request) GOTO(out_buf, rc = 0); @@ -444,8 +558,10 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, * the ldlm_callback_handler. Note this sends a request RPC * from a server (MDT) to a client (MDC), backwards of normal comms. */ - exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid); + exp = obd_uuid_lookup(mdt2obd_dev(mdt), &uuid); if (exp == NULL || exp->exp_disconnected) { + if (exp != NULL) + class_export_put(exp); /* This should clean up agents on evicted exports */ rc = -ENOENT; CERROR("%s: agent uuid (%s) not found, unregistering:" @@ -486,35 +602,15 @@ out: } out_buf: - if (buf != hal) - kuc_free(buf, len); + kuc_free(buf, len); RETURN(rc); } /** - * update status of a request - * \param mti [IN] - * \param pgs [IN] progress of the copy tool - * \retval 0 success - * \retval -ve failure + * seq_file method called to start access to debugfs file */ -int mdt_hsm_coordinator_update(struct mdt_thread_info *mti, - struct hsm_progress_kernel *pgs) -{ - int rc; - - ENTRY; - /* ask to coodinator to update request state and - * to record on disk the result */ - rc = mdt_hsm_update_request_state(mti, pgs, 1); - RETURN(rc); -} - -/** - * seq_file method called to start access to /proc file - */ -static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off) +static void *mdt_hsm_agent_debugfs_start(struct seq_file *s, loff_t *off) { struct mdt_device *mdt = s->private; struct coordinator *cdt = &mdt->mdt_coordinator; @@ -544,7 +640,7 @@ static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off) * seq_file method called to get next item * just returns NULL at eof */ -static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p) +static void *mdt_hsm_agent_debugfs_next(struct seq_file *s, void *v, loff_t *p) { struct mdt_device *mdt = s->private; struct coordinator *cdt = &mdt->mdt_coordinator; @@ -565,7 +661,7 @@ static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p) /** */ -static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v) +static int mdt_hsm_agent_debugfs_show(struct seq_file *s, void *v) { struct list_head *pos = v; struct hsm_agent *ha; @@ -593,9 +689,9 @@ static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v) } /** - * seq_file method called to stop access to /proc file + * seq_file method called to stop access to debugfs file */ -static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v) +static void mdt_hsm_agent_debugfs_stop(struct seq_file *s, void *v) { struct mdt_device *mdt = s->private; struct coordinator *cdt = &mdt->mdt_coordinator; @@ -603,33 +699,30 @@ static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v) up_read(&cdt->cdt_agent_lock); } -/* hsm agent list proc functions */ -static const struct seq_operations mdt_hsm_agent_proc_ops = { - .start = mdt_hsm_agent_proc_start, - .next = mdt_hsm_agent_proc_next, - .show = mdt_hsm_agent_proc_show, - .stop = mdt_hsm_agent_proc_stop, +/* hsm agent list debugfs functions */ +static const struct seq_operations mdt_hsm_agent_debugfs_ops = { + .start = mdt_hsm_agent_debugfs_start, + .next = mdt_hsm_agent_debugfs_next, + .show = mdt_hsm_agent_debugfs_show, + .stop = mdt_hsm_agent_debugfs_stop, }; /** - * public function called at open of /proc file to get + * public function called at open of debugfs file to get * list of agents */ -static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file) +static int ldebugfs_open_hsm_agent(struct inode *inode, struct file *file) { struct seq_file *s; int rc; ENTRY; - if (LPROCFS_ENTRY_CHECK(PDE(inode))) - RETURN(-ENOENT); - - rc = seq_open(file, &mdt_hsm_agent_proc_ops); + rc = seq_open(file, &mdt_hsm_agent_debugfs_ops); if (rc) RETURN(rc); s = file->private_data; - s->private = PDE(inode)->data; + s->private = inode->i_private; RETURN(rc); } @@ -637,9 +730,8 @@ static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file) /* methods to access hsm agent list */ const struct file_operations mdt_hsm_agent_fops = { .owner = THIS_MODULE, - .open = lprocfs_open_hsm_agent, + .open = ldebugfs_open_hsm_agent, .read = seq_read, .llseek = seq_lseek, - .release = lprocfs_seq_release, + .release = seq_release, }; -