*/
/*
* (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
+ *
+ * Copyright (c) 2016, 2017, Intel Corporation.
* alternatives
*
*/
#include <obd.h>
#include <obd_support.h>
#include <lustre_export.h>
-#include <lustre/lustre_user.h>
#include <lprocfs_status.h>
+#include <lustre_kernelcomm.h>
#include "mdt_internal.h"
/*
/*
* find a hsm_agent by uuid
- * lock cdt_agent_lock needs to be hold by caller
+ * lock cdt_agent_lock needs to be held by caller
* \param cdt [IN] coordinator
* \param uuid [IN] agent UUID
* \retval hsm_agent pointer or NULL if not found
if (ha != NULL)
OBD_FREE_PTR(ha);
out:
+ /* wake the coordinator to potentially schedule requests */
+ if (rc == -EEXIST || rc == 0)
+ mdt_hsm_cdt_event(cdt);
+
return rc;
}
RETURN(rc);
}
+int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
+ struct hsm_action_item *hai)
+{
+ struct hsm_agent *ha;
+ __u32 archive_mask = 0;
+ struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+ int i;
+ /* return error by default in case all archive_ids have unregistered */
+ int rc = -EAGAIN;
+ ENTRY;
+
+ /* send action to all registered archive_ids */
+ down_read(&cdt->cdt_agent_lock);
+ list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
+ for (i = 0; (i < ha->ha_archive_cnt); i++) {
+ /* only send once for each archive_id */
+ if ((1 << ha->ha_archive_id[i]) & archive_mask)
+ continue;
+ archive_mask |= (1 << ha->ha_archive_id[i]);
+
+ /* XXX: it could make sense to gather all
+ * actions for the same archive_id like in
+ * mdt_hsm_add_actions() ?? */
+ rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt,
+ ha->ha_archive_id[i], 0,
+ hai);
+ if (rc) {
+ CERROR("%s: unable to add HSM remove request "
+ "for "DFID": rc=%d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(&hai->hai_fid), rc);
+ break;
+ } else {
+ CDEBUG(D_HSM, "%s: added HSM remove request "
+ "for "DFID", archive_id=%d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(&hai->hai_fid),
+ ha->ha_archive_id[i]);
+ }
+ }
+ /* early exit from loop due to error? */
+ if (i != ha->ha_archive_cnt)
+ break;
+ }
+ up_read(&cdt->cdt_agent_lock);
+
+ RETURN(rc);
+}
+
/**
- * send a compound request to the agent
+ * send a HAL to the agent
* \param mti [IN] context
* \param hal [IN] request (can be a kuc payload)
* \param purge [IN] purge mode (no record)
ENTRY;
rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid);
+ if (rc && hal->hal_archive_id == 0) {
+ uint notrmcount = 0;
+ int rc2 = 0;
+
+ /* special case of remove requests with no archive_id specified,
+ * and no agent registered to serve all archives, then create a
+ * set of new requests, each to be sent to each registered
+ * archives.
+ * Todo so, find all HSMA_REMOVE entries, and then :
+ * _ set completed status as SUCCESS (or FAIL?)
+ * _ create a new LLOG record for each archive_id
+ * presently being served by any CT
+ */
+ hai = hai_first(hal);
+ for (i = 0; i < hal->hal_count; i++,
+ hai = hai_next(hai)) {
+ struct hsm_record_update update;
+
+ /* only removes are concerned */
+ if (hai->hai_action != HSMA_REMOVE) {
+ /* count if other actions than HSMA_REMOVE,
+ * to return original error/rc */
+ notrmcount++;
+ continue;
+ }
+
+ /* send remove request to all registered archive_ids */
+ rc2 = mdt_hsm_send_action_to_each_archive(mti, hai);
+ if (rc2)
+ break;
+
+ /* only update original request as SUCCEED if it has
+ * been successfully broadcasted to all available
+ * archive_ids
+ * XXX: this should only cause duplicates to be sent,
+ * unless a method to record already successfully
+ * reached archive_ids is implemented */
+
+ update.cookie = hai->hai_cookie;
+ update.status = ARS_SUCCEED;
+ rc2 = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc2) {
+ CERROR("%s: mdt_agent_record_update() "
+ "failed, cannot update "
+ "status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_SUCCEED),
+ hai->hai_cookie, rc2);
+ break;
+ }
+ }
+ /* only remove requests with archive_id=0 */
+ if (notrmcount == 0)
+ RETURN(rc2);
+
+ }
+
if (rc) {
CERROR("%s: Cannot find agent for archive %d: rc = %d\n",
mdt_obd_name(mdt), hal->hal_archive_id, rc);
hal->hal_archive_id);
len = hal_size(hal);
- if (kuc_ispayload(hal)) {
- /* hal is already a kuc payload
- * we do not need to alloc a new one
- * this avoid a alloc/memcpy/free
- */
- buf = hal;
- } else {
- buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
- if (IS_ERR(buf))
- RETURN(PTR_ERR(buf));
- memcpy(buf, hal, len);
- }
+ buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
+ if (IS_ERR(buf))
+ RETURN(PTR_ERR(buf));
+ memcpy(buf, hal, len);
/* Check if request is still valid (cf file hsm flags) */
fail_request = false;
hai = hai_first(hal);
for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
- if (hai->hai_action != HSMA_CANCEL) {
- struct mdt_object *obj;
- struct md_hsm hsm;
+ struct mdt_object *obj;
+ struct md_hsm hsm;
- obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
- if (!IS_ERR(obj) && obj != NULL) {
- mdt_object_put(mti->mti_env, obj);
- } else {
- if (hai->hai_action == HSMA_REMOVE)
- continue;
-
- if (obj == NULL) {
- fail_request = true;
- rc = mdt_agent_record_update(
- mti->mti_env, mdt,
- &hai->hai_cookie,
- 1, ARS_FAILED);
- if (rc) {
- CERROR(
- "%s: mdt_agent_record_update() "
- "failed, cannot update "
- "status to %s for cookie "
- LPX64": rc = %d\n",
- mdt_obd_name(mdt),
- agent_req_status2name(ARS_FAILED),
- hai->hai_cookie, rc);
- GOTO(out_buf, rc);
- }
- continue;
- }
- GOTO(out_buf, rc = PTR_ERR(obj));
+ if (hai->hai_action == HSMA_CANCEL)
+ continue;
+
+ obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
+ if (!IS_ERR(obj)) {
+ mdt_object_put(mti->mti_env, obj);
+ } else if (PTR_ERR(obj) == -ENOENT) {
+ struct hsm_record_update update = {
+ .cookie = hai->hai_cookie,
+ .status = ARS_FAILED,
+ };
+
+ if (hai->hai_action == HSMA_REMOVE)
+ continue;
+
+ fail_request = true;
+ rc = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc < 0) {
+ CERROR("%s: mdt_agent_record_update() failed, "
+ "cannot update status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_FAILED),
+ hai->hai_cookie, rc);
+ GOTO(out_buf, rc);
}
- if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
- hal->hal_flags, &hsm)) {
- /* incompatible request, we abort the request */
- /* next time coordinator will wake up, it will
- * make the same compound with valid only
- * records */
- fail_request = true;
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- &hai->hai_cookie,
- 1, ARS_FAILED);
- if (rc) {
- CERROR("%s: mdt_agent_record_update() "
- "failed, cannot update "
- "status to %s for cookie "
- LPX64": rc = %d\n",
- mdt_obd_name(mdt),
- agent_req_status2name(ARS_FAILED),
- hai->hai_cookie, rc);
- GOTO(out_buf, rc);
- }
+ continue;
+ } else {
+ GOTO(out_buf, rc = PTR_ERR(obj));
+ }
+
+ if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
+ hal->hal_flags, &hsm)) {
+ struct hsm_record_update update = {
+ .cookie = hai->hai_cookie,
+ .status = ARS_FAILED,
+ };
+
+ /* incompatible request, we abort the request */
+ /* next time coordinator will wake up, it will
+ * make the same HAL with valid only
+ * records */
+ fail_request = true;
+ rc = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc) {
+ CERROR("%s: mdt_agent_record_update() failed, "
+ "cannot update status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_FAILED),
+ hai->hai_cookie, rc);
+ GOTO(out_buf, rc);
}
+
+ /* if restore and record status updated, give
+ * back granted layout lock */
+ if (hai->hai_action == HSMA_RESTORE)
+ cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
}
}
- /* we found incompatible requests, so the compound cannot be send
+ /* we found incompatible requests, so the HAL cannot be sent
* as is. Bad records have been invalidated in llog.
* Valid one will be reschedule next time coordinator will wake up
- * So no need the rebuild a full valid compound request now
+ * So no need the rebuild a full valid HAL now
*/
if (fail_request)
GOTO(out_buf, rc = 0);
* the ldlm_callback_handler. Note this sends a request RPC
* from a server (MDT) to a client (MDC), backwards of normal comms.
*/
- exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid);
+ exp = obd_uuid_lookup(mdt2obd_dev(mdt), &uuid);
if (exp == NULL || exp->exp_disconnected) {
+ if (exp != NULL)
+ class_export_put(exp);
/* This should clean up agents on evicted exports */
rc = -ENOENT;
CERROR("%s: agent uuid (%s) not found, unregistering:"
}
out_buf:
- if (buf != hal)
- kuc_free(buf, len);
-
- RETURN(rc);
-}
-
-/**
- * update status of a request
- * \param mti [IN]
- * \param pgs [IN] progress of the copy tool
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
- struct hsm_progress_kernel *pgs)
-{
- int rc;
+ kuc_free(buf, len);
- ENTRY;
- /* ask to coodinator to update request state and
- * to record on disk the result */
- rc = mdt_hsm_update_request_state(mti, pgs, 1);
RETURN(rc);
}
/**
- * seq_file method called to start access to /proc file
+ * seq_file method called to start access to debugfs file
*/
-static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
+static void *mdt_hsm_agent_debugfs_start(struct seq_file *s, loff_t *off)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
* seq_file method called to get next item
* just returns NULL at eof
*/
-static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
+static void *mdt_hsm_agent_debugfs_next(struct seq_file *s, void *v, loff_t *p)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
/**
*/
-static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
+static int mdt_hsm_agent_debugfs_show(struct seq_file *s, void *v)
{
struct list_head *pos = v;
struct hsm_agent *ha;
RETURN(0);
ha = list_entry(pos, struct hsm_agent, ha_list);
- seq_printf(s, "uuid=%s archive#=%d (", ha->ha_uuid.uuid,
- ha->ha_archive_cnt);
- if (ha->ha_archive_cnt == 0)
- seq_printf(s, "all");
- else
- for (i = 0; i < ha->ha_archive_cnt; i++)
- seq_printf(s, "%d ", ha->ha_archive_id[i]);
+ seq_printf(s, "uuid=%s archive_id=", ha->ha_uuid.uuid);
+ if (ha->ha_archive_cnt == 0) {
+ seq_printf(s, "ANY");
+ } else {
+ seq_printf(s, "%d", ha->ha_archive_id[0]);
+ for (i = 1; i < ha->ha_archive_cnt; i++)
+ seq_printf(s, ",%d", ha->ha_archive_id[i]);
+ }
- seq_printf(s, ") r=%d s=%d f=%d\n",
+ seq_printf(s, " requests=[current:%d ok:%d errors:%d]\n",
atomic_read(&ha->ha_requests),
atomic_read(&ha->ha_success),
atomic_read(&ha->ha_failure));
}
/**
- * seq_file method called to stop access to /proc file
+ * seq_file method called to stop access to debugfs file
*/
-static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
+static void mdt_hsm_agent_debugfs_stop(struct seq_file *s, void *v)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
up_read(&cdt->cdt_agent_lock);
}
-/* hsm agent list proc functions */
-static const struct seq_operations mdt_hsm_agent_proc_ops = {
- .start = mdt_hsm_agent_proc_start,
- .next = mdt_hsm_agent_proc_next,
- .show = mdt_hsm_agent_proc_show,
- .stop = mdt_hsm_agent_proc_stop,
+/* hsm agent list debugfs functions */
+static const struct seq_operations mdt_hsm_agent_debugfs_ops = {
+ .start = mdt_hsm_agent_debugfs_start,
+ .next = mdt_hsm_agent_debugfs_next,
+ .show = mdt_hsm_agent_debugfs_show,
+ .stop = mdt_hsm_agent_debugfs_stop,
};
/**
- * public function called at open of /proc file to get
+ * public function called at open of debugfs file to get
* list of agents
*/
-static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
+static int ldebugfs_open_hsm_agent(struct inode *inode, struct file *file)
{
struct seq_file *s;
int rc;
ENTRY;
- if (LPROCFS_ENTRY_CHECK(PDE(inode)))
- RETURN(-ENOENT);
-
- rc = seq_open(file, &mdt_hsm_agent_proc_ops);
+ rc = seq_open(file, &mdt_hsm_agent_debugfs_ops);
if (rc)
RETURN(rc);
s = file->private_data;
- s->private = PDE(inode)->data;
+ s->private = inode->i_private;
RETURN(rc);
}
/* methods to access hsm agent list */
const struct file_operations mdt_hsm_agent_fops = {
.owner = THIS_MODULE,
- .open = lprocfs_open_hsm_agent,
+ .open = ldebugfs_open_hsm_agent,
.read = seq_read,
.llseek = seq_lseek,
- .release = lprocfs_seq_release,
+ .release = seq_release,
};
-