*/
/*
* (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
+ *
+ * Copyright (c) 2016, 2017, Intel Corporation.
* alternatives
*
*/
#include <obd.h>
#include <obd_support.h>
#include <lustre_export.h>
-#include <lustre/lustre_user.h>
#include <lprocfs_status.h>
+#include <lustre_kernelcomm.h>
#include "mdt_internal.h"
/*
/*
* find a hsm_agent by uuid
- * lock cdt_agent_lock needs to be hold by caller
+ * lock cdt_agent_lock needs to be held by caller
* \param cdt [IN] coordinator
* \param uuid [IN] agent UUID
* \retval hsm_agent pointer or NULL if not found
const struct obd_uuid *uuid)
{
struct hsm_agent *ha;
- cfs_list_t *pos;
- cfs_list_for_each(pos, &cdt->cdt_agents) {
- ha = cfs_list_entry(pos, struct hsm_agent, ha_list);
+ list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
if (obd_uuid_equals(&ha->ha_uuid, uuid))
return ha;
}
GOTO(out_free, rc = -EEXIST);
}
- cfs_list_add_tail(&ha->ha_list, &cdt->cdt_agents);
+ list_add_tail(&ha->ha_list, &cdt->cdt_agents);
if (ha->ha_archive_cnt == 0)
CDEBUG(D_HSM, "agent %s registered for all archives\n",
out_free:
- if ((ha != NULL) && (ha->ha_archive_id != NULL))
+ if (ha != NULL && ha->ha_archive_id != NULL)
OBD_FREE(ha->ha_archive_id,
ha->ha_archive_cnt * sizeof(*ha->ha_archive_id));
if (ha != NULL)
OBD_FREE_PTR(ha);
out:
+ /* wake the coordinator to potentially schedule requests */
+ if (rc == -EEXIST || rc == 0)
+ mdt_hsm_cdt_event(cdt);
+
return rc;
}
ha = mdt_hsm_agent_lookup(cdt, uuid);
if (ha != NULL)
- cfs_list_del_init(&ha->ha_list);
+ list_del_init(&ha->ha_list);
up_write(&cdt->cdt_agent_lock);
int succ_rq, int fail_rq, int new_rq,
const struct obd_uuid *uuid)
{
- struct hsm_agent *ha, *tmp;
+ struct hsm_agent *ha;
int rc;
ENTRY;
down_read(&cdt->cdt_agent_lock);
- cfs_list_for_each_entry_safe(ha, tmp, &cdt->cdt_agents, ha_list) {
+ list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
if (obd_uuid_equals(&ha->ha_uuid, uuid)) {
- if ((succ_rq == 0) && (fail_rq == 0) && (new_rq == 0)) {
+ if (succ_rq == 0 && fail_rq == 0 && new_rq == 0) {
atomic_set(&ha->ha_success, 0);
atomic_set(&ha->ha_failure, 0);
atomic_set(&ha->ha_requests, 0);
struct obd_uuid *uuid)
{
int rc = -EAGAIN, i, load = -1;
- struct hsm_agent *ha, *tmp;
+ struct hsm_agent *ha;
ENTRY;
/* Choose an export to send a copytool req to */
down_read(&cdt->cdt_agent_lock);
- cfs_list_for_each_entry_safe(ha, tmp, &cdt->cdt_agents, ha_list) {
+ list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
for (i = 0; (i < ha->ha_archive_cnt) &&
(ha->ha_archive_id[i] != archive); i++) {
/* nothing to do, just skip unmatching records */
}
/* archive count == 0 means copy tool serves any backend */
- if ((ha->ha_archive_cnt != 0) && (i == ha->ha_archive_cnt))
+ if (ha->ha_archive_cnt != 0 && i == ha->ha_archive_cnt)
continue;
- if ((load == -1) ||
- (load > atomic_read(&ha->ha_requests))) {
+ if (load == -1 || load > atomic_read(&ha->ha_requests)) {
load = atomic_read(&ha->ha_requests);
*uuid = ha->ha_uuid;
rc = 0;
RETURN(rc);
}
+int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
+ struct hsm_action_item *hai)
+{
+ struct hsm_agent *ha;
+ __u32 archive_mask = 0;
+ struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+ int i;
+ /* return error by default in case all archive_ids have unregistered */
+ int rc = -EAGAIN;
+ ENTRY;
+
+ /* send action to all registered archive_ids */
+ down_read(&cdt->cdt_agent_lock);
+ list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
+ for (i = 0; (i < ha->ha_archive_cnt); i++) {
+ /* only send once for each archive_id */
+ if ((1 << ha->ha_archive_id[i]) & archive_mask)
+ continue;
+ archive_mask |= (1 << ha->ha_archive_id[i]);
+
+ /* XXX: it could make sense to gather all
+ * actions for the same archive_id like in
+ * mdt_hsm_add_actions() ?? */
+ rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt,
+ ha->ha_archive_id[i], 0,
+ hai);
+ if (rc) {
+ CERROR("%s: unable to add HSM remove request "
+ "for "DFID": rc=%d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(&hai->hai_fid), rc);
+ break;
+ } else {
+ CDEBUG(D_HSM, "%s: added HSM remove request "
+ "for "DFID", archive_id=%d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(&hai->hai_fid),
+ ha->ha_archive_id[i]);
+ }
+ }
+ /* early exit from loop due to error? */
+ if (i != ha->ha_archive_cnt)
+ break;
+ }
+ up_read(&cdt->cdt_agent_lock);
+
+ RETURN(rc);
+}
+
/**
- * send a compound request to the agent
+ * send a HAL to the agent
* \param mti [IN] context
* \param hal [IN] request (can be a kuc payload)
* \param purge [IN] purge mode (no record)
ENTRY;
rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid);
+ if (rc && hal->hal_archive_id == 0) {
+ uint notrmcount = 0;
+ int rc2 = 0;
+
+ /* special case of remove requests with no archive_id specified,
+ * and no agent registered to serve all archives, then create a
+ * set of new requests, each to be sent to each registered
+ * archives.
+ * Todo so, find all HSMA_REMOVE entries, and then :
+ * _ set completed status as SUCCESS (or FAIL?)
+ * _ create a new LLOG record for each archive_id
+ * presently being served by any CT
+ */
+ hai = hai_first(hal);
+ for (i = 0; i < hal->hal_count; i++,
+ hai = hai_next(hai)) {
+ struct hsm_record_update update;
+
+ /* only removes are concerned */
+ if (hai->hai_action != HSMA_REMOVE) {
+ /* count if other actions than HSMA_REMOVE,
+ * to return original error/rc */
+ notrmcount++;
+ continue;
+ }
+
+ /* send remove request to all registered archive_ids */
+ rc2 = mdt_hsm_send_action_to_each_archive(mti, hai);
+ if (rc2)
+ break;
+
+ /* only update original request as SUCCEED if it has
+ * been successfully broadcasted to all available
+ * archive_ids
+ * XXX: this should only cause duplicates to be sent,
+ * unless a method to record already successfully
+ * reached archive_ids is implemented */
+
+ update.cookie = hai->hai_cookie;
+ update.status = ARS_SUCCEED;
+ rc2 = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc2) {
+ CERROR("%s: mdt_agent_record_update() "
+ "failed, cannot update "
+ "status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_SUCCEED),
+ hai->hai_cookie, rc2);
+ break;
+ }
+ }
+ /* only remove requests with archive_id=0 */
+ if (notrmcount == 0)
+ RETURN(rc2);
+
+ }
+
if (rc) {
CERROR("%s: Cannot find agent for archive %d: rc = %d\n",
mdt_obd_name(mdt), hal->hal_archive_id, rc);
hal->hal_archive_id);
len = hal_size(hal);
- if (kuc_ispayload(hal)) {
- /* hal is already a kuc payload
- * we do not need to alloc a new one
- * this avoid a alloc/memcpy/free
- */
- buf = hal;
- } else {
- buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
- if (IS_ERR(buf))
- RETURN(PTR_ERR(buf));
- memcpy(buf, hal, len);
- }
+ buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
+ if (IS_ERR(buf))
+ RETURN(PTR_ERR(buf));
+ memcpy(buf, hal, len);
/* Check if request is still valid (cf file hsm flags) */
fail_request = false;
- hai = hai_zero(hal);
+ hai = hai_first(hal);
for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
- if (hai->hai_action != HSMA_CANCEL) {
- struct mdt_object *obj;
- struct md_hsm hsm;
+ struct mdt_object *obj;
+ struct md_hsm hsm;
- obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm,
- NULL);
- if (IS_ERR(obj) && (hai->hai_action == HSMA_REMOVE))
- continue;
+ if (hai->hai_action == HSMA_CANCEL)
+ continue;
- if (IS_ERR(obj) && (PTR_ERR(obj) == -ENOENT)) {
- fail_request = true;
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- &hai->hai_cookie,
- 1, ARS_FAILED);
- if (rc) {
- CERROR("%s: mdt_agent_record_update() "
- "failed, rc=%d, cannot update "
- "status to %s for cookie "
- LPX64": rc = %d\n",
- mdt_obd_name(mdt), rc,
- agent_req_status2name(ARS_FAILED),
- hai->hai_cookie, rc);
- GOTO(out_buf, rc);
- }
+ obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
+ if (!IS_ERR(obj)) {
+ mdt_object_put(mti->mti_env, obj);
+ } else if (PTR_ERR(obj) == -ENOENT) {
+ struct hsm_record_update update = {
+ .cookie = hai->hai_cookie,
+ .status = ARS_FAILED,
+ };
+
+ if (hai->hai_action == HSMA_REMOVE)
continue;
+
+ fail_request = true;
+ rc = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc < 0) {
+ CERROR("%s: mdt_agent_record_update() failed, "
+ "cannot update status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_FAILED),
+ hai->hai_cookie, rc);
+ GOTO(out_buf, rc);
}
- if (IS_ERR(obj))
- GOTO(out_buf, rc = PTR_ERR(obj));
-
- if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
- hal->hal_flags, &hsm)) {
- /* incompatible request, we abort the request */
- /* next time coordinator will wake up, it will
- * make the same compound with valid only
- * records */
- fail_request = true;
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- &hai->hai_cookie,
- 1, ARS_FAILED);
- if (rc) {
- CERROR("%s: mdt_agent_record_update() "
- "failed, rc=%d, cannot update "
- "status to %s for cookie "
- LPX64": rc = %d\n",
- mdt_obd_name(mdt), rc,
- agent_req_status2name(ARS_FAILED),
- hai->hai_cookie, rc);
- GOTO(out_buf, rc);
- }
+
+ continue;
+ } else {
+ GOTO(out_buf, rc = PTR_ERR(obj));
+ }
+
+ if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
+ hal->hal_flags, &hsm)) {
+ struct hsm_record_update update = {
+ .cookie = hai->hai_cookie,
+ .status = ARS_FAILED,
+ };
+
+ /* incompatible request, we abort the request */
+ /* next time coordinator will wake up, it will
+ * make the same HAL with valid only
+ * records */
+ fail_request = true;
+ rc = mdt_agent_record_update(mti->mti_env, mdt,
+ &update, 1);
+ if (rc) {
+ CERROR("%s: mdt_agent_record_update() failed, "
+ "cannot update status to %s for cookie "
+ "%#llx: rc = %d\n",
+ mdt_obd_name(mdt),
+ agent_req_status2name(ARS_FAILED),
+ hai->hai_cookie, rc);
+ GOTO(out_buf, rc);
}
+
+ /* if restore and record status updated, give
+ * back granted layout lock */
+ if (hai->hai_action == HSMA_RESTORE)
+ cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
}
}
- /* we found incompatible requests, so the compound cannot be send
+ /* we found incompatible requests, so the HAL cannot be sent
* as is. Bad records have been invalidated in llog.
* Valid one will be reschedule next time coordinator will wake up
- * So no need the rebuild a full valid compound request now
+ * So no need the rebuild a full valid HAL now
*/
if (fail_request)
GOTO(out_buf, rc = 0);
* the ldlm_callback_handler. Note this sends a request RPC
* from a server (MDT) to a client (MDC), backwards of normal comms.
*/
- exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid);
- if ((exp == NULL) || (exp->exp_disconnected)) {
+ exp = obd_uuid_lookup(mdt2obd_dev(mdt), &uuid);
+ if (exp == NULL || exp->exp_disconnected) {
+ if (exp != NULL)
+ class_export_put(exp);
/* This should clean up agents on evicted exports */
rc = -ENOENT;
CERROR("%s: agent uuid (%s) not found, unregistering:"
out:
if (rc != 0 && is_registered) {
/* in case of error, we have to unregister requests */
- hai = hai_zero(hal);
+ hai = hai_first(hal);
for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
if (hai->hai_action == HSMA_CANCEL)
continue;
}
out_buf:
- if (buf != hal)
- kuc_free(buf, len);
+ kuc_free(buf, len);
RETURN(rc);
}
/**
- * update status of a request
- * \param mti [IN]
- * \param pgs [IN] progress of the copy tool
- * \retval 0 success
- * \retval -ve failure
+ * seq_file method called to start access to debugfs file
*/
-int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
- struct hsm_progress_kernel *pgs)
-{
- int rc;
-
- ENTRY;
- /* ask to coodinator to update request state and
- * to record on disk the result */
- rc = mdt_hsm_update_request_state(mti, pgs, 1);
- RETURN(rc);
-}
-
-/**
- * seq_file method called to start access to /proc file
- */
-static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
+static void *mdt_hsm_agent_debugfs_start(struct seq_file *s, loff_t *off)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
- cfs_list_t *pos;
+ struct list_head *pos;
loff_t i;
ENTRY;
down_read(&cdt->cdt_agent_lock);
- if (cfs_list_empty(&cdt->cdt_agents))
+ if (list_empty(&cdt->cdt_agents))
RETURN(NULL);
if (*off == 0)
RETURN(SEQ_START_TOKEN);
i = 0;
- cfs_list_for_each(pos, &cdt->cdt_agents) {
+ list_for_each(pos, &cdt->cdt_agents) {
i++;
if (i >= *off)
RETURN(pos);
* seq_file method called to get next item
* just returns NULL at eof
*/
-static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
+static void *mdt_hsm_agent_debugfs_next(struct seq_file *s, void *v, loff_t *p)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
- cfs_list_t *pos = v;
+ struct list_head *pos = v;
ENTRY;
if (pos == SEQ_START_TOKEN)
/**
*/
-static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
+static int mdt_hsm_agent_debugfs_show(struct seq_file *s, void *v)
{
- cfs_list_t *pos = v;
+ struct list_head *pos = v;
struct hsm_agent *ha;
int i;
ENTRY;
if (pos == SEQ_START_TOKEN)
RETURN(0);
- ha = cfs_list_entry(pos, struct hsm_agent, ha_list);
- seq_printf(s, "uuid=%s archive#=%d (", ha->ha_uuid.uuid,
- ha->ha_archive_cnt);
- if (ha->ha_archive_cnt == 0)
- seq_printf(s, "all");
- else
- for (i = 0; i < ha->ha_archive_cnt; i++)
- seq_printf(s, "%d ", ha->ha_archive_id[i]);
+ ha = list_entry(pos, struct hsm_agent, ha_list);
+ seq_printf(s, "uuid=%s archive_id=", ha->ha_uuid.uuid);
+ if (ha->ha_archive_cnt == 0) {
+ seq_printf(s, "ANY");
+ } else {
+ seq_printf(s, "%d", ha->ha_archive_id[0]);
+ for (i = 1; i < ha->ha_archive_cnt; i++)
+ seq_printf(s, ",%d", ha->ha_archive_id[i]);
+ }
- seq_printf(s, ") r=%d s=%d f=%d\n",
+ seq_printf(s, " requests=[current:%d ok:%d errors:%d]\n",
atomic_read(&ha->ha_requests),
atomic_read(&ha->ha_success),
atomic_read(&ha->ha_failure));
}
/**
- * seq_file method called to stop access to /proc file
+ * seq_file method called to stop access to debugfs file
*/
-static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
+static void mdt_hsm_agent_debugfs_stop(struct seq_file *s, void *v)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
up_read(&cdt->cdt_agent_lock);
}
-/* hsm agent list proc functions */
-static const struct seq_operations mdt_hsm_agent_proc_ops = {
- .start = mdt_hsm_agent_proc_start,
- .next = mdt_hsm_agent_proc_next,
- .show = mdt_hsm_agent_proc_show,
- .stop = mdt_hsm_agent_proc_stop,
+/* hsm agent list debugfs functions */
+static const struct seq_operations mdt_hsm_agent_debugfs_ops = {
+ .start = mdt_hsm_agent_debugfs_start,
+ .next = mdt_hsm_agent_debugfs_next,
+ .show = mdt_hsm_agent_debugfs_show,
+ .stop = mdt_hsm_agent_debugfs_stop,
};
/**
- * public function called at open of /proc file to get
+ * public function called at open of debugfs file to get
* list of agents
*/
-static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
+static int ldebugfs_open_hsm_agent(struct inode *inode, struct file *file)
{
struct seq_file *s;
int rc;
ENTRY;
- if (LPROCFS_ENTRY_AND_CHECK(PDE(inode)))
- RETURN(-ENOENT);
-
- rc = seq_open(file, &mdt_hsm_agent_proc_ops);
- if (rc) {
- LPROCFS_EXIT();
+ rc = seq_open(file, &mdt_hsm_agent_debugfs_ops);
+ if (rc)
RETURN(rc);
- }
+
s = file->private_data;
- s->private = PDE(inode)->data;
+ s->private = inode->i_private;
RETURN(rc);
}
/* methods to access hsm agent list */
const struct file_operations mdt_hsm_agent_fops = {
.owner = THIS_MODULE,
- .open = lprocfs_open_hsm_agent,
+ .open = ldebugfs_open_hsm_agent,
.read = seq_read,
.llseek = seq_lseek,
- .release = lprocfs_seq_release,
+ .release = seq_release,
};
-