* (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
* alternatives
*
+ * Copyright (c) 2014, 2017, Intel Corporation.
*/
/*
* lustre/mdt/mdt_hsm_cdt_requests.c
#define DEBUG_SUBSYSTEM S_MDS
+#include <libcfs/libcfs.h>
+#include <libcfs/libcfs_hash.h>
#include <obd_support.h>
-#include <lustre/lustre_user.h>
#include <lprocfs_status.h>
+#include <linux/interval_tree_generic.h>
#include "mdt_internal.h"
+static unsigned int
+cdt_request_cookie_hash(struct cfs_hash *hs, const void *key, unsigned int mask)
+{
+ return cfs_hash_djb2_hash(key, sizeof(u64), mask);
+}
+
+static void *cdt_request_cookie_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct cdt_agent_req, car_cookie_hash);
+}
+
+static void *cdt_request_cookie_key(struct hlist_node *hnode)
+{
+ struct cdt_agent_req *car = cdt_request_cookie_object(hnode);
+
+ return &car->car_hai->hai_cookie;
+}
+
+static int cdt_request_cookie_keycmp(const void *key, struct hlist_node *hnode)
+{
+ const u64 *cookie2 = cdt_request_cookie_key(hnode);
+
+ return *(u64 *)key == *cookie2;
+}
+
+static void
+cdt_request_cookie_get(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+ struct cdt_agent_req *car = cdt_request_cookie_object(hnode);
+
+ mdt_cdt_get_request(car);
+}
+
+static void
+cdt_request_cookie_put(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+ struct cdt_agent_req *car = cdt_request_cookie_object(hnode);
+
+ mdt_cdt_put_request(car);
+}
+
+struct cfs_hash_ops cdt_request_cookie_hash_ops = {
+ .hs_hash = cdt_request_cookie_hash,
+ .hs_key = cdt_request_cookie_key,
+ .hs_keycmp = cdt_request_cookie_keycmp,
+ .hs_object = cdt_request_cookie_object,
+ .hs_get = cdt_request_cookie_get,
+ .hs_put_locked = cdt_request_cookie_put,
+};
+
/**
* dump requests list
* \param cdt [IN] coordinator
struct cdt_agent_req *car;
down_read(&cdt->cdt_request_lock);
- list_for_each_entry(car, &cdt->cdt_requests, car_request_list) {
+ list_for_each_entry(car, &cdt->cdt_request_list, car_request_list) {
CDEBUG(D_HSM, "%s fid="DFID" dfid="DFID
- " compound/cookie="LPX64"/"LPX64
- " action=%s archive#=%d flags="LPX64
- " extent="LPX64"-"LPX64
- " gid="LPX64" refcount=%d canceled=%d\n",
+ " cookie=%#llx"
+ " action=%s archive#=%d flags=%#llx"
+ " extent=%#llx-%#llx"
+ " gid=%#llx refcount=%d canceled=%d\n",
prefix, PFID(&car->car_hai->hai_fid),
PFID(&car->car_hai->hai_dfid),
- car->car_compound_id, car->car_hai->hai_cookie,
+ car->car_hai->hai_cookie,
hsm_copytool_action2name(car->car_hai->hai_action),
car->car_archive_id, car->car_flags,
car->car_hai->hai_extent.offset,
up_read(&cdt->cdt_request_lock);
}
-struct req_interval_data {
- struct cdt_req_progress *crp;
- __u64 done_sz;
-};
-
-/**
- * interval tree cb, used to go through all the tree of extent done
- */
-static enum interval_iter req_interval_cb(struct interval_node *node,
- void *args)
-{
- struct req_interval_data *data;
- ENTRY;
-
- data = args;
- data->done_sz += node->in_extent.end - node->in_extent.start;
- RETURN(INTERVAL_ITER_CONT);
-}
-
-/**
- * scan the interval tree associated to a request
- * to compute the amount of work done
- * \param car [IN] request
- * \param done_sz [OUT] will be set to the size of work done
+/* Interval tree to track reported progress.
+ * Intervals stored are non-overlapping and non-adjacent.
+ * When a new interval is added, all intervals that might overlap
+ * or be adjacent are first removed, with any extra length added to
+ * the new interval.
*/
-void mdt_cdt_get_work_done(struct cdt_agent_req *car, __u64 *done_sz)
-{
- struct req_interval_data rid;
- struct cdt_req_progress *crp = &car->car_progress;
+struct progress_node {
+ __u64 pn_offset;
+ __u64 pn_end;
+ __u64 pn_subtree_last;
+ struct rb_node pn_rb;
+};
- mutex_lock(&crp->crp_lock);
+#define START(node) ((node)->pn_offset)
+#define LAST(node) ((node)->pn_end)
- rid.crp = crp;
- rid.done_sz = 0;
- interval_iterate(crp->crp_root, req_interval_cb, &rid);
- *done_sz = rid.done_sz;
+INTERVAL_TREE_DEFINE(struct progress_node, pn_rb, __u64, pn_subtree_last,
+ START, LAST, static, progress)
- mutex_unlock(&crp->crp_lock);
-}
+#define progress_first(root) rb_entry_safe(interval_tree_first(root), \
+ struct progress_node, pn_rb)
-#define NODE_VECTOR_SZ 256
-/**
+/*
* free the interval tree associated to a request
*/
static void mdt_cdt_free_request_tree(struct cdt_req_progress *crp)
{
- struct interval_node *node, *vn;
- int i;
+ struct progress_node *node;
ENTRY;
- mutex_lock(&crp->crp_lock);
-
- if (crp->crp_max == 0)
- goto out;
-
- /* remove all nodes from tree */
- for (i = 0 ; i < crp->crp_cnt ; i++) {
- vn = crp->crp_node[i / NODE_VECTOR_SZ];
- node = &vn[i % NODE_VECTOR_SZ];
- interval_erase(node, &crp->crp_root);
+ while ((node = progress_first(&crp->crp_root)) != NULL) {
+ progress_remove(node, &crp->crp_root);
+ OBD_FREE_PTR(node);
}
- /* free all sub vectors */
- for (i = 0 ; i <= crp->crp_max / NODE_VECTOR_SZ ; i++)
- OBD_FREE(crp->crp_node[i],
- NODE_VECTOR_SZ * sizeof(crp->crp_node[i][0]));
-
- /* free main vector */
- OBD_FREE(crp->crp_node,
- sizeof(crp->crp_node[0]) *
- (crp->crp_max / NODE_VECTOR_SZ + 1));
-
- crp->crp_cnt = 0;
- crp->crp_max = 0;
-out:
- mutex_unlock(&crp->crp_lock);
+
EXIT;
}
static int hsm_update_work(struct cdt_req_progress *crp,
const struct hsm_extent *extent)
{
- int rc, osz, nsz;
- struct interval_node **new_vv;
- struct interval_node *v, *node;
+ struct progress_node *node;
+ struct progress_node *overlap;
+ __u64 end;
+ __u64 total;
ENTRY;
- mutex_lock(&crp->crp_lock);
- /* new node index */
-
- if (crp->crp_cnt >= crp->crp_max) {
- /* no more room */
- /* allocate a new vector */
- OBD_ALLOC(v, NODE_VECTOR_SZ * sizeof(v[0]));
- if (v == NULL)
- GOTO(out, rc = -ENOMEM);
-
- if (crp->crp_max == 0)
- osz = 0;
- else
- osz = sizeof(new_vv[0]) *
- (crp->crp_max / NODE_VECTOR_SZ + 1);
-
- nsz = osz + sizeof(new_vv[0]);
- /* increase main vector size */
- OBD_ALLOC(new_vv, nsz);
- if (new_vv == NULL) {
- OBD_FREE(v, NODE_VECTOR_SZ * sizeof(v[0]));
- GOTO(out, rc = -ENOMEM);
- }
-
- if (osz == 0) {
- crp->crp_max = NODE_VECTOR_SZ - 1;
- } else {
- memcpy(new_vv, crp->crp_node, osz);
- OBD_FREE(crp->crp_node, osz);
- crp->crp_max += NODE_VECTOR_SZ;
- }
-
- crp->crp_node = new_vv;
- crp->crp_node[crp->crp_max / NODE_VECTOR_SZ] = v;
+ end = extent->offset + extent->length - 1;
+ if (end < extent->offset)
+ RETURN(-EINVAL);
+
+ OBD_ALLOC_PTR(node);
+ if (!node)
+ RETURN(-ENOMEM);
+ node->pn_offset = extent->offset;
+ node->pn_end = end;
+
+ spin_lock(&crp->crp_lock);
+ total = crp->crp_total;
+ /* Search just before and just after the target interval
+ * to find intervals that would be adjacent. Remove them
+ * too and add their extra length to 'node'.
+ */
+ while ((overlap = progress_iter_first(&crp->crp_root,
+ (node->pn_offset == 0 ?
+ 0 : node->pn_offset - 1),
+ (node->pn_end == LUSTRE_EOF ?
+ LUSTRE_EOF : node->pn_end + 1)))
+ != NULL) {
+ node->pn_offset = min(node->pn_offset, overlap->pn_offset);
+ node->pn_end = max(node->pn_end, overlap->pn_end);
+ progress_remove(overlap, &crp->crp_root);
+ total -= overlap->pn_end - overlap->pn_offset + 1;
+ OBD_FREE_PTR(overlap);
}
-
- v = crp->crp_node[crp->crp_cnt / NODE_VECTOR_SZ];
- node = &v[crp->crp_cnt % NODE_VECTOR_SZ];
- interval_set(node, extent->offset, extent->offset + extent->length);
- /* try to insert, if entry already exist ignore the new one
- * it can happen if ct sends 2 times the same progress */
- if (interval_insert(node, &crp->crp_root) == NULL)
- crp->crp_cnt++;
-
- rc = 0;
-out:
- mutex_unlock(&crp->crp_lock);
- return rc;
+ progress_insert(node, &crp->crp_root);
+ total += node->pn_end - node->pn_offset + 1;
+ crp->crp_total = total;
+ spin_unlock(&crp->crp_lock);
+ RETURN(0);
}
/**
*/
static void mdt_cdt_init_request_tree(struct cdt_req_progress *crp)
{
- mutex_init(&crp->crp_lock);
- crp->crp_root = NULL;
- crp->crp_cnt = 0;
- crp->crp_max = 0;
+ spin_lock_init(&crp->crp_lock);
+ crp->crp_root = INTERVAL_TREE_ROOT;
+ if (0)
+ /* Silence a warning about unused function */
+ progress_iter_next(NULL, 0, 0);
}
-/** Allocate/init a agent request and its sub-structures.
+/** Allocate/init an agent request and its sub-structures.
*
- * \param compound_id [IN]
* \param archive_id [IN]
* \param flags [IN]
* \param uuid [IN]
* \retval car [OUT] success valid structure
* \retval car [OUT]
*/
-struct cdt_agent_req *mdt_cdt_alloc_request(__u64 compound_id, __u32 archive_id,
- __u64 flags, struct obd_uuid *uuid,
+struct cdt_agent_req *mdt_cdt_alloc_request(__u32 archive_id, __u64 flags,
+ struct obd_uuid *uuid,
struct hsm_action_item *hai)
{
struct cdt_agent_req *car;
RETURN(ERR_PTR(-ENOMEM));
atomic_set(&car->car_refcount, 1);
- car->car_compound_id = compound_id;
car->car_archive_id = archive_id;
car->car_flags = flags;
car->car_canceled = 0;
- car->car_req_start = cfs_time_current_sec();
+ car->car_req_start = ktime_get_real_seconds();
car->car_req_update = car->car_req_start;
car->car_uuid = *uuid;
OBD_ALLOC(car->car_hai, hai->hai_len);
}
/**
- * Free a agent request and its sub-structures.
+ * Free an agent request and its sub-structures.
*
* \param car [IN] Request to be freed.
*/
}
/**
- * find request in the list by cookie or by fid
- * lock cdt_request_lock needs to be hold by caller
- * \param cdt [IN] coordinator
- * \param cookie [IN] request cookie
- * \param fid [IN] fid
- * \retval request pointer or NULL if not found
- */
-static struct cdt_agent_req *cdt_find_request_nolock(struct coordinator *cdt,
- __u64 cookie,
- const struct lu_fid *fid)
-{
- struct cdt_agent_req *car;
- struct cdt_agent_req *found = NULL;
- ENTRY;
-
- list_for_each_entry(car, &cdt->cdt_requests, car_request_list) {
- if (car->car_hai->hai_cookie == cookie ||
- (fid != NULL && lu_fid_eq(fid, &car->car_hai->hai_fid))) {
- mdt_cdt_get_request(car);
- found = car;
- break;
- }
- }
-
- RETURN(found);
-}
-
-/**
* add a request to the list
* \param cdt [IN] coordinator
* \param car [IN] request
* \retval 0 success
* \retval -ve failure
*/
-int mdt_cdt_add_request(struct coordinator *cdt, struct cdt_agent_req *new_car)
+int mdt_cdt_add_request(struct coordinator *cdt, struct cdt_agent_req *car)
{
- struct cdt_agent_req *car;
+ int rc;
ENTRY;
/* cancel requests are not kept in memory */
- LASSERT(new_car->car_hai->hai_action != HSMA_CANCEL);
+ LASSERT(car->car_hai->hai_action != HSMA_CANCEL);
down_write(&cdt->cdt_request_lock);
- car = cdt_find_request_nolock(cdt, new_car->car_hai->hai_cookie, NULL);
- if (car != NULL) {
- mdt_cdt_put_request(car);
+
+ rc = cfs_hash_add_unique(cdt->cdt_request_cookie_hash,
+ &car->car_hai->hai_cookie,
+ &car->car_cookie_hash);
+ if (rc < 0) {
up_write(&cdt->cdt_request_lock);
RETURN(-EEXIST);
}
- list_add_tail(&new_car->car_request_list, &cdt->cdt_requests);
- up_write(&cdt->cdt_request_lock);
+ list_add_tail(&car->car_request_list, &cdt->cdt_request_list);
- mdt_hsm_agent_update_statistics(cdt, 0, 0, 1, &new_car->car_uuid);
+ up_write(&cdt->cdt_request_lock);
+ mdt_hsm_agent_update_statistics(cdt, 0, 0, 1, &car->car_uuid);
+
+ switch (car->car_hai->hai_action) {
+ case HSMA_ARCHIVE:
+ atomic_inc(&cdt->cdt_archive_count);
+ break;
+ case HSMA_RESTORE:
+ atomic_inc(&cdt->cdt_restore_count);
+ break;
+ case HSMA_REMOVE:
+ atomic_inc(&cdt->cdt_remove_count);
+ break;
+ }
atomic_inc(&cdt->cdt_request_count);
RETURN(0);
* \param fid [IN] fid
* \retval request pointer or NULL if not found
*/
-struct cdt_agent_req *mdt_cdt_find_request(struct coordinator *cdt,
- const __u64 cookie,
- const struct lu_fid *fid)
+struct cdt_agent_req *mdt_cdt_find_request(struct coordinator *cdt, u64 cookie)
{
struct cdt_agent_req *car;
ENTRY;
down_read(&cdt->cdt_request_lock);
- car = cdt_find_request_nolock(cdt, cookie, fid);
+ car = cfs_hash_lookup(cdt->cdt_request_cookie_hash, &cookie);
up_read(&cdt->cdt_request_lock);
RETURN(car);
ENTRY;
down_write(&cdt->cdt_request_lock);
- car = cdt_find_request_nolock(cdt, cookie, NULL);
- if (car != NULL) {
- list_del(&car->car_request_list);
+ car = cfs_hash_del_key(cdt->cdt_request_cookie_hash, &cookie);
+ if (car == NULL) {
up_write(&cdt->cdt_request_lock);
+ RETURN(-ENOENT);
+ }
- /* reference from cdt_requests list */
- mdt_cdt_put_request(car);
+ list_del(&car->car_request_list);
+ up_write(&cdt->cdt_request_lock);
- /* reference from cdt_find_request_nolock() */
- mdt_cdt_put_request(car);
+ switch (car->car_hai->hai_action) {
+ case HSMA_ARCHIVE:
+ atomic_dec(&cdt->cdt_archive_count);
+ break;
+ case HSMA_RESTORE:
+ atomic_dec(&cdt->cdt_restore_count);
+ break;
+ case HSMA_REMOVE:
+ atomic_dec(&cdt->cdt_remove_count);
+ break;
+ }
- LASSERT(atomic_read(&cdt->cdt_request_count) >= 1);
- atomic_dec(&cdt->cdt_request_count);
+ /* Drop reference from cdt_request_list. */
+ mdt_cdt_put_request(car);
- RETURN(0);
+ LASSERT(atomic_read(&cdt->cdt_request_count) >= 1);
+ if (atomic_dec_and_test(&cdt->cdt_request_count)) {
+ /* request count is empty, nudge coordinator for more work */
+ cdt->cdt_wakeup_coordinator = true;
+ wake_up_interruptible(&cdt->cdt_waitq);
}
- up_write(&cdt->cdt_request_lock);
- RETURN(-ENOENT);
+ RETURN(0);
}
/**
int rc;
ENTRY;
- car = mdt_cdt_find_request(cdt, pgs->hpk_cookie, NULL);
+ car = mdt_cdt_find_request(cdt, pgs->hpk_cookie);
if (car == NULL)
RETURN(ERR_PTR(-ENOENT));
- car->car_req_update = cfs_time_current_sec();
+ car->car_req_update = ktime_get_real_seconds();
- /* update progress done by copy tool */
- if (pgs->hpk_errval == 0 && pgs->hpk_extent.length != 0) {
+ /* update data move progress done by copy tool */
+ if (car->car_hai->hai_action != HSMA_REMOVE && pgs->hpk_errval == 0 &&
+ pgs->hpk_extent.length != 0) {
rc = hsm_update_work(&car->car_progress, &pgs->hpk_extent);
if (rc) {
mdt_cdt_put_request(car);
/**
* seq_file method called to start access to /proc file
*/
-static void *mdt_hsm_request_proc_start(struct seq_file *s, loff_t *p)
+static void *mdt_hsm_active_requests_proc_start(struct seq_file *s, loff_t *p)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
down_read(&cdt->cdt_request_lock);
- if (list_empty(&cdt->cdt_requests))
+ if (list_empty(&cdt->cdt_request_list))
RETURN(NULL);
if (*p == 0)
RETURN(SEQ_START_TOKEN);
i = 0;
- list_for_each(pos, &cdt->cdt_requests) {
+ list_for_each(pos, &cdt->cdt_request_list) {
i++;
if (i >= *p)
RETURN(pos);
* seq_file method called to get next item
* just returns NULL at eof
*/
-static void *mdt_hsm_request_proc_next(struct seq_file *s, void *v, loff_t *p)
+static void *mdt_hsm_active_requests_proc_next(struct seq_file *s, void *v,
+ loff_t *p)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
ENTRY;
if (pos == SEQ_START_TOKEN)
- pos = cdt->cdt_requests.next;
+ pos = cdt->cdt_request_list.next;
else
pos = pos->next;
(*p)++;
- if (pos != &cdt->cdt_requests)
+ if (pos != &cdt->cdt_request_list)
RETURN(pos);
else
RETURN(NULL);
/**
* display request data
*/
-static int mdt_hsm_request_proc_show(struct seq_file *s, void *v)
+static int mdt_hsm_active_requests_proc_show(struct seq_file *s, void *v)
{
struct list_head *pos = v;
struct cdt_agent_req *car;
char buf[12];
- __u64 data_moved;
ENTRY;
if (pos == SEQ_START_TOKEN)
RETURN(0);
car = list_entry(pos, struct cdt_agent_req, car_request_list);
- mdt_cdt_get_work_done(car, &data_moved);
seq_printf(s, "fid="DFID" dfid="DFID
- " compound/cookie="LPX64"/"LPX64
- " action=%s archive#=%d flags="LPX64
- " extent="LPX64"-"LPX64" gid="LPX64
- " data=[%s] canceled=%d uuid=%s done="LPU64"%%\n",
+ " compound/cookie=%#llx/%#llx"
+ " action=%s archive#=%d flags=%#llx"
+ " extent=%#llx-%#llx gid=%#llx"
+ " data=[%s] canceled=%d uuid=%s done=%llu\n",
PFID(&car->car_hai->hai_fid),
PFID(&car->car_hai->hai_dfid),
- car->car_compound_id, car->car_hai->hai_cookie,
+ 0ULL /* compound_id */, car->car_hai->hai_cookie,
hsm_copytool_action2name(car->car_hai->hai_action),
car->car_archive_id, car->car_flags,
car->car_hai->hai_extent.offset,
car->car_hai->hai_gid,
hai_dump_data_field(car->car_hai, buf, sizeof(buf)),
car->car_canceled, obd_uuid2str(&car->car_uuid),
- data_moved);
+ car->car_progress.crp_total);
RETURN(0);
}
/**
* seq_file method called to stop access to /proc file
*/
-static void mdt_hsm_request_proc_stop(struct seq_file *s, void *v)
+static void mdt_hsm_active_requests_proc_stop(struct seq_file *s, void *v)
{
struct mdt_device *mdt = s->private;
struct coordinator *cdt = &mdt->mdt_coordinator;
}
/* hsm agent list proc functions */
-static const struct seq_operations mdt_hsm_request_proc_ops = {
- .start = mdt_hsm_request_proc_start,
- .next = mdt_hsm_request_proc_next,
- .show = mdt_hsm_request_proc_show,
- .stop = mdt_hsm_request_proc_stop,
+static const struct seq_operations mdt_hsm_active_requests_proc_ops = {
+ .start = mdt_hsm_active_requests_proc_start,
+ .next = mdt_hsm_active_requests_proc_next,
+ .show = mdt_hsm_active_requests_proc_show,
+ .stop = mdt_hsm_active_requests_proc_stop,
};
/**
* public function called at open of /proc file to get
* list of agents
*/
-static int lprocfs_open_hsm_request(struct inode *inode, struct file *file)
+static int ldebugfs_open_hsm_active_requests(struct inode *inode,
+ struct file *file)
{
struct seq_file *s;
int rc;
ENTRY;
- if (LPROCFS_ENTRY_AND_CHECK(PDE(inode)))
- RETURN(-ENOENT);
-
- rc = seq_open(file, &mdt_hsm_request_proc_ops);
+ rc = seq_open(file, &mdt_hsm_active_requests_proc_ops);
if (rc) {
- LPROCFS_EXIT();
RETURN(rc);
}
s = file->private_data;
- s->private = PDE(inode)->data;
+ s->private = inode->i_private;
RETURN(rc);
}
/* methods to access hsm request list */
-const struct file_operations mdt_hsm_request_fops = {
+const struct file_operations mdt_hsm_active_requests_fops = {
.owner = THIS_MODULE,
- .open = lprocfs_open_hsm_request,
+ .open = ldebugfs_open_hsm_active_requests,
.read = seq_read,
.llseek = seq_lseek,
- .release = lprocfs_seq_release,
+ .release = seq_release,
};