From: Mr NeilBrown <neilb@suse.de>
Date: Mon, 24 Aug 2020 22:28:18 +0000 (+1000)
Subject: LU-11085 mdt: revise recording of hsm progress updates.
X-Git-Tag: 2.14.51~62
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f684172237e99b8064a68bd6e53bcbb915c663c5

LU-11085 mdt: revise recording of hsm progress updates.

When copy tool is migrating a file for HSM purposes it can report
progress as individual intervals, and the total covered by all the
intervals can be requested.

This patch makes various changes to the code for recording the
intervals.

- switch to the Linux interval-tree implementation rather than the
  lustre one.
- detect overlapping intervals as well as duplicates.  Any overlapping
  or adjacent intervals are removed and the space which they covered
  is added the the new interval.
- keep track of the total of all current intervals, so that it can be
  returned on request without needing to examine the interval tree.
- use a spinlock rather than a mutex to protect against parallel
  updates, as all operations are non-blocking.

Also add a test to llapi_hsm_test.c to send overlapping intervals and
check the result is as expected.

Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: Id300b64949a3416ee3282c5e4ce82122c9e4e2f0
Reviewed-on: https://review.whamcloud.com/39725
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---

diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h
index 5fa92df..82a3920 100644
--- a/libcfs/include/libcfs/libcfs.h
+++ b/libcfs/include/libcfs/libcfs.h
@@ -132,9 +132,11 @@ void libcfs_vfree_atomic(const void *addr);
 
 #ifdef HAVE_INTERVAL_TREE_CACHED
 #define interval_tree_root rb_root_cached
+#define interval_tree_first rb_first_cached
 #define INTERVAL_TREE_ROOT RB_ROOT_CACHED
 #else
 #define interval_tree_root rb_root
+#define interval_tree_first rb_first
 #define INTERVAL_TREE_ROOT RB_ROOT
 #endif /* HAVE_INTERVAL_TREE_CACHED */
 
diff --git a/lustre/mdt/mdt_hsm_cdt_client.c b/lustre/mdt/mdt_hsm_cdt_client.c
index 12ec150..ac5ff93 100644
--- a/lustre/mdt/mdt_hsm_cdt_client.c
+++ b/lustre/mdt/mdt_hsm_cdt_client.c
@@ -533,15 +533,12 @@ int mdt_hsm_get_action(struct mdt_thread_info *mti,
 		RETURN(0);
 
 	car = mdt_cdt_find_request(cdt, hgad.hgad_hai.hai_cookie);
-	if (car != NULL) {
-		__u64 data_moved;
-
-		mdt_cdt_get_work_done(car, &data_moved);
-		/* this is just to give the volume of data moved
-		 * it means data_moved data have been moved from the
-		 * original request but we do not know which one
+	if (car) {
+		/* This is just to give the volume of data moved.
+		 * It means 'car_progress' data have been moved from the
+		 * original request but we do not know which one.
 		 */
-		extent->length = data_moved;
+		extent->length = car->car_progress.crp_total;
 		mdt_cdt_put_request(car);
 	}
 
diff --git a/lustre/mdt/mdt_hsm_cdt_requests.c b/lustre/mdt/mdt_hsm_cdt_requests.c
index 7135334..9452638 100644
--- a/lustre/mdt/mdt_hsm_cdt_requests.c
+++ b/lustre/mdt/mdt_hsm_cdt_requests.c
@@ -40,6 +40,7 @@
 #include <libcfs/libcfs_hash.h>
 #include <obd_support.h>
 #include <lprocfs_status.h>
+#include <linux/interval_tree_generic.h>
 #include "mdt_internal.h"
 
 static unsigned int
@@ -121,79 +122,41 @@ void dump_requests(char *prefix, struct coordinator *cdt)
 	up_read(&cdt->cdt_request_lock);
 }
 
-struct req_interval_data {
-	struct cdt_req_progress	*crp;
-	__u64			 done_sz;
-};
-
-/**
- * interval tree cb, used to go through all the tree of extent done
+/* Interval tree to track reported progress.
+ * Intervals stored are non-overlapping and non-adjacent.
+ * When a new interval is added, all intervals that might overlap
+ * or be adjacent are first removed, with any extra length added to
+ * the new interval.
  */
-static enum interval_iter req_interval_cb(struct interval_node *node,
-					  void *args)
-{
-	struct req_interval_data	*data;
-	ENTRY;
-
-	data = args;
-	data->done_sz += node->in_extent.end - node->in_extent.start;
-	RETURN(INTERVAL_ITER_CONT);
-}
-
-/**
- * scan the interval tree associated to a request
- * to compute the amount of work done
- * \param car [IN] request
- * \param done_sz [OUT] will be set to the size of work done
- */
-void mdt_cdt_get_work_done(struct cdt_agent_req *car, __u64 *done_sz)
-{
-	struct req_interval_data	 rid;
-	struct cdt_req_progress		*crp = &car->car_progress;
+struct progress_node {
+	__u64		pn_offset;
+	__u64		pn_end;
+	__u64		pn_subtree_last;
+	struct rb_node	pn_rb;
+};
 
-	mutex_lock(&crp->crp_lock);
+#define START(node) ((node)->pn_offset)
+#define LAST(node) ((node)->pn_end)
 
-	rid.crp = crp;
-	rid.done_sz = 0;
-	interval_iterate(crp->crp_root, req_interval_cb, &rid);
-	*done_sz = rid.done_sz;
+INTERVAL_TREE_DEFINE(struct progress_node, pn_rb, __u64, pn_subtree_last,
+		     START, LAST, static, progress)
 
-	mutex_unlock(&crp->crp_lock);
-}
+#define progress_first(root) rb_entry_safe(interval_tree_first(root),	\
+					   struct progress_node, pn_rb)
 
-#define NODE_VECTOR_SZ 256
-/**
+/*
  * free the interval tree associated to a request
  */
 static void mdt_cdt_free_request_tree(struct cdt_req_progress *crp)
 {
-	struct interval_node	*node, *vn;
-	int			 i;
+	struct progress_node *node;
 	ENTRY;
 
-	mutex_lock(&crp->crp_lock);
-
-	if (crp->crp_max == 0)
-		goto out;
-
-	/* remove all nodes from tree */
-	for (i = 0 ; i < crp->crp_cnt ; i++) {
-		vn = crp->crp_node[i / NODE_VECTOR_SZ];
-		node = &vn[i % NODE_VECTOR_SZ];
-		interval_erase(node, &crp->crp_root);
+	while ((node = progress_first(&crp->crp_root)) != NULL) {
+		progress_remove(node, &crp->crp_root);
+		OBD_FREE_PTR(node);
 	}
-	/* free all sub vectors */
-	for (i = 0 ; i <= crp->crp_max / NODE_VECTOR_SZ ; i++)
-		OBD_FREE_PTR_ARRAY(crp->crp_node[i], NODE_VECTOR_SZ);
-
-	/* free main vector */
-	OBD_FREE_PTR_ARRAY(crp->crp_node,
-			   (crp->crp_max / NODE_VECTOR_SZ + 1));
-
-	crp->crp_cnt = 0;
-	crp->crp_max = 0;
-out:
-	mutex_unlock(&crp->crp_lock);
+
 	EXIT;
 }
 
@@ -203,66 +166,45 @@ out:
 static int hsm_update_work(struct cdt_req_progress *crp,
 			   const struct hsm_extent *extent)
 {
-	int			  rc, osz, nsz;
-	struct interval_node	**new_vv;
-	struct interval_node	 *v, *node;
-	__u64			  end;
+	struct progress_node *node;
+	struct progress_node *overlap;
+	__u64 end;
+	__u64 total;
 	ENTRY;
 
-	end = extent->offset + extent->length;
-	if (end <= extent->offset)
+	end = extent->offset + extent->length - 1;
+	if (end < extent->offset)
 		RETURN(-EINVAL);
 
-	mutex_lock(&crp->crp_lock);
-	/* new node index */
-
-	if (crp->crp_cnt >= crp->crp_max) {
-		/* no more room */
-		/* allocate a new vector */
-		OBD_ALLOC_PTR_ARRAY(v, NODE_VECTOR_SZ);
-		if (v == NULL)
-			GOTO(out, rc = -ENOMEM);
-
-		if (crp->crp_max == 0)
-			osz = 0;
-		else
-			osz = sizeof(new_vv[0]) *
-			      (crp->crp_max / NODE_VECTOR_SZ + 1);
-
-		nsz = osz + sizeof(new_vv[0]);
-		/* increase main vector size */
-		OBD_ALLOC(new_vv, nsz);
-		if (new_vv == NULL) {
-			OBD_FREE_PTR_ARRAY(v, NODE_VECTOR_SZ);
-			GOTO(out, rc = -ENOMEM);
-		}
-
-		if (osz == 0) {
-			crp->crp_max = NODE_VECTOR_SZ - 1;
-		} else {
-			memcpy(new_vv, crp->crp_node, osz);
-			OBD_FREE(crp->crp_node, osz);
-			crp->crp_max += NODE_VECTOR_SZ;
-		}
-
-		crp->crp_node = new_vv;
-		crp->crp_node[crp->crp_max / NODE_VECTOR_SZ] = v;
+	OBD_ALLOC_PTR(node);
+	if (!node)
+		RETURN(-ENOMEM);
+	node->pn_offset = extent->offset;
+	node->pn_end = end;
+
+	spin_lock(&crp->crp_lock);
+	total = crp->crp_total;
+	/* Search just before and just after the target interval
+	 * to find intervals that would be adjacent.  Remove them
+	 * too and add their extra length to 'node'.
+	 */
+	while ((overlap = progress_iter_first(&crp->crp_root,
+					      (node->pn_offset == 0 ?
+					       0 : node->pn_offset - 1),
+					      (node->pn_end == LUSTRE_EOF ?
+					       LUSTRE_EOF : node->pn_end + 1)))
+	       != NULL) {
+		node->pn_offset = min(node->pn_offset, overlap->pn_offset);
+		node->pn_end = max(node->pn_end, overlap->pn_end);
+		progress_remove(overlap, &crp->crp_root);
+		total -= overlap->pn_end - overlap->pn_offset + 1;
+		OBD_FREE_PTR(overlap);
 	}
-
-	v = crp->crp_node[crp->crp_cnt / NODE_VECTOR_SZ];
-	node = &v[crp->crp_cnt % NODE_VECTOR_SZ];
-	rc = interval_set(node, extent->offset, end);
-	if (rc)
-		GOTO(out, rc);
-	/* try to insert, if entry already exist ignore the new one
-	 * it can happen if ct sends 2 times the same progress */
-	if (interval_insert(node, &crp->crp_root) == NULL)
-		crp->crp_cnt++;
-
-	rc = 0;
-out:
-	mutex_unlock(&crp->crp_lock);
-	RETURN(rc);
+	progress_insert(node, &crp->crp_root);
+	total += node->pn_end - node->pn_offset + 1;
+	crp->crp_total = total;
+	spin_unlock(&crp->crp_lock);
+	RETURN(0);
 }
 
 /**
@@ -270,10 +212,11 @@ out:
  */
 static void mdt_cdt_init_request_tree(struct cdt_req_progress *crp)
 {
-	mutex_init(&crp->crp_lock);
-	crp->crp_root = NULL;
-	crp->crp_cnt = 0;
-	crp->crp_max = 0;
+	spin_lock_init(&crp->crp_lock);
+	crp->crp_root = INTERVAL_TREE_ROOT;
+	if (0)
+		/* Silence a warning about unused function */
+		progress_iter_next(NULL, 0, 0);
 }
 
 /** Allocate/init an agent request and its sub-structures.
@@ -561,14 +504,12 @@ static int mdt_hsm_active_requests_proc_show(struct seq_file *s, void *v)
 	struct list_head	*pos = v;
 	struct cdt_agent_req	*car;
 	char			 buf[12];
-	__u64			 data_moved;
 	ENTRY;
 
 	if (pos == SEQ_START_TOKEN)
 		RETURN(0);
 
 	car = list_entry(pos, struct cdt_agent_req, car_request_list);
-	mdt_cdt_get_work_done(car, &data_moved);
 
 	seq_printf(s, "fid="DFID" dfid="DFID
 		   " compound/cookie=%#llx/%#llx"
@@ -585,7 +526,7 @@ static int mdt_hsm_active_requests_proc_show(struct seq_file *s, void *v)
 		   car->car_hai->hai_gid,
 		   hai_dump_data_field(car->car_hai, buf, sizeof(buf)),
 		   car->car_canceled, obd_uuid2str(&car->car_uuid),
-		   data_moved);
+		   car->car_progress.crp_total);
 	RETURN(0);
 }
 
diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h
index 98cbfee..58aadaa 100644
--- a/lustre/mdt/mdt_internal.h
+++ b/lustre/mdt/mdt_internal.h
@@ -573,14 +573,10 @@ static inline struct mdt_thread_info *mdt_th_info(const struct lu_env *env)
 }
 
 struct cdt_req_progress {
-	struct mutex		 crp_lock;	/**< protect tree */
-	struct interval_node	*crp_root;	/**< tree to track extent
+	spinlock_t		 crp_lock;	/**< protect tree */
+	struct interval_tree_root crp_root;	/**< tree to track extent
 						 *   moved */
-	struct interval_node	**crp_node;	/**< buffer for tree nodes
-						 *   vector of fixed size
-						 *   vectors */
-	int			 crp_cnt;	/**< # of used nodes */
-	int			 crp_max;	/**< # of allocated nodes */
+	__u64			 crp_total;
 };
 
 struct cdt_agent_req {
@@ -1083,7 +1079,6 @@ struct cdt_agent_req *mdt_cdt_alloc_request(__u32 archive_id, __u64 flags,
 void mdt_cdt_free_request(struct cdt_agent_req *car);
 int mdt_cdt_add_request(struct coordinator *cdt, struct cdt_agent_req *new_car);
 struct cdt_agent_req *mdt_cdt_find_request(struct coordinator *cdt, u64 cookie);
-void mdt_cdt_get_work_done(struct cdt_agent_req *car, __u64 *done_sz);
 void mdt_cdt_get_request(struct cdt_agent_req *car);
 void mdt_cdt_put_request(struct cdt_agent_req *car);
 struct cdt_agent_req *mdt_cdt_update_request(struct coordinator *cdt,
diff --git a/lustre/tests/llapi_hsm_test.c b/lustre/tests/llapi_hsm_test.c
index 32998af..2d4ab13 100644
--- a/lustre/tests/llapi_hsm_test.c
+++ b/lustre/tests/llapi_hsm_test.c
@@ -1021,6 +1021,41 @@ void test112(void)
 	helper_archiving(test112_progress, length);
 }
 
+/* Archive, with 9 reports, each covering 20%, so many overlap. */
+static void test113_progress(struct hsm_copyaction_private *hcp, size_t length)
+{
+	int rc;
+	int i;
+	struct hsm_extent he;
+	struct hsm_current_action hca;
+
+	for (i = 0; i < 9; i++) {
+		he.offset = i*length/10;
+		he.length = 2*length/10;
+		rc = llapi_hsm_action_progress(hcp, &he, length, 0);
+		ASSERTF(rc == 0, "llapi_hsm_action_progress failed: %s",
+			strerror(-rc));
+
+		rc = llapi_hsm_current_action(testfile, &hca);
+		ASSERTF(rc == 0, "llapi_hsm_current_action failed: %s",
+			strerror(-rc));
+		ASSERTF(hca.hca_state == HPS_RUNNING,
+			"hca_state=%u", hca.hca_state);
+		ASSERTF(hca.hca_action == HUA_ARCHIVE,
+			"hca_state=%u", hca.hca_action);
+		ASSERTF(hca.hca_location.length == (i+2)*length/10,
+			"i=%d, length=%llu",
+			i, (unsigned long long)hca.hca_location.length);
+	}
+}
+
+void test113(void)
+{
+	const size_t length = 1000;
+
+	helper_archiving(test113_progress, length);
+}
+
 static void usage(char *prog)
 {
 	fprintf(stderr, "Usage: %s [-d lustre_dir]\n", prog);
@@ -1091,6 +1126,7 @@ int main(int argc, char *argv[])
 	PERFORM(test110);
 	PERFORM(test111);
 	PERFORM(test112);
+	PERFORM(test113);
 
 	return EXIT_SUCCESS;
 }