From: Frank Zago <fzago@cray.com>
Date: Wed, 6 Apr 2016 21:03:14 +0000 (-0500)
Subject: LU-7988 hsm: run HSM coordinator once per second at most
X-Git-Tag: 2.10.51~11
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=cc6ef11d2f972ebc440013bddda87a536a09750c;hp=2deec088aa4e1153bc00730c2b1b4bf5d40d275a

LU-7988 hsm: run HSM coordinator once per second at most

When there is heavy HSM usage, each new HSM request can trigger the
HSM coordinator, which may run many times per seconds. When it is
running it locks the HSM catalog (using cdt_llog_lock) preventing any
other HSM operation to happen, such as insertion, removal or dumping
of the requests.

Limit the coordinator to run once per second, and only if there is
work to do. It will still execute the loop once every 10 seconds (or
as defined by the procfs loop_period parameter) to do housekeeping.

Signed-off-by: frank zago <fzago@cray.com>
Change-Id: Ide3f061f8943a3088ea713993521897fb74e5d99
Reviewed-on: https://review.whamcloud.com/19341
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Reviewed-by: Faccini Bruno <bruno.faccini@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---

diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c
index a85e2af..bf04033 100644
--- a/lustre/mdt/mdt_coordinator.c
+++ b/lustre/mdt/mdt_coordinator.c
@@ -514,6 +514,8 @@ static int mdt_coordinator(void *data)
 	struct mdt_device	*mdt = mti->mti_mdt;
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
 	struct hsm_scan_data	 hsd = { NULL };
+	time64_t		 wait_event_time = 1 * HZ;
+	time64_t		 last_housekeeping = 0;
 	int			 rc = 0;
 	int			 request_sz;
 	ENTRY;
@@ -535,30 +537,39 @@ static int mdt_coordinator(void *data)
 	wake_up_all(&cdt->cdt_waitq);
 
 	while (1) {
-		struct l_wait_info lwi;
 		int i;
 
-		lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period),
-				  NULL, NULL);
-		l_wait_event(cdt->cdt_waitq,
-			     cdt->cdt_event || kthread_should_stop(),
-			     &lwi);
+		/* Limit execution of the expensive requests traversal
+		 * to at most every "wait_event_time" jiffies. This prevents
+		 * repeatedly locking/unlocking the catalog for each request
+		 * and preventing other HSM operations from happening */
+		wait_event_interruptible_timeout(cdt->cdt_waitq,
+						 kthread_should_stop(),
+						 wait_event_time);
 
 		CDEBUG(D_HSM, "coordinator resumes\n");
 
 		if (kthread_should_stop()) {
+			CDEBUG(D_HSM, "Coordinator stops\n");
 			rc = 0;
 			break;
 		}
 
-		cdt->cdt_event = false;
-
 		/* if coordinator is suspended continue to wait */
 		if (cdt->cdt_state == CDT_DISABLE) {
 			CDEBUG(D_HSM, "disable state, coordinator sleeps\n");
 			continue;
 		}
 
+		/* If no event, and no housekeeping to do, continue to
+		 * wait. */
+		if (last_housekeeping + cdt->cdt_loop_period <= get_seconds())
+			last_housekeeping = get_seconds();
+		else if (!cdt->cdt_event)
+			continue;
+
+		cdt->cdt_event = false;
+
 		CDEBUG(D_HSM, "coordinator starts reading llog\n");
 
 		if (hsd.max_requests != cdt->cdt_max_requests) {
@@ -824,27 +835,6 @@ static int hsm_init_ucred(struct lu_ucred *uc)
 }
 
 /**
- * wake up coordinator thread
- * \param mdt [IN] device
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_hsm_cdt_wakeup(struct mdt_device *mdt)
-{
-	struct coordinator	*cdt = &mdt->mdt_coordinator;
-	ENTRY;
-
-	if (cdt->cdt_state == CDT_STOPPED)
-		RETURN(-ESRCH);
-
-	/* wake up coordinator */
-	cdt->cdt_event = true;
-	wake_up_all(&cdt->cdt_waitq);
-
-	RETURN(0);
-}
-
-/**
  * initialize coordinator struct
  * \param mdt [IN] device
  * \retval 0 success
@@ -1565,9 +1555,9 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 		/* then remove request from memory list (LU-9075) */
 		mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
 
-		/* ct has completed a request, so a slot is available, wakeup
-		 * cdt to find new work */
-		mdt_hsm_cdt_wakeup(mdt);
+		/* ct has completed a request, so a slot is available,
+		 * signal the coordinator to find new work */
+		mdt_hsm_cdt_event(cdt);
 	} else {
 		/* if copytool send a progress on a canceled request
 		 * we inform copytool it should stop
@@ -2039,7 +2029,8 @@ mdt_hsm_cdt_control_seq_write(struct file *file, const char __user *buffer,
 	if (strcmp(kernbuf, CDT_ENABLE_CMD) == 0) {
 		if (cdt->cdt_state == CDT_DISABLE) {
 			rc = set_cdt_state(cdt, CDT_RUNNING, NULL);
-			mdt_hsm_cdt_wakeup(mdt);
+			mdt_hsm_cdt_event(cdt);
+			wake_up(&cdt->cdt_waitq);
 		} else {
 			rc = mdt_hsm_cdt_start(mdt);
 		}
diff --git a/lustre/mdt/mdt_hsm_cdt_client.c b/lustre/mdt/mdt_hsm_cdt_client.c
index a2be524..e1f1673 100644
--- a/lustre/mdt/mdt_hsm_cdt_client.c
+++ b/lustre/mdt/mdt_hsm_cdt_client.c
@@ -464,9 +464,9 @@ record:
 
 	GOTO(out, rc);
 out:
-	/* if work has been added, wake up coordinator */
+	/* if work has been added, signal the coordinator */
 	if (rc == 0 || rc == -ENODATA)
-		mdt_hsm_cdt_wakeup(mdt);
+		mdt_hsm_cdt_event(cdt);
 
 	return rc;
 }
diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h
index 86a87b2..c67d090 100644
--- a/lustre/mdt/mdt_internal.h
+++ b/lustre/mdt/mdt_internal.h
@@ -894,7 +894,15 @@ struct cdt_restore_handle *mdt_hsm_restore_hdl_find(struct coordinator *cdt,
 int mdt_hsm_cdt_init(struct mdt_device *mdt);
 int mdt_hsm_cdt_stop(struct mdt_device *mdt);
 int mdt_hsm_cdt_fini(struct mdt_device *mdt);
-int mdt_hsm_cdt_wakeup(struct mdt_device *mdt);
+
+/*
+ * Signal the coordinator has work to do
+ * \param cdt [IN] coordinator
+ */
+static inline void mdt_hsm_cdt_event(struct coordinator *cdt)
+{
+	cdt->cdt_event = true;
+}
 
 /* coordinator control /proc interface */
 ssize_t mdt_hsm_cdt_control_seq_write(struct file *file,
diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh
index a69302a..25c9cf8 100755
--- a/lustre/tests/sanity-hsm.sh
+++ b/lustre/tests/sanity-hsm.sh
@@ -3198,6 +3198,12 @@ multi_archive() {
 	echo "$count archive requests submitted"
 }
 
+cleanup_test_40() {
+	trap 0
+	set_hsm_param max_requests $max_requests
+	copytool_cleanup
+}
+
 test_40() {
 	local stream_count=4
 	local file_count=100
@@ -3206,6 +3212,17 @@ test_40() {
 	local i=""
 	local p=""
 	local fid=""
+	local max_requests=$(get_hsm_param max_requests)
+
+	# Increase the number of HSM request that can be performed in
+	# parallel. With the coordinator running once per second, this
+	# also limits the number of requests per seconds that can be
+	# performed, so we pick a decent number. But we also need to keep
+	# that number low because the copytool has no rate limit and will
+	# fail some requests if if gets too many at once.
+	set_hsm_param max_requests 300
+
+	trap cleanup_test_40 EXIT
 
 	for i in $(seq 1 $file_count); do
 		for p in $(seq 1 $stream_count); do
@@ -3232,7 +3249,8 @@ test_40() {
 	wait ${pids[*]}
 	echo OK
 	wait_all_done 100
-	copytool_cleanup
+
+	cleanup_test_40
 }
 run_test 40 "Parallel archive requests"
 
@@ -3907,8 +3925,6 @@ run_test 103 "Purge all requests"
 DATA=CEA
 DATAHEX='[434541]'
 test_104() {
-	# test needs a running copytool
-	copytool_setup
 
 	mkdir -p $DIR/$tdir
 	local f=$DIR/$tdir/$tfile
@@ -3916,25 +3932,38 @@ test_104() {
 	fid=$(make_custom_file_for_progress $f 39 1000000)
 	[ $? != 0 ] && skip "not enough free space" && return
 
-	# if cdt is on, it can serve too quickly the request
-	cdt_disable
 	$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f
 	local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
 			$HSM_PARAM.actions |\
 			grep $fid | cut -f16 -d=")
-	cdt_enable
 
 	[[ "$data1" == "$DATAHEX" ]] ||
 		error "Data field in records is ($data1) and not ($DATAHEX)"
 
+	# archive the file
+	copytool_setup
+
+	wait_request_state $fid ARCHIVE SUCCEED
+
 	copytool_cleanup
 }
 run_test 104 "Copy tool data field"
 
+cleanup_test_105() {
+	trap 0
+	set_hsm_param max_requests $max_requests
+	copytool_cleanup
+}
+
 test_105() {
+	local max_requests=$(get_hsm_param max_requests)
 	mkdir -p $DIR/$tdir
 	local i=""
 
+	set_hsm_param max_requests 300
+
+	trap cleanup_test_105 EXIT
+
 	cdt_disable
 	for i in $(seq -w 1 10); do
 		cp /etc/passwd $DIR/$tdir/$i
@@ -3944,6 +3973,7 @@ test_105() {
 			$HSM_PARAM.actions |\
 			grep WAITING | wc -l")
 	cdt_restart
+
 	cdt_disable
 	local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\
 			$HSM_PARAM.actions |\
@@ -3953,6 +3983,8 @@ test_105() {
 	[[ "$reqcnt1" == "$reqcnt2" ]] ||
 		error "Requests count after shutdown $reqcnt2 != "\
 		      "before shutdown $reqcnt1"
+
+	cleanup_test_105
 }
 run_test 105 "Restart of coordinator"
 
@@ -4288,6 +4320,9 @@ test_220a() {
 
 	$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
 
+	# wait request to reach CT
+	wait_request_state $fid ARCHIVE STARTED
+
 	rm -f $f
 
 	copytool_continue
@@ -4402,6 +4437,10 @@ test_222c() {
 	copytool_suspend
 
 	$LFS hsm_restore $f
+
+	# wait request to reach CT
+	wait_request_state $fid RESTORE STARTED
+
 	rm -f $f
 
 	copytool_continue
@@ -4553,6 +4592,10 @@ test_224a() {
 	copytool_suspend
 
 	$LFS hsm_remove $f
+
+	# wait for request to reach CT
+	wait_request_state $fid REMOVE STARTED
+
 	rm -f $f
 
 	copytool_continue