From: Frank Zago <fzago@cray.com>
Date: Fri, 8 Apr 2016 17:59:06 +0000 (-0400)
Subject: LU-7988 hsm: added coordinator housekeeping flag
X-Git-Tag: 2.10.53~40
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=afc9ff6caff7d572041cabf0a957dc8749fce49d;p=fs%2Flustre-release.git

LU-7988 hsm: added coordinator housekeeping flag

When the coordinator is not performing housekeeping, only the requests
in the ARS_WAITING state will be processed as they are new
requests. The other requests, in states ARS_FAILED, ARS_CANCELED,
ARS_SUCCEED and ARS_STARTED can wait a few more seconds until the
housekeeping starts.

Also, when not performing housekeeping, as soon as hsd.request is
full, exit from the loop as there is enough potential work queued;
there's no need to examine all the HSM records, thus shortening the
time spent in cdt_llog_process() holding the critical lock
cdt_llog_lock.

Test-Parameters: trivial testlist=sanity-hsm
Signed-off-by: frank zago <fzago@cray.com>
Change-Id: Ib73c97d29ca2f86b912aeb8d055c004cff14d5cf
Reviewed-on: https://review.whamcloud.com/19582
Tested-by: Jenkins
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Patrick Farrell <paf@cray.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---

diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c
index e59452e..a579734 100644
--- a/lustre/mdt/mdt_coordinator.c
+++ b/lustre/mdt/mdt_coordinator.c
@@ -138,6 +138,10 @@ struct hsm_scan_request {
 struct hsm_scan_data {
 	struct mdt_thread_info		*mti;
 	char				 fs_name[MTI_NAME_MAXLEN+1];
+	/* are we scanning the logs for housekeeping, or just looking
+	 * for new work?
+	 */
+	bool				 housekeeping;
 	/* request to be send to agents */
 	int				 max_requests;	/** vector size */
 	int				 request_cnt;	/** used count */
@@ -202,13 +206,25 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 		if (!request) {
 			struct hsm_action_list *hal;
 
-			if (hsd->request_cnt == hsd->max_requests)
-				/* Unknown request and no more room
-				 * for a new request. Continue to scan
-				 * to find other entries for already
-				 * existing requests.
-				 */
-				RETURN(0);
+			if (hsd->request_cnt == hsd->max_requests) {
+				if (!hsd->housekeeping) {
+					/* The request array is full,
+					 * stop here. There might be
+					 * more known requests that
+					 * could be merged, but this
+					 * avoid analyzing too many
+					 * llogs for minor gains.
+					 */
+					RETURN(LLOG_PROC_BREAK);
+				} else {
+					/* Unknown request and no more room
+					 * for a new request. Continue to scan
+					 * to find other entries for already
+					 * existing requests.
+					 */
+					RETURN(0);
+				}
+			}
 
 			request = &hsd->request[hsd->request_cnt];
 
@@ -286,6 +302,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 		cfs_time_t now = cfs_time_current_sec();
 		cfs_time_t last;
 
+		if (!hsd->housekeeping)
+			break;
+
 		/* we search for a running request
 		 * error may happen if coordinator crashes or stopped
 		 * with running request
@@ -353,6 +372,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 	case ARS_FAILED:
 	case ARS_CANCELED:
 	case ARS_SUCCEED:
+		if (!hsd->housekeeping)
+			break;
+
 		if ((larr->arr_req_change + cdt->cdt_grace_delay) <
 		    cfs_time_current_sec()) {
 			cdt_agent_record_hash_del(cdt,
@@ -563,10 +585,15 @@ static int mdt_coordinator(void *data)
 
 		/* If no event, and no housekeeping to do, continue to
 		 * wait. */
-		if (last_housekeeping + cdt->cdt_loop_period <= get_seconds())
+		if (last_housekeeping + cdt->cdt_loop_period <=
+		    get_seconds()) {
 			last_housekeeping = get_seconds();
-		else if (!cdt->cdt_event)
+			hsd.housekeeping = true;
+		} else if (cdt->cdt_event) {
+			hsd.housekeeping = false;
+		} else {
 			continue;
+		}
 
 		cdt->cdt_event = false;