Whamcloud - gitweb
LU-7988 hsm: added coordinator housekeeping flag 82/19582/38
authorFrank Zago <fzago@cray.com>
Fri, 8 Apr 2016 17:59:06 +0000 (13:59 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 31 Aug 2017 19:15:34 +0000 (19:15 +0000)
When the coordinator is not performing housekeeping, only the requests
in the ARS_WAITING state will be processed as they are new
requests. The other requests, in states ARS_FAILED, ARS_CANCELED,
ARS_SUCCEED and ARS_STARTED can wait a few more seconds until the
housekeeping starts.

Also, when not performing housekeeping, as soon as hsd.request is
full, exit from the loop as there is enough potential work queued;
there's no need to examine all the HSM records, thus shortening the
time spent in cdt_llog_process() holding the critical lock
cdt_llog_lock.

Test-Parameters: trivial testlist=sanity-hsm
Signed-off-by: frank zago <fzago@cray.com>
Change-Id: Ib73c97d29ca2f86b912aeb8d055c004cff14d5cf
Reviewed-on: https://review.whamcloud.com/19582
Tested-by: Jenkins
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Patrick Farrell <paf@cray.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/mdt/mdt_coordinator.c

index e59452e..a579734 100644 (file)
@@ -138,6 +138,10 @@ struct hsm_scan_request {
 struct hsm_scan_data {
        struct mdt_thread_info          *mti;
        char                             fs_name[MTI_NAME_MAXLEN+1];
+       /* are we scanning the logs for housekeeping, or just looking
+        * for new work?
+        */
+       bool                             housekeeping;
        /* request to be send to agents */
        int                              max_requests;  /** vector size */
        int                              request_cnt;   /** used count */
@@ -202,13 +206,25 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                if (!request) {
                        struct hsm_action_list *hal;
 
-                       if (hsd->request_cnt == hsd->max_requests)
-                               /* Unknown request and no more room
-                                * for a new request. Continue to scan
-                                * to find other entries for already
-                                * existing requests.
-                                */
-                               RETURN(0);
+                       if (hsd->request_cnt == hsd->max_requests) {
+                               if (!hsd->housekeeping) {
+                                       /* The request array is full,
+                                        * stop here. There might be
+                                        * more known requests that
+                                        * could be merged, but this
+                                        * avoid analyzing too many
+                                        * llogs for minor gains.
+                                        */
+                                       RETURN(LLOG_PROC_BREAK);
+                               } else {
+                                       /* Unknown request and no more room
+                                        * for a new request. Continue to scan
+                                        * to find other entries for already
+                                        * existing requests.
+                                        */
+                                       RETURN(0);
+                               }
+                       }
 
                        request = &hsd->request[hsd->request_cnt];
 
@@ -286,6 +302,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                cfs_time_t now = cfs_time_current_sec();
                cfs_time_t last;
 
+               if (!hsd->housekeeping)
+                       break;
+
                /* we search for a running request
                 * error may happen if coordinator crashes or stopped
                 * with running request
@@ -353,6 +372,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
        case ARS_FAILED:
        case ARS_CANCELED:
        case ARS_SUCCEED:
+               if (!hsd->housekeeping)
+                       break;
+
                if ((larr->arr_req_change + cdt->cdt_grace_delay) <
                    cfs_time_current_sec()) {
                        cdt_agent_record_hash_del(cdt,
@@ -563,10 +585,15 @@ static int mdt_coordinator(void *data)
 
                /* If no event, and no housekeeping to do, continue to
                 * wait. */
-               if (last_housekeeping + cdt->cdt_loop_period <= get_seconds())
+               if (last_housekeeping + cdt->cdt_loop_period <=
+                   get_seconds()) {
                        last_housekeeping = get_seconds();
-               else if (!cdt->cdt_event)
+                       hsd.housekeeping = true;
+               } else if (cdt->cdt_event) {
+                       hsd.housekeeping = false;
+               } else {
                        continue;
+               }
 
                cdt->cdt_event = false;