Whamcloud - gitweb
LU-5380 at: net AT after connect 55/11155/2
authorAlexander.Boyko <alexander_boyko@xyratex.com>
Mon, 21 Jul 2014 10:18:23 +0000 (14:18 +0400)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 4 Nov 2014 17:53:49 +0000 (17:53 +0000)
Once connected, the previously gathered AT statistics is not valid
anymore because may reflect other routing, etc. The connect by itself
could take a long time due to different reasons (e.g. server was not
ready) and net latency got very high (see import_select_connection())
what does not reflect the current situation.

Take into account only the current (re-)CONNECT rpc latency.

Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Signed-off-by: Alexander Boyko <alexander_boyko@xyratex.com>
Xyratex-bug-id: MRP-1285
Change-Id: I6edc0e232a92319e7c8535aced28fe1ad3436c54
Reviewed-on: http://review.whamcloud.com/11155
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/ptlrpc_internal.h

index 505998c..1f9452d 100644 (file)
@@ -281,8 +281,8 @@ int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
 }
 
 /* Adjust expected network latency */
 }
 
 /* Adjust expected network latency */
-static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
-                                      unsigned int service_time)
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+                              unsigned int service_time)
 {
         unsigned int nl, oldnl;
         struct imp_at *at;
 {
         unsigned int nl, oldnl;
         struct imp_at *at;
index f7e200d..80ad967 100644 (file)
@@ -868,6 +868,12 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
        class_export_put(exp);
 
        imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
        class_export_put(exp);
 
+       /* The net statistics after (re-)connect is not valid anymore,
+        * because may reflect other routing, etc. */
+       at_init(&imp->imp_at.iat_net_latency, 0, 0);
+       ptlrpc_at_adj_net_latency(request,
+                       lustre_msg_get_service_time(request->rq_repmsg));
+
        obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
 
        if (aa->pcaa_initial_connect) {
        obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
 
        if (aa->pcaa_initial_connect) {
index 09f7b47..53f14dc 100644 (file)
@@ -54,6 +54,8 @@ int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
 int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
 
 /* client.c */
 int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
 
 /* client.c */
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+                              unsigned int service_time);
 struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
                                         unsigned type, unsigned portal);
 int ptlrpc_request_cache_init(void);
 struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
                                         unsigned type, unsigned portal);
 int ptlrpc_request_cache_init(void);