Whamcloud - gitweb
LU-2433 lnet: enable asymmetric router failure detection
authorEmoly Liu <emoly.liu@intel.com>
Fri, 7 Dec 2012 01:56:34 +0000 (09:56 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 22 Jan 2013 20:13:06 +0000 (15:13 -0500)
Asynmmetric router failure detection is disabled by default,
it's helpful to enable it by default.
The original change is at https://github.com/chaos/lustre/
commit/872bfc41cc858a6c01cae3648e0ca329a414a436

Signed-off-by: Christopher J. Morrone <morrone2@llnl.gov>
Signed-off-by: Emoly Liu <emoly.liu@intel.com>
Change-Id: I259e00911cdf3d7caecb98c9832d40a91b571596
Reviewed-on: http://review.whamcloud.com/4753
Reviewed-by: Isaac Huang <he.huang@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/router.c

index 5bb07dd..da0373b 100644 (file)
@@ -82,23 +82,23 @@ lnet_peer_buffer_credits(lnet_ni_t *ni)
 
 static int check_routers_before_use = 0;
 CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444,
-                "Assume routers are down and ping them before use");
+               "Assume routers are down and ping them before use");
 
-static int avoid_asym_router_failure = 0;
+static int avoid_asym_router_failure = 1;
 CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644,
-                "Avoid asymmetrical failures: reserved, use at your own risk");
+               "Avoid asymmetrical router failures (0 to disable)");
 
-static int dead_router_check_interval = 0;
+static int dead_router_check_interval = 60;
 CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0644,
-                "Seconds between dead router health checks (<= 0 to disable)");
+               "Seconds between dead router health checks (<= 0 to disable)");
 
-static int live_router_check_interval = 0;
+static int live_router_check_interval = 60;
 CFS_MODULE_PARM(live_router_check_interval, "i", int, 0644,
-                "Seconds between live router health checks (<= 0 to disable)");
+               "Seconds between live router health checks (<= 0 to disable)");
 
 static int router_ping_timeout = 50;
 CFS_MODULE_PARM(router_ping_timeout, "i", int, 0644,
-                "Seconds to wait for the reply to a router health query");
+               "Seconds to wait for the reply to a router health query");
 
 int
 lnet_peers_start_down(void)