Whamcloud - gitweb
LU-10185 gnilnd: Add mod param to adjust vmalloc's retry flag 94/29894/4
authorJames Shimek <jshimek@cray.com>
Wed, 31 May 2017 20:15:36 +0000 (15:15 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 22 Dec 2017 07:04:07 +0000 (07:04 +0000)
In kernel 3.12 gnilnd had issues with the memory allocator
getting stuck trying to find pages for long periods of time
so __GFP_NORETRY was added to fail fast. The memory subsystem
has changed enough that it is beleived that vmalloc will fail
if it can't get pages.

Add modparam to allow no_retry flag to be enabled or disabled
dynamically.

Test-parameters: trivial

Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: I067a8d7b52d237e464fceae7e2220f2488b68957
Reviewed-on: https://review.whamcloud.com/29894
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Shimek <knathrak@gmail.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/klnds/gnilnd/gnilnd.h
lnet/klnds/gnilnd/gnilnd_modparams.c

index 6a762a6..b0d0f89 100644 (file)
@@ -87,6 +87,9 @@
        (cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * \
         *kgnilnd_tunables.kgn_timeout))
 
+/* Should we use the no_retry flag with vzalloc */
+#define GNILND_VZALLOC_RETRY 0
+
 /* reaper thread wakup interval */
 #define GNILND_REAPER_THREAD_WAKE  1
 /* reaper thread checks each conn NCHECKS time every kgnilnd_data.kgn_new_min_timeout */
@@ -490,6 +493,7 @@ typedef struct kgn_tunables {
        int     *kgn_thread_affinity;  /* bind scheduler threads to cpus */
        int     *kgn_to_reconn_disable;/* disable reconnect after timeout */
        int     *kgn_thread_safe;      /* use thread safe kgni API */
+       int     *kgn_vzalloc_noretry;  /* Should we pass the noretry flag */
 } kgn_tunables_t;
 
 typedef struct kgn_mbox_info {
@@ -982,8 +986,15 @@ static inline int kgnilnd_trylock(struct mutex *cq_lock,
 
 static inline void *kgnilnd_vzalloc(int size)
 {
-       void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY | __GFP_ZERO,
-                             PAGE_KERNEL);
+       void *ret;
+       if (*kgnilnd_tunables.kgn_vzalloc_noretry)
+               ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY |
+                                     __GFP_ZERO,
+                               PAGE_KERNEL);
+       else
+               ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO,
+                               PAGE_KERNEL);
+
        LIBCFS_ALLOC_POST(ret, size);
        return ret;
 }
index 68659d1..3f042fa 100644 (file)
@@ -212,6 +212,11 @@ module_param(to_reconn_disable, int, 0644);
 MODULE_PARM_DESC(to_reconn_disable,
                  "Timed out connection waits for peer before reconnecting");
 
+static int vzalloc_no_retry = GNILND_VZALLOC_RETRY;
+module_param(vzalloc_no_retry, int, 0644);
+MODULE_PARM_DESC(vzalloc_no_retry,
+                "Should we pass the no_retry flag to vmalloc 1: no_retry 0: normal");
+
 kgn_tunables_t kgnilnd_tunables = {
        .kgn_min_reconnect_interval = &min_reconnect_interval,
        .kgn_max_reconnect_interval = &max_reconnect_interval,
@@ -255,7 +260,8 @@ kgn_tunables_t kgnilnd_tunables = {
        .kgn_thread_safe            = &thread_safe,
        .kgn_reg_fail_timeout       = &reg_fail_timeout,
        .kgn_to_reconn_disable      = &to_reconn_disable,
-       .kgn_max_purgatory          = &max_conn_purg
+       .kgn_max_purgatory          = &max_conn_purg,
+       .kgn_vzalloc_noretry        = &vzalloc_no_retry
 };
 
 int