Whamcloud - gitweb
b=15332,i=liang:
authorisaac <isaac>
Tue, 5 May 2009 20:46:42 +0000 (20:46 +0000)
committerisaac <isaac>
Tue, 5 May 2009 20:46:42 +0000 (20:46 +0000)
- add a new LND optiion to control peer buffer credits on routers.

21 files changed:
lnet/ChangeLog
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_modparams.c
lnet/klnds/ptllnd/ptllnd.c
lnet/klnds/ptllnd/ptllnd.h
lnet/klnds/ptllnd/ptllnd_modparams.c
lnet/klnds/ptllnd/ptllnd_peer.c
lnet/klnds/ptllnd/ptllnd_rx_buf.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_lib-darwin.c
lnet/klnds/socklnd/socklnd_lib-linux.c
lnet/klnds/socklnd/socklnd_lib-winnt.c
lnet/klnds/socklnd/socklnd_modparams.c
lnet/lnet/api-ni.c
lnet/lnet/peer.c
lnet/lnet/router.c
lnet/lnet/router_proc.c

index 578ab40..f682366 100644 (file)
@@ -17,6 +17,10 @@ Bugzilla   :
 Description: 
 Details    : 
 
 Description: 
 Details    : 
 
+Severity   : enhancement
+Bugzilla   : 15332
+Description: add a new LND optiion to control peer buffer credits on routers
+
 Severity   : normal
 Bugzilla   : 18844
 Description: Fixing deadlock in usocklnd
 Severity   : normal
 Bugzilla   : 18844
 Description: Fixing deadlock in usocklnd
index 452a325..cf04b09 100644 (file)
@@ -669,6 +669,7 @@ int lnet_acceptor_start(void);
 void lnet_acceptor_stop(void);
 
 int lnet_peers_start_down(void);
 void lnet_acceptor_stop(void);
 
 int lnet_peers_start_down(void);
+int lnet_peer_buffer_credits(lnet_ni_t *ni);
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
 
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
 
index 6b2652a..bba2881 100644 (file)
@@ -383,6 +383,7 @@ typedef struct lnet_ni {
         int               ni_txcredits;         /* # tx credits free */
         int               ni_mintxcredits;      /* lowest it's been */
         int               ni_peertxcredits;     /* # per-peer send credits */
         int               ni_txcredits;         /* # tx credits free */
         int               ni_mintxcredits;      /* lowest it's been */
         int               ni_peertxcredits;     /* # per-peer send credits */
+        int               ni_peerrtrcredits;    /* # per-peer router buffer credits */
         int               ni_peertimeout;       /* seconds to consider peer dead */
         lnet_nid_t        ni_nid;               /* interface's NID */
         void             *ni_data;              /* instance-specific data */
         int               ni_peertimeout;       /* seconds to consider peer dead */
         lnet_nid_t        ni_nid;               /* interface's NID */
         void             *ni_data;              /* instance-specific data */
index 9a5cd02..9549b50 100644 (file)
@@ -2037,9 +2037,10 @@ kiblnd_startup (lnet_ni_t *ni)
         do_gettimeofday(&tv);
         net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
 
         do_gettimeofday(&tv);
         net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
 
-        ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits;
-        ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits;
-        ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout;
+        ni->ni_peertimeout    = *kiblnd_tunables.kib_peertimeout;
+        ni->ni_maxtxcredits   = *kiblnd_tunables.kib_credits;
+        ni->ni_peertxcredits  = *kiblnd_tunables.kib_peertxcredits;
+        ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
 
         spin_lock_init(&net->ibn_tx_lock);
         INIT_LIST_HEAD(&net->ibn_idle_txs);
 
         spin_lock_init(&net->ibn_tx_lock);
         INIT_LIST_HEAD(&net->ibn_idle_txs);
index ed5bacf..4e270df 100644 (file)
@@ -107,7 +107,8 @@ typedef struct
         int              *kib_keepalive;        /* keepalive timeout (seconds) */
         int              *kib_ntx;              /* # tx descs */
         int              *kib_credits;          /* # concurrent sends */
         int              *kib_keepalive;        /* keepalive timeout (seconds) */
         int              *kib_ntx;              /* # tx descs */
         int              *kib_credits;          /* # concurrent sends */
-        int              *kib_peercredits;      /* # concurrent sends to 1 peer */
+        int              *kib_peertxcredits;    /* # concurrent sends to 1 peer */
+        int              *kib_peerrtrcredits;   /* # per-peer router buffer credits */
         int              *kib_peercredits_hiw;  /* # when eagerly to return credits */
         int              *kib_peertimeout;      /* seconds to consider peer dead */
         char            **kib_default_ipif;     /* default IPoIB interface */
         int              *kib_peercredits_hiw;  /* # when eagerly to return credits */
         int              *kib_peertimeout;      /* seconds to consider peer dead */
         char            **kib_default_ipif;     /* default IPoIB interface */
@@ -136,7 +137,7 @@ extern kib_tunables_t  kiblnd_tunables;
 
 #define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
                                      IBLND_MSG_QUEUE_SIZE_V1 :   \
 
 #define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
                                      IBLND_MSG_QUEUE_SIZE_V1 :   \
-                                     *kiblnd_tunables.kib_peercredits) /* # messages/RDMAs in-flight */
+                                     *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
 #define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
                                      IBLND_CREDIT_HIGHWATER_V1 : \
                                      *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
 #define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
                                      IBLND_CREDIT_HIGHWATER_V1 : \
                                      *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
index 45a8efb..61cc0c8 100644 (file)
@@ -68,6 +68,10 @@ static int peer_credits_hiw = 0;
 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
                 "when eagerly to return credits");
 
 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
                 "when eagerly to return credits");
 
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+                "# per-peer router buffer credits");
+
 static int peer_timeout = 0;
 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
                 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
 static int peer_timeout = 0;
 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
                 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
@@ -123,8 +127,9 @@ kib_tunables_t kiblnd_tunables = {
         .kib_keepalive              = &keepalive,
         .kib_ntx                    = &ntx,
         .kib_credits                = &credits,
         .kib_keepalive              = &keepalive,
         .kib_ntx                    = &ntx,
         .kib_credits                = &credits,
-        .kib_peercredits            = &peer_credits,
+        .kib_peertxcredits          = &peer_credits,
         .kib_peercredits_hiw        = &peer_credits_hiw,
         .kib_peercredits_hiw        = &peer_credits_hiw,
+        .kib_peerrtrcredits         = &peer_buffer_credits,
         .kib_peertimeout            = &peer_timeout,
         .kib_default_ipif           = &ipif_name,
         .kib_retry_count            = &retry_count,
         .kib_peertimeout            = &peer_timeout,
         .kib_default_ipif           = &ipif_name,
         .kib_retry_count            = &retry_count,
@@ -150,8 +155,9 @@ enum {
         O2IBLND_TIMEOUT,
         O2IBLND_NTX,
         O2IBLND_CREDITS,
         O2IBLND_TIMEOUT,
         O2IBLND_NTX,
         O2IBLND_CREDITS,
-        O2IBLND_PEER_CREDITS,
+        O2IBLND_PEER_TXCREDITS,
         O2IBLND_PEER_CREDITS_HIW,
         O2IBLND_PEER_CREDITS_HIW,
+        O2IBLND_PEER_RTRCREDITS,
         O2IBLND_PEER_TIMEOUT,
         O2IBLND_IPIF_BASENAME,
         O2IBLND_RETRY_COUNT,
         O2IBLND_PEER_TIMEOUT,
         O2IBLND_IPIF_BASENAME,
         O2IBLND_RETRY_COUNT,
@@ -172,8 +178,9 @@ enum {
 #define O2IBLND_TIMEOUT          CTL_UNNUMBERED
 #define O2IBLND_NTX              CTL_UNNUMBERED
 #define O2IBLND_CREDITS          CTL_UNNUMBERED
 #define O2IBLND_TIMEOUT          CTL_UNNUMBERED
 #define O2IBLND_NTX              CTL_UNNUMBERED
 #define O2IBLND_CREDITS          CTL_UNNUMBERED
-#define O2IBLND_PEER_CREDITS     CTL_UNNUMBERED
+#define O2IBLND_PEER_TXCREDITS   CTL_UNNUMBERED
 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
+#define O2IBLND_PEER_RTRCREDITS  CTL_UNNUMBERED
 #define O2IBLND_PEER_TIMEOUT     CTL_UNNUMBERED
 #define O2IBLND_IPIF_BASENAME    CTL_UNNUMBERED
 #define O2IBLND_RETRY_COUNT      CTL_UNNUMBERED
 #define O2IBLND_PEER_TIMEOUT     CTL_UNNUMBERED
 #define O2IBLND_IPIF_BASENAME    CTL_UNNUMBERED
 #define O2IBLND_RETRY_COUNT      CTL_UNNUMBERED
@@ -231,7 +238,7 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = {
                 .proc_handler = &proc_dointvec
         },
         {
                 .proc_handler = &proc_dointvec
         },
         {
-                .ctl_name = O2IBLND_PEER_CREDITS,
+                .ctl_name = O2IBLND_PEER_TXCREDITS,
                 .procname = "peer_credits",
                 .data     = &peer_credits,
                 .maxlen   = sizeof(int),
                 .procname = "peer_credits",
                 .data     = &peer_credits,
                 .maxlen   = sizeof(int),
@@ -247,6 +254,14 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = {
                 .proc_handler = &proc_dointvec
         },
         {
                 .proc_handler = &proc_dointvec
         },
         {
+                .ctl_name = O2IBLND_PEER_RTRCREDITS,
+                .procname = "peer_buffer_credits",
+                .data     = &peer_buffer_credits,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        },
+        {
                 .ctl_name = O2IBLND_PEER_TIMEOUT,
                 .procname = "peer_timeout",
                 .data     = &peer_timeout,
                 .ctl_name = O2IBLND_PEER_TIMEOUT,
                 .procname = "peer_timeout",
                 .data     = &peer_timeout,
@@ -415,17 +430,17 @@ kiblnd_tunables_init (void)
                 return -EINVAL;
         }
 
                 return -EINVAL;
         }
 
-        if (*kiblnd_tunables.kib_peercredits < IBLND_CREDITS_DEFAULT)
-                *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_DEFAULT;
+        if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
+                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
 
 
-        if (*kiblnd_tunables.kib_peercredits > IBLND_CREDITS_MAX)
-                *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_MAX;
+        if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
+                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
 
 
-        if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peercredits / 2)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits / 2;
+        if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
+                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
 
 
-        if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peercredits)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits - 1;
+        if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
+                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
 
         if (*kiblnd_tunables.kib_map_on_demand < 0 ||
             *kiblnd_tunables.kib_map_on_demand >= IBLND_MAX_RDMA_FRAGS)
 
         if (*kiblnd_tunables.kib_map_on_demand < 0 ||
             *kiblnd_tunables.kib_map_on_demand >= IBLND_MAX_RDMA_FRAGS)
@@ -434,21 +449,21 @@ kiblnd_tunables_init (void)
         if (*kiblnd_tunables.kib_concurrent_sends == 0) {
                 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
                     *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
         if (*kiblnd_tunables.kib_concurrent_sends == 0) {
                 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
                     *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits) * 2;
+                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
                 else
                 else
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits);
+                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
         }
 
         }
 
-        if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peercredits * 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits * 2;
+        if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
+                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
 
 
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits / 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits / 2;
+        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
+                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
 
 
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits) {
+        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
                 CWARN("Concurrent sends %d is lower than message queue size: %d, "
                       "performance may drop slightly.\n",
                 CWARN("Concurrent sends %d is lower than message queue size: %d, "
                       "performance may drop slightly.\n",
-                      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peercredits);
+                      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
         }
 
         kiblnd_sysctl_init();
         }
 
         kiblnd_sysctl_init();
index b148fe5..770bfeb 100755 (executable)
@@ -519,7 +519,7 @@ kptllnd_startup (lnet_ni_t *ni)
                 return -EINVAL;
         }
 
                 return -EINVAL;
         }
 
-        if (*kptllnd_tunables.kptl_peercredits > PTLLND_MSG_MAX_CREDITS) {
+        if (*kptllnd_tunables.kptl_peertxcredits > PTLLND_MSG_MAX_CREDITS) {
                 CERROR("peercredits must be <= %d\n", PTLLND_MSG_MAX_CREDITS);
                 return -EINVAL;
         }
                 CERROR("peercredits must be <= %d\n", PTLLND_MSG_MAX_CREDITS);
                 return -EINVAL;
         }
@@ -572,8 +572,9 @@ kptllnd_startup (lnet_ni_t *ni)
         /*
          * Setup Credits
          */
         /*
          * Setup Credits
          */
-        ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits;
-        ni->ni_peertxcredits = *kptllnd_tunables.kptl_peercredits;
+        ni->ni_maxtxcredits   = *kptllnd_tunables.kptl_credits;
+        ni->ni_peertxcredits  = *kptllnd_tunables.kptl_peertxcredits;
+        ni->ni_peerrtrcredits = *kptllnd_tunables.kptl_peerrtrcredits;
 
         kptllnd_data.kptl_expected_peers =
                 *kptllnd_tunables.kptl_max_nodes *
 
         kptllnd_data.kptl_expected_peers =
                 *kptllnd_tunables.kptl_max_nodes *
index 96bae2d..58a87b0 100755 (executable)
@@ -95,7 +95,7 @@
 # define PTLLND_N_SCHED         1                   /* # schedulers */
 #endif
 
 # define PTLLND_N_SCHED         1                   /* # schedulers */
 #endif
 
-#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1)
+#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1)
   /* when eagerly to return credits */
 
 typedef struct
   /* when eagerly to return credits */
 
 typedef struct
@@ -110,7 +110,8 @@ typedef struct
         int             *kptl_rxb_npages;       /* number of pages for rx buffer */
         int             *kptl_rxb_nspare;       /* number of spare rx buffers */
         int             *kptl_credits;          /* number of credits */
         int             *kptl_rxb_npages;       /* number of pages for rx buffer */
         int             *kptl_rxb_nspare;       /* number of spare rx buffers */
         int             *kptl_credits;          /* number of credits */
-        int             *kptl_peercredits;      /* number of credits */
+        int             *kptl_peertxcredits;    /* number of peer tx credits */
+        int             *kptl_peerrtrcredits;   /* number of peer router credits */
         int             *kptl_max_msg_size;     /* max immd message size*/
         int             *kptl_peer_hash_table_size; /* # slots in peer hash table */
         int             *kptl_reschedule_loops; /* scheduler yield loops */
         int             *kptl_max_msg_size;     /* max immd message size*/
         int             *kptl_peer_hash_table_size; /* # slots in peer hash table */
         int             *kptl_reschedule_loops; /* scheduler yield loops */
@@ -525,14 +526,14 @@ kptllnd_reserve_buffers(int n)
 static inline int
 kptllnd_peer_reserve_buffers(void)
 {
 static inline int
 kptllnd_peer_reserve_buffers(void)
 {
-        return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits);
+        return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits);
 }
 
 static inline void
 kptllnd_peer_unreserve_buffers(void)
 {
         kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool,
 }
 
 static inline void
 kptllnd_peer_unreserve_buffers(void)
 {
         kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool,
-                                         *kptllnd_tunables.kptl_peercredits);
+                                         *kptllnd_tunables.kptl_peertxcredits);
 }
 
 /*
 }
 
 /*
index bb54d32..1c8faac 100644 (file)
@@ -86,6 +86,10 @@ static int peercredits = PTLLND_PEERCREDITS;    /* <lnet/ptllnd_wire.h> */
 CFS_MODULE_PARM(peercredits, "i", int, 0444,
                 "concurrent sends to 1 peer");
 
 CFS_MODULE_PARM(peercredits, "i", int, 0444,
                 "concurrent sends to 1 peer");
 
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+                "# per-peer router buffer credits");
+
 static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE;  /* <lnet/ptllnd_wire.h> */
 CFS_MODULE_PARM(max_msg_size, "i", int, 0444,
                 "max size of immediate message");
 static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE;  /* <lnet/ptllnd_wire.h> */
 CFS_MODULE_PARM(max_msg_size, "i", int, 0444,
                 "max size of immediate message");
@@ -133,7 +137,8 @@ kptl_tunables_t kptllnd_tunables = {
         .kptl_rxb_npages             = &rxb_npages,
         .kptl_rxb_nspare             = &rxb_nspare,
         .kptl_credits                = &credits,
         .kptl_rxb_npages             = &rxb_npages,
         .kptl_rxb_nspare             = &rxb_nspare,
         .kptl_credits                = &credits,
-        .kptl_peercredits            = &peercredits,
+        .kptl_peertxcredits          = &peercredits,
+        .kptl_peerrtrcredits         = &peer_buffer_credits,
         .kptl_max_msg_size           = &max_msg_size,
         .kptl_peer_hash_table_size   = &peer_hash_table_size,
         .kptl_reschedule_loops       = &reschedule_loops,
         .kptl_max_msg_size           = &max_msg_size,
         .kptl_peer_hash_table_size   = &peer_hash_table_size,
         .kptl_reschedule_loops       = &reschedule_loops,
@@ -174,7 +179,8 @@ enum {
         KPTLLND_PID,
         KPTLLND_RXB_PAGES,
         KPTLLND_CREDITS,
         KPTLLND_PID,
         KPTLLND_RXB_PAGES,
         KPTLLND_CREDITS,
-        KPTLLND_PEERCREDITS,
+        KPTLLND_PEERTXCREDITS,
+        KPTLLND_PEERRTRCREDITS,
         KPTLLND_MAX_MSG_SIZE,
         KPTLLND_PEER_HASH_SIZE,
         KPTLLND_RESHEDULE_LOOPS,
         KPTLLND_MAX_MSG_SIZE,
         KPTLLND_PEER_HASH_SIZE,
         KPTLLND_RESHEDULE_LOOPS,
@@ -195,7 +201,8 @@ enum {
 #define KPTLLND_PID             CTL_UNNUMBERED
 #define KPTLLND_RXB_PAGES       CTL_UNNUMBERED
 #define KPTLLND_CREDITS         CTL_UNNUMBERED
 #define KPTLLND_PID             CTL_UNNUMBERED
 #define KPTLLND_RXB_PAGES       CTL_UNNUMBERED
 #define KPTLLND_CREDITS         CTL_UNNUMBERED
-#define KPTLLND_PEERCREDITS     CTL_UNNUMBERED
+#define KPTLLND_PEERTXCREDITS   CTL_UNNUMBERED
+#define KPTLLND_PEERRTRCREDITS  CTL_UNNUMBERED
 #define KPTLLND_MAX_MSG_SIZE    CTL_UNNUMBERED
 #define KPTLLND_PEER_HASH_SIZE  CTL_UNNUMBERED
 #define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
 #define KPTLLND_MAX_MSG_SIZE    CTL_UNNUMBERED
 #define KPTLLND_PEER_HASH_SIZE  CTL_UNNUMBERED
 #define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
@@ -280,7 +287,7 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = {
                 .proc_handler = &proc_dointvec
         },
         {
                 .proc_handler = &proc_dointvec
         },
         {
-                .ctl_name = KPTLLND_PEERCREDITS,
+                .ctl_name = KPTLLND_PEERTXCREDITS,
                 .procname = "peercredits",
                 .data     = &peercredits,
                 .maxlen   = sizeof(int),
                 .procname = "peercredits",
                 .data     = &peercredits,
                 .maxlen   = sizeof(int),
@@ -288,6 +295,14 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = {
                 .proc_handler = &proc_dointvec
         },
         {
                 .proc_handler = &proc_dointvec
         },
         {
+                .ctl_name = KPTLLND_PEERRTRCREDITS,
+                .procname = "peer_buffer_credits",
+                .data     = &peer_buffer_credits,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        },
+        {
                 .ctl_name = KPTLLND_MAX_MSG_SIZE,
                 .procname = "max_msg_size",
                 .data     = &max_msg_size,
                 .ctl_name = KPTLLND_MAX_MSG_SIZE,
                 .procname = "max_msg_size",
                 .data     = &max_msg_size,
index c92fe6c..2afede2 100644 (file)
@@ -192,7 +192,7 @@ kptllnd_peer_allocate (lnet_process_id_t lpid, ptl_process_id_t ppid)
         peer->peer_ptlid = ppid;
         peer->peer_credits = 1;                 /* enough for HELLO */
         peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
         peer->peer_ptlid = ppid;
         peer->peer_credits = 1;                 /* enough for HELLO */
         peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
-        peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peercredits - 1;
+        peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peertxcredits - 1;
         peer->peer_sent_credits = 1;           /* HELLO credit is implicit */
         peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */
 
         peer->peer_sent_credits = 1;           /* HELLO credit is implicit */
         peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */
 
@@ -620,7 +620,7 @@ kptllnd_peer_check_sends (kptl_peer_t *peer)
                 LASSERT (peer->peer_sent_credits >= 0);
                 LASSERT (peer->peer_sent_credits +
                          peer->peer_outstanding_credits <=
                 LASSERT (peer->peer_sent_credits >= 0);
                 LASSERT (peer->peer_sent_credits +
                          peer->peer_outstanding_credits <=
-                         *kptllnd_tunables.kptl_peercredits);
+                         *kptllnd_tunables.kptl_peertxcredits);
                 LASSERT (peer->peer_credits >= 0);
 
                 msg_type = tx->tx_msg->ptlm_type;
                 LASSERT (peer->peer_credits >= 0);
 
                 msg_type = tx->tx_msg->ptlm_type;
index 5c28881..149e726 100644 (file)
@@ -376,7 +376,7 @@ kptllnd_rx_done(kptl_rx_t *rx, int post_credit)
 
                 LASSERT (peer->peer_outstanding_credits +
                          peer->peer_sent_credits <=
 
                 LASSERT (peer->peer_outstanding_credits +
                          peer->peer_sent_credits <=
-                         *kptllnd_tunables.kptl_peercredits);
+                         *kptllnd_tunables.kptl_peertxcredits);
 
                 CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n",
                        libcfs_id2str(peer->peer_id), peer->peer_credits,
 
                 CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n",
                        libcfs_id2str(peer->peer_id), peer->peer_credits,
index d68d919..3841b08 100644 (file)
@@ -2600,9 +2600,10 @@ ksocknal_startup (lnet_ni_t *ni)
         cfs_spin_lock_init(&net->ksnn_lock);
         net->ksnn_incarnation = ksocknal_new_incarnation();
         ni->ni_data = net;
         cfs_spin_lock_init(&net->ksnn_lock);
         net->ksnn_incarnation = ksocknal_new_incarnation();
         ni->ni_data = net;
-        ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
-        ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
-        ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
+        ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
+        ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
+        ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
+        ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
 
         if (ni->ni_interfaces[0] == NULL) {
                 rc = ksocknal_enumerate_interfaces(net);
 
         if (ni->ni_interfaces[0] == NULL) {
                 rc = ksocknal_enumerate_interfaces(net);
index 3ad8f75..e4386ab 100644 (file)
@@ -115,7 +115,8 @@ typedef struct
         int              *ksnd_keepalive_count; /* # probes */
         int              *ksnd_keepalive_intvl; /* time between probes */
         int              *ksnd_credits;         /* # concurrent sends */
         int              *ksnd_keepalive_count; /* # probes */
         int              *ksnd_keepalive_intvl; /* time between probes */
         int              *ksnd_credits;         /* # concurrent sends */
-        int              *ksnd_peercredits;     /* # concurrent sends to 1 peer */
+        int              *ksnd_peertxcredits;   /* # concurrent sends to 1 peer */
+        int              *ksnd_peerrtrcredits;  /* # per-peer router buffer credits */
         int              *ksnd_peertimeout;     /* seconds to consider peer dead */
         int              *ksnd_enable_csum;     /* enable check sum */
         int              *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
         int              *ksnd_peertimeout;     /* seconds to consider peer dead */
         int              *ksnd_enable_csum;     /* enable check sum */
         int              *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
index 70e4294..8c7010d 100644 (file)
@@ -62,7 +62,7 @@ SYSCTL_INT(_lnet_ksocknal,    OID_AUTO,         credits,
            CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_credits, 
            0,                                   "credits");
 SYSCTL_INT(_lnet_ksocknal,    OID_AUTO,         peer_credits, 
            CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_credits, 
            0,                                   "credits");
 SYSCTL_INT(_lnet_ksocknal,    OID_AUTO,         peer_credits, 
-           CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_peercredits, 
+           CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_peertxcredits, 
            0,                                   "peer_credits");
 SYSCTL_INT(_lnet_ksocknal,    OID_AUTO,         nconnds, 
            CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_nconnds, 
            0,                                   "peer_credits");
 SYSCTL_INT(_lnet_ksocknal,    OID_AUTO,         nconnds, 
            CTLTYPE_INT | CTLFLAG_RW ,           &ksocknal_tunables.ksnd_nconnds, 
index 7ad0db5..73c1958 100644 (file)
@@ -43,7 +43,8 @@
 enum {
         SOCKLND_TIMEOUT = 1,
         SOCKLND_CREDITS,
 enum {
         SOCKLND_TIMEOUT = 1,
         SOCKLND_CREDITS,
-        SOCKLND_PEER_CREDITS,
+        SOCKLND_PEER_TXCREDITS,
+        SOCKLND_PEER_RTRCREDITS,
         SOCKLND_PEER_TIMEOUT,
         SOCKLND_NCONNDS,
         SOCKLND_RECONNECTS_MIN,
         SOCKLND_PEER_TIMEOUT,
         SOCKLND_NCONNDS,
         SOCKLND_RECONNECTS_MIN,
@@ -71,7 +72,8 @@ enum {
 
 #define SOCKLND_TIMEOUT         CTL_UNNUMBERED
 #define SOCKLND_CREDITS         CTL_UNNUMBERED
 
 #define SOCKLND_TIMEOUT         CTL_UNNUMBERED
 #define SOCKLND_CREDITS         CTL_UNNUMBERED
-#define SOCKLND_PEER_CREDITS    CTL_UNNUMBERED
+#define SOCKLND_PEER_TXCREDITS  CTL_UNNUMBERED
+#define SOCKLND_PEER_RTRCREDITS  CTL_UNNUMBERED
 #define SOCKLND_PEER_TIMEOUT    CTL_UNNUMBERED
 #define SOCKLND_NCONNDS         CTL_UNNUMBERED
 #define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
 #define SOCKLND_PEER_TIMEOUT    CTL_UNNUMBERED
 #define SOCKLND_NCONNDS         CTL_UNNUMBERED
 #define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
@@ -116,9 +118,18 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
                 .strategy = &sysctl_intvec,
         },
          {
                 .strategy = &sysctl_intvec,
         },
          {
-                .ctl_name = SOCKLND_PEER_CREDITS,
+                .ctl_name = SOCKLND_PEER_TXCREDITS,
                 .procname = "peer_credits",
                 .procname = "peer_credits",
-                .data     = &ksocknal_tunables.ksnd_peercredits,
+                .data     = &ksocknal_tunables.ksnd_peertxcredits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+         {
+                .ctl_name = SOCKLND_PEER_RTRCREDITS,
+                .procname = "peer_buffer_credits",
+                .data     = &ksocknal_tunables.ksnd_peerrtrcredits,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec,
index 06ef0d6..6b88bac 100755 (executable)
@@ -79,7 +79,15 @@ ksocknal_lib_tunables_init ()
 
         ksocknal_ctl_table[i].ctl_name = j++;
         ksocknal_ctl_table[i].procname = "peer_credits";
 
         ksocknal_ctl_table[i].ctl_name = j++;
         ksocknal_ctl_table[i].procname = "peer_credits";
-        ksocknal_ctl_table[i].data     = ksocknal_tunables.ksnd_peercredits;
+        ksocknal_ctl_table[i].data     = ksocknal_tunables.ksnd_peertxcredits;
+        ksocknal_ctl_table[i].maxlen   = sizeof (int);
+        ksocknal_ctl_table[i].mode     = 0444;
+        ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
+        i++;
+
+        ksocknal_ctl_table[i].ctl_name = j++;
+        ksocknal_ctl_table[i].procname = "peer_buffer_credits";
+        ksocknal_ctl_table[i].data     = ksocknal_tunables.ksnd_peerrtrcredits;
         ksocknal_ctl_table[i].maxlen   = sizeof (int);
         ksocknal_ctl_table[i].mode     = 0444;
         ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
         ksocknal_ctl_table[i].maxlen   = sizeof (int);
         ksocknal_ctl_table[i].mode     = 0444;
         ksocknal_ctl_table[i].proc_handler = &proc_dointvec;
index 3089808..6fa44f3 100644 (file)
@@ -33,6 +33,10 @@ static int peer_credits = 8;
 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
                 "# concurrent sends to 1 peer");
 
 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
                 "# concurrent sends to 1 peer");
 
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+                "# per-peer router buffer credits");
+
 static int peer_timeout = 0;
 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
                 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
 static int peer_timeout = 0;
 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
                 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
@@ -175,7 +179,8 @@ int ksocknal_tunables_init(void)
         ksocknal_tunables.ksnd_keepalive_count    = &keepalive_count;
         ksocknal_tunables.ksnd_keepalive_intvl    = &keepalive_intvl;
         ksocknal_tunables.ksnd_credits            = &credits;
         ksocknal_tunables.ksnd_keepalive_count    = &keepalive_count;
         ksocknal_tunables.ksnd_keepalive_intvl    = &keepalive_intvl;
         ksocknal_tunables.ksnd_credits            = &credits;
-        ksocknal_tunables.ksnd_peercredits        = &peer_credits;
+        ksocknal_tunables.ksnd_peertxcredits      = &peer_credits;
+        ksocknal_tunables.ksnd_peerrtrcredits     = &peer_buffer_credits;
         ksocknal_tunables.ksnd_peertimeout        = &peer_timeout;
         ksocknal_tunables.ksnd_enable_csum        = &enable_csum;
         ksocknal_tunables.ksnd_inject_csum_error  = &inject_csum_error;
         ksocknal_tunables.ksnd_peertimeout        = &peer_timeout;
         ksocknal_tunables.ksnd_enable_csum        = &enable_csum;
         ksocknal_tunables.ksnd_inject_csum_error  = &inject_csum_error;
index 58839c7..fac8296 100644 (file)
@@ -1081,10 +1081,10 @@ lnet_startup_lndnis (void)
 
                 ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
 
 
                 ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
 
-                CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d]\n",
+                CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
                        libcfs_nid2str(ni->ni_nid),
                        ni->ni_peertxcredits, ni->ni_txcredits,
                        libcfs_nid2str(ni->ni_nid),
                        ni->ni_peertxcredits, ni->ni_txcredits,
-                       ni->ni_peertimeout);
+                       ni->ni_peerrtrcredits, ni->ni_peertimeout);
 
                 nicount++;
         }
 
                 nicount++;
         }
index c8ad591..d39507b 100644 (file)
@@ -221,12 +221,10 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid)
                 return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH;
         }
 
                 return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH;
         }
 
-       lp->lp_txcredits = 
-                lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
-
-        /* As a first approximation; allow this peer the same number of router
-         * buffers as it is allowed outstanding sends */
-        lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits;
+        lp->lp_txcredits    =
+        lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+        lp->lp_rtrcredits    =
+        lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
 
         LASSERT (!the_lnet.ln_shutdown);
         /* can't add peers after shutdown starts */
 
         LASSERT (!the_lnet.ln_shutdown);
         /* can't add peers after shutdown starts */
index 2f9cc01..42df3a2 100644 (file)
@@ -39,6 +39,9 @@ CFS_MODULE_PARM(small_router_buffers, "i", int, 0444,
 static int large_router_buffers = 512;
 CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
                 "# of large messages to buffer in the router");
 static int large_router_buffers = 512;
 CFS_MODULE_PARM(large_router_buffers, "i", int, 0444,
                 "# of large messages to buffer in the router");
+static int peer_buffer_credits = 0;
+CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
+                "# router buffer credits per peer");
 
 static int auto_down = 1;
 CFS_MODULE_PARM(auto_down, "i", int, 0444,
 
 static int auto_down = 1;
 CFS_MODULE_PARM(auto_down, "i", int, 0444,
@@ -66,6 +69,20 @@ lnet_peers_start_down(void)
         return check_routers_before_use;
 }
 
         return check_routers_before_use;
 }
 
+int
+lnet_peer_buffer_credits(lnet_ni_t *ni)
+{
+        /* NI option overrides LNet default */
+        if (ni->ni_peerrtrcredits > 0)
+                return ni->ni_peerrtrcredits;
+        if (peer_buffer_credits > 0)
+                return peer_buffer_credits;
+
+        /* As an approximation, allow this peer the same number of router
+         * buffers as it is allowed outstanding sends */
+        return ni->ni_peertxcredits;
+}
+
 void
 lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when)
 {
 void
 lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when)
 {
@@ -1060,6 +1077,12 @@ lnet_peers_start_down(void)
         return 0;
 }
 
         return 0;
 }
 
+int
+lnet_peer_buffer_credits(lnet_ni_t *ni)
+{
+        return 0;
+}
+
 void
 lnet_router_checker_stop(void)
 {
 void
 lnet_router_checker_stop(void)
 {
index 77571e5..6247c53 100644 (file)
@@ -958,12 +958,13 @@ lnet_ni_seq_show (struct seq_file *s, void *iter)
         int                     txcr;
         int                     mintxcr;
         int                     npeertxcr;
         int                     txcr;
         int                     mintxcr;
         int                     npeertxcr;
+        int                     npeerrtrcr;
         lnet_nid_t              nid;
         int                     nref;
 
         if (lnsi->lnsi_off == 0) {
         lnet_nid_t              nid;
         int                     nref;
 
         if (lnsi->lnsi_off == 0) {
-                seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n",
-                           "nid", "refs", "peer", "max", "tx", "min");
+                seq_printf(s, "%-24s %4s %4s %4s %5s %5s %5s\n",
+                           "nid", "refs", "peer", "rtr", "max", "tx", "min");
                 return 0;
         }
 
                 return 0;
         }
 
@@ -973,18 +974,19 @@ lnet_ni_seq_show (struct seq_file *s, void *iter)
 
         ni = lnsi->lnsi_ni;
 
 
         ni = lnsi->lnsi_ni;
 
-        maxtxcr   = ni->ni_maxtxcredits;
-        txcr      = ni->ni_txcredits;
-        mintxcr   = ni->ni_mintxcredits;
-        npeertxcr = ni->ni_peertxcredits;
-        nid       = ni->ni_nid;
-        nref      = ni->ni_refcount;
+        maxtxcr    = ni->ni_maxtxcredits;
+        txcr       = ni->ni_txcredits;
+        mintxcr    = ni->ni_mintxcredits;
+        npeertxcr  = ni->ni_peertxcredits;
+        npeerrtrcr = ni->ni_peerrtrcredits;
+        nid        = ni->ni_nid;
+        nref       = ni->ni_refcount;
 
         LNET_UNLOCK();
 
 
         LNET_UNLOCK();
 
-        seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n",
+        seq_printf(s, "%-24s %4d %4d %4d %5d %5d %5d\n",
                    libcfs_nid2str(nid), nref,
                    libcfs_nid2str(nid), nref,
-                   npeertxcr, maxtxcr, txcr, mintxcr);
+                   npeertxcr, npeerrtrcr, maxtxcr, txcr, mintxcr);
         return 0;
 }
 
         return 0;
 }