From: isaac Date: Thu, 23 Apr 2009 04:29:19 +0000 (+0000) Subject: b=15332,i=liang: X-Git-Tag: v1_9_170~29 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b68358a8ad39e9a0c39383db4ac34de9a4fd04b0 b=15332,i=liang: - add a new LND optiion to control peer buffer credits on routers. --- diff --git a/lnet/ChangeLog b/lnet/ChangeLog index 578ab40..f682366 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -17,6 +17,10 @@ Bugzilla : Description: Details : +Severity : enhancement +Bugzilla : 15332 +Description: add a new LND optiion to control peer buffer credits on routers + Severity : normal Bugzilla : 18844 Description: Fixing deadlock in usocklnd diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 452a325..cf04b09 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -669,6 +669,7 @@ int lnet_acceptor_start(void); void lnet_acceptor_stop(void); int lnet_peers_start_down(void); +int lnet_peer_buffer_credits(lnet_ni_t *ni); int lnet_router_checker_start(void); void lnet_router_checker_stop(void); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 6b2652a..bba2881 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -383,6 +383,7 @@ typedef struct lnet_ni { int ni_txcredits; /* # tx credits free */ int ni_mintxcredits; /* lowest it's been */ int ni_peertxcredits; /* # per-peer send credits */ + int ni_peerrtrcredits; /* # per-peer router buffer credits */ int ni_peertimeout; /* seconds to consider peer dead */ lnet_nid_t ni_nid; /* interface's NID */ void *ni_data; /* instance-specific data */ diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 9a5cd02..9549b50 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2037,9 +2037,10 @@ kiblnd_startup (lnet_ni_t *ni) do_gettimeofday(&tv); net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; - ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits; - ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; + ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; + ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; + ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits; + ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits; spin_lock_init(&net->ibn_tx_lock); INIT_LIST_HEAD(&net->ibn_idle_txs); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index ed5bacf..4e270df 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -107,7 +107,8 @@ typedef struct int *kib_keepalive; /* keepalive timeout (seconds) */ int *kib_ntx; /* # tx descs */ int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ + int *kib_peertxcredits; /* # concurrent sends to 1 peer */ + int *kib_peerrtrcredits; /* # per-peer router buffer credits */ int *kib_peercredits_hiw; /* # when eagerly to return credits */ int *kib_peertimeout; /* seconds to consider peer dead */ char **kib_default_ipif; /* default IPoIB interface */ @@ -136,7 +137,7 @@ extern kib_tunables_t kiblnd_tunables; #define IBLND_MSG_QUEUE_SIZE(v) ((v) == IBLND_MSG_VERSION_1 ? \ IBLND_MSG_QUEUE_SIZE_V1 : \ - *kiblnd_tunables.kib_peercredits) /* # messages/RDMAs in-flight */ + *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */ #define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \ IBLND_CREDIT_HIGHWATER_V1 : \ *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 45a8efb..61cc0c8 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -68,6 +68,10 @@ static int peer_credits_hiw = 0; CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444, "when eagerly to return credits"); +static int peer_buffer_credits = 0; +CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, + "# per-peer router buffer credits"); + static int peer_timeout = 0; CFS_MODULE_PARM(peer_timeout, "i", int, 0444, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); @@ -123,8 +127,9 @@ kib_tunables_t kiblnd_tunables = { .kib_keepalive = &keepalive, .kib_ntx = &ntx, .kib_credits = &credits, - .kib_peercredits = &peer_credits, + .kib_peertxcredits = &peer_credits, .kib_peercredits_hiw = &peer_credits_hiw, + .kib_peerrtrcredits = &peer_buffer_credits, .kib_peertimeout = &peer_timeout, .kib_default_ipif = &ipif_name, .kib_retry_count = &retry_count, @@ -150,8 +155,9 @@ enum { O2IBLND_TIMEOUT, O2IBLND_NTX, O2IBLND_CREDITS, - O2IBLND_PEER_CREDITS, + O2IBLND_PEER_TXCREDITS, O2IBLND_PEER_CREDITS_HIW, + O2IBLND_PEER_RTRCREDITS, O2IBLND_PEER_TIMEOUT, O2IBLND_IPIF_BASENAME, O2IBLND_RETRY_COUNT, @@ -172,8 +178,9 @@ enum { #define O2IBLND_TIMEOUT CTL_UNNUMBERED #define O2IBLND_NTX CTL_UNNUMBERED #define O2IBLND_CREDITS CTL_UNNUMBERED -#define O2IBLND_PEER_CREDITS CTL_UNNUMBERED +#define O2IBLND_PEER_TXCREDITS CTL_UNNUMBERED #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED +#define O2IBLND_PEER_RTRCREDITS CTL_UNNUMBERED #define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED #define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED #define O2IBLND_RETRY_COUNT CTL_UNNUMBERED @@ -231,7 +238,7 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = O2IBLND_PEER_CREDITS, + .ctl_name = O2IBLND_PEER_TXCREDITS, .procname = "peer_credits", .data = &peer_credits, .maxlen = sizeof(int), @@ -247,6 +254,14 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = { .proc_handler = &proc_dointvec }, { + .ctl_name = O2IBLND_PEER_RTRCREDITS, + .procname = "peer_buffer_credits", + .data = &peer_buffer_credits, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { .ctl_name = O2IBLND_PEER_TIMEOUT, .procname = "peer_timeout", .data = &peer_timeout, @@ -415,17 +430,17 @@ kiblnd_tunables_init (void) return -EINVAL; } - if (*kiblnd_tunables.kib_peercredits < IBLND_CREDITS_DEFAULT) - *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_DEFAULT; + if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT) + *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT; - if (*kiblnd_tunables.kib_peercredits > IBLND_CREDITS_MAX) - *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_MAX; + if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX) + *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX; - if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peercredits / 2) - *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits / 2; + if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2) + *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2; - if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peercredits) - *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits - 1; + if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits) + *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1; if (*kiblnd_tunables.kib_map_on_demand < 0 || *kiblnd_tunables.kib_map_on_demand >= IBLND_MAX_RDMA_FRAGS) @@ -434,21 +449,21 @@ kiblnd_tunables_init (void) if (*kiblnd_tunables.kib_concurrent_sends == 0) { if (*kiblnd_tunables.kib_map_on_demand > 0 && *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) - *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits) * 2; + *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2; else - *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits); + *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits); } - if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peercredits * 2) - *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits * 2; + if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2) + *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2; - if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits / 2) - *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits / 2; + if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2) + *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2; - if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits) { + if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) { CWARN("Concurrent sends %d is lower than message queue size: %d, " "performance may drop slightly.\n", - *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peercredits); + *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits); } kiblnd_sysctl_init(); diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c index b148fe5..770bfeb 100755 --- a/lnet/klnds/ptllnd/ptllnd.c +++ b/lnet/klnds/ptllnd/ptllnd.c @@ -519,7 +519,7 @@ kptllnd_startup (lnet_ni_t *ni) return -EINVAL; } - if (*kptllnd_tunables.kptl_peercredits > PTLLND_MSG_MAX_CREDITS) { + if (*kptllnd_tunables.kptl_peertxcredits > PTLLND_MSG_MAX_CREDITS) { CERROR("peercredits must be <= %d\n", PTLLND_MSG_MAX_CREDITS); return -EINVAL; } @@ -572,8 +572,9 @@ kptllnd_startup (lnet_ni_t *ni) /* * Setup Credits */ - ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits; - ni->ni_peertxcredits = *kptllnd_tunables.kptl_peercredits; + ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits; + ni->ni_peertxcredits = *kptllnd_tunables.kptl_peertxcredits; + ni->ni_peerrtrcredits = *kptllnd_tunables.kptl_peerrtrcredits; kptllnd_data.kptl_expected_peers = *kptllnd_tunables.kptl_max_nodes * diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h index 96bae2d..58a87b0 100755 --- a/lnet/klnds/ptllnd/ptllnd.h +++ b/lnet/klnds/ptllnd/ptllnd.h @@ -95,7 +95,7 @@ # define PTLLND_N_SCHED 1 /* # schedulers */ #endif -#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1) +#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1) /* when eagerly to return credits */ typedef struct @@ -110,7 +110,8 @@ typedef struct int *kptl_rxb_npages; /* number of pages for rx buffer */ int *kptl_rxb_nspare; /* number of spare rx buffers */ int *kptl_credits; /* number of credits */ - int *kptl_peercredits; /* number of credits */ + int *kptl_peertxcredits; /* number of peer tx credits */ + int *kptl_peerrtrcredits; /* number of peer router credits */ int *kptl_max_msg_size; /* max immd message size*/ int *kptl_peer_hash_table_size; /* # slots in peer hash table */ int *kptl_reschedule_loops; /* scheduler yield loops */ @@ -525,14 +526,14 @@ kptllnd_reserve_buffers(int n) static inline int kptllnd_peer_reserve_buffers(void) { - return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits); + return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits); } static inline void kptllnd_peer_unreserve_buffers(void) { kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool, - *kptllnd_tunables.kptl_peercredits); + *kptllnd_tunables.kptl_peertxcredits); } /* diff --git a/lnet/klnds/ptllnd/ptllnd_modparams.c b/lnet/klnds/ptllnd/ptllnd_modparams.c index bb54d32..1c8faac 100644 --- a/lnet/klnds/ptllnd/ptllnd_modparams.c +++ b/lnet/klnds/ptllnd/ptllnd_modparams.c @@ -86,6 +86,10 @@ static int peercredits = PTLLND_PEERCREDITS; /* */ CFS_MODULE_PARM(peercredits, "i", int, 0444, "concurrent sends to 1 peer"); +static int peer_buffer_credits = 0; +CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, + "# per-peer router buffer credits"); + static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* */ CFS_MODULE_PARM(max_msg_size, "i", int, 0444, "max size of immediate message"); @@ -133,7 +137,8 @@ kptl_tunables_t kptllnd_tunables = { .kptl_rxb_npages = &rxb_npages, .kptl_rxb_nspare = &rxb_nspare, .kptl_credits = &credits, - .kptl_peercredits = &peercredits, + .kptl_peertxcredits = &peercredits, + .kptl_peerrtrcredits = &peer_buffer_credits, .kptl_max_msg_size = &max_msg_size, .kptl_peer_hash_table_size = &peer_hash_table_size, .kptl_reschedule_loops = &reschedule_loops, @@ -174,7 +179,8 @@ enum { KPTLLND_PID, KPTLLND_RXB_PAGES, KPTLLND_CREDITS, - KPTLLND_PEERCREDITS, + KPTLLND_PEERTXCREDITS, + KPTLLND_PEERRTRCREDITS, KPTLLND_MAX_MSG_SIZE, KPTLLND_PEER_HASH_SIZE, KPTLLND_RESHEDULE_LOOPS, @@ -195,7 +201,8 @@ enum { #define KPTLLND_PID CTL_UNNUMBERED #define KPTLLND_RXB_PAGES CTL_UNNUMBERED #define KPTLLND_CREDITS CTL_UNNUMBERED -#define KPTLLND_PEERCREDITS CTL_UNNUMBERED +#define KPTLLND_PEERTXCREDITS CTL_UNNUMBERED +#define KPTLLND_PEERRTRCREDITS CTL_UNNUMBERED #define KPTLLND_MAX_MSG_SIZE CTL_UNNUMBERED #define KPTLLND_PEER_HASH_SIZE CTL_UNNUMBERED #define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED @@ -280,7 +287,7 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = { .proc_handler = &proc_dointvec }, { - .ctl_name = KPTLLND_PEERCREDITS, + .ctl_name = KPTLLND_PEERTXCREDITS, .procname = "peercredits", .data = &peercredits, .maxlen = sizeof(int), @@ -288,6 +295,14 @@ static cfs_sysctl_table_t kptllnd_ctl_table[] = { .proc_handler = &proc_dointvec }, { + .ctl_name = KPTLLND_PEERRTRCREDITS, + .procname = "peer_buffer_credits", + .data = &peer_buffer_credits, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { .ctl_name = KPTLLND_MAX_MSG_SIZE, .procname = "max_msg_size", .data = &max_msg_size, diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c index c92fe6c..2afede2 100644 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ b/lnet/klnds/ptllnd/ptllnd_peer.c @@ -192,7 +192,7 @@ kptllnd_peer_allocate (lnet_process_id_t lpid, ptl_process_id_t ppid) peer->peer_ptlid = ppid; peer->peer_credits = 1; /* enough for HELLO */ peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS; - peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peercredits - 1; + peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peertxcredits - 1; peer->peer_sent_credits = 1; /* HELLO credit is implicit */ peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */ @@ -620,7 +620,7 @@ kptllnd_peer_check_sends (kptl_peer_t *peer) LASSERT (peer->peer_sent_credits >= 0); LASSERT (peer->peer_sent_credits + peer->peer_outstanding_credits <= - *kptllnd_tunables.kptl_peercredits); + *kptllnd_tunables.kptl_peertxcredits); LASSERT (peer->peer_credits >= 0); msg_type = tx->tx_msg->ptlm_type; diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c index 5c28881..149e726 100644 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ b/lnet/klnds/ptllnd/ptllnd_rx_buf.c @@ -376,7 +376,7 @@ kptllnd_rx_done(kptl_rx_t *rx, int post_credit) LASSERT (peer->peer_outstanding_credits + peer->peer_sent_credits <= - *kptllnd_tunables.kptl_peercredits); + *kptllnd_tunables.kptl_peertxcredits); CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n", libcfs_id2str(peer->peer_id), peer->peer_credits, diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index d68d919..3841b08 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -2600,9 +2600,10 @@ ksocknal_startup (lnet_ni_t *ni) cfs_spin_lock_init(&net->ksnn_lock); net->ksnn_incarnation = ksocknal_new_incarnation(); ni->ni_data = net; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits; - ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; + ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; + ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; + ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; + ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; if (ni->ni_interfaces[0] == NULL) { rc = ksocknal_enumerate_interfaces(net); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 3ad8f75..e4386ab 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -115,7 +115,8 @@ typedef struct int *ksnd_keepalive_count; /* # probes */ int *ksnd_keepalive_intvl; /* time between probes */ int *ksnd_credits; /* # concurrent sends */ - int *ksnd_peercredits; /* # concurrent sends to 1 peer */ + int *ksnd_peertxcredits; /* # concurrent sends to 1 peer */ + int *ksnd_peerrtrcredits; /* # per-peer router buffer credits */ int *ksnd_peertimeout; /* seconds to consider peer dead */ int *ksnd_enable_csum; /* enable check sum */ int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */ diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c index 70e4294..8c7010d 100644 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.c @@ -62,7 +62,7 @@ SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits, CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits, 0, "credits"); SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peercredits, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peertxcredits, 0, "peer_credits"); SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds, CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds, diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c index 7ad0db5..73c1958 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -43,7 +43,8 @@ enum { SOCKLND_TIMEOUT = 1, SOCKLND_CREDITS, - SOCKLND_PEER_CREDITS, + SOCKLND_PEER_TXCREDITS, + SOCKLND_PEER_RTRCREDITS, SOCKLND_PEER_TIMEOUT, SOCKLND_NCONNDS, SOCKLND_RECONNECTS_MIN, @@ -71,7 +72,8 @@ enum { #define SOCKLND_TIMEOUT CTL_UNNUMBERED #define SOCKLND_CREDITS CTL_UNNUMBERED -#define SOCKLND_PEER_CREDITS CTL_UNNUMBERED +#define SOCKLND_PEER_TXCREDITS CTL_UNNUMBERED +#define SOCKLND_PEER_RTRCREDITS CTL_UNNUMBERED #define SOCKLND_PEER_TIMEOUT CTL_UNNUMBERED #define SOCKLND_NCONNDS CTL_UNNUMBERED #define SOCKLND_RECONNECTS_MIN CTL_UNNUMBERED @@ -116,9 +118,18 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = { .strategy = &sysctl_intvec, }, { - .ctl_name = SOCKLND_PEER_CREDITS, + .ctl_name = SOCKLND_PEER_TXCREDITS, .procname = "peer_credits", - .data = &ksocknal_tunables.ksnd_peercredits, + .data = &ksocknal_tunables.ksnd_peertxcredits, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { + .ctl_name = SOCKLND_PEER_RTRCREDITS, + .procname = "peer_buffer_credits", + .data = &ksocknal_tunables.ksnd_peerrtrcredits, .maxlen = sizeof (int), .mode = 0444, .proc_handler = &proc_dointvec, diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.c b/lnet/klnds/socklnd/socklnd_lib-winnt.c index 06ef0d6..6b88bac 100755 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.c +++ b/lnet/klnds/socklnd/socklnd_lib-winnt.c @@ -79,7 +79,15 @@ ksocknal_lib_tunables_init () ksocknal_ctl_table[i].ctl_name = j++; ksocknal_ctl_table[i].procname = "peer_credits"; - ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peercredits; + ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peertxcredits; + ksocknal_ctl_table[i].maxlen = sizeof (int); + ksocknal_ctl_table[i].mode = 0444; + ksocknal_ctl_table[i].proc_handler = &proc_dointvec; + i++; + + ksocknal_ctl_table[i].ctl_name = j++; + ksocknal_ctl_table[i].procname = "peer_buffer_credits"; + ksocknal_ctl_table[i].data = ksocknal_tunables.ksnd_peerrtrcredits; ksocknal_ctl_table[i].maxlen = sizeof (int); ksocknal_ctl_table[i].mode = 0444; ksocknal_ctl_table[i].proc_handler = &proc_dointvec; diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 3089808..6fa44f3 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -33,6 +33,10 @@ static int peer_credits = 8; CFS_MODULE_PARM(peer_credits, "i", int, 0444, "# concurrent sends to 1 peer"); +static int peer_buffer_credits = 0; +CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, + "# per-peer router buffer credits"); + static int peer_timeout = 0; CFS_MODULE_PARM(peer_timeout, "i", int, 0444, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); @@ -175,7 +179,8 @@ int ksocknal_tunables_init(void) ksocknal_tunables.ksnd_keepalive_count = &keepalive_count; ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl; ksocknal_tunables.ksnd_credits = &credits; - ksocknal_tunables.ksnd_peercredits = &peer_credits; + ksocknal_tunables.ksnd_peertxcredits = &peer_credits; + ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits; ksocknal_tunables.ksnd_peertimeout = &peer_timeout; ksocknal_tunables.ksnd_enable_csum = &enable_csum; ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 58839c7..fac8296 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1081,10 +1081,10 @@ lnet_startup_lndnis (void) ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits; - CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d]\n", + CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, ni->ni_txcredits, - ni->ni_peertimeout); + ni->ni_peerrtrcredits, ni->ni_peertimeout); nicount++; } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index c8ad591..d39507b 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -221,12 +221,10 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH; } - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - - /* As a first approximation; allow this peer the same number of router - * buffers as it is allowed outstanding sends */ - lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits; + lp->lp_txcredits = + lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; + lp->lp_rtrcredits = + lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); LASSERT (!the_lnet.ln_shutdown); /* can't add peers after shutdown starts */ diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 2f9cc01..42df3a2 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -39,6 +39,9 @@ CFS_MODULE_PARM(small_router_buffers, "i", int, 0444, static int large_router_buffers = 512; CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, "# of large messages to buffer in the router"); +static int peer_buffer_credits = 0; +CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, + "# router buffer credits per peer"); static int auto_down = 1; CFS_MODULE_PARM(auto_down, "i", int, 0444, @@ -66,6 +69,20 @@ lnet_peers_start_down(void) return check_routers_before_use; } +int +lnet_peer_buffer_credits(lnet_ni_t *ni) +{ + /* NI option overrides LNet default */ + if (ni->ni_peerrtrcredits > 0) + return ni->ni_peerrtrcredits; + if (peer_buffer_credits > 0) + return peer_buffer_credits; + + /* As an approximation, allow this peer the same number of router + * buffers as it is allowed outstanding sends */ + return ni->ni_peertxcredits; +} + void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when) { @@ -1060,6 +1077,12 @@ lnet_peers_start_down(void) return 0; } +int +lnet_peer_buffer_credits(lnet_ni_t *ni) +{ + return 0; +} + void lnet_router_checker_stop(void) { diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 77571e5..6247c53 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -958,12 +958,13 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) int txcr; int mintxcr; int npeertxcr; + int npeerrtrcr; lnet_nid_t nid; int nref; if (lnsi->lnsi_off == 0) { - seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n", - "nid", "refs", "peer", "max", "tx", "min"); + seq_printf(s, "%-24s %4s %4s %4s %5s %5s %5s\n", + "nid", "refs", "peer", "rtr", "max", "tx", "min"); return 0; } @@ -973,18 +974,19 @@ lnet_ni_seq_show (struct seq_file *s, void *iter) ni = lnsi->lnsi_ni; - maxtxcr = ni->ni_maxtxcredits; - txcr = ni->ni_txcredits; - mintxcr = ni->ni_mintxcredits; - npeertxcr = ni->ni_peertxcredits; - nid = ni->ni_nid; - nref = ni->ni_refcount; + maxtxcr = ni->ni_maxtxcredits; + txcr = ni->ni_txcredits; + mintxcr = ni->ni_mintxcredits; + npeertxcr = ni->ni_peertxcredits; + npeerrtrcr = ni->ni_peerrtrcredits; + nid = ni->ni_nid; + nref = ni->ni_refcount; LNET_UNLOCK(); - seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n", + seq_printf(s, "%-24s %4d %4d %4d %5d %5d %5d\n", libcfs_nid2str(nid), nref, - npeertxcr, maxtxcr, txcr, mintxcr); + npeertxcr, npeerrtrcr, maxtxcr, txcr, mintxcr); return 0; }