#include <linux/sysctl.h>
#include <linux/uio.h>
#include <linux/unistd.h>
+#include <linux/hashtable.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <lnet/lib-lnet.h>
#include <lnet/socklnd.h>
-#ifdef HAVE_TCP_SENDPAGE_USE_SOCKET
-# define cfs_tcp_sendpage(sk, page, offset, size, flags) \
- tcp_sendpage((sk)->sk_socket, page, offset, size, flags)
-#else /* !HAVE_TCP_SENDPAGE_USE_SOCKET */
-# define cfs_tcp_sendpage(sk, page, offset, size, flags) \
- tcp_sendpage(sk, page, offset, size, flags)
-#endif /* HAVE_TCP_SENDPAGE_USE_SOCKET */
-
#ifndef NETIF_F_CSUM_MASK
# define NETIF_F_CSUM_MASK NETIF_F_ALL_CSUM
#endif
#define SOCKNAL_NSCHEDS 3
#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1)
-#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer_ni lists */
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
-#define SOCKNAL_ENOMEM_RETRY 1 /* seconds between retries */
+#define SOCKNAL_PEER_HASH_BITS 7 /* log2 of # peer_ni lists */
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */
+#define SOCKNAL_ENOMEM_RETRY 1 /* seconds between retries */
-#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
+#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
+#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
-#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
+#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
* no risk if we're not running on a CONFIG_HIGHMEM platform. */
# define SOCKNAL_RISK_KMAP_DEADLOCK 1
#endif
-struct ksock_sched_info;
-
-struct ksock_sched { /* per scheduler state */
- spinlock_t kss_lock; /* serialise */
- struct list_head kss_rx_conns; /* conn waiting to be read */
+/* per scheduler state */
+struct ksock_sched {
+ /* serialise */
+ spinlock_t kss_lock;
/* conn waiting to be written */
- struct list_head kss_tx_conns;
+ struct list_head kss_rx_conns;
+ struct list_head kss_tx_conns;
/* zombie noop tx list */
- struct list_head kss_zombie_noop_txs;
- wait_queue_head_t kss_waitq; /* where scheduler sleeps */
+ struct list_head kss_zombie_noop_txs;
+ /* where scheduler sleeps */
+ wait_queue_head_t kss_waitq;
/* # connections assigned to this scheduler */
- int kss_nconns;
- struct ksock_sched_info *kss_info; /* owner of it */
-#if !SOCKNAL_SINGLE_FRAG_RX
- struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_SINGLE_FRAG_RX
- struct kvec kss_scratch_iov[LNET_MAX_IOV];
-#endif
-};
-
-struct ksock_sched_info {
- int ksi_nthreads_max; /* max allowed threads */
- int ksi_nthreads; /* number of threads */
- int ksi_cpt; /* CPT id */
- struct ksock_sched *ksi_scheds; /* array of schedulers */
+ int kss_nconns;
+ /* max allowed threads */
+ int kss_nthreads_max;
+ /* number of threads */
+ int kss_nthreads;
+ /* CPT id */
+ int kss_cpt;
};
#define KSOCK_CPT_SHIFT 16
unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload size */
int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */
int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
-#ifdef CPU_AFFINITY
int *ksnd_irq_affinity; /* enable IRQ affinity? */
-#endif
#ifdef SOCKNAL_BACKOFF
int *ksnd_backoff_init; /* initial TCP backoff */
int *ksnd_backoff_max; /* maximum TCP backoff */
struct ksock_net {
__u64 ksnn_incarnation; /* my epoch */
- spinlock_t ksnn_lock; /* serialise */
struct list_head ksnn_list; /* chain on global list */
- int ksnn_npeers; /* # peers */
- int ksnn_shutdown; /* shutting down? */
+ atomic_t ksnn_npeers; /* # peers */
int ksnn_ninterfaces; /* IP interfaces */
struct ksock_interface ksnn_interfaces[LNET_INTERFACES_NUM];
};
+/* When the ksock_net is shut down, this (negative) bias is added to
+ * ksnn_npeers, which prevents new peers from being added.
+ */
+#define SOCKNAL_SHUTDOWN_BIAS (INT_MIN+1)
/** connd timeout */
#define SOCKNAL_CONND_TIMEOUT 120
/* stabilize peer_ni/conn ops */
rwlock_t ksnd_global_lock;
/* hash table of all my known peers */
- struct list_head *ksnd_peers;
- int ksnd_peer_hash_size; /* size of ksnd_peers */
+ DECLARE_HASHTABLE(ksnd_peers, SOCKNAL_PEER_HASH_BITS);
int ksnd_nthreads; /* # live threads */
int ksnd_shuttingdown; /* tell threads to exit */
/* schedulers information */
- struct ksock_sched_info **ksnd_sched_info;
+ struct ksock_sched **ksnd_schedulers;
atomic_t ksnd_nactive_txs; /* #active txs */
* received into either struct kvec or lnet_kiov_t fragments, depending on
* what the header matched or whether the message needs forwarding. */
-struct ksock_conn; /* forward ref */
-struct ksock_peer; /* forward ref */
-struct ksock_route; /* forward ref */
-struct ksock_proto; /* forward ref */
+struct ksock_conn; /* forward ref */
+struct ksock_route; /* forward ref */
+struct ksock_proto; /* forward ref */
struct ksock_tx { /* transmit packet */
struct list_head tx_list; /* queue on conn for transmission etc */
unsigned int ksnc_closing:1; /* being shut down */
unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
unsigned int ksnc_zc_capable:1; /* enable to ZC */
- struct ksock_proto *ksnc_proto; /* protocol for the connection */
+ const struct ksock_proto *ksnc_proto; /* protocol for the connection */
/* READER */
#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
struct ksock_peer_ni {
- struct list_head ksnp_list; /* stash on global peer_ni list */
+ struct hlist_node ksnp_list; /* stash on global peer_ni list */
time64_t ksnp_last_alive;/* when (in seconds) I was last alive */
struct lnet_process_id ksnp_id; /* who's on the other end(s) */
- atomic_t ksnp_refcount; /* # users */
- int ksnp_sharecount; /* lconf usage counter */
- int ksnp_closing; /* being closed */
- int ksnp_accepting;/* # passive connections pending */
- int ksnp_error; /* errno on closing last conn */
- __u64 ksnp_zc_next_cookie;/* ZC completion cookie */
- __u64 ksnp_incarnation; /* latest known peer_ni incarnation */
- struct ksock_proto *ksnp_proto; /* latest known peer_ni protocol */
+ atomic_t ksnp_refcount; /* # users */
+ int ksnp_closing; /* being closed */
+ int ksnp_accepting; /* # passive connections pending */
+ int ksnp_error; /* errno on closing last conn */
+ __u64 ksnp_zc_next_cookie;/* ZC completion cookie */
+ __u64 ksnp_incarnation; /* latest known peer_ni incarnation */
+ const struct ksock_proto *ksnp_proto; /* latest known protocol */
struct list_head ksnp_conns; /* all active connections */
struct list_head ksnp_routes; /* routes */
struct list_head ksnp_tx_queue; /* waiting packets */
- spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
+ spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
/* zero copy requests wait for ACK */
struct list_head ksnp_zc_req_list;
time64_t ksnp_send_keepalive; /* time to send keepalive */
- struct lnet_ni *ksnp_ni; /* which network */
- int ksnp_n_passive_ips; /* # of... */
- __u32 ksnp_passive_ips[LNET_INTERFACES_NUM]; /* preferred local interfaces */
+ struct lnet_ni *ksnp_ni; /* which network */
+ int ksnp_n_passive_ips; /* # of... */
+ __u32 ksnp_passive_ips[LNET_INTERFACES_NUM]; /* preferred local interfaces */
};
struct ksock_connreq {
* return MATCH_MAY : can be backup */
};
-extern struct ksock_proto ksocknal_protocol_v1x;
-extern struct ksock_proto ksocknal_protocol_v2x;
-extern struct ksock_proto ksocknal_protocol_v3x;
+extern const struct ksock_proto ksocknal_protocol_v1x;
+extern const struct ksock_proto ksocknal_protocol_v2x;
+extern const struct ksock_proto ksocknal_protocol_v3x;
#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
(1 << SOCKLND_CONN_BULK_OUT));
}
-static inline struct list_head *
-ksocknal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
- return (&ksocknal_data.ksnd_peers [hash]);
-}
-
static inline void
ksocknal_conn_addref(struct ksock_conn *conn)
{
extern void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
extern void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
int error);
-extern void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
+extern void ksocknal_notify(lnet_nid_t gw_nid);
extern void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when);
extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
extern void ksocknal_thread_fini(void);
extern void ksocknal_lib_push_conn(struct ksock_conn *conn);
extern int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn);
extern int ksocknal_lib_setup_sock(struct socket *so);
-extern int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx);
-extern int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx);
+extern int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx,
+ struct kvec *scratch_iov);
+extern int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx,
+ struct kvec *scratch_iov);
extern void ksocknal_lib_eager_ack(struct ksock_conn *conn);
-extern int ksocknal_lib_recv_iov(struct ksock_conn *conn);
-extern int ksocknal_lib_recv_kiov(struct ksock_conn *conn);
+extern int ksocknal_lib_recv_iov(struct ksock_conn *conn,
+ struct kvec *scratchiov);
+extern int ksocknal_lib_recv_kiov(struct ksock_conn *conn, struct page **pages,
+ struct kvec *scratchiov);
extern int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem,
int *rxmem, int *nagle);