*/
#define DEBUG_PORTAL_ALLOC
-#define EXPORT_SYMTAB
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
#include <linux/config.h>
#include <linux/module.h>
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <linux/init.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/kp30.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
+#include <portals/socknal.h>
#if CONFIG_SMP
-# define SOCKNAL_N_SCHED smp_num_cpus /* # socknal schedulers */
+# define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */
#else
# define SOCKNAL_N_SCHED 1 /* # socknal schedulers */
#endif
/* default vals for runtime tunables */
#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
-#define SOCKNAL_EAGER_ACK 1 /* default eager ack (boolean) */
+#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */
+#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
-
+#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */
#define SOCKNAL_USE_KEEPALIVES 0 /* use tcp/ip keepalive? */
#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#if PTL_LARGE_MTU
-# define SOCKNAL_MAX_FWD_PAYLOAD (256<<10) /* biggest payload I can forward */
-#else
-# define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */
-#endif
-
-#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
-
#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT)
+#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
/* # pages in a large message fwd buffer */
#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */
#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
# define sk_socket socket
#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+# define sk_wmem_queued wmem_queued
+#endif
+
typedef struct /* pool of forwarding buffers */
{
spinlock_t fmp_lock; /* serialise */
- struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_idle_fmbs; /* free buffers */
struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+ int fmp_nactive_fmbs; /* # buffers in use */
+ int fmp_buff_pages; /* # pages per buffer */
} ksock_fmb_pool_t;
int ksnd_init; /* initialisation state */
int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */
int ksnd_eager_ack; /* make TCP ack eagerly? */
+ int ksnd_typed_conns; /* drive sockets by type? */
+ int ksnd_min_bulk; /* smallest "large" message */
#if SOCKNAL_ZC
unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */
#endif
struct ctl_table_header *ksnd_sysctl; /* sysctl interface */
+ __u64 ksnd_incarnation; /* my epoch */
rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */
struct list_head *ksnd_peers; /* hash table of all my known peers */
kpr_router_t ksnd_router; /* THE router */
- void *ksnd_fmbs; /* all the pre-allocated FMBs */
ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
- void *ksnd_ltxs; /* all the pre-allocated LTXs */
- spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */
- struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
- struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
- wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
- int ksnd_active_ltxs; /* #active ltxs */
+ atomic_t ksnd_nactive_ltxs; /* #active ltxs */
struct list_head ksnd_deathrow_conns; /* conns to be closed */
struct list_head ksnd_zombie_conns; /* conns to be freed */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleep here */
+ struct list_head ksnd_enomem_conns; /* conns to be retried */
+ wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
+ unsigned long ksnd_reaper_waketime; /* when reaper will wake */
spinlock_t ksnd_reaper_lock; /* serialise */
+ int ksnd_enomem_tx; /* test ENOMEM sender */
int ksnd_stall_tx; /* test sluggish sender */
int ksnd_stall_rx; /* test sluggish receiver */
#define SOCKNAL_INIT_ALL 3
/* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded
- * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0
- * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1
- * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t
- * fragments.
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more ptl_kiov_t fragments.
*
* On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header). Once the header has been received, if the message
- * requires forwarding or will be received into mapped memory, up to
- * PTL_MD_MAX_IOV struct iovec fragments describe the target memory.
- * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used.
- */
+ * receive (the header). Once the header has been received, the payload is
+ * received into either struct iovec or ptl_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
struct ksock_conn; /* forward ref */
struct ksock_peer; /* forward ref */
#endif
} ksock_tx_t;
+typedef struct /* forwarded packet */
+{
+ ksock_tx_t ftx_tx; /* send info */
+ struct iovec ftx_iov; /* hdr iovec */
+} ksock_ftx_t;
+
#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
/* network zero copy callback descriptor embedded in ksock_tx_t */
-/* space for the tx frag descriptors: hdr is always 1 iovec
- * and payload is PTL_MD_MAX of either type. */
-typedef struct
-{
- struct iovec hdr;
- union {
- struct iovec iov[PTL_MD_MAX_IOV];
- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
- } payload;
-} ksock_txiovspace_t;
-
typedef struct /* locally transmitted packet */
{
ksock_tx_t ltx_tx; /* send info */
- struct list_head *ltx_idle; /* where to put when idle */
void *ltx_private; /* lib_finalize() callback arg */
void *ltx_cookie; /* lib_finalize() callback arg */
- ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */
ptl_hdr_t ltx_hdr; /* buffer for packet header */
+ int ltx_desc_size; /* bytes allocated for this desc */
+ struct iovec ltx_iov[1]; /* iov for hdr + payload */
+ ptl_kiov_t ltx_kiov[0]; /* kiov for payload */
} ksock_ltx_t;
#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
{ /* (socknal->router) */
struct list_head fmb_list; /* queue idle */
kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
- int fmb_npages; /* # pages allocated */
ksock_fmb_pool_t *fmb_pool; /* owning pool */
struct ksock_peer *fmb_peer; /* peer received from */
- struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
- struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
+ ptl_hdr_t fmb_hdr; /* message header */
+ ptl_kiov_t fmb_kiov[0]; /* payload frags */
} ksock_fmb_t;
/* space for the rx frag descriptors; we either read a single contiguous
- * header, or PTL_MD_MAX_IOV frags of payload of either type. */
+ * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
typedef union {
struct iovec iov[PTL_MD_MAX_IOV];
ptl_kiov_t kiov[PTL_MD_MAX_IOV];
__u32 ksnc_ipaddr; /* peer's IP */
int ksnc_port; /* peer's port */
int ksnc_closing; /* being shut down */
+ int ksnc_type; /* type of connection */
+ __u64 ksnc_incarnation; /* peer's incarnation */
- /* READER */
+ /* reader */
struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */
int ksnc_rx_started; /* started receiving a message */
int ksnc_tx_scheduled; /* being progressed */
} ksock_conn_t;
+#define KSNR_TYPED_ROUTES ((1 << SOCKNAL_CONN_CONTROL) | \
+ (1 << SOCKNAL_CONN_BULK_IN) | \
+ (1 << SOCKNAL_CONN_BULK_OUT))
+
typedef struct ksock_route
{
struct list_head ksnr_list; /* chain on peer route list */
int ksnr_port; /* port to connect to */
int ksnr_buffer_size; /* size of socket buffers */
unsigned int ksnr_irq_affinity:1; /* set affinity? */
- unsigned int ksnr_xchange_nids:1; /* do hello protocol? */
- unsigned int ksnr_nonagel:1; /* disable nagle? */
unsigned int ksnr_eager:1; /* connect eagery? */
- unsigned int ksnr_connecting:1; /* autoconnect in progress? */
+ unsigned int ksnr_connecting:4; /* autoconnects in progress by type */
+ unsigned int ksnr_connected:4; /* connections established by type */
unsigned int ksnr_deleted:1; /* been removed from peer? */
- int ksnr_generation; /* connection incarnation # */
- ksock_conn_t *ksnr_conn; /* NULL/active connection */
+ int ksnr_conn_count; /* # conns established by this route */
} ksock_route_t;
typedef struct ksock_peer
extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid);
extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr,
int single, int keep_conn);
-extern int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route,
- struct socket *sock, int bind_irq);
+extern int ksocknal_create_conn (ksock_route_t *route,
+ struct socket *sock, int bind_irq, int type);
extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
-extern int ksocknal_close_conn_unlocked (ksock_conn_t *conn, int why);
extern void ksocknal_terminate_conn (ksock_conn_t *conn);
extern void ksocknal_destroy_conn (ksock_conn_t *conn);
extern void ksocknal_put_conn (ksock_conn_t *conn);
-extern int ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr);
+extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
+extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
+extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr);
extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch);
extern int ksocknal_autoconnectd (void *arg);
extern int ksocknal_reaper (void *arg);
extern int ksocknal_setup_sock (struct socket *sock);
+extern int ksocknal_hello (struct socket *sock,
+ ptl_nid_t *nid, int *type, __u64 *incarnation);