+/* space for the rx frag descriptors; we either read a single contiguous
+ * header, or up to LNET_MAX_IOV frags of payload of either type. */
+union ksock_rxiovspace {
+ struct kvec iov[LNET_MAX_IOV];
+ lnet_kiov_t kiov[LNET_MAX_IOV];
+};
+
+#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */
+#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */
+#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */
+#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */
+#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */
+#define SOCKNAL_RX_SLOP 6 /* skipping body */
+
+struct ksock_conn {
+ struct ksock_peer_ni *ksnc_peer; /* owning peer_ni */
+ struct ksock_route *ksnc_route; /* owning route */
+ struct list_head ksnc_list; /* stash on peer_ni's conn list */
+ struct socket *ksnc_sock; /* actual socket */
+ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
+ void *ksnc_saved_write_space; /* socket's original write_space() callback */
+ atomic_t ksnc_conn_refcount; /* conn refcount */
+ atomic_t ksnc_sock_refcount; /* sock refcount */
+ struct ksock_sched *ksnc_scheduler; /* who schedules this connection */
+ __u32 ksnc_myipaddr; /* my IP */
+ __u32 ksnc_ipaddr; /* peer_ni's IP */
+ int ksnc_port; /* peer_ni's port */
+ signed int ksnc_type:3; /* type of connection,
+ * should be signed value */
+ unsigned int ksnc_closing:1; /* being shut down */
+ unsigned int ksnc_flip:1; /* flip or not, only for V2.x */
+ unsigned int ksnc_zc_capable:1; /* enable to ZC */
+ struct ksock_proto *ksnc_proto; /* protocol for the connection */
+
+ /* READER */
+
+ /* where I enq waiting input or a forwarding descriptor */
+ struct list_head ksnc_rx_list;
+ time64_t ksnc_rx_deadline; /* when (in seconds) receive times out */
+ __u8 ksnc_rx_started; /* started receiving a message */
+ __u8 ksnc_rx_ready; /* data ready to read */
+ __u8 ksnc_rx_scheduled;/* being progressed */
+ __u8 ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # kvec frags */
+ struct kvec *ksnc_rx_iov; /* the kvec frags */
+ int ksnc_rx_nkiov; /* # page frags */
+ lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
+ union ksock_rxiovspace ksnc_rx_iov_space;/* space for frag descriptors */
+ __u32 ksnc_rx_csum; /* partial checksum for incoming data */
+ struct lnet_msg *ksnc_lnet_msg; /* rx lnet_finalize arg*/
+ struct ksock_msg ksnc_msg; /* incoming message buffer:
+ * V2.x message takes the
+ * whole struct
+ * V1.x message is a bare
+ * struct lnet_hdr, it's stored
+ * in ksnc_msg.ksm_u.lnetmsg
+ */
+ /* -- WRITER -- */
+ /* where I enq waiting for output space */
+ struct list_head ksnc_tx_list;
+ /* packets waiting to be sent */
+ struct list_head ksnc_tx_queue;
+ /* next TX that can carry a LNet message or ZC-ACK */
+ struct ksock_tx *ksnc_tx_carrier;
+ /* when (in seconds) tx times out */
+ time64_t ksnc_tx_deadline;
+ /* send buffer marker */
+ int ksnc_tx_bufnob;
+ /* # bytes queued */
+ atomic_t ksnc_tx_nob;
+ /* write space */
+ int ksnc_tx_ready;
+ /* being progressed */
+ int ksnc_tx_scheduled;
+ /* time stamp of the last posted TX */
+ time64_t ksnc_tx_last_post;
+};
+
+struct ksock_route {
+ struct list_head ksnr_list; /* chain on peer_ni route list */
+ struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
+ struct ksock_peer_ni *ksnr_peer; /* owning peer_ni */
+ atomic_t ksnr_refcount; /* # users */
+ time64_t ksnr_timeout; /* when (in secs) reconnection can happen next */
+ time64_t ksnr_retry_interval; /* how long between retries */
+ __u32 ksnr_myipaddr; /* my IP */
+ __u32 ksnr_ipaddr; /* IP address to connect to */
+ int ksnr_port; /* port to connect to */
+ unsigned int ksnr_scheduled:1; /* scheduled for attention */
+ unsigned int ksnr_connecting:1;/* connection establishment in progress */
+ unsigned int ksnr_connected:4; /* connections established by type */
+ unsigned int ksnr_deleted:1; /* been removed from peer_ni? */
+ unsigned int ksnr_share_count; /* created explicitly? */
+ int ksnr_conn_count; /* # conns established by this route */
+};
+
+#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */
+
+struct ksock_peer_ni {
+ struct list_head ksnp_list; /* stash on global peer_ni list */
+ time64_t ksnp_last_alive;/* when (in seconds) I was last alive */
+ struct lnet_process_id ksnp_id; /* who's on the other end(s) */
+ atomic_t ksnp_refcount; /* # users */
+ int ksnp_closing; /* being closed */
+ int ksnp_accepting;/* # passive connections pending */
+ int ksnp_error; /* errno on closing last conn */
+ __u64 ksnp_zc_next_cookie;/* ZC completion cookie */
+ __u64 ksnp_incarnation; /* latest known peer_ni incarnation */
+ struct ksock_proto *ksnp_proto; /* latest known peer_ni protocol */
+ struct list_head ksnp_conns; /* all active connections */
+ struct list_head ksnp_routes; /* routes */
+ struct list_head ksnp_tx_queue; /* waiting packets */
+ spinlock_t ksnp_lock; /* serialize, g_lock unsafe */
+ /* zero copy requests wait for ACK */
+ struct list_head ksnp_zc_req_list;
+ time64_t ksnp_send_keepalive; /* time to send keepalive */
+ struct lnet_ni *ksnp_ni; /* which network */
+ int ksnp_n_passive_ips; /* # of... */
+ __u32 ksnp_passive_ips[LNET_INTERFACES_NUM]; /* preferred local interfaces */
+};
+
+struct ksock_connreq {
+ /* stash on ksnd_connd_connreqs */
+ struct list_head ksncr_list;
+ /* chosen NI */
+ struct lnet_ni *ksncr_ni;
+ /* accepted socket */
+ struct socket *ksncr_sock;
+};
+
+extern struct ksock_nal_data ksocknal_data;
+extern struct ksock_tunables ksocknal_tunables;
+
+#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */
+#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */
+#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not preferred */
+
+struct ksock_proto {
+ int pro_version; /* version number of protocol */
+ int (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *); /* handshake function */
+ int (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int);/* handshake function */
+ void (*pro_pack)(struct ksock_tx *); /* message pack */
+ void (*pro_unpack)(struct ksock_msg *); /* message unpack */
+ struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *); /* queue tx on the connection */
+ int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64); /* queue ZC ack on the connection */
+ int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int); /* handle ZC request */
+ int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64); /* handle ZC ACK */
+ int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int); /* msg type matches the connection type:
+ * return value:
+ * return MATCH_NO : no
+ * return MATCH_YES : matching type
+ * return MATCH_MAY : can be backup */
+};
+
+extern struct ksock_proto ksocknal_protocol_v1x;
+extern struct ksock_proto ksocknal_protocol_v2x;
+extern struct ksock_proto ksocknal_protocol_v3x;
+
+#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
+#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
+#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR
+
+#ifndef CPU_MASK_NONE
+#define CPU_MASK_NONE 0UL