X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fgmlnd%2Fgmlnd.h;h=6936737557e342e363d073dd00c3fb2ac8651eae;hb=ed88907a96ba81d3558e71ade9def98bdc785169;hp=47d71eb5bfa8cfacc377cd0370da315809691dcd;hpb=8483950e2edce4eee194f44f99da09fc839de3a2;p=fs%2Flustre-release.git diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index 47d71eb..6936737 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -60,16 +60,11 @@ #include "linux/vmalloc.h" #include "linux/sysctl.h" -#define DEBUG_SUBSYSTEM S_NAL +#define DEBUG_SUBSYSTEM S_LND -#include "portals/nal.h" -#include "portals/api.h" -#include "portals/errno.h" #include "libcfs/kp30.h" -#include "portals/p30.h" - -#include "portals/nal.h" -#include "portals/lib-p30.h" +#include "lnet/lnet.h" +#include "lnet/lib-lnet.h" /* undefine these before including the GM headers which clash */ #undef PACKAGE_BUGREPORT @@ -85,14 +80,15 @@ #include "gm.h" #include "gm_internal.h" -/* - * Defines for the API NAL - */ +/* Fixed tunables */ +#define GMNAL_RESCHED 100 /* # busy loops to force scheduler to yield */ +#define GMNAL_NETADDR_BASE 0x10000000 /* where we start in network VM */ +#define GMNAL_LARGE_PRIORITY GM_LOW_PRIORITY /* large message GM priority */ +#define GMNAL_SMALL_PRIORITY GM_LOW_PRIORITY /* small message GM priority */ /* Wire protocol */ - typedef struct { - ptl_hdr_t gmim_hdr; /* portals header */ + lnet_hdr_t gmim_hdr; /* portals header */ char gmim_payload[0]; /* payload */ } gmnal_immediate_msg_t; @@ -109,129 +105,141 @@ typedef struct { } gmm_u; } WIRE_ATTR gmnal_msg_t; -#define GMNAL_MSG_MAGIC 0x6d797269 /* 'myri'! */ +#define GMNAL_MSG_MAGIC LNET_PROTO_GM_MAGIC #define GMNAL_MSG_VERSION 1 #define GMNAL_MSG_IMMEDIATE 1 +typedef struct netbuf { + __u64 nb_netaddr; /* network VM address */ + lnet_kiov_t nb_kiov[1]; /* the pages (at least 1) */ +} gmnal_netbuf_t; + +#define GMNAL_NETBUF_MSG(nb) ((gmnal_msg_t *)page_address((nb)->nb_kiov[0].kiov_page)) +#define GMNAL_NETBUF_LOCAL_NETADDR(nb) ((void *)((unsigned long)(nb)->nb_netaddr)) + +typedef struct gmnal_txbuf { + struct list_head txb_list; /* queue on gmni_idle_ltxbs */ + struct gmnal_txbuf *txb_next; /* stash on gmni_ltxs */ + gmnal_netbuf_t txb_buf; /* space */ +} gmnal_txbuf_t; + typedef struct gmnal_tx { - struct gmnal_tx *tx_next; - gmnal_msg_t *tx_msg; - int tx_buffer_size; - gm_size_t tx_gm_size; - int tx_msg_size; - int tx_gmlid; - int tx_gm_priority; - ptl_nid_t tx_nid; - struct gmnal_ni *tx_gmni; - lib_msg_t *tx_libmsg; - int tx_rxt; + struct list_head tx_list; /* queue */ + int tx_credit:1; /* consumed a credit? */ + int tx_large_iskiov:1; /* large is in kiovs? */ + struct gmnal_ni *tx_gmni; /* owning NI */ + lnet_nid_t tx_nid; /* destination NID */ + int tx_gmlid; /* destination GM local ID */ + lnet_msg_t *tx_lntmsg; /* lntmsg to finalize on completion */ + + gmnal_netbuf_t tx_buf; /* small tx buffer */ + gmnal_txbuf_t *tx_ltxb; /* large buffer (to free on completion) */ + int tx_msgnob; /* message size (so far) */ + + int tx_large_nob; /* # bytes large buffer payload */ + int tx_large_offset; /* offset within frags */ + int tx_large_niov; /* # VM frags */ + union { + struct iovec *iov; /* mapped frags */ + lnet_kiov_t *kiov; /* page frags */ + } tx_large_frags; + unsigned long tx_launchtime; /* when (in jiffies) the transmit was launched */ + struct gmnal_tx *tx_next; /* stash on gmni_txs */ } gmnal_tx_t; -/* - * as for gmnal_tx_t - * a hash table in nal_data find rxs from - * the rx buffer address. hash table populated at init time - */ typedef struct gmnal_rx { - struct list_head rx_list; - gmnal_msg_t *rx_msg; - int rx_size; - gm_size_t rx_gmsize; - unsigned int rx_recv_nob; - __u16 rx_recv_gmid; - __u8 rx_recv_port; - __u8 rx_recv_type; - struct gmnal_rx *rx_next; + struct list_head rx_list; /* enqueue on gmni_rxq for handling */ + int rx_islarge:1; /* large receive buffer? */ + unsigned int rx_recv_nob; /* bytes received */ + __u16 rx_recv_gmid; /* sender */ + __u8 rx_recv_port; /* sender's port */ + __u8 rx_recv_type; /* ?? */ + struct gmnal_rx *rx_next; /* stash on gmni_rxs */ + gmnal_netbuf_t rx_buf; /* the buffer */ } gmnal_rx_t; - -/* - * 1 receive thread started on each CPU - */ -#define NRXTHREADS 10 /* max number of receiver threads */ - typedef struct gmnal_ni { - spinlock_t gmni_tx_lock; - struct semaphore gmni_tx_token; - gmnal_tx_t *gmni_tx; - spinlock_t gmni_rxt_tx_lock; - struct semaphore gmni_rxt_tx_token; - gmnal_tx_t *gmni_rxt_tx; - gmnal_rx_t *gmni_rx; - struct gm_hash *gmni_rx_hash; - lib_nal_t *gmni_libnal; - struct gm_port *gmni_port; - spinlock_t gmni_gm_lock; /* serialise GM calls */ - atomic_t gmni_nthreads; - int gmni_nrxthreads; - long gmni_rxthread_pid[NRXTHREADS]; - gm_alarm_t gmni_ctthread_alarm; - int gmni_thread_shutdown; - int gmni_msg_size; - struct list_head gmni_rxq; - spinlock_t gmni_rxq_lock; - struct semaphore gmni_rxq_wait; + lnet_ni_t *gmni_ni; /* generic NI */ + struct gm_port *gmni_port; /* GM port */ + spinlock_t gmni_gm_lock; /* serialise GM calls */ + int gmni_large_pages; /* # pages in a large message buffer */ + int gmni_large_msgsize; /* nob in large message buffers */ + int gmni_large_gmsize; /* large message GM bucket */ + int gmni_small_msgsize; /* nob in small message buffers */ + int gmni_small_gmsize; /* small message GM bucket */ + __u64 gmni_netaddr_base; /* base of mapped network VM */ + int gmni_netaddr_size; /* # bytes of mapped network VM */ + + gmnal_tx_t *gmni_txs; /* all txs */ + gmnal_rx_t *gmni_rxs; /* all rx descs */ + gmnal_txbuf_t *gmni_ltxbs; /* all large tx bufs */ + + atomic_t gmni_nthreads; /* total # threads */ + gm_alarm_t gmni_alarm; /* alarm to wake caretaker */ + int gmni_shutdown; /* tell all threads to exit */ + + struct list_head gmni_idle_txs; /* idle tx's */ + int gmni_tx_credits; /* # transmits still possible */ + struct list_head gmni_idle_ltxbs; /* idle large tx buffers */ + struct list_head gmni_buf_txq; /* tx's waiting for buffers */ + struct list_head gmni_cred_txq; /* tx's waiting for credits */ + spinlock_t gmni_tx_lock; /* serialise */ + + struct gm_hash *gmni_rx_hash; /* buffer->rx lookup */ + struct semaphore gmni_rx_mutex; /* serialise blocking on GM */ } gmnal_ni_t; - -/* - * for ioctl get pid - */ -#define GMNAL_IOC_GET_GNID 1 +typedef struct { + int *gm_port; + int *gm_ntx; + int *gm_credits; + int *gm_peer_credits; + int *gm_nlarge_tx_bufs; + int *gm_nrx_small; + int *gm_nrx_large; + +#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM + struct ctl_table_header *gm_sysctl; /* sysctl interface */ +#endif +} gmnal_tunables_t; /* gmnal_api.c */ int gmnal_init(void); -void gmnal_fini(void); +void gmnal_fini(void); +int gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); +int gmnal_startup(lnet_ni_t *ni); +void gmnal_shutdown(lnet_ni_t *ni); /* gmnal_cb.c */ -ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, - lib_msg_t *libmsg, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen); -ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, - lib_msg_t *libmsg, - unsigned int nkiov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen); -ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, - lib_msg_t *libmsg, ptl_hdr_t *hdr, int type, - ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, - size_t offset, size_t len); -ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, - lib_msg_t *libmsg, ptl_hdr_t *hdr, int type, - ptl_nid_t nid, ptl_pid_t pid, - unsigned int nkiov, ptl_kiov_t *kiov, - size_t offset, size_t len); -int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist); +int gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, + int delayed, unsigned int niov, + struct iovec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen); +int gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); /* gmnal_util.c */ -int gmnal_is_rxthread(gmnal_ni_t *gmnalni); -int gmnal_alloc_txs(gmnal_ni_t *gmnalni); -void gmnal_free_txs(gmnal_ni_t *gmnalni); -gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmnalni, int block); -void gmnal_return_tx(gmnal_ni_t *gmnalni, gmnal_tx_t *tx); -int gmnal_alloc_rxs(gmnal_ni_t *gmnalni); -void gmnal_free_rxs(gmnal_ni_t *gmnalni); +void gmnal_free_ltxbufs(gmnal_ni_t *gmni); +int gmnal_alloc_ltxbufs(gmnal_ni_t *gmni); +void gmnal_free_txs(gmnal_ni_t *gmni); +int gmnal_alloc_txs(gmnal_ni_t *gmni); +void gmnal_free_rxs(gmnal_ni_t *gmni); +int gmnal_alloc_rxs(gmnal_ni_t *gmni); char *gmnal_gmstatus2str(gm_status_t status); char *gmnal_rxevent2str(gm_recv_event_t *ev); void gmnal_yield(int delay); -int gmnal_enqueue_rx(gmnal_ni_t *gmnalni, gm_recv_t *recv); -gmnal_rx_t *gmnal_dequeue_rx(gmnal_ni_t *gmnalni); -void gmnal_stop_threads(gmnal_ni_t *gmnalni); -int gmnal_start_threads(gmnal_ni_t *gmnalni); /* gmnal_comm.c */ -void gmnal_pack_msg(gmnal_ni_t *gmnalni, gmnal_tx_t *tx, - ptl_nid_t dstnid, int type); -int gmnal_ct_thread(void *arg); -int gmnal_rx_thread(void *arg); -void gmnal_post_rx(gmnal_ni_t *gmnalni, gmnal_rx_t *rx); -ptl_err_t gmnal_post_tx(gmnal_ni_t *gmnalni, gmnal_tx_t *tx, - lib_msg_t *libmsg, ptl_nid_t nid, int nob); +void gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx); +gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmni); +void gmnal_tx_done(gmnal_tx_t *tx, int rc); +void gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg, + lnet_nid_t dstnid, int type); +void gmnal_stop_threads(gmnal_ni_t *gmni); +int gmnal_start_threads(gmnal_ni_t *gmni); +void gmnal_check_txqueues_locked (gmnal_ni_t *gmni); /* Module Parameters */ -extern int num_txds; -extern int gm_port_id; +extern gmnal_tunables_t gmnal_tunables; #endif /*__INCLUDE_GMNAL_H__*/