Whamcloud - gitweb
i=liangzhen
[fs/lustre-release.git] / lnet / klnds / gmlnd / gmlnd.h
index 4c7b1b8..be0f404 100644 (file)
@@ -1,7 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
+ * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
+ * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
  *
  *   This file is part of Lustre, http://www.lustre.org/
  *
@@ -38,8 +39,9 @@
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-
-#include "linux/config.h"
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
 #include "linux/module.h"
 #include "linux/tty.h"
 #include "linux/kernel.h"
 #include "linux/vmalloc.h"
 #include "linux/sysctl.h"
 
-#define DEBUG_SUBSYSTEM S_NAL
+#define DEBUG_SUBSYSTEM S_LND
 
-#include "portals/nal.h"
-#include "portals/api.h"
-#include "portals/errno.h"
 #include "libcfs/kp30.h"
-#include "portals/p30.h"
-
-#include "portals/nal.h"
-#include "portals/lib-p30.h"
+#include "lnet/lnet.h"
+#include "lnet/lib-lnet.h"
 
 /* undefine these before including the GM headers which clash */
 #undef PACKAGE_BUGREPORT
 #include "gm.h"
 #include "gm_internal.h"
 
-/*
- *      Defines for the API NAL
- */
+/* Fixed tunables */
+#define GMNAL_RESCHED              100          /* # busy loops to force scheduler to yield */
+#define GMNAL_NETADDR_BASE         0x10000000   /* where we start in network VM */
+#define GMNAL_LARGE_PRIORITY       GM_LOW_PRIORITY /* large message GM priority */
+#define GMNAL_SMALL_PRIORITY       GM_LOW_PRIORITY /* small message GM priority */
 
 /* Wire protocol */
-
 typedef struct {
-        ptl_hdr_t       gmim_hdr;               /* portals header */
+        lnet_hdr_t      gmim_hdr;               /* portals header */
         char            gmim_payload[0];        /* payload */
 } gmnal_immediate_msg_t;
 
@@ -109,143 +107,141 @@ typedef struct {
         }               gmm_u;
 } WIRE_ATTR gmnal_msg_t;
 
-#define GMNAL_MSG_MAGIC                 0x6d797269 /* 'myri'! */
+#define GMNAL_MSG_MAGIC                 LNET_PROTO_GM_MAGIC
 #define GMNAL_MSG_VERSION               1
 #define GMNAL_MSG_IMMEDIATE             1
 
+typedef struct netbuf {
+        __u64                    nb_netaddr;    /* network VM address */
+        lnet_kiov_t              nb_kiov[1];    /* the pages (at least 1) */
+} gmnal_netbuf_t;
+
+#define GMNAL_NETBUF_MSG(nb)            ((gmnal_msg_t *)page_address((nb)->nb_kiov[0].kiov_page))
+#define GMNAL_NETBUF_LOCAL_NETADDR(nb)  ((void *)((unsigned long)(nb)->nb_netaddr))
+
+typedef struct gmnal_txbuf {
+        struct list_head         txb_list;      /* queue on gmni_idle_ltxbs */
+        struct gmnal_txbuf      *txb_next;      /* stash on gmni_ltxs */
+        gmnal_netbuf_t           txb_buf;       /* space */
+} gmnal_txbuf_t;
+
 typedef struct gmnal_tx {
-        struct gmnal_tx         *tx_next;
-        gmnal_msg_t             *tx_msg;
-        int                      tx_buffer_size;
-        gm_size_t                tx_gm_size;
-        int                      tx_msg_size;
-        int                      tx_gmlid;
-        int                      tx_gm_priority;
-        ptl_nid_t                tx_nid;
-        struct gmnal_ni         *tx_gmni;
-        lib_msg_t               *tx_libmsg;
-        int                      tx_rxt; 
+        struct list_head         tx_list;       /* queue */
+        int                      tx_credit:1;   /* consumed a credit? */
+        int                      tx_large_iskiov:1; /* large is in kiovs? */
+        struct gmnal_ni         *tx_gmni;       /* owning NI */
+        lnet_nid_t               tx_nid;        /* destination NID */
+        int                      tx_gmlid;      /* destination GM local ID */
+        lnet_msg_t              *tx_lntmsg;     /* lntmsg to finalize on completion */
+
+        gmnal_netbuf_t           tx_buf;        /* small tx buffer */
+        gmnal_txbuf_t           *tx_ltxb;       /* large buffer (to free on completion) */
+        int                      tx_msgnob;     /* message size (so far) */
+
+        int                      tx_large_nob;  /* # bytes large buffer payload */
+        int                      tx_large_offset; /* offset within frags */
+        int                      tx_large_niov; /* # VM frags */
+        union {
+                struct iovec    *iov;           /* mapped frags */
+                lnet_kiov_t     *kiov;          /* page frags */
+        }                        tx_large_frags;
+        unsigned long            tx_launchtime; /* when (in jiffies) the transmit was launched */
+        struct gmnal_tx         *tx_next;       /* stash on gmni_txs */
 } gmnal_tx_t;
 
-/*
- *      as for gmnal_tx_t 
- *      a hash table in nal_data find rxs from
- *      the rx buffer address. hash table populated at init time
- */
 typedef struct gmnal_rx {
-        struct list_head         rx_list;
-        gmnal_msg_t             *rx_msg;
-        int                      rx_size;
-        gm_size_t                rx_gmsize;
-        unsigned int             rx_recv_nob;
-        __u16                    rx_recv_gmid;
-        __u8                     rx_recv_port;
-        __u8                     rx_recv_type;
-        struct gmnal_rx         *rx_next;
+        struct list_head         rx_list;      /* enqueue on gmni_rxq for handling */
+        int                      rx_islarge:1;  /* large receive buffer? */
+        unsigned int             rx_recv_nob;  /* bytes received */
+        __u16                    rx_recv_gmid; /* sender */
+        __u8                     rx_recv_port; /* sender's port */
+        __u8                     rx_recv_type; /* ?? */
+        struct gmnal_rx         *rx_next;      /* stash on gmni_rxs */
+        gmnal_netbuf_t           rx_buf;        /* the buffer */
 } gmnal_rx_t;
 
-
-/*
- *      1 receive thread started on each CPU
- */
-#define NRXTHREADS 10 /* max number of receiver threads */
-
 typedef struct gmnal_ni {
-        spinlock_t       gmni_tx_lock;
-        struct semaphore gmni_tx_token;
-        gmnal_tx_t      *gmni_tx;
-        spinlock_t       gmni_rxt_tx_lock;
-        struct semaphore gmni_rxt_tx_token;
-        gmnal_tx_t      *gmni_rxt_tx;
-        gmnal_rx_t      *gmni_rx;
-        struct gm_hash  *gmni_rx_hash;
-        lib_nal_t       *gmni_libnal;
-        struct gm_port  *gmni_port;
-        spinlock_t       gmni_gm_lock;          /* serialise GM calls */
-        long             gmni_rxthread_pid[NRXTHREADS];
-        int              gmni_rxthread_stop_flag;
-        spinlock_t       gmni_rxthread_flag_lock;
-        long             gmni_rxthread_flag;
-        long             gmni_ctthread_pid;
-        int              gmni_ctthread_flag;
-        gm_alarm_t       gmni_ctthread_alarm;
-        int              gmni_msg_size;
-        struct list_head gmni_rxq;
-        spinlock_t       gmni_rxq_lock;
-        struct semaphore gmni_rxq_wait;
+        lnet_ni_t        *gmni_ni;              /* generic NI */
+        struct gm_port   *gmni_port;            /* GM port */
+        spinlock_t        gmni_gm_lock;         /* serialise GM calls */
+        int               gmni_large_pages;     /* # pages in a large message buffer */
+        int               gmni_large_msgsize;   /* nob in large message buffers */
+        int               gmni_large_gmsize;    /* large message GM bucket */
+        int               gmni_small_msgsize;   /* nob in small message buffers */
+        int               gmni_small_gmsize;    /* small message GM bucket */
+        __u64             gmni_netaddr_base;    /* base of mapped network VM */
+        int               gmni_netaddr_size;    /* # bytes of mapped network VM */
+
+        gmnal_tx_t       *gmni_txs;             /* all txs */
+        gmnal_rx_t       *gmni_rxs;            /* all rx descs */
+        gmnal_txbuf_t    *gmni_ltxbs;           /* all large tx bufs */
+        
+        atomic_t          gmni_nthreads;        /* total # threads */
+        gm_alarm_t        gmni_alarm;           /* alarm to wake caretaker */
+        int               gmni_shutdown;       /* tell all threads to exit */
+
+        struct list_head  gmni_idle_txs;        /* idle tx's */
+        int               gmni_tx_credits;      /* # transmits still possible */
+        struct list_head  gmni_idle_ltxbs;      /* idle large tx buffers */
+        struct list_head  gmni_buf_txq;         /* tx's waiting for buffers */
+        struct list_head  gmni_cred_txq;        /* tx's waiting for credits */
+        spinlock_t        gmni_tx_lock;         /* serialise */
+
+        struct gm_hash   *gmni_rx_hash;                /* buffer->rx lookup */
+        struct semaphore  gmni_rx_mutex;        /* serialise blocking on GM */
 } gmnal_ni_t;
 
-/*
- *      Flags to start/stop and check status of threads
- *      each rxthread sets 1 bit (any bit) of the flag on startup
- *      and clears 1 bit when exiting
- */
-#define GMNAL_THREAD_RESET      0
-#define GMNAL_THREAD_STOP       666
-#define GMNAL_CTTHREAD_STARTED  333
-#define GMNAL_RXTHREADS_STARTED ( (1<<num_rx_threads)-1)
-
-
-/*
- * for ioctl get pid
- */
-#define GMNAL_IOC_GET_GNID 1    
+typedef struct {
+        int              *gm_port;
+        int              *gm_ntx;
+        int              *gm_credits;
+        int              *gm_peer_credits;
+        int              *gm_nlarge_tx_bufs;
+        int              *gm_nrx_small;
+        int              *gm_nrx_large;
+
+#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
+        cfs_sysctl_table_header_t *gm_sysctl;   /* sysctl interface */
+#endif
+} gmnal_tunables_t;
 
 
 /* gmnal_api.c */
 int gmnal_init(void);
-void  gmnal_fini(void);
+void gmnal_fini(void);
+int gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
+int gmnal_startup(lnet_ni_t *ni);
+void gmnal_shutdown(lnet_ni_t *ni);
 
 /* gmnal_cb.c */
-ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, 
-                        lib_msg_t *libmsg,
-                        unsigned int niov, struct iovec *iov, 
-                        size_t offset, size_t mlen, size_t rlen);
-ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, 
-                              lib_msg_t *libmsg, 
-                              unsigned int nkiov, ptl_kiov_t *kiov, 
-                              size_t offset, size_t mlen, size_t rlen);
-ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, 
-                        lib_msg_t *libmsg, ptl_hdr_t *hdr, int type, 
-                        ptl_nid_t nid, ptl_pid_t pid,
-                        unsigned int niov, struct iovec *iov, 
-                        size_t offset, size_t len);
-ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private,
-                              lib_msg_t *libmsg, ptl_hdr_t *hdr, int type,
-                              ptl_nid_t nid, ptl_pid_t pid, 
-                              unsigned int nkiov, ptl_kiov_t *kiov, 
-                              size_t offset, size_t len);
-int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist);
+int gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+               int delayed, unsigned int niov, 
+               struct iovec *iov, lnet_kiov_t *kiov,
+               unsigned int offset, unsigned int mlen, unsigned int rlen);
+int gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
 
 /* gmnal_util.c */
-int gmnal_is_rxthread(gmnal_ni_t *gmnalni);
-int gmnal_alloc_txs(gmnal_ni_t *gmnalni);
-void gmnal_free_txs(gmnal_ni_t *gmnalni);
-gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmnalni, int block);
-void gmnal_return_tx(gmnal_ni_t *gmnalni, gmnal_tx_t *tx);
-int gmnal_alloc_rxs(gmnal_ni_t *gmnalni);
-void gmnal_free_rxs(gmnal_ni_t *gmnalni);
-void gmnal_stop_rxthread(gmnal_ni_t *gmnalni);
-void gmnal_stop_ctthread(gmnal_ni_t *gmnalni);
+void gmnal_free_ltxbufs(gmnal_ni_t *gmni);
+int gmnal_alloc_ltxbufs(gmnal_ni_t *gmni);
+void gmnal_free_txs(gmnal_ni_t *gmni);
+int gmnal_alloc_txs(gmnal_ni_t *gmni);
+void gmnal_free_rxs(gmnal_ni_t *gmni);
+int gmnal_alloc_rxs(gmnal_ni_t *gmni);
 char *gmnal_gmstatus2str(gm_status_t status);
 char *gmnal_rxevent2str(gm_recv_event_t *ev);
 void gmnal_yield(int delay);
-int gmnal_enqueue_rx(gmnal_ni_t *gmnalni, gm_recv_t *recv);
-gmnal_rx_t *gmnal_dequeue_rx(gmnal_ni_t *gmnalni);
-int gmnal_start_kernel_threads(gmnal_ni_t *gmnalni);
 
 /* gmnal_comm.c */
-void gmnal_pack_msg(gmnal_ni_t *gmnalni, gmnal_tx_t *tx,
-                    ptl_nid_t dstnid, int type);
-int gmnal_ct_thread(void *arg);
-int gmnal_rx_thread(void *arg);
-void gmnal_post_rx(gmnal_ni_t *gmnalni, gmnal_rx_t *rx);
-ptl_err_t gmnal_post_tx(gmnal_ni_t *gmnalni, gmnal_tx_t *tx, 
-                        lib_msg_t *libmsg, ptl_nid_t nid, int nob);
+void gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx);
+gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmni);
+void gmnal_tx_done(gmnal_tx_t *tx, int rc);
+void gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg,
+                    lnet_nid_t dstnid, int type);
+void gmnal_stop_threads(gmnal_ni_t *gmni);
+int gmnal_start_threads(gmnal_ni_t *gmni);
+void gmnal_check_txqueues_locked (gmnal_ni_t *gmni);
 
 /* Module Parameters */
-extern  int num_rx_threads;
-extern  int num_txds;
-extern  int gm_port_id;
+extern gmnal_tunables_t gmnal_tunables;
 
 #endif /*__INCLUDE_GMNAL_H__*/