Whamcloud - gitweb
LU-4588 code: replace semaphores with mutexes
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd.h
index 6bfbd9a..f433247 100644 (file)
@@ -26,9 +26,6 @@
 #ifndef _GNILND_GNILND_H_
 #define _GNILND_GNILND_H_
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -67,7 +64,6 @@
 #include <libcfs/libcfs.h>
 #include <lnet/lnet.h>
 #include <lnet/lib-lnet.h>
-#include <lnet/lnet-sysctl.h>
 
 #include <gni_pub.h>
 #include "gnilnd_version.h"
 /* tune down some COMPUTE options as they won't see the same number of connections and
  * don't need the throughput of multiple threads by default */
 #if defined(CONFIG_CRAY_COMPUTE)
+#ifdef CONFIG_MK1OM
+#define GNILND_SCHED_THREADS      2             /* default # of kgnilnd_scheduler threads */
+#else
 #define GNILND_SCHED_THREADS      1             /* default # of kgnilnd_scheduler threads */
+#endif
 #define GNILND_FMABLK             64            /* default number of mboxes per fmablk */
 #define GNILND_SCHED_NICE         0            /* default nice value for scheduler threads */
 #define GNILND_COMPUTE            1             /* compute image */
@@ -471,6 +471,8 @@ typedef struct kgn_tunables {
        int              *kgn_dgram_timeout;    /* max time for dgram mover to run before scheduling */
        int              *kgn_sched_nice;       /* nice value for kgnilnd scheduler threads */
        int              *kgn_reverse_rdma;     /* Reverse RDMA setting */
+       int              *kgn_eager_credits;    /* allocated eager buffers */
+       int              *kgn_efault_lbug;      /* Should we LBUG on receiving an EFAULT */
 #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
        cfs_sysctl_table_header_t *kgn_sysctl;  /* sysctl interface */
 #endif
@@ -518,7 +520,7 @@ typedef struct kgn_device {
        int                     gnd_id;           /* device id, also index in kgn_devices */
        __u32                   gnd_nid;          /* ph host ID translated to NID */
        struct list_head        gnd_fma_buffs;    /* list of FMA memory blocks */
-       struct semaphore        gnd_fmablk_sem;   /* semaphore for FMA block memory alloc/free */
+       struct mutex            gnd_fmablk_mutex; /* mutex for FMA block memory alloc/free */
        spinlock_t              gnd_fmablk_lock;  /* lock for mbox alloc/release */
        atomic_t                gnd_nfmablk;      /* # of fmablk live */
        atomic_t                gnd_fmablk_vers;  /* gnd_fma_bufs stamp */
@@ -757,7 +759,7 @@ typedef struct kgn_peer {
        unsigned long       gnp_last_alive;             /* last time I had valid comms */
        int                 gnp_last_dgram_errno;       /* last error dgrams saw */
        unsigned long       gnp_last_dgram_time;        /* last time I tried to connect */
-       unsigned long       gnp_reconnect_time;         /* CURRENT_SECONDS when reconnect OK */
+       unsigned long       gnp_reconnect_time;         /* get_seconds() when reconnect OK */
        unsigned long       gnp_reconnect_interval;     /* exponential backoff */
        atomic_t            gnp_dirty_eps;              /* # of old but yet to be destroyed EPs from conns */
        int                 gnp_down;                   /* rca says peer down */
@@ -794,7 +796,7 @@ typedef struct kgn_data {
        wait_queue_head_t       kgn_ruhroh_waitq;     /* ruhroh thread wakeup */
        int                     kgn_quiesce_trigger;  /* should we quiesce ? */
        atomic_t                kgn_nquiesce;         /* how many quiesced ? */
-       struct semaphore        kgn_quiesce_sem;      /* serialize ruhroh task, startup and shutdown */
+       struct mutex            kgn_quiesce_mutex;    /* serialize ruhroh task, startup and shutdown */
        int                     kgn_needs_reset;      /* we need stack reset */
 
        /* These next three members implement communication from gnilnd into
@@ -815,6 +817,7 @@ typedef struct kgn_data {
 
        struct list_head       *kgn_conns;            /* conns hashed by cqid */
        atomic_t                kgn_nconns;           /* # connections extant */
+       atomic_t                kgn_neager_allocs;    /* # of eager allocations */
        __u64                   kgn_peerstamp;        /* when I started up */
        __u64                   kgn_connstamp;        /* conn stamp generator */
        int                     kgn_conn_version;     /* version flag for conn tables */
@@ -824,11 +827,11 @@ typedef struct kgn_data {
        wait_queue_head_t       kgn_reaper_waitq;     /* reaper sleeps here */
        spinlock_t              kgn_reaper_lock;      /* serialise */
 
-       cfs_mem_cache_t        *kgn_rx_cache;         /* rx descriptor space */
-       cfs_mem_cache_t        *kgn_tx_cache;         /* tx descriptor memory */
-       cfs_mem_cache_t        *kgn_tx_phys_cache;    /* tx phys descriptor memory */
+       struct kmem_cache        *kgn_rx_cache;         /* rx descriptor space */
+       struct kmem_cache        *kgn_tx_cache;         /* tx descriptor memory */
+       struct kmem_cache        *kgn_tx_phys_cache;    /* tx phys descriptor memory */
        atomic_t                kgn_ntx;              /* # tx in use */
-       cfs_mem_cache_t        *kgn_dgram_cache;      /* outgoing datagrams */
+       struct kmem_cache        *kgn_dgram_cache;      /* outgoing datagrams */
 
        struct page          ***kgn_cksum_map_pages;  /* page arrays for mapping pages on checksum */
        __u64                   kgn_cksum_npages;     /* Number of pages allocated for checksumming */
@@ -848,6 +851,7 @@ typedef struct kgn_data {
        atomic_t                kgn_rev_offset;       /* number of time REV rdma have been misaligned offsets */
        atomic_t                kgn_rev_length;       /* Number of times REV rdma have been misaligned lengths */
        atomic_t                kgn_rev_copy_buff;    /* Number of times REV rdma have had to make a copy buffer */
+       struct socket          *kgn_sock;             /* for Apollo */
 } kgn_data_t;
 
 extern kgn_data_t         kgnilnd_data;
@@ -1032,19 +1036,18 @@ do {
        (atomic_read(&kgnilnd_data.kgn_nquiesce) ==                             \
                atomic_read(&kgnilnd_data.kgn_nthreads))
 
-#define KGNILND_SPIN_QUIESCE                                                 \
-do {                                                                         \
-       /* E.T phone home */                                                 \
-       atomic_inc(&kgnilnd_data.kgn_nquiesce);                              \
-       CDEBUG(D_NET, "Waiting for thread pause to be over...\n");           \
-       while (kgnilnd_data.kgn_quiesce_trigger) {                           \
-               set_current_state(TASK_INTERRUPTIBLE);                       \
-               cfs_schedule_timeout_and_set_state(TASK_INTERRUPTIBLE,       \
-                       cfs_time_seconds(1));                                \
-       }                                                                    \
-       /* Mom, my homework is done */                                       \
-       CDEBUG(D_NET, "Waking up from thread pause\n");                      \
-       atomic_dec(&kgnilnd_data.kgn_nquiesce);                              \
+#define KGNILND_SPIN_QUIESCE                                           \
+do {                                                                   \
+       /* E.T phone home */                                            \
+       atomic_inc(&kgnilnd_data.kgn_nquiesce);                         \
+       CDEBUG(D_NET, "Waiting for thread pause to be over...\n");      \
+       while (kgnilnd_data.kgn_quiesce_trigger) {                      \
+               set_current_state(TASK_INTERRUPTIBLE);                  \
+               schedule_timeout(HZ);                           \
+       }                                                               \
+       /* Mom, my homework is done */                                  \
+       CDEBUG(D_NET, "Waking up from thread pause\n");                 \
+       atomic_dec(&kgnilnd_data.kgn_nquiesce);                         \
 } while(0)
 
 /* use macros for addref/decref to get the calling function name in the CDEBUG */
@@ -1671,7 +1674,7 @@ void kgnilnd_base_shutdown(void);
 
 int kgnilnd_allocate_phys_fmablk(kgn_device_t *device);
 int kgnilnd_map_phys_fmablk(kgn_device_t *device);
-void kgnilnd_unmap_phys_fmablk(kgn_device_t *device);
+void kgnilnd_unmap_fma_blocks(kgn_device_t *device);
 void kgnilnd_free_phys_fmablk(kgn_device_t *device);
 
 int kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
@@ -1702,7 +1705,7 @@ int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int r
 int kgnilnd_schedule_process_conn(kgn_conn_t *conn, int sched_intent);
 
 void kgnilnd_schedule_dgram(kgn_device_t *dev);
-int kgnilnd_create_peer_safe(kgn_peer_t **peerp, lnet_nid_t nid, kgn_net_t *net);
+int kgnilnd_create_peer_safe(kgn_peer_t **peerp, lnet_nid_t nid, kgn_net_t *net, int node_state);
 void kgnilnd_add_peer_locked(lnet_nid_t nid, kgn_peer_t *new_stub_peer, kgn_peer_t **peerp);
 int kgnilnd_add_peer(kgn_net_t *net, lnet_nid_t nid, kgn_peer_t **peerp);
 
@@ -1743,6 +1746,7 @@ int kgnilnd_close_peer_conns_locked(kgn_peer_t *peer, int why);
 int kgnilnd_report_node_state(lnet_nid_t nid, int down);
 void kgnilnd_wakeup_rca_thread(void);
 int kgnilnd_start_rca_thread(void);
+int kgnilnd_get_node_state(__u32 nid);
 
 int kgnilnd_tunables_init(void);
 void kgnilnd_tunables_fini(void);
@@ -1768,11 +1772,12 @@ void kgnilnd_release_mbox(kgn_conn_t *conn, int purgatory_hold);
 
 int kgnilnd_find_and_cancel_dgram(kgn_device_t *dev, lnet_nid_t dst_nid);
 void kgnilnd_cancel_dgram_locked(kgn_dgram_t *dgram);
-void kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram);
+void kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram, int shutdown);
 
 int kgnilnd_setup_wildcard_dgram(kgn_device_t *dev);
 int kgnilnd_cancel_net_dgrams(kgn_net_t *net);
 int kgnilnd_cancel_wc_dgrams(kgn_device_t *dev);
+int kgnilnd_cancel_dgrams(kgn_device_t *dev);
 void kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev);
 
 int kgnilnd_dgram_waitq(void *arg);