X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fgnilnd%2Fgnilnd.h;h=f7359a413e325ea8cad45a283e72f601e7ce4861;hb=57c03f3070753146a1d374939197e8c838a0bcc1;hp=b0d0f89a847f7218e1d9f5724e5731326d4921da;hpb=356ce9edd4941ddb495729e148b7bd791b8ff722;p=fs%2Flustre-release.git diff --git a/lnet/klnds/gnilnd/gnilnd.h b/lnet/klnds/gnilnd/gnilnd.h index b0d0f89..f7359a4 100644 --- a/lnet/klnds/gnilnd/gnilnd.h +++ b/lnet/klnds/gnilnd/gnilnd.h @@ -28,6 +28,8 @@ #ifndef _GNILND_GNILND_H_ #define _GNILND_GNILND_H_ +#define DEBUG_SUBSYSTEM S_LND + #include #include #include @@ -59,14 +61,23 @@ #include #include -#define DEBUG_SUBSYSTEM S_LND - -#include -#include #include #include -#include "gnilnd_version.h" + +static inline time_t cfs_duration_sec(long duration_jiffies) +{ + return jiffies_to_msecs(duration_jiffies) / MSEC_PER_SEC; +} + +#ifdef CONFIG_SLAB +#define GNILND_MBOX_SIZE KMALLOC_MAX_SIZE +#else +#define GNILND_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ + (MAX_ORDER + PAGE_SHIFT - 1) : 25) +#define GNILND_SHIFT_MAX GNILND_SHIFT_HIGH +#define GNILND_MBOX_SIZE (1UL << GNILND_SHIFT_MAX) +#endif /* tunables determined at compile time */ @@ -117,11 +128,13 @@ #define GNILND_SCHED_NICE 0 /* default nice value for scheduler threads */ #define GNILND_COMPUTE 1 /* compute image */ #define GNILND_FAST_RECONNECT 1 /* Fast Reconnect option */ +#define GNILND_DEFAULT_CREDITS 64 /* Default number of simultaneous transmits */ #else #define GNILND_FMABLK 1024 /* default number of mboxes per fmablk */ #define GNILND_SCHED_NICE -20 /* default nice value for scheduler threads */ #define GNILND_COMPUTE 0 /* service image */ #define GNILND_FAST_RECONNECT 0 /* Fast Reconnect option */ +#define GNILND_DEFAULT_CREDITS 256 /* Default number of simultaneous transmits */ #endif /* EXTRA_BITS are there to allow us to hide NOOP/CLOSE and anything else out of band */ @@ -135,6 +148,8 @@ /* need sane upper bound to limit copy overhead */ #define GNILND_MAX_IMMEDIATE (64<<10) +/* allow for 4M transfers over gni. Note 2.5M used by DVS */ +#define GNILND_MAX_IOV 1024 /* Max number of connections to keep in purgatory per peer */ #define GNILND_PURGATORY_MAX 5 @@ -180,8 +195,6 @@ #define GNILND_BUF_IMMEDIATE_KIOV 2 /* immediate data */ #define GNILND_BUF_PHYS_UNMAPPED 3 /* physical: not mapped yet */ #define GNILND_BUF_PHYS_MAPPED 4 /* physical: mapped already */ -#define GNILND_BUF_VIRT_UNMAPPED 5 /* virtual: not mapped yet */ -#define GNILND_BUF_VIRT_MAPPED 6 /* virtual: mapped already */ #define GNILND_TX_WAITING_REPLY (1<<1) /* expecting to receive reply */ #define GNILND_TX_WAITING_COMPLETION (1<<2) /* waiting for smsg_send to complete */ @@ -335,7 +348,7 @@ typedef enum kgn_dgram_type { v2: * - added checksum to FMA * moved seq before paylod - * WIRE_ATTR added for alignment + * __packed added for alignment v3: * added gnm_payload_len for FMA payload size v4: @@ -362,12 +375,12 @@ typedef struct kgn_gniparams { __u32 gnpr_host_id; /* ph. host ID of the NIC */ __u32 gnpr_cqid; /* cqid I want peer to use when sending events to me */ gni_smsg_attr_t gnpr_smsg_attr; /* my short msg. attributes */ -} WIRE_ATTR kgn_gniparams_t; +} __packed kgn_gniparams_t; typedef struct kgn_nak_data { __s32 gnnd_errno; /* errno reason for NAK */ -} WIRE_ATTR kgn_nak_data_t; +} __packed kgn_nak_data_t; /* the first bits of the connreq struct CANNOT CHANGE FORM EVER * without breaking the ability for us to properly NAK someone */ @@ -389,42 +402,42 @@ typedef struct kgn_connreq { /* connection request/response * kgn_gniparams_t gncr_gnparams; /* sender's endpoint info */ kgn_nak_data_t gncr_nakdata; /* data (rc, etc) for NAK */ }; -} WIRE_ATTR kgn_connreq_t; +} __packed kgn_connreq_t; typedef struct { gni_mem_handle_t gnrd_key; __u64 gnrd_addr; __u32 gnrd_nob; -} WIRE_ATTR kgn_rdma_desc_t; +} __packed kgn_rdma_desc_t; typedef struct { struct lnet_hdr gnim_hdr; /* LNet header */ /* LNet payload is in FMA "Message Data" */ -} WIRE_ATTR kgn_immediate_msg_t; +} __packed kgn_immediate_msg_t; typedef struct { struct lnet_hdr gnprm_hdr; /* LNet header */ __u64 gnprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kgn_putreq_msg_t; +} __packed kgn_putreq_msg_t; typedef struct { __u64 gnpam_src_cookie; /* reflected completion cookie */ __u64 gnpam_dst_cookie; /* opaque completion cookie */ __u16 gnpam_payload_cksum; /* checksum for get msg */ kgn_rdma_desc_t gnpam_desc; /* sender's sink buffer */ -} WIRE_ATTR kgn_putack_msg_t; +} __packed kgn_putack_msg_t; typedef struct { struct lnet_hdr gngm_hdr; /* LNet header */ __u64 gngm_cookie; /* opaque completion cookie */ __u16 gngm_payload_cksum; /* checksum for put msg */ kgn_rdma_desc_t gngm_desc; /* sender's sink buffer */ -} WIRE_ATTR kgn_get_msg_t; +} __packed kgn_get_msg_t; typedef struct { int gncm_retval; /* error on NAK, size on REQ */ __u64 gncm_cookie; /* reflected completion cookie */ -} WIRE_ATTR kgn_completion_msg_t; +} __packed kgn_completion_msg_t; typedef struct { /* NB must fit in FMA "Prefix" */ __u32 gnm_magic; /* I'm an gni message */ @@ -443,7 +456,7 @@ typedef struct { /* NB must fit in FMA "Prefix" * kgn_get_msg_t get; kgn_completion_msg_t completion; } gnm_u; -} WIRE_ATTR kgn_msg_t; +} __packed kgn_msg_t; /************************************************************************ * runtime tunable data @@ -567,8 +580,6 @@ typedef struct kgn_device { atomic64_t gnd_nbytes_map; /* bytes of total GART maps - fma, tx, etc */ __u32 gnd_map_nphys; /* # TX phys mappings */ __u32 gnd_map_physnop; /* # TX phys pages mapped */ - __u32 gnd_map_nvirt; /* # TX virt mappings */ - __u64 gnd_map_virtnob; /* # TX virt bytes mapped */ spinlock_t gnd_map_lock; /* serialize gnd_map_XXX */ unsigned long gnd_next_map; /* next mapping attempt in jiffies */ int gnd_map_attempt; /* last map attempt # */ @@ -603,7 +614,7 @@ typedef struct kgn_device { typedef struct kgn_net { struct list_head gnn_list; /* chain on kgni_data::kgn_nets */ kgn_device_t *gnn_dev; /* device for this net */ - lnet_ni_t *gnn_ni; /* network interface instance */ + struct lnet_ni *gnn_ni; /* network interface instance */ atomic_t gnn_refcount; /* # current references */ int gnn_shutdown; /* lnd_shutdown set */ __u16 gnn_netnum; /* stash netnum for quicker lookup */ @@ -671,7 +682,7 @@ typedef struct kgn_tx { /* message descriptor */ kgn_tx_list_state_t tx_list_state;/* where in state machine is this TX ? */ struct list_head *tx_list_p; /* pointer to current list */ struct kgn_conn *tx_conn; /* owning conn */ - lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ + struct lnet_msg *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */ unsigned long tx_cred_wait; /* time spend waiting for smsg creds */ struct list_head tx_map_list; /* list entry on device map list */ @@ -752,7 +763,7 @@ typedef struct kgn_conn { kgn_fma_memblock_t *gnc_fma_blk; /* pointer to fma block for our mailbox */ gni_smsg_attr_t gnpr_smsg_attr; /* my short msg. attributes */ spinlock_t gnc_tx_lock; /* protect tx alloc/free */ - __u8 gnc_tx_bits[GNILND_MAX_MSG_ID/8]; /* bit table for tx id */ + unsigned long gnc_tx_bits[(GNILND_MAX_MSG_ID/8)/sizeof(unsigned long)]; /* bit table for tx id */ int gnc_next_tx; /* next tx to use in tx_ref_table */ kgn_tx_t **gnc_tx_ref_table; /* table of TX descriptors for this conn */ int gnc_mbox_id; /* id of mbox in fma_blk */ @@ -780,7 +791,7 @@ typedef struct kgn_peer { short gnp_connecting; /* connection forming */ short gnp_pending_unlink; /* need last conn close to trigger unlink */ int gnp_last_errno; /* last error conn saw */ - unsigned long gnp_last_alive; /* last time I had valid comms */ + time64_t gnp_last_alive; /* last time I had valid comms */ int gnp_last_dgram_errno; /* last error dgrams saw */ unsigned long gnp_last_dgram_time; /* last time I tried to connect */ unsigned long gnp_reconnect_time; /* get_seconds() when reconnect OK */ @@ -797,9 +808,9 @@ typedef struct kgn_peer { typedef struct kgn_rx { kgn_conn_t *grx_conn; /* connection */ kgn_msg_t *grx_msg; /* message */ - lnet_msg_t *grx_lntmsg; /* lnet msg for this rx (eager only) */ + struct lnet_msg *grx_lntmsg; /* lnet msg for this rx (eager only) */ int grx_eager; /* if eager, we copied msg to somewhere */ - struct timespec grx_received; /* time this msg received */ + struct timespec64 grx_received; /* time this msg received */ } kgn_rx_t; typedef struct kgn_data { @@ -988,12 +999,10 @@ static inline void *kgnilnd_vzalloc(int size) { void *ret; if (*kgnilnd_tunables.kgn_vzalloc_noretry) - ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY | - __GFP_ZERO, - PAGE_KERNEL); + ret = __ll_vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO | + __GFP_NORETRY); else - ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO, - PAGE_KERNEL); + ret = __ll_vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO); LIBCFS_ALLOC_POST(ret, size); return ret; @@ -1146,34 +1155,36 @@ do { \ #error "this code uses actions inside LASSERT for ref counting" #endif -#define kgnilnd_admin_addref(atomic) \ -do { \ - int val = atomic_inc_return(&atomic); \ - LASSERTF(val > 0, #atomic " refcount %d\n", val); \ - CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \ +#define kgnilnd_admin_addref(atomic) \ +do { \ + int val = atomic_inc_return(&atomic); \ + LASSERTF(val > 0, #atomic " refcount %d\n", val); \ + CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \ } while (0) -#define kgnilnd_admin_decref(atomic) \ -do { \ - int val = atomic_dec_return(&atomic); \ - LASSERTF(val >=0, #atomic " refcount %d\n", val); \ - CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \ +#define kgnilnd_admin_decref(atomic) \ +do { \ + int val = atomic_dec_return(&atomic); \ + LASSERTF(val >= 0, #atomic " refcount %d\n", val); \ + CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \ + if (!val) \ + wake_up_var(&kgnilnd_data); \ }while (0) -#define kgnilnd_net_addref(net) \ -do { \ - int val = atomic_inc_return(&net->gnn_refcount); \ - LASSERTF(val > 1, "net %p refcount %d\n", net, val); \ - CDEBUG(D_NETTRACE, "net %p->%s++ (%d)\n", net, \ - libcfs_nid2str(net->gnn_ni->ni_nid), val); \ +#define kgnilnd_net_addref(net) \ +do { \ + int val = atomic_inc_return(&net->gnn_refcount); \ + LASSERTF(val > 1, "net %p refcount %d\n", net, val); \ + CDEBUG(D_NETTRACE, "net %p->%s++ (%d)\n", net, \ + libcfs_nidstr(&net->gnn_ni->ni_nid), val); \ } while (0) -#define kgnilnd_net_decref(net) \ -do { \ - int val = atomic_dec_return(&net->gnn_refcount); \ - LASSERTF(val >= 0, "net %p refcount %d\n", net, val); \ - CDEBUG(D_NETTRACE, "net %p->%s-- (%d)\n", net, \ - libcfs_nid2str(net->gnn_ni->ni_nid), val); \ +#define kgnilnd_net_decref(net) \ +do { \ + int val = atomic_dec_return(&net->gnn_refcount); \ + LASSERTF(val >= 0, "net %p refcount %d\n", net, val); \ + CDEBUG(D_NETTRACE, "net %p->%s-- (%d)\n", net, \ + libcfs_nidstr(&net->gnn_ni->ni_nid), val); \ } while (0) #define kgnilnd_peer_addref(peer) \ @@ -1367,7 +1378,7 @@ kgnilnd_check_purgatory_conn(kgn_conn_t *conn) if (conn->gnc_peer) { loopback = conn->gnc_peer->gnp_nid == - conn->gnc_peer->gnp_net->gnn_ni->ni_nid; + lnet_nid_to_nid4(&conn->gnc_peer->gnp_net->gnn_ni->ni_nid); } else { /* short circuit - a conn that didn't complete * setup never needs a purgatory hold */ @@ -1559,8 +1570,7 @@ kgnilnd_tx_del_state_locked(kgn_tx_t *tx, kgn_peer_t *peer, static inline int kgnilnd_tx_mapped(kgn_tx_t *tx) { - return (tx->tx_buftype == GNILND_BUF_VIRT_MAPPED || - tx->tx_buftype == GNILND_BUF_PHYS_MAPPED); + return tx->tx_buftype == GNILND_BUF_PHYS_MAPPED; } static inline struct list_head * @@ -1736,8 +1746,11 @@ kgnilnd_find_net(lnet_nid_t nid, kgn_net_t **netp) return -ESHUTDOWN; } - list_for_each_entry(net, kgnilnd_netnum2netlist(LNET_NETNUM(LNET_NIDNET(nid))), gnn_list) { - if (!net->gnn_shutdown && LNET_NIDNET(net->gnn_ni->ni_nid) == LNET_NIDNET(nid)) { + list_for_each_entry(net, + kgnilnd_netnum2netlist(LNET_NETNUM(LNET_NIDNET(nid))), + gnn_list) { + if (!net->gnn_shutdown && + LNET_NID_NET(&net->gnn_ni->ni_nid) == LNET_NIDNET(nid)) { kgnilnd_net_addref(net); up_read(&kgnilnd_data.kgn_net_rw_sem); *netp = net; @@ -1758,8 +1771,8 @@ kgnilnd_find_net(lnet_nid_t nid, kgn_net_t **netp) int kgnilnd_dev_init(kgn_device_t *dev); void kgnilnd_dev_fini(kgn_device_t *dev); -int kgnilnd_startup(lnet_ni_t *ni); -void kgnilnd_shutdown(lnet_ni_t *ni); +int kgnilnd_startup(struct lnet_ni *ni); +void kgnilnd_shutdown(struct lnet_ni *ni); int kgnilnd_base_startup(void); void kgnilnd_base_shutdown(void); @@ -1768,17 +1781,17 @@ int kgnilnd_map_phys_fmablk(kgn_device_t *device); void kgnilnd_unmap_fma_blocks(kgn_device_t *device); void kgnilnd_free_phys_fmablk(kgn_device_t *device); -int kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -void kgnilnd_query(lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when); -int kgnilnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kgnilnd_eager_recv(lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, +int kgnilnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg); +int kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); +int kgnilnd_eager_recv(struct lnet_ni *ni, void *private, + struct lnet_msg *lntmsg, void **new_private); +int kgnilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, int delayed, unsigned int niov, - struct kvec *iov, lnet_kiov_t *kiov, + struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen); -__u16 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int offset, unsigned int nob, int dump_blob); +__u16 kgnilnd_cksum_kiov(unsigned int nkiov, struct bio_vec *kiov, + unsigned int offset, unsigned int nob, int dump_blob); /* purgatory functions */ void kgnilnd_add_purgatory_locked(kgn_conn_t *conn, kgn_peer_t *peer); @@ -1805,13 +1818,15 @@ int kgnilnd_del_conn_or_peer(kgn_net_t *net, lnet_nid_t nid, int command, int er void kgnilnd_peer_increase_reconnect_locked(kgn_peer_t *peer); void kgnilnd_queue_reply(kgn_conn_t *conn, kgn_tx_t *tx); void kgnilnd_queue_tx(kgn_conn_t *conn, kgn_tx_t *tx); -void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target); +void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, + struct lnet_processid *target); int kgnilnd_send_mapped_tx(kgn_tx_t *tx, int try_map_if_full); void kgnilnd_consume_rx(kgn_rx_t *rx); void kgnilnd_schedule_device(kgn_device_t *dev); void kgnilnd_device_callback(__u32 devid, __u64 arg); -void kgnilnd_schedule_device_timer(unsigned long arg); +void kgnilnd_schedule_device_timer(cfs_timer_cb_arg_t data); +void kgnilnd_schedule_device_timer_rd(cfs_timer_cb_arg_t data); int kgnilnd_reaper(void *arg); int kgnilnd_scheduler(void *arg);