/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <lnet/darwin/lib-types.h>
#elif defined(__WINNT__)
#include <lnet/winnt/lib-types.h>
+#include <libcfs/libcfs_pack.h>
+#include <libcfs/libcfs_unpack.h>
#else
#error Unsupported Operating System
#endif
#define WIRE_ATTR __attribute__((packed))
/* Packed version of lnet_process_id_t to transfer via network */
-#include <libcfs/libcfs_pack.h>
typedef struct {
lnet_nid_t nid;
lnet_pid_t pid; /* node id / process id */
__u64 wh_interface_cookie;
__u64 wh_object_cookie;
} WIRE_ATTR lnet_handle_wire_t;
-#include <libcfs/libcfs_unpack.h>
typedef enum {
LNET_MSG_ACK = 0,
* byte boundary in the message header. Note that all types used in these
* wire structs MUST be fixed size and the smaller types are placed at the
* end. */
-#include <libcfs/libcfs_pack.h>
typedef struct lnet_ack {
lnet_handle_wire_t dst_wmd;
__u64 match_bits;
__u32 acr_version; /* protocol version */
__u64 acr_nid; /* target NID */
} WIRE_ATTR lnet_acceptor_connreq_t;
-#include <libcfs/libcfs_unpack.h>
#define LNET_PROTO_ACCEPTOR_VERSION 1
/* commited for sending */
unsigned int msg_tx_committed:1;
- /* queued for tx credit */
- unsigned int msg_tx_delayed:1;
+ /* CPT # this message committed for sending */
+ unsigned int msg_tx_cpt:15;
/* commited for receiving */
unsigned int msg_rx_committed:1;
+ /* CPT # this message committed for receiving */
+ unsigned int msg_rx_cpt:15;
+ /* queued for tx credit */
+ unsigned int msg_tx_delayed:1;
/* queued for RX buffer */
unsigned int msg_rx_delayed:1;
/* ready for pending on RX delay list */
lnet_libhandle_t me_lh;
lnet_process_id_t me_match_id;
unsigned int me_portal;
+ unsigned int me_pos; /* hash offset in mt_hash */
__u64 me_match_bits;
__u64 me_ignore_bits;
lnet_unlink_t me_unlink;
#define LNET_COOKIE_TYPE_ME 2
#define LNET_COOKIE_TYPE_EQ 3
#define LNET_COOKIE_TYPE_BITS 2
-#define LNET_COOKIE_TYPES (1 << LNET_COOKIE_TYPE_BITS)
-/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be
- * extracted by masking with (LNET_COOKIE_TYPES - 1) */
+#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
struct lnet_ni; /* forward ref */
__u32 ns_unused;
} WIRE_ATTR lnet_ni_status_t;
+struct lnet_tx_queue {
+ int tq_credits; /* # tx credits free */
+ int tq_credits_min; /* lowest it's been */
+ int tq_credits_max; /* total # tx credits */
+ cfs_list_t tq_delayed; /* delayed TXs */
+};
+
#define LNET_MAX_INTERFACES 16
typedef struct lnet_ni {
- cfs_list_t ni_list; /* chain on ln_nis */
- cfs_list_t ni_txq; /* messages waiting for tx credits */
- int ni_maxtxcredits; /* # tx credits */
- int ni_txcredits; /* # tx credits free */
- int ni_mintxcredits; /* lowest it's been */
- int ni_peertxcredits; /* # per-peer send credits */
- int ni_peerrtrcredits; /* # per-peer router buffer credits */
- int ni_peertimeout; /* seconds to consider peer dead */
- lnet_nid_t ni_nid; /* interface's NID */
- void *ni_data; /* instance-specific data */
- lnd_t *ni_lnd; /* procedural interface */
- int ni_refcount; /* reference count */
- /* when I was last alive */
- long ni_last_alive;
+#ifdef __KERNEL__
+ spinlock_t ni_lock;
+#else
+# ifndef HAVE_LIBPTHREAD
+ int ni_lock;
+# else
+ pthread_mutex_t ni_lock;
+# endif
+#endif
+ cfs_list_t ni_list; /* chain on ln_nis */
+ cfs_list_t ni_cptlist; /* chain on ln_nis_cpt */
+ int ni_maxtxcredits; /* # tx credits */
+ /* # per-peer send credits */
+ int ni_peertxcredits;
+ /* # per-peer router buffer credits */
+ int ni_peerrtrcredits;
+ /* seconds to consider peer dead */
+ int ni_peertimeout;
+ int ni_ncpts; /* number of CPTs */
+ __u32 *ni_cpts; /* bond NI on some CPTs */
+ lnet_nid_t ni_nid; /* interface's NID */
+ void *ni_data; /* instance-specific data */
+ lnd_t *ni_lnd; /* procedural interface */
+ struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
+ int **ni_refs; /* percpt reference count */
+ long ni_last_alive; /* when I was last alive */
lnet_ni_status_t *ni_status; /* my health status */
/* equivalent interfaces to use */
char *ni_interfaces[LNET_MAX_INTERFACES];
} lnet_ni_t;
-#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
-enum {
- LNET_PROTO_PING_UNKNOWN = 0, /* unknown */
- LNET_PROTO_PING_VERSION_1 = 1, /* old version */
- LNET_PROTO_PING_VERSION = 2, /* current version */
-};
+#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL
+
+/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
+ * of old LNet, so there shouldn't be any compatibility issue */
+#define LNET_PING_FEAT_INVAL (0) /* no feature */
+#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */
+#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */
+
+#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \
+ LNET_PING_FEAT_NI_STATUS)
typedef struct {
- __u32 pi_magic;
- __u32 pi_version;
- lnet_pid_t pi_pid;
- __u32 pi_nnis;
- lnet_ni_status_t pi_ni[0];
+ __u32 pi_magic;
+ __u32 pi_features;
+ lnet_pid_t pi_pid;
+ __u32 pi_nnis;
+ lnet_ni_status_t pi_ni[0];
} WIRE_ATTR lnet_ping_info_t;
/* router checker data, per router */
lnet_ni_t *lp_ni; /* interface peer is on */
lnet_nid_t lp_nid; /* peer's NID */
int lp_refcount; /* # refs */
- int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */
- /* returned RC ping version */
- unsigned int lp_ping_version;
+ int lp_cpt; /* CPT this peer attached on */
+ /* # refs from lnet_route_t::lr_gateway */
+ int lp_rtr_refcount;
+ /* returned RC ping features */
+ unsigned int lp_ping_feats;
cfs_list_t lp_routes; /* routers on this peer */
lnet_rc_data_t *lp_rcd; /* router checker state */
} lnet_peer_t;
cfs_list_t *pt_hash; /* NID->peer hash */
};
-#define lnet_peer_aliveness_enabled(lp) ((lp)->lp_ni->ni_peertimeout > 0)
+/* peer aliveness is enabled only on routers for peers in a network where the
+ * lnet_ni_t::ni_peertimeout has been set to a positive value */
+#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
+ (lp)->lp_ni->ni_peertimeout > 0)
typedef struct {
cfs_list_t lr_list; /* chain on net */
cfs_list_t lr_gwlist; /* chain on gateway */
lnet_peer_t *lr_gateway; /* router node */
__u32 lr_net; /* remote network number */
+ int lr_seq; /* sequence for round-robin */
unsigned int lr_downis; /* number of down NIs */
unsigned int lr_hops; /* how far I am */
} lnet_route_t;
+#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7)
+#define LNET_REMOTE_NETS_HASH_MAX (1U << 16)
+#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits)
+
typedef struct {
- cfs_list_t lrn_list; /* chain on ln_remote_nets */
+ cfs_list_t lrn_list; /* chain on ln_remote_nets_hash */
cfs_list_t lrn_routes; /* routes to me */
__u32 lrn_net; /* my net number */
} lnet_remotenet_t;
lnet_kiov_t rb_kiov[0]; /* the buffer space */
} lnet_rtrbuf_t;
-#include <libcfs/libcfs_pack.h>
typedef struct {
__u32 msgs_alloc;
__u32 msgs_max;
__u64 route_length;
__u64 drop_length;
} WIRE_ATTR lnet_counters_t;
-#include <libcfs/libcfs_unpack.h>
#define LNET_PEER_HASHSIZE 503 /* prime! */
/* ME hash of RDMA portal */
#define LNET_MT_HASH_BITS 8
#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS)
+#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1)
+/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
+ * the last entry is reserved for MEs with ignore-bits */
+#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE
+/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
+ * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
+ * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */
+#define LNET_MT_BITS_U64 6 /* 2^6 bits */
+#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
+#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1)
/* portal match table */
struct lnet_match_table {
unsigned int mt_cpt;
unsigned int mt_portal; /* portal index */
/* match table is set as "enabled" if there's non-exhausted MD
- * attached on mt_mlist, it's only valide for wildcard portal */
+ * attached on mt_mhash, it's only valide for wildcard portal */
unsigned int mt_enabled;
- cfs_list_t mt_mlist; /* matching list */
+ /* bitmap to flag whether MEs on mt_hash are exhausted or not */
+ __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
cfs_list_t *mt_mhash; /* matching hash */
};
+/* these are only useful for wildcard portal */
+/* Turn off message rotor for wildcard portals */
+#define LNET_PTL_ROTOR_OFF 0
+/* round-robin dispatch all PUT messages for wildcard portals */
+#define LNET_PTL_ROTOR_ON 1
+/* round-robin dispatch routed PUT message for wildcard portals */
+#define LNET_PTL_ROTOR_RR_RT 2
+/* dispatch routed PUT message by hashing source NID for wildcard portals */
+#define LNET_PTL_ROTOR_HASH_RT 3
+
typedef struct lnet_portal {
#ifdef __KERNEL__
- cfs_spinlock_t ptl_lock;
+ spinlock_t ptl_lock;
#else
# ifndef HAVE_LIBPTHREAD
int ptl_lock;
unsigned int ln_cpt_number;
unsigned int ln_cpt_bits;
+ /* protect LNet resources (ME/MD/EQ) */
+ struct cfs_percpt_lock *ln_res_lock;
+ /* # portals */
+ int ln_nportals;
+ /* the vector of portals */
+ lnet_portal_t **ln_portals;
+ /* percpt ME containers */
+ struct lnet_res_container **ln_me_containers;
+ /* percpt MD container */
+ struct lnet_res_container **ln_md_containers;
+
+ /* Event Queue container */
+ struct lnet_res_container ln_eq_container;
#ifdef __KERNEL__
- cfs_spinlock_t ln_lock;
- cfs_mutex_t ln_api_mutex;
- cfs_mutex_t ln_lnd_mutex;
cfs_waitq_t ln_eq_waitq;
- cfs_spinlock_t ln_eq_wait_lock;
+ spinlock_t ln_eq_wait_lock;
#else
# ifndef HAVE_LIBPTHREAD
- int ln_lock;
- int ln_api_mutex;
- int ln_lnd_mutex;
int ln_eq_wait_lock;
# else
- pthread_mutex_t ln_lock;
- pthread_mutex_t ln_api_mutex;
- pthread_mutex_t ln_lnd_mutex;
pthread_cond_t ln_eq_cond;
pthread_mutex_t ln_eq_wait_lock;
# endif
#endif
- struct cfs_percpt_lock *ln_res_lock;
- /* ME container */
- struct lnet_res_container **ln_me_containers;
- /* MD container */
- struct lnet_res_container **ln_md_containers;
- /* Event Queue container */
- struct lnet_res_container ln_eq_container;
-
- /* # portals */
- int ln_nportals;
- /* the vector of portals */
- lnet_portal_t **ln_portals;
-
- int ln_init; /* LNetInit() called? */
- /* LNetNIInit/LNetNIFini counter */
- int ln_refcount;
- /* Have I called LNetNIInit myself? */
- int ln_niinit_self;
- /* shutdown in progress */
- int ln_shutdown;
- /* registered LNDs */
- cfs_list_t ln_lnds;
-
- lnet_pid_t ln_pid; /* requested pid */
-
- cfs_list_t ln_nis; /* LND instances */
- lnet_ni_t *ln_loni; /* the loopback NI */
+ unsigned int ln_remote_nets_hbits;
+
+ /* protect NI, peer table, credits, routers, rtrbuf... */
+ struct cfs_percpt_lock *ln_net_lock;
+ /* percpt message containers for active/finalizing/freed message */
+ struct lnet_msg_container **ln_msg_containers;
+ lnet_counters_t **ln_counters;
+ struct lnet_peer_table **ln_peer_tables;
+ /* failure simulation */
+ cfs_list_t ln_test_peers;
+
+ cfs_list_t ln_nis; /* LND instances */
+ /* NIs bond on specific CPT(s) */
+ cfs_list_t ln_nis_cpt;
+ /* dying LND instances */
+ cfs_list_t ln_nis_zombie;
+ lnet_ni_t *ln_loni; /* the loopback NI */
/* NI to wait for events in */
lnet_ni_t *ln_eq_waitni;
- cfs_list_t ln_zombie_nis; /* dying LND instances */
- int ln_nzombie_nis; /* # of NIs to wait for */
-
- cfs_list_t ln_remote_nets; /* remote networks with routes to them */
- __u64 ln_remote_nets_version; /* validity stamp */
-
- cfs_list_t ln_routers; /* list of all known routers */
- __u64 ln_routers_version; /* validity stamp */
-
- int ln_routing; /* am I a router? */
- lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */
-
- __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */
-
- char *ln_network_tokens; /* space for network names */
- int ln_network_tokens_nob;
-
- int ln_testprotocompat; /* test protocol compatibility flags */
-
- cfs_list_t ln_test_peers; /* failure simulation */
-
- /* message container */
- struct lnet_peer_table *ln_peer_table;
- struct lnet_msg_container ln_msg_container;
-
- lnet_handle_md_t ln_ping_target_md;
- lnet_handle_eq_t ln_ping_target_eq;
- lnet_ping_info_t *ln_ping_info;
+ /* remote networks with routes to them */
+ cfs_list_t *ln_remote_nets_hash;
+ /* validity stamp */
+ __u64 ln_remote_nets_version;
+ /* list of all known routers */
+ cfs_list_t ln_routers;
+ /* validity stamp */
+ __u64 ln_routers_version;
+ /* percpt router buffer pools */
+ lnet_rtrbufpool_t **ln_rtrpools;
+
+ lnet_handle_md_t ln_ping_target_md;
+ lnet_handle_eq_t ln_ping_target_eq;
+ lnet_ping_info_t *ln_ping_info;
-#ifdef __KERNEL__
- cfs_semaphore_t ln_rc_signal; /* serialise startup/shutdown */
-#endif
/* router checker startup/shutdown state */
int ln_rc_state;
/* router checker's event queue */
cfs_list_t ln_rcd_deathrow;
/* rcd ready for free */
cfs_list_t ln_rcd_zombie;
+#ifdef __KERNEL__
+ /* serialise startup/shutdown */
+ struct semaphore ln_rc_signal;
+
+ struct mutex ln_api_mutex;
+ struct mutex ln_lnd_mutex;
+#else
+# ifndef HAVE_LIBPTHREAD
+ int ln_api_mutex;
+ int ln_lnd_mutex;
+# else
+ pthread_mutex_t ln_api_mutex;
+ pthread_mutex_t ln_lnd_mutex;
+# endif
+#endif
+ int ln_init; /* LNetInit() called? */
+ /* Have I called LNetNIInit myself? */
+ int ln_niinit_self;
+ /* LNetNIInit/LNetNIFini counter */
+ int ln_refcount;
+ /* shutdown in progress */
+ int ln_shutdown;
+
+ int ln_routing; /* am I a router? */
+ lnet_pid_t ln_pid; /* requested pid */
+ /* uniquely identifies this ni in this epoch */
+ __u64 ln_interface_cookie;
+ /* registered LNDs */
+ cfs_list_t ln_lnds;
- lnet_counters_t ln_counters;
+ /* space for network names */
+ char *ln_network_tokens;
+ int ln_network_tokens_nob;
+ /* test protocol compatibility flags */
+ int ln_testprotocompat;
#ifndef __KERNEL__
- /* Temporary workaround to allow uOSS and test programs force
- * server mode in userspace. The only place where we use it is
- * lnet_prepare(). The only way to turn this flag on is to
- * call lnet_server_mode() */
-
- int ln_server_mode_flag;
+ /* Temporary workaround to allow uOSS and test programs force
+ * server mode in userspace. The only place where we use it is
+ * lnet_prepare(). The only way to turn this flag on is to
+ * call lnet_server_mode() */
+ int ln_server_mode_flag;
#endif
} lnet_t;