/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/** \addtogroup lnet
* @{ */
-#include <libcfs/libcfs.h>
+#include <libcfs/types.h>
/** \addtogroup lnet_addr
* @{ */
/** wildcard PID that matches any lnet_pid_t */
#define LNET_PID_ANY ((lnet_pid_t) -1)
-#ifdef CRAY_XT3
-typedef __u32 lnet_uid_t;
-#define LNET_UID_ANY ((lnet_uid_t) -1)
-#endif
-
#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */
#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */
+#define LNET_PID_LUSTRE 12345
#define LNET_TIME_FOREVER (-1)
+/* how an LNET NID encodes net:address */
+/** extract the address part of an lnet_nid_t */
+
+static inline __u32 LNET_NIDADDR(lnet_nid_t nid)
+{
+ return nid & 0xffffffff;
+}
+
+static inline __u32 LNET_NIDNET(lnet_nid_t nid)
+{
+ return (nid >> 32) & 0xffffffff;
+}
+
+static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr)
+{
+ return (((__u64)net) << 32) | addr;
+}
+
+static inline __u32 LNET_NETNUM(__u32 net)
+{
+ return net & 0xffff;
+}
+
+static inline __u32 LNET_NETTYP(__u32 net)
+{
+ return (net >> 16) & 0xffff;
+}
+
+static inline __u32 LNET_MKNET(__u32 type, __u32 num)
+{
+ return (type << 16) | num;
+}
+
+#define WIRE_ATTR __attribute__((packed))
+
+/* Packed version of lnet_process_id_t to transfer via network */
+typedef struct {
+ lnet_nid_t nid;
+ lnet_pid_t pid; /* node id / process id */
+} WIRE_ATTR lnet_process_id_packed_t;
+
+/* The wire handle's interface cookie only matches one network interface in
+ * one epoch (i.e. new cookie when the interface restarts or the node
+ * reboots). The object cookie only matches one object on that interface
+ * during that object's lifetime (i.e. no cookie re-use). */
+typedef struct {
+ __u64 wh_interface_cookie;
+ __u64 wh_object_cookie;
+} WIRE_ATTR lnet_handle_wire_t;
+
+typedef enum {
+ LNET_MSG_ACK = 0,
+ LNET_MSG_PUT,
+ LNET_MSG_GET,
+ LNET_MSG_REPLY,
+ LNET_MSG_HELLO,
+} lnet_msg_type_t;
+
+/* The variant fields of the portals message header are aligned on an 8
+ * byte boundary in the message header. Note that all types used in these
+ * wire structs MUST be fixed size and the smaller types are placed at the
+ * end. */
+typedef struct lnet_ack {
+ lnet_handle_wire_t dst_wmd;
+ __u64 match_bits;
+ __u32 mlength;
+} WIRE_ATTR lnet_ack_t;
+
+typedef struct lnet_put {
+ lnet_handle_wire_t ack_wmd;
+ __u64 match_bits;
+ __u64 hdr_data;
+ __u32 ptl_index;
+ __u32 offset;
+} WIRE_ATTR lnet_put_t;
+
+typedef struct lnet_get {
+ lnet_handle_wire_t return_wmd;
+ __u64 match_bits;
+ __u32 ptl_index;
+ __u32 src_offset;
+ __u32 sink_length;
+} WIRE_ATTR lnet_get_t;
+
+typedef struct lnet_reply {
+ lnet_handle_wire_t dst_wmd;
+} WIRE_ATTR lnet_reply_t;
+
+typedef struct lnet_hello {
+ __u64 incarnation;
+ __u32 type;
+} WIRE_ATTR lnet_hello_t;
+
+typedef struct {
+ lnet_nid_t dest_nid;
+ lnet_nid_t src_nid;
+ lnet_pid_t dest_pid;
+ lnet_pid_t src_pid;
+ __u32 type; /* lnet_msg_type_t */
+ __u32 payload_length; /* payload data to follow */
+ /*<------__u64 aligned------->*/
+ union {
+ lnet_ack_t ack;
+ lnet_put_t put;
+ lnet_get_t get;
+ lnet_reply_t reply;
+ lnet_hello_t hello;
+ } msg;
+} WIRE_ATTR lnet_hdr_t;
+
+/* A HELLO message contains a magic number and protocol version
+ * code in the header's dest_nid, the peer's NID in the src_nid, and
+ * LNET_MSG_HELLO in the type field. All other common fields are zero
+ * (including payload_size; i.e. no payload).
+ * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
+ * running the same protocol and to find out its NID. These LNDs should
+ * exchange HELLO messages when a connection is first established. Individual
+ * LNDs can put whatever else they fancy in lnet_hdr_t::msg.
+ */
+typedef struct {
+ __u32 magic; /* LNET_PROTO_TCP_MAGIC */
+ __u16 version_major; /* increment on incompatible change */
+ __u16 version_minor; /* increment on compatible change */
+} WIRE_ATTR lnet_magicversion_t;
+
+/* PROTO MAGIC for LNDs */
+#define LNET_PROTO_IB_MAGIC 0x0be91b91
+#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */
+#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
+#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100
+#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */
+
+/* Placeholder for a future "unified" protocol across all LNDs */
+/* Current LNDs that receive a request with this magic will respond with a
+ * * * "stub" reply using their current protocol */
+#define LNET_PROTO_MAGIC 0x45726963 /* ! */
+
+#define LNET_PROTO_TCP_VERSION_MAJOR 1
+#define LNET_PROTO_TCP_VERSION_MINOR 0
+
+/* Acceptor connection request */
+typedef struct {
+ __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */
+ __u32 acr_version; /* protocol version */
+ __u64 acr_nid; /* target NID */
+} WIRE_ATTR lnet_acceptor_connreq_t;
+
+#define LNET_PROTO_ACCEPTOR_VERSION 1
+
+typedef struct lnet_counters {
+ __u32 msgs_alloc;
+ __u32 msgs_max;
+ __u32 errors;
+ __u32 send_count;
+ __u32 recv_count;
+ __u32 route_count;
+ __u32 drop_count;
+ __u64 send_length;
+ __u64 recv_length;
+ __u64 route_length;
+ __u64 drop_length;
+} WIRE_ATTR lnet_counters_t;
+
+#define LNET_NI_STATUS_UP 0x15aac0de
+#define LNET_NI_STATUS_DOWN 0xdeadface
+#define LNET_NI_STATUS_INVALID 0x00000000
+
+#define LNET_MAX_INTERFACES 16
+
/**
* Objects maintained by the LNet are accessed through handles. Handle types
* have names of the form lnet_handle_xx_t, where xx is one of the two letter
* or after the last item in the list.
*/
typedef enum {
- LNET_INS_BEFORE,
- LNET_INS_AFTER
+ /** insert ME before current position or head of the list */
+ LNET_INS_BEFORE,
+ /** insert ME after current position or tail of the list */
+ LNET_INS_AFTER,
+ /** attach ME at tail of local CPU partition ME list */
+ LNET_INS_LOCAL
} lnet_ins_pos_t;
/** @} lnet_me */
lnet_handle_eq_t eq_handle;
} lnet_md_t;
-/* Max Transfer Unit (minimum supported everywhere) */
-#define LNET_MTU_BITS 20
-#define LNET_MTU (1<<LNET_MTU_BITS)
+/* Max Transfer Unit (minimum supported everywhere).
+ * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
+ * these limits are system wide and not interface-local. */
+#define LNET_MTU_BITS 20
+#define LNET_MTU (1 << LNET_MTU_BITS)
/** limit on the number of fragments in discontiguous MDs */
#define LNET_MAX_IOV 256
-/* Max payload size */
-#ifndef LNET_MAX_PAYLOAD
-# error "LNET_MAX_PAYLOAD must be defined in config.h"
-#else
-# if (LNET_MAX_PAYLOAD < LNET_MTU)
-# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb"
-# elif defined(__KERNEL__)
-# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV))
-/* PAGE_SIZE is a constant: check with cpp! */
-# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb"
-# endif
-# endif
-#endif
-
/**
* Options for the MD structure. See lnet_md_t::options.
*/
* A page-based fragment of a MD.
*/
typedef struct {
- /** Pointer to the page where the fragment resides */
- cfs_page_t *kiov_page;
- /** Length in bytes of the fragment */
- unsigned int kiov_len;
- /**
- * Starting offset of the fragment within the page. Note that the
- * end of the fragment must not pass the end of the page; i.e.,
- * kiov_len + kiov_offset <= CFS_PAGE_SIZE.
- */
- unsigned int kiov_offset;
+ /** Pointer to the page where the fragment resides */
+ struct page *kiov_page;
+ /** Length in bytes of the fragment */
+ unsigned int kiov_len;
+ /**
+ * Starting offset of the fragment within the page. Note that the
+ * end of the fragment must not pass the end of the page; i.e.,
+ * kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
+ */
+ unsigned int kiov_offset;
} lnet_kiov_t;
/** @} lnet_md */
typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t;
#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0)
-/* XXX
- * cygwin need the pragma line, not clear if it's needed in other places.
- * checking!!!
- */
-#ifdef __CYGWIN__
-#pragma pack(push, 4)
-#endif
-
/**
* Information about an event on a MD.
*/
* \see lnet_md_t::options
*/
unsigned int offset;
-#ifdef CRAY_XT3
- lnet_uid_t uid;
-#endif
-
/**
* The sequence number for this event. Sequence numbers are unique
* to each event.
*/
volatile lnet_seq_t sequence;
} lnet_event_t;
-#ifdef __CYGWIN__
-#pragma pop
-#endif
/**
* Event queue handler function type.