1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
5 * Author: Eric Barton <eeb@bartonsoftware.com>
7 * This file is part of the Lustre file system, http://www.lustre.org
8 * Lustre is a trademark of Cluster File Systems, Inc.
10 * This file is confidential source code owned by Cluster File Systems.
11 * No viewing, modification, compilation, redistribution, or any other
12 * form of use is permitted except through a signed license agreement.
14 * If you have not signed such an agreement, then you have no rights to
15 * this file. Please destroy it immediately and contact CFS.
20 #define DEBUG_SUBSYSTEM S_LND
22 #include <lnet/lib-lnet.h>
23 #include <lnet/ptllnd_wire.h>
25 #include <portals/p30.h>
26 #include <lnet/ptllnd.h> /* Depends on portals/p30.h */
29 #define PTLLND_DEBUG_TIMING 0
31 #define PTLLND_MSGS_PER_BUFFER 64
32 #define PTLLND_MSGS_SPARE 256
33 #define PTLLND_PEER_HASH_SIZE 101
34 #define PTLLND_EQ_SIZE 1024
35 #if PTLLND_DEBUG_TIMING
36 # define PTLLND_TX_HISTORY 1024
38 # define PTLLND_TX_HISTORY 0
40 #define PTLLND_WARN_LONG_WAIT 5 /* seconds */
41 #define PTLLND_ABORT_ON_NAK 1 /* abort app on protocol version mismatch */
44 /* Hack to record history
45 * This should really be done by CDEBUG(D_NETTRACE... */
48 struct list_head he_list;
49 struct timeval he_time;
57 void ptllnd_dump_history();
58 void ptllnd_history(const char *fn, const char *file, const int line,
59 const char *fmt, ...);
60 #define PTLLND_HISTORY(fmt, a...) \
61 ptllnd_history(__FUNCTION__, __FILE__, __LINE__, fmt, ## a)
64 #define PTLLND_MD_OPTIONS (PTL_MD_LUSTRE_COMPLETION_SEMANTICS |\
65 PTL_MD_EVENT_START_DISABLE)
69 ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */
70 int plni_peer_credits;
71 int plni_max_msg_size;
74 int plni_peer_hash_size;
77 int plni_max_tx_history;
78 int plni_abort_on_nak;
81 struct list_head plni_active_txs;
82 struct list_head plni_zombie_txs;
86 ptl_handle_ni_t plni_nih;
87 ptl_handle_eq_t plni_eqh;
88 ptl_process_id_t plni_portals_id; /* Portals ID of interface */
90 struct list_head *plni_peer_hash;
93 struct list_head plni_tx_history;
96 struct list_head plni_buffers;
98 int plni_nposted_buffers;
101 #define PTLLND_CREDIT_HIGHWATER(plni) ((plni)->plni_peer_credits - 1)
105 struct list_head plp_list;
107 lnet_process_id_t plp_id;
108 ptl_process_id_t plp_ptlid;
111 int plp_outstanding_credits;
112 int plp_max_msg_size;
114 int plp_recvd_hello:1;
118 struct list_head plp_txq;
119 struct list_head plp_activeq;
124 struct list_head plb_list;
127 ptl_handle_md_t plb_md;
133 ptllnd_peer_t *rx_peer;
140 struct list_head tx_list;
143 ptllnd_peer_t *tx_peer;
144 lnet_msg_t *tx_lnetmsg;
145 lnet_msg_t *tx_lnetreplymsg;
146 unsigned int tx_niov;
147 ptl_md_iovec_t *tx_iov;
148 ptl_handle_md_t tx_bulkmdh;
149 ptl_handle_md_t tx_reqmdh;
150 #if PTLLND_DEBUG_TIMING
151 struct timeval tx_bulk_posted;
152 struct timeval tx_bulk_done;
153 struct timeval tx_req_posted;
154 struct timeval tx_req_done;
156 int tx_completing; /* someone already completing */
157 int tx_msgsize; /* # bytes in tx_msg */
158 kptl_msg_t tx_msg; /* message to send */
161 #define PTLLND_RDMA_WRITE 0x100 /* pseudo message type */
162 #define PTLLND_RDMA_READ 0x101 /* (no msg actually sent) */
164 /* Hack to extract object type from event's user_ptr relies on (and checks)
165 * that structs are somewhat aligned. */
166 #define PTLLND_EVENTARG_TYPE_TX 0x1
167 #define PTLLND_EVENTARG_TYPE_BUF 0x2
168 #define PTLLND_EVENTARG_TYPE_MASK 0x3
171 ptllnd_obj2eventarg (void *obj, int type)
173 unsigned long ptr = (unsigned long)obj;
175 LASSERT ((ptr & PTLLND_EVENTARG_TYPE_MASK) == 0);
176 LASSERT ((type & ~PTLLND_EVENTARG_TYPE_MASK) == 0);
178 return (void *)(ptr | type);
182 ptllnd_eventarg2type (void *arg)
184 unsigned long ptr = (unsigned long)arg;
186 return (ptr & PTLLND_EVENTARG_TYPE_MASK);
190 ptllnd_eventarg2obj (void *arg)
192 unsigned long ptr = (unsigned long)arg;
194 return (void *)(ptr & ~PTLLND_EVENTARG_TYPE_MASK);
197 #if PTLLND_DEBUG_TIMING
198 # define PTLLND_DBGT_INIT(tv) memset(&(tv), 0, sizeof(tv))
199 # define PTLLND_DBGT_STAMP(tv) gettimeofday(&(tv), NULL)
200 # define DBGT_FMT "%ld.%06ld"
201 # define DBGT_ARGS(tv) , (long)((tv).tv_sec), (long)((tv).tv_usec)
203 # define PTLLND_DBGT_INIT(tv)
204 # define PTLLND_DBGT_STAMP(tv)
205 # define DBGT_FMT "-"
206 # define DBGT_ARGS(tv)
209 int ptllnd_parse_int_tunable(int *value, char *name, int dflt);
210 void ptllnd_cull_tx_history(ptllnd_ni_t *plni);
211 int ptllnd_startup(lnet_ni_t *ni);
212 void ptllnd_shutdown(lnet_ni_t *ni);
213 int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
214 int ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg);
215 int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
216 int delayed, unsigned int niov,
217 struct iovec *iov, lnet_kiov_t *kiov,
218 unsigned int offset, unsigned int mlen, unsigned int rlen);
219 int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
220 void **new_privatep);
222 ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob);
223 void ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive);
224 void ptllnd_wait(lnet_ni_t *ni, int milliseconds);
225 void ptllnd_check_sends(ptllnd_peer_t *peer);
226 void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id);
227 void ptllnd_destroy_peer(ptllnd_peer_t *peer);
228 void ptllnd_close_peer(ptllnd_peer_t *peer, int error);
229 int ptllnd_post_buffer(ptllnd_buffer_t *buf);
230 int ptllnd_grow_buffers (lnet_ni_t *ni);
231 const char *ptllnd_evtype2str(int type);
232 const char *ptllnd_msgtype2str(int type);
233 char *ptllnd_ptlid2str(ptl_process_id_t id);
236 ptllnd_peer_addref (ptllnd_peer_t *peer)
238 LASSERT (peer->plp_refcount > 0);
239 peer->plp_refcount++;
243 ptllnd_peer_decref (ptllnd_peer_t *peer)
245 LASSERT (peer->plp_refcount > 0);
246 peer->plp_refcount--;
247 if (peer->plp_refcount == 0)
248 ptllnd_destroy_peer(peer);
252 ptllnd_post_tx(ptllnd_tx_t *tx)
254 ptllnd_peer_t *peer = tx->tx_peer;
255 LASSERT(tx->tx_peer != NULL);
256 list_add_tail(&tx->tx_list, &peer->plp_txq);
257 ptllnd_check_sends(peer);
260 static inline lnet_nid_t
261 ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid)
263 return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
266 static inline ptl_nid_t
267 ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
269 return LNET_NIDADDR(lnet_nid);
273 * A note about lprintf():
274 * Normally printf() is redirected to stdout of the console
275 * from which yod launched the catamount application. However
276 * there is a lot of initilziation code that runs before this
277 * redirection is hooked up, and printf() seems to go to the bit bucket
279 * To get any kind of debug output and init time lprintf() can
280 * be used to output to the console from which bookqk was used to
281 * boot the catamount node. This works for debugging some simple