1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/ulnds/ptllnd/ptllnd.h
38 * Author: Eric Barton <eeb@bartonsoftware.com>
42 #define DEBUG_SUBSYSTEM S_LND
44 #include <lnet/lib-lnet.h>
45 #include <lnet/ptllnd_wire.h>
47 #include <portals/p30.h>
48 #include <lnet/ptllnd.h> /* Depends on portals/p30.h */
51 /* Hack to record history
52 * This should really be done by CDEBUG(D_NETTRACE... */
55 struct list_head he_list;
56 struct timeval he_time;
64 void ptllnd_dump_history();
65 void ptllnd_history(const char *fn, const char *file, const int line,
66 const char *fmt, ...);
67 #define PTLLND_HISTORY(fmt, a...) \
68 ptllnd_history(__FUNCTION__, __FILE__, __LINE__, fmt, ## a)
71 #define PTLLND_MD_OPTIONS (PTL_MD_LUSTRE_COMPLETION_SEMANTICS |\
72 PTL_MD_EVENT_START_DISABLE)
76 ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */
77 int plni_peer_credits;
78 int plni_max_msg_size;
81 int plni_peer_hash_size;
84 int plni_max_tx_history;
85 int plni_abort_on_protocol_mismatch;
86 int plni_abort_on_nak;
90 int plni_watchdog_interval;
94 struct list_head plni_active_txs;
95 struct list_head plni_zombie_txs;
99 ptl_handle_ni_t plni_nih;
100 ptl_handle_eq_t plni_eqh;
101 ptl_process_id_t plni_portals_id; /* Portals ID of interface */
103 struct list_head *plni_peer_hash;
106 int plni_watchdog_nextt;
107 int plni_watchdog_peeridx;
109 struct list_head plni_tx_history;
110 int plni_ntx_history;
112 struct list_head plni_buffers;
114 int plni_nposted_buffers;
118 #define PTLLND_CREDIT_HIGHWATER(plni) ((plni)->plni_peer_credits - 1)
122 struct list_head plp_list;
124 lnet_process_id_t plp_id;
125 ptl_process_id_t plp_ptlid;
126 int plp_credits; /* # msg buffers reserved for me at peer */
128 /* credits for msg buffers I've posted for this peer...
129 * outstanding - free buffers I've still to inform my peer about
130 * sent - free buffers I've told my peer about
131 * lazy - additional buffers (over and above plni_peer_credits)
132 * posted to prevent peer blocking on sending a non-RDMA
133 * messages to me when LNET isn't eagerly responsive to
134 * the network (i.e. liblustre doesn't have control).
135 * extra_lazy - lazy credits not required any more. */
136 int plp_outstanding_credits;
137 int plp_sent_credits;
138 int plp_lazy_credits;
139 int plp_extra_lazy_credits;
141 int plp_max_msg_size;
143 int plp_sent_hello:1;
144 int plp_recvd_hello:1;
148 struct list_head plp_txq;
149 struct list_head plp_noopq;
150 struct list_head plp_activeq;
155 struct list_head plb_list;
158 ptl_handle_md_t plb_md;
164 ptllnd_peer_t *rx_peer;
171 struct list_head tx_list;
174 ptllnd_peer_t *tx_peer;
175 lnet_msg_t *tx_lnetmsg;
176 lnet_msg_t *tx_lnetreplymsg;
177 unsigned int tx_niov;
178 ptl_md_iovec_t *tx_iov;
179 ptl_handle_md_t tx_bulkmdh;
180 ptl_handle_md_t tx_reqmdh;
181 struct timeval tx_bulk_posted;
182 struct timeval tx_bulk_done;
183 struct timeval tx_req_posted;
184 struct timeval tx_req_done;
185 int tx_completing; /* someone already completing */
186 int tx_msgsize; /* # bytes in tx_msg */
187 time_t tx_deadline; /* time to complete by */
188 kptl_msg_t tx_msg; /* message to send */
191 #define PTLLND_RDMA_WRITE 0x100 /* pseudo message type */
192 #define PTLLND_RDMA_READ 0x101 /* (no msg actually sent) */
194 /* Hack to extract object type from event's user_ptr relies on (and checks)
195 * that structs are somewhat aligned. */
196 #define PTLLND_EVENTARG_TYPE_TX 0x1
197 #define PTLLND_EVENTARG_TYPE_BUF 0x2
198 #define PTLLND_EVENTARG_TYPE_MASK 0x3
201 ptllnd_obj2eventarg (void *obj, int type)
203 unsigned long ptr = (unsigned long)obj;
205 LASSERT ((ptr & PTLLND_EVENTARG_TYPE_MASK) == 0);
206 LASSERT ((type & ~PTLLND_EVENTARG_TYPE_MASK) == 0);
208 return (void *)(ptr | type);
212 ptllnd_eventarg2type (void *arg)
214 unsigned long ptr = (unsigned long)arg;
216 return (ptr & PTLLND_EVENTARG_TYPE_MASK);
220 ptllnd_eventarg2obj (void *arg)
222 unsigned long ptr = (unsigned long)arg;
224 return (void *)(ptr & ~PTLLND_EVENTARG_TYPE_MASK);
227 int ptllnd_parse_int_tunable(int *value, char *name, int dflt);
228 void ptllnd_cull_tx_history(ptllnd_ni_t *plni);
229 int ptllnd_startup(lnet_ni_t *ni);
230 void ptllnd_shutdown(lnet_ni_t *ni);
231 int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
232 int ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg);
233 int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
234 int delayed, unsigned int niov,
235 struct iovec *iov, lnet_kiov_t *kiov,
236 unsigned int offset, unsigned int mlen, unsigned int rlen);
237 int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
238 void **new_privatep);
240 ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob);
241 int ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int n);
242 void ptllnd_wait(lnet_ni_t *ni, int milliseconds);
243 void ptllnd_check_sends(ptllnd_peer_t *peer);
244 void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id);
245 void ptllnd_destroy_peer(ptllnd_peer_t *peer);
246 void ptllnd_close_peer(ptllnd_peer_t *peer, int error);
247 int ptllnd_post_buffer(ptllnd_buffer_t *buf);
248 int ptllnd_size_buffers (lnet_ni_t *ni, int delta);
249 const char *ptllnd_evtype2str(int type);
250 const char *ptllnd_msgtype2str(int type);
251 const char *ptllnd_errtype2str(int type);
252 char *ptllnd_ptlid2str(ptl_process_id_t id);
253 void ptllnd_dump_debug(lnet_ni_t *ni, lnet_process_id_t id);
257 ptllnd_peer_addref (ptllnd_peer_t *peer)
259 LASSERT (peer->plp_refcount > 0);
260 peer->plp_refcount++;
264 ptllnd_peer_decref (ptllnd_peer_t *peer)
266 LASSERT (peer->plp_refcount > 0);
267 peer->plp_refcount--;
268 if (peer->plp_refcount == 0)
269 ptllnd_destroy_peer(peer);
272 static inline lnet_nid_t
273 ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid)
275 return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
278 static inline ptl_nid_t
279 ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
281 return LNET_NIDADDR(lnet_nid);
285 * A note about lprintf():
286 * Normally printf() is redirected to stdout of the console
287 * from which yod launched the catamount application. However
288 * there is a lot of initilziation code that runs before this
289 * redirection is hooked up, and printf() seems to go to the bit bucket
291 * To get any kind of debug output and init time lprintf() can
292 * be used to output to the console from which bookqk was used to
293 * boot the catamount node. This works for debugging some simple