1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
6 * This file is part of Lustre, http://www.lustre.org/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 * Portals GM kernel NAL header file
25 * This file makes all declaration and prototypes
26 * for the API side and CB side of the NAL
28 #ifndef __INCLUDE_GMNAL_H__
29 #define __INCLUDE_GMNAL_H__
32 # define EXPORT_SYMTAB
35 #include "linux/config.h"
36 #include "linux/module.h"
37 #include "linux/tty.h"
38 #include "linux/kernel.h"
40 #include "linux/string.h"
41 #include "linux/stat.h"
42 #include "linux/errno.h"
43 #include "linux/locks.h"
44 #include "linux/unistd.h"
45 #include "linux/init.h"
46 #include "linux/sem.h"
47 #include "linux/vmalloc.h"
48 #include "linux/sysctl.h"
50 #define DEBUG_SUBSYSTEM S_GMNAL
52 #include "portals/nal.h"
53 #include "portals/api.h"
54 #include "portals/errno.h"
55 #include "linux/kp30.h"
56 #include "portals/p30.h"
58 #include "portals/lib-nal.h"
59 #include "portals/lib-p30.h"
61 #define GM_STRONG_TYPES 1
63 #include "gm_internal.h"
68 * Defines for the API NAL
72 * Small message size is configurable
73 * insmod can set small_msg_size
74 * which is used to populate nal_data.small_msg_size
76 #define GMNAL_SMALL_MESSAGE 1078
77 #define GMNAL_LARGE_MESSAGE_INIT 1079
78 #define GMNAL_LARGE_MESSAGE_ACK 1080
79 #define GMNAL_LARGE_MESSAGE_FINI 1081
81 extern int gmnal_small_msg_size;
82 extern int num_rx_threads;
85 #define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size
86 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c)
87 #define GMNAL_MAGIC 0x1234abcd
89 * The gm_port to use for gmnal
91 #define GMNAL_GM_PORT gm_port
95 * Small Transmit Descriptor
96 * A structre to keep track of a small transmit operation
97 * This structure has a one-to-one relationship with a small
98 * transmit buffer (both create by gmnal_stxd_alloc).
99 * There are two free list of stxd. One for use by clients of the NAL
100 * and the other by the NAL rxthreads when doing sends.
101 * This helps prevent deadlock caused by stxd starvation.
103 typedef struct _gmnal_stxd_t {
111 struct _gmnal_data_t *nal_data;
114 struct iovec iov[PTL_MD_MAX_IOV];
115 struct _gmnal_stxd_t *next;
118 struct iovec *iovec_dup;
122 * keeps a transmit token for large transmit (gm_get)
123 * and a pointer to rxd that is used as context for large receive
125 typedef struct _gmnal_ltxd_t {
126 struct _gmnal_ltxd_t *next;
127 struct _gmnal_srxd_t *srxd;
132 * as for gmnal_stxd_t
133 * a hash table in nal_data find srxds from
134 * the rx buffer address. hash table populated at init time
136 typedef struct _gmnal_srxd_t {
140 unsigned int gm_source_node;
141 gmnal_stxd_t *source_stxd;
147 spinlock_t callback_lock;
150 struct _gmnal_srxd_t *next;
151 struct _gmnal_data_t *nal_data;
155 * Header which lmgnal puts at the start of each message
157 typedef struct _gmnal_msghdr {
160 unsigned int sender_node_id;
164 #define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t)
167 * the caretaker thread (ct_thread) gets receive events
168 * (and other events) from the myrinet device via the GM2 API.
169 * caretaker thread populates one work entry for each receive event,
170 * puts it on a Q in nal_data and wakes a receive thread to
171 * process the receive.
172 * Processing a portals receive can involve a transmit operation.
173 * Because of this the caretaker thread cannot process receives
174 * as it may get deadlocked when supply of transmit descriptors
175 * is exhausted (as caretaker thread is responsible for replacing
176 * transmit descriptors on the free list)
178 typedef struct _gmnal_rxtwe {
184 struct _gmnal_rxtwe *next;
188 * 1 receive thread started on each CPU
190 #define NRXTHREADS 10 /* max number of receiver threads */
192 typedef struct _gmnal_data_t {
195 spinlock_t stxd_lock;
196 struct semaphore stxd_token;
198 spinlock_t rxt_stxd_lock;
199 struct semaphore rxt_stxd_token;
200 gmnal_stxd_t *rxt_stxd;
201 spinlock_t ltxd_lock;
202 struct semaphore ltxd_token;
204 spinlock_t srxd_lock;
205 struct semaphore srxd_token;
207 struct gm_hash *srxd_hash;
210 struct gm_port *gm_port;
211 unsigned int gm_local_nid;
212 unsigned int gm_global_nid;
214 long rxthread_pid[NRXTHREADS];
215 int rxthread_stop_flag;
216 spinlock_t rxthread_flag_lock;
220 gm_alarm_t ctthread_alarm;
222 int small_msg_gmsize;
223 gmnal_rxtwe_t *rxtwe_head;
224 gmnal_rxtwe_t *rxtwe_tail;
225 spinlock_t rxtwe_lock;
226 struct semaphore rxtwe_wait;
227 struct ctl_table_header *sysctl;
231 * Flags to start/stop and check status of threads
232 * each rxthread sets 1 bit (any bit) of the flag on startup
233 * and clears 1 bit when exiting
235 #define GMNAL_THREAD_RESET 0
236 #define GMNAL_THREAD_STOP 666
237 #define GMNAL_CTTHREAD_STARTED 333
238 #define GMNAL_RXTHREADS_STARTED ( (1<<num_rx_threads)-1)
241 extern gmnal_data_t *global_nal_data;
246 #define GMNAL_IOC_GET_GNID 1
251 #define GMNAL_STATUS_OK 0
252 #define GMNAL_STATUS_FAIL 1
253 #define GMNAL_STATUS_NOMEM 2
257 * FUNCTION PROTOTYPES
265 * For the Small tx and rx descriptor lists
267 #define GMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock);
268 #define GMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock);
269 #define GMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock);
270 #define GMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n);
271 #define GMNAL_TXD_GETTOKEN(a) down(&a->stxd_token);
272 #define GMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token)
273 #define GMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token);
275 #define GMNAL_RXT_TXD_LOCK_INIT(a) spin_lock_init(&a->rxt_stxd_lock);
276 #define GMNAL_RXT_TXD_LOCK(a) spin_lock(&a->rxt_stxd_lock);
277 #define GMNAL_RXT_TXD_UNLOCK(a) spin_unlock(&a->rxt_stxd_lock);
278 #define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n);
279 #define GMNAL_RXT_TXD_GETTOKEN(a) down(&a->rxt_stxd_token);
280 #define GMNAL_RXT_TXD_TRYGETTOKEN(a) down_trylock(&a->rxt_stxd_token)
281 #define GMNAL_RXT_TXD_RETURNTOKEN(a) up(&a->rxt_stxd_token);
283 #define GMNAL_LTXD_LOCK_INIT(a) spin_lock_init(&a->ltxd_lock);
284 #define GMNAL_LTXD_LOCK(a) spin_lock(&a->ltxd_lock);
285 #define GMNAL_LTXD_UNLOCK(a) spin_unlock(&a->ltxd_lock);
286 #define GMNAL_LTXD_TOKEN_INIT(a, n) sema_init(&a->ltxd_token, n);
287 #define GMNAL_LTXD_GETTOKEN(a) down(&a->ltxd_token);
288 #define GMNAL_LTXD_TRYGETTOKEN(a) down_trylock(&a->ltxd_token)
289 #define GMNAL_LTXD_RETURNTOKEN(a) up(&a->ltxd_token);
291 #define GMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock);
292 #define GMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock);
293 #define GMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock);
294 #define GMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n);
295 #define GMNAL_RXD_GETTOKEN(a) down(&a->srxd_token);
296 #define GMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token)
297 #define GMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token);
299 #define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock);
300 #define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock);
301 #define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock);
302 #define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock);
312 int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t);
314 int gmnal_api_shutdown(nal_t *, int);
316 int gmnal_api_validate(nal_t *, void *, size_t);
318 void gmnal_api_yield(nal_t *, unsigned long *, int);
320 void gmnal_api_lock(nal_t *, unsigned long *);
322 void gmnal_api_unlock(nal_t *, unsigned long *);
325 #define GMNAL_INIT_NAL(a) do { \
326 a->forward = gmnal_api_forward; \
327 a->shutdown = gmnal_api_shutdown; \
328 a->validate = NULL; \
329 a->yield = gmnal_api_yield; \
330 a->lock = gmnal_api_lock; \
331 a->unlock = gmnal_api_unlock; \
334 a->nal_data = NULL; \
342 int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
343 int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
345 int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
346 int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
348 int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *,
349 unsigned int, struct iovec *, size_t, size_t);
351 int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *,
352 unsigned int, ptl_kiov_t *, size_t, size_t);
354 int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
356 int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
358 int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
360 void *gmnal_cb_malloc(nal_cb_t *, size_t);
362 void gmnal_cb_free(nal_cb_t *, void *, size_t);
364 void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
366 int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **);
368 void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
370 void gmnal_cb_cli(nal_cb_t *, unsigned long *);
372 void gmnal_cb_sti(nal_cb_t *, unsigned long *);
374 int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
376 nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid);
378 void gmnal_fini(void);
382 #define GMNAL_INIT_NAL_CB(a) do { \
383 a->cb_send = gmnal_cb_send; \
384 a->cb_send_pages = gmnal_cb_send_pages; \
385 a->cb_recv = gmnal_cb_recv; \
386 a->cb_recv_pages = gmnal_cb_recv_pages; \
387 a->cb_read = gmnal_cb_read; \
388 a->cb_write = gmnal_cb_write; \
389 a->cb_callback = gmnal_cb_callback; \
390 a->cb_malloc = gmnal_cb_malloc; \
391 a->cb_free = gmnal_cb_free; \
393 a->cb_unmap = NULL; \
394 a->cb_printf = gmnal_cb_printf; \
395 a->cb_cli = gmnal_cb_cli; \
396 a->cb_sti = gmnal_cb_sti; \
397 a->cb_dist = gmnal_cb_dist; \
398 a->nal_data = NULL; \
403 * Small and Large Transmit and Receive Descriptor Functions
405 int gmnal_alloc_txd(gmnal_data_t *);
406 void gmnal_free_txd(gmnal_data_t *);
407 gmnal_stxd_t* gmnal_get_stxd(gmnal_data_t *, int);
408 void gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *);
409 gmnal_ltxd_t* gmnal_get_ltxd(gmnal_data_t *);
410 void gmnal_return_ltxd(gmnal_data_t *, gmnal_ltxd_t *);
412 int gmnal_alloc_srxd(gmnal_data_t *);
413 void gmnal_free_srxd(gmnal_data_t *);
414 gmnal_srxd_t* gmnal_get_srxd(gmnal_data_t *, int);
415 void gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *);
418 * general utility functions
420 gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*);
421 void gmnal_stop_rxthread(gmnal_data_t *);
422 void gmnal_stop_ctthread(gmnal_data_t *);
423 void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
424 void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
425 void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
426 char *gmnal_gm_error(gm_status_t);
427 char *gmnal_rxevent(gm_recv_event_t*);
428 int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
429 void gmnal_yield(int);
430 int gmnal_start_kernel_threads(gmnal_data_t *);
434 * Communication functions
440 int gmnal_ct_thread(void *); /* caretaker thread */
441 int gmnal_rx_thread(void *); /* receive thread */
442 int gmnal_pre_receive(gmnal_data_t*, gmnal_rxtwe_t*, int);
443 int gmnal_rx_bad(gmnal_data_t *, gmnal_rxtwe_t *, gmnal_srxd_t*);
444 int gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *);
445 int gmnal_add_rxtwe(gmnal_data_t *, gm_recv_t *);
446 gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *);
447 void gmnal_remove_rxtwe(gmnal_data_t *);
453 int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
454 struct iovec *, size_t, size_t);
455 int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
456 int, ptl_nid_t, ptl_pid_t,
457 unsigned int, struct iovec*, int);
458 void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
465 int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
466 struct iovec *, size_t, size_t);
468 int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
469 int, ptl_nid_t, ptl_pid_t, unsigned int,
472 void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t);
474 int gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int,
477 void gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t);
479 int gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int,
482 void gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *);
483 void gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t);
484 void gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *);
486 #endif /*__INCLUDE_GMNAL_H__*/