1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
6 * This file is part of Lustre, http://www.lustre.org/
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 * Portals GM kernel NAL header file
25 * This file makes all declaration and prototypes
26 * for the API side and CB side of the NAL
28 #ifndef __INCLUDE_GMNAL_H__
29 #define __INCLUDE_GMNAL_H__
31 /* XXX Lustre as of V1.2.2 drop defines VERSION, which causes problems
32 * when including <GM>/include/gm_lanai.h which defines a structure field
33 * with the name VERSION XXX */
39 # define EXPORT_SYMTAB
42 #include "linux/config.h"
43 #include "linux/module.h"
44 #include "linux/tty.h"
45 #include "linux/kernel.h"
47 #include "linux/string.h"
48 #include "linux/stat.h"
49 #include "linux/errno.h"
50 #include "linux/version.h"
51 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
52 #include "linux/buffer_head.h"
55 #include "linux/locks.h"
57 #include "linux/unistd.h"
58 #include "linux/init.h"
59 #include "linux/sem.h"
60 #include "linux/vmalloc.h"
61 #include "linux/sysctl.h"
63 #define DEBUG_SUBSYSTEM S_NAL
65 #include "portals/nal.h"
66 #include "portals/api.h"
67 #include "portals/errno.h"
68 #include "libcfs/kp30.h"
69 #include "portals/p30.h"
71 #include "portals/nal.h"
72 #include "portals/lib-p30.h"
74 /* undefine these before including the GM headers which clash */
75 #undef PACKAGE_BUGREPORT
78 #undef PACKAGE_TARNAME
79 #undef PACKAGE_VERSION
81 #define GM_STRONG_TYPES 1
86 #include "gm_internal.h"
91 * Defines for the API NAL
95 * Small message size is configurable
96 * insmod can set small_msg_size
97 * which is used to populate nal_data.small_msg_size
99 #define GMNAL_MAGIC 0x1234abcd
101 #define GMNAL_SMALL_MESSAGE 1078
103 extern int num_rx_threads;
104 extern int num_stxds;
105 extern int gm_port_id;
108 * Small Transmit Descriptor
109 * A structre to keep track of a small transmit operation
110 * This structure has a one-to-one relationship with a small
111 * transmit buffer (both create by gmnal_stxd_alloc).
112 * There are two free list of stxd. One for use by clients of the NAL
113 * and the other by the NAL rxthreads when doing sends.
114 * This helps prevent deadlock caused by stxd starvation.
116 typedef struct gmnal_stxd {
117 struct gmnal_stxd *tx_next;
120 gm_size_t tx_gm_size;
126 struct gmnal_ni *tx_gmni;
127 lib_msg_t *tx_cookie;
131 struct iovec *tx_iovec_dup;
132 struct iovec tx_iov[PTL_MD_MAX_IOV];
136 * as for gmnal_stxd_t
137 * a hash table in nal_data find srxds from
138 * the rx buffer address. hash table populated at init time
140 typedef struct gmnal_srxd {
144 unsigned int rx_sender_gmid;
145 __u64 rx_source_stxd;
149 struct iovec *rx_riov;
151 spinlock_t rx_callback_lock;
152 int rx_callback_status;
153 lib_msg_t *rx_cookie;
154 struct gmnal_srxd *rx_next;
155 struct gmnal_ni *rx_gmni;
159 * Header which lmgnal puts at the start of each message
160 * watch alignment for ia32/64 interaction
162 typedef struct gmnal_msghdr {
166 __u32 gmm_sender_gmid;
167 __u64 gmm_stxd_remote_ptr;
168 } WIRE_ATTR gmnal_msghdr_t;
171 * the caretaker thread (ct_thread) gets receive events
172 * (and other events) from the myrinet device via the GM2 API.
173 * caretaker thread populates one work entry for each receive event,
174 * puts it on a Q in nal_data and wakes a receive thread to
175 * process the receive.
176 * Processing a portals receive can involve a transmit operation.
177 * Because of this the caretaker thread cannot process receives
178 * as it may get deadlocked when supply of transmit descriptors
179 * is exhausted (as caretaker thread is responsible for replacing
180 * transmit descriptors on the free list)
182 typedef struct gmnal_rxtwe {
188 struct gmnal_rxtwe *next;
192 * 1 receive thread started on each CPU
194 #define NRXTHREADS 10 /* max number of receiver threads */
196 typedef struct gmnal_ni {
197 spinlock_t gmni_stxd_lock;
198 struct semaphore gmni_stxd_token;
199 gmnal_stxd_t *gmni_stxd;
200 spinlock_t gmni_rxt_stxd_lock;
201 struct semaphore gmni_rxt_stxd_token;
202 gmnal_stxd_t *gmni_rxt_stxd;
203 gmnal_srxd_t *gmni_srxd;
204 struct gm_hash *gmni_srxd_hash;
206 lib_nal_t *gmni_libnal;
207 struct gm_port *gmni_port;
208 __u32 gmni_local_gmid;
209 __u32 gmni_global_gmid;
210 spinlock_t gmni_gm_lock; /* serialise GM calls */
211 long gmni_rxthread_pid[NRXTHREADS];
212 int gmni_rxthread_stop_flag;
213 spinlock_t gmni_rxthread_flag_lock;
214 long gmni_rxthread_flag;
215 long gmni_ctthread_pid;
216 int gmni_ctthread_flag;
217 gm_alarm_t gmni_ctthread_alarm;
218 int gmni_small_msg_size;
219 int gmni_small_msg_gmsize;
220 gmnal_rxtwe_t *gmni_rxtwe_head;
221 gmnal_rxtwe_t *gmni_rxtwe_tail;
222 spinlock_t gmni_rxtwe_lock;
223 struct semaphore gmni_rxtwe_wait;
227 * Flags to start/stop and check status of threads
228 * each rxthread sets 1 bit (any bit) of the flag on startup
229 * and clears 1 bit when exiting
231 #define GMNAL_THREAD_RESET 0
232 #define GMNAL_THREAD_STOP 666
233 #define GMNAL_CTTHREAD_STARTED 333
234 #define GMNAL_RXTHREADS_STARTED ( (1<<num_rx_threads)-1)
240 #define GMNAL_IOC_GET_GNID 1
243 * FUNCTION PROTOTYPES
249 int gmnal_api_startup(nal_t *, ptl_pid_t,
250 ptl_ni_limits_t *, ptl_ni_limits_t *);
252 int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t);
254 void gmnal_api_shutdown(nal_t *);
256 int gmnal_api_validate(nal_t *, void *, size_t);
258 void gmnal_api_yield(nal_t *, unsigned long *, int);
260 void gmnal_api_lock(nal_t *, unsigned long *);
262 void gmnal_api_unlock(nal_t *, unsigned long *);
269 ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
270 int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t);
272 ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
273 int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t);
275 ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
276 unsigned int, struct iovec *, size_t, size_t, size_t);
278 ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
279 unsigned int, ptl_kiov_t *, size_t, size_t, size_t);
281 int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
283 int gmnal_init(void);
285 void gmnal_fini(void);
289 * Small and Large Transmit and Receive Descriptor Functions
291 int gmnal_alloc_txd(gmnal_ni_t *);
292 void gmnal_free_txd(gmnal_ni_t *);
293 gmnal_stxd_t* gmnal_get_stxd(gmnal_ni_t *, int);
294 void gmnal_return_stxd(gmnal_ni_t *, gmnal_stxd_t *);
296 int gmnal_alloc_srxd(gmnal_ni_t *);
297 void gmnal_free_srxd(gmnal_ni_t *);
300 * general utility functions
302 gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_ni_t *, void*);
303 void gmnal_stop_rxthread(gmnal_ni_t *);
304 void gmnal_stop_ctthread(gmnal_ni_t *);
305 void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
306 void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
307 char *gmnal_gm_error(gm_status_t);
308 char *gmnal_rxevent(gm_recv_event_t*);
309 void gmnal_yield(int);
310 int gmnal_start_kernel_threads(gmnal_ni_t *);
314 * Communication functions
320 int gmnal_ct_thread(void *); /* caretaker thread */
321 int gmnal_rx_thread(void *); /* receive thread */
322 void gmnal_pre_receive(gmnal_ni_t*, gmnal_rxtwe_t*, int);
323 void gmnal_rx_bad(gmnal_ni_t *, gmnal_rxtwe_t *);
324 void gmnal_rx_requeue_buffer(gmnal_ni_t *, gmnal_srxd_t *);
325 int gmnal_add_rxtwe(gmnal_ni_t *, gm_recv_t *);
326 gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_ni_t *);
327 void gmnal_remove_rxtwe(gmnal_ni_t *);
333 ptl_err_t gmnal_small_tx(lib_nal_t *libnal, void *private,
334 lib_msg_t *cookie, ptl_hdr_t *hdr,
335 int type, ptl_nid_t nid,
336 gmnal_stxd_t *stxd, int size);
337 void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
339 #endif /*__INCLUDE_GMNAL_H__*/