Whamcloud - gitweb
Branch: b1_4
[fs/lustre-release.git] / lnet / klnds / gmlnd / gmlnd_api.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
5  *
6  *   This file is part of Lustre, http://www.lustre.org/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 /*
23  *      Implements the API NAL functions
24  */
25
26 #include "gmnal.h"
27
28
29
30 gmnal_data_t    *global_nal_data = NULL;
31 #define         GLOBAL_NID_STR_LEN      16
32 char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
33 ptl_handle_ni_t kgmnal_ni;
34
35 extern int gmnal_cmd(struct portals_cfg *pcfg, void *private);
36
37 /*
38  *      Write the global nid /proc/sys/gmnal/globalnid
39  */
40 #define GMNAL_SYSCTL    201
41 #define GMNAL_SYSCTL_GLOBALNID  1
42
43 static ctl_table gmnal_sysctl_table[] = {
44         {GMNAL_SYSCTL_GLOBALNID, "globalnid",
45          global_nid_str, GLOBAL_NID_STR_LEN,
46          0444, NULL, &proc_dostring},
47         { 0 }
48 };
49
50
51 static ctl_table gmnalnal_top_sysctl_table[] = {
52         {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
53         { 0 }
54 };
55
56 /*
57  *      gmnal_api_shutdown
58  *      nal_refct == 0 => called on last matching PtlNIFini()
59  *      Close down this interface and free any resources associated with it
60  *      nal_t   nal     our nal to shutdown
61  */
62 void
63 gmnal_api_shutdown(nal_t *nal)
64 {
65         gmnal_data_t    *nal_data;
66         lib_nal_t       *libnal;
67
68         if (nal->nal_refct != 0) {
69                 /* This module got the first ref */
70                 PORTAL_MODULE_UNUSE;
71                 return;
72         }
73
74         LASSERT(nal == global_nal_data->nal);
75         libnal = (lib_nal_t *)nal->nal_data;
76         nal_data = (gmnal_data_t *)libnal->libnal_data;
77         LASSERT(nal_data == global_nal_data);
78         CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
79
80         /* Stop portals calling our ioctl handler */
81         libcfs_nal_cmd_unregister(GMNAL);
82
83         /* XXX for shutdown "under fire" we probably need to set a shutdown
84          * flag so when lib calls us we fail immediately and dont queue any
85          * more work but our threads can still call into lib OK.  THEN
86          * shutdown our threads, THEN lib_fini() */
87         lib_fini(libnal);
88
89         gmnal_stop_rxthread(nal_data);
90         gmnal_stop_ctthread(nal_data);
91         gmnal_free_txd(nal_data);
92         gmnal_free_srxd(nal_data);
93         GMNAL_GM_LOCK(nal_data);
94         gm_close(nal_data->gm_port);
95         gm_finalize();
96         GMNAL_GM_UNLOCK(nal_data);
97         if (nal_data->sysctl)
98                 unregister_sysctl_table (nal_data->sysctl);
99         /* Don't free 'nal'; it's a static struct */
100         PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
101         PORTAL_FREE(libnal, sizeof(lib_nal_t));
102
103         global_nal_data = NULL;
104 }
105
106
107 int
108 gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
109                   ptl_ni_limits_t *requested_limits,
110                   ptl_ni_limits_t *actual_limits)
111 {
112
113         lib_nal_t       *libnal = NULL;
114         gmnal_data_t    *nal_data = NULL;
115         gmnal_srxd_t    *srxd = NULL;
116         gm_status_t     gm_status;
117         unsigned int    local_nid = 0, global_nid = 0;
118         ptl_process_id_t process_id;
119
120         if (nal->nal_refct != 0) {
121                 if (actual_limits != NULL) {
122                         libnal = (lib_nal_t *)nal->nal_data;
123                         *actual_limits = libnal->libnal_ni.ni_actual_limits;
124                 }
125                 PORTAL_MODULE_USE;
126                 return (PTL_OK);
127         }
128
129         /* Called on first PtlNIInit() */
130
131         CDEBUG(D_TRACE, "startup\n");
132
133         LASSERT(global_nal_data == NULL);
134
135         PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
136         if (!nal_data) {
137                 CDEBUG(D_ERROR, "can't get memory\n");
138                 return(PTL_NO_SPACE);
139         }       
140         memset(nal_data, 0, sizeof(gmnal_data_t));
141         /*
142          *      set the small message buffer size 
143          */
144
145         CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
146         CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
147
148         PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
149         if (!libnal) {
150                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
151                 return(PTL_NO_SPACE);
152         }
153         memset(libnal, 0, sizeof(lib_nal_t));
154         CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
155
156         GMNAL_INIT_NAL_CB(libnal);
157         /*
158          *      String them all together
159          */
160         libnal->libnal_data = (void*)nal_data;
161         nal_data->nal = nal;
162         nal_data->libnal = libnal;
163
164         GMNAL_GM_LOCK_INIT(nal_data);
165
166
167         /*
168          *      initialise the interface,
169          */
170         CDEBUG(D_INFO, "Calling gm_init\n");
171         if (gm_init() != GM_SUCCESS) {
172                 CDEBUG(D_ERROR, "call to gm_init failed\n");
173                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
174                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
175                 return(PTL_FAIL);
176         }
177
178
179         CDEBUG(D_NET, "Calling gm_open with port [%d], "
180                "name [%s], version [%d]\n", GMNAL_GM_PORT_ID,
181                "gmnal", GM_API_VERSION);
182
183         GMNAL_GM_LOCK(nal_data);
184         gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT_ID, "gmnal",
185                             GM_API_VERSION);
186         GMNAL_GM_UNLOCK(nal_data);
187
188         CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status);
189         if (gm_status == GM_SUCCESS) {
190                 CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", 
191                        nal_data->gm_port);
192         } else {
193                 switch(gm_status) {
194                 case(GM_INVALID_PARAMETER):
195                         CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n");
196                         break;
197                 case(GM_BUSY):
198                         CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n");
199                         break;
200                 case(GM_NO_SUCH_DEVICE):
201                         CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
202                         break;
203                 case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
204                         CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
205                                "and driver\n");
206                         break;
207                 case(GM_OUT_OF_MEMORY):
208                         CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
209                         break;
210                 default:
211                         CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
212                                "code [%d]\n", gm_status);
213                         break;
214                 }       
215                 GMNAL_GM_LOCK(nal_data);
216                 gm_finalize();
217                 GMNAL_GM_UNLOCK(nal_data);
218                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
219                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
220                 return(PTL_FAIL);
221         }
222
223         nal_data->small_msg_size = gmnal_small_msg_size;
224         nal_data->small_msg_gmsize =
225                         gm_min_size_for_length(gmnal_small_msg_size);
226
227         if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) {
228                 CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n");
229                 gmnal_free_txd(nal_data);
230                 GMNAL_GM_LOCK(nal_data);
231                 gm_close(nal_data->gm_port);
232                 gm_finalize();
233                 GMNAL_GM_UNLOCK(nal_data);
234                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
235                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
236                 return(PTL_FAIL);
237         }
238
239
240         /*
241          *      Hang out a bunch of small receive buffers
242          *      In fact hang them all out
243          */
244         while((srxd = gmnal_get_srxd(nal_data, 0))) {
245                 CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n", 
246                        srxd->buffer);
247                 GMNAL_GM_LOCK(nal_data);
248                 gm_provide_receive_buffer_with_tag(nal_data->gm_port, 
249                                                    srxd->buffer, srxd->gmsize, 
250                                                    GM_LOW_PRIORITY, 0);
251                 GMNAL_GM_UNLOCK(nal_data);
252         }
253         
254         /*
255          *      Allocate pools of small tx buffers and descriptors
256          */
257         if (gmnal_alloc_txd(nal_data) != GMNAL_STATUS_OK) {
258                 CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n");
259                 GMNAL_GM_LOCK(nal_data);
260                 gm_close(nal_data->gm_port);
261                 gm_finalize();
262                 GMNAL_GM_UNLOCK(nal_data);
263                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
264                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
265                 return(PTL_FAIL);
266         }
267
268         gmnal_start_kernel_threads(nal_data);
269
270         while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) {
271                 gmnal_yield(1);
272                 CDEBUG(D_INFO, "Waiting for receive thread signs of life\n");
273         }
274
275         CDEBUG(D_INFO, "receive thread seems to have started\n");
276
277
278         /*
279          *      Initialise the portals library
280          */
281         CDEBUG(D_NET, "Getting node id\n");
282         GMNAL_GM_LOCK(nal_data);
283         gm_status = gm_get_node_id(nal_data->gm_port, &local_nid);
284         GMNAL_GM_UNLOCK(nal_data);
285         if (gm_status != GM_SUCCESS) {
286                 gmnal_stop_rxthread(nal_data);
287                 gmnal_stop_ctthread(nal_data);
288                 CDEBUG(D_ERROR, "can't determine node id\n");
289                 gmnal_free_txd(nal_data);
290                 gmnal_free_srxd(nal_data);
291                 GMNAL_GM_LOCK(nal_data);
292                 gm_close(nal_data->gm_port);
293                 gm_finalize();
294                 GMNAL_GM_UNLOCK(nal_data);
295                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
296                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
297                 return(PTL_FAIL);
298         }
299
300         nal_data->gm_local_nid = local_nid;
301         CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid);
302
303         GMNAL_GM_LOCK(nal_data);
304         gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, 
305                                             &global_nid);
306         GMNAL_GM_UNLOCK(nal_data);
307         if (gm_status != GM_SUCCESS) {
308                 CDEBUG(D_ERROR, "failed to obtain global id\n");
309                 gmnal_stop_rxthread(nal_data);
310                 gmnal_stop_ctthread(nal_data);
311                 gmnal_free_txd(nal_data);
312                 gmnal_free_srxd(nal_data);
313                 GMNAL_GM_LOCK(nal_data);
314                 gm_close(nal_data->gm_port);
315                 gm_finalize();
316                 GMNAL_GM_UNLOCK(nal_data);
317                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
318                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
319                 return(PTL_FAIL);
320         }
321         CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
322         nal_data->gm_global_nid = global_nid;
323         snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
324
325 /*
326         pid = gm_getpid();
327 */
328         process_id.pid = requested_pid;
329         process_id.nid = global_nid;
330         
331         CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
332         CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
333         
334         CDEBUG(D_PORTALS, "calling lib_init\n");
335         if (lib_init(libnal, nal, process_id, 
336                      requested_limits, actual_limits) != PTL_OK) {
337                 CDEBUG(D_ERROR, "lib_init failed\n");
338                 gmnal_stop_rxthread(nal_data);
339                 gmnal_stop_ctthread(nal_data);
340                 gmnal_free_txd(nal_data);
341                 gmnal_free_srxd(nal_data);
342                 GMNAL_GM_LOCK(nal_data);
343                 gm_close(nal_data->gm_port);
344                 gm_finalize();
345                 GMNAL_GM_UNLOCK(nal_data);
346                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
347                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
348                 return(PTL_FAIL);
349                 
350         }
351
352         if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) {
353                 CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n");
354
355                 /* XXX these cleanup cases should be restructured to
356                  * minimise duplication... */
357                 lib_fini(libnal);
358                 
359                 gmnal_stop_rxthread(nal_data);
360                 gmnal_stop_ctthread(nal_data);
361                 gmnal_free_txd(nal_data);
362                 gmnal_free_srxd(nal_data);
363                 GMNAL_GM_LOCK(nal_data);
364                 gm_close(nal_data->gm_port);
365                 gm_finalize();
366                 GMNAL_GM_UNLOCK(nal_data);
367                 PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
368                 PORTAL_FREE(libnal, sizeof(lib_nal_t));
369                 return(PTL_FAIL);
370         }
371
372         /* might be better to initialise this at module load rather than in
373          * NAL startup */
374         nal_data->sysctl = NULL;
375         nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
376
377         CDEBUG(D_INFO, "gmnal_init finished\n");
378
379         global_nal_data = libnal->libnal_data;
380
381         return(PTL_OK);
382 }
383
384 nal_t the_gm_nal;
385
386 /* 
387  *        Called when module loaded
388  */
389 int gmnal_init(void)
390 {
391         int    rc;
392
393         memset(&the_gm_nal, 0, sizeof(nal_t));
394         CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal);
395         GMNAL_INIT_NAL(&the_gm_nal);
396
397         rc = ptl_register_nal(GMNAL, &the_gm_nal);
398         if (rc != PTL_OK)
399                 CERROR("Can't register GMNAL: %d\n", rc);
400         rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni);
401         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
402                 ptl_unregister_nal(GMNAL);
403                 return (-ENODEV);
404         }
405
406         return (rc);
407 }
408
409
410 /*
411  *      Called when module removed
412  */
413 void gmnal_fini()
414 {
415         CDEBUG(D_TRACE, "gmnal_fini\n");
416
417         PtlNIFini(kgmnal_ni);
418
419         ptl_unregister_nal(GMNAL);
420         LASSERT(global_nal_data == NULL);
421 }