1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@zabbo.net>
6 * Author: Peter J. Braam <braam@clusterfs.com>
7 * Author: Phil Schwan <phil@clusterfs.com>
8 * Author: Eric Barton <eric@bartonsoftware.com>
9 * Author: Kedar Sovani <kedar@calsoftinc.com>
10 * Author: Amey Inamdar <amey@calsoftinc.com>
12 * This file is part of Portals, http://www.sf.net/projects/lustre/
14 * Portals is free software; you can redistribute it and/or
15 * modify it under the terms of version 2 of the GNU General Public
16 * License as published by the Free Software Foundation.
18 * Portals is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with Portals; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 #include <linux/poll.h>
31 ptl_handle_ni_t ktoenal_ni;
32 static nal_t ktoenal_api;
33 static ksock_nal_data_t ktoenal_data;
36 ksocknal_interface_t ktoenal_interface = {
37 ksni_add_sock: ktoenal_add_sock,
38 ksni_close_sock: ktoenal_close_sock,
39 ksni_set_mynid: ktoenal_set_mynid,
43 kpr_nal_interface_t ktoenal_router_interface = {
45 kprni_arg: &ktoenal_data,
46 kprni_fwd: ktoenal_fwd_packet,
51 ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
52 void *ret, size_t ret_len)
58 nal_cb = k->ksnd_nal_cb;
60 lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */
65 ktoenal_api_shutdown(nal_t *nal, int ni)
67 CDEBUG (D_NET, "closing all connections\n");
69 return ktoenal_close_sock(0); /* close all sockets */
73 ktoenal_api_yield(nal_t *nal)
80 ktoenal_api_lock(nal_t *nal, unsigned long *flags)
86 nal_cb = k->ksnd_nal_cb;
87 nal_cb->cb_cli(nal_cb,flags);
91 ktoenal_api_unlock(nal_t *nal, unsigned long *flags)
97 nal_cb = k->ksnd_nal_cb;
98 nal_cb->cb_sti(nal_cb,flags);
102 ktoenal_init(int interface, ptl_pt_index_t ptl_size,
103 ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
105 CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
106 ktoenal_data.ksnd_mynid);
107 lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size,
109 return (&ktoenal_api);
113 * EXTRA functions follow
115 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
116 #define SOCKET_I(inode) (&(inode)->u.socket_i)
118 static __inline__ struct socket *
119 socki_lookup(struct inode *inode)
121 return SOCKET_I(inode);
125 ktoenal_set_mynid(ptl_nid_t nid)
127 lib_ni_t *ni = &ktoenal_lib.ni;
129 /* FIXME: we have to do this because we call lib_init() at module
130 * insertion time, which is before we have 'mynid' available. lib_init
131 * sets the NAL's nid, which it uses to tell other nodes where packets
132 * are coming from. This is not a very graceful solution to this
135 CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid);
137 ktoenal_data.ksnd_mynid = nid;
143 ktoenal_add_sock (ptl_nid_t nid, int fd)
147 struct file *file = NULL;
148 struct socket *sock = NULL;
157 sock = socki_lookup(file->f_dentry->d_inode);
162 PORTAL_ALLOC(conn, sizeof(*conn));
166 memset (conn, 0, sizeof (conn)); /* zero for consistency */
167 file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */
168 conn->ksnc_file = file;
169 conn->ksnc_sock = sock;
170 conn->ksnc_peernid = nid;
171 atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
173 conn->ksnc_rx_ready = 0;
174 conn->ksnc_rx_scheduled = 0;
175 ktoenal_new_packet (conn, 0);
177 INIT_LIST_HEAD (&conn->ksnc_tx_queue);
178 conn->ksnc_tx_ready = 0;
179 conn->ksnc_tx_scheduled = 0;
181 LASSERT (!in_interrupt());
182 write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
184 list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist);
185 write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
187 ktoenal_data_ready(conn);
188 ktoenal_write_space(conn);
190 ktoenal_data.ksnd_slistchange = 1;
191 wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
192 /* Schedule pollthread so that it will poll
193 * for newly created socket
197 CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
198 conn, conn->ksnc_peernid);
200 /* Can't unload while connection active */
209 /* Passing in a zero nid will close all connections */
211 ktoenal_close_sock(ptl_nid_t nid)
215 LIST_HEAD (death_row);
216 struct list_head *tmp;
218 LASSERT (!in_interrupt());
219 write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
221 if (nid == 0) /* close ALL connections */
223 /* insert 'death row' into the socket list... */
224 list_add (&death_row, &ktoenal_data.ksnd_socklist);
225 /* ...extract and reinitialise the socket list itself... */
226 list_del_init (&ktoenal_data.ksnd_socklist);
227 /* ...and voila, death row is the proud owner of all conns */
228 } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) {
230 conn = list_entry (tmp, ksock_conn_t, ksnc_list);
232 if (conn->ksnc_peernid == nid)
234 list_del (&conn->ksnc_list);
235 list_add (&conn->ksnc_list, &death_row);
241 write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
243 if (list_empty (&death_row))
247 conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
248 list_del (&conn->ksnc_list);
249 ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */
250 } while (!list_empty (&death_row));
252 ktoenal_data.ksnd_slistchange = 1;
253 wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
260 ktoenal_get_conn (ptl_nid_t nid)
262 struct list_head *tmp;
265 PROF_START(conn_list_walk);
267 read_lock (&ktoenal_data.ksnd_socklist_lock);
269 list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
271 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
273 if (conn->ksnc_peernid == nid)
275 /* caller is referencing */
276 atomic_inc (&conn->ksnc_refcount);
278 read_unlock (&ktoenal_data.ksnd_socklist_lock);
280 CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
281 conn, nid, atomic_read (&conn->ksnc_refcount));
283 PROF_FINISH(conn_list_walk);
288 read_unlock (&ktoenal_data.ksnd_socklist_lock);
290 CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid);
291 PROF_FINISH(conn_list_walk);
296 ktoenal_close_conn (ksock_conn_t *conn)
298 CDEBUG (D_NET, "connection [%p] closed \n", conn);
300 fput (conn->ksnc_file);
301 PORTAL_FREE (conn, sizeof (*conn));
302 /* One less connection keeping us hanging on */
307 _ktoenal_put_conn (ksock_conn_t *conn)
311 CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
313 /* "But what is the black spot, captain?" I asked.
314 * "That's a summons, mate..." */
316 LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
317 LASSERT (!conn->ksnc_rx_scheduled);
321 ktoenal_close_conn (conn);
325 spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
327 list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list);
328 wake_up (&ktoenal_data.ksnd_reaper_waitq);
330 spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
334 ktoenal_free_buffers (void)
336 if (ktoenal_data.ksnd_fmbs != NULL)
338 ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs;
342 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++)
343 for (j = 0; j < fmb->fmb_npages; j++)
344 if (fmb->fmb_pages[j] != NULL)
345 __free_page (fmb->fmb_pages[j]);
347 PORTAL_FREE (ktoenal_data.ksnd_fmbs,
348 sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
351 if (ktoenal_data.ksnd_ltxs != NULL)
352 PORTAL_FREE (ktoenal_data.ksnd_ltxs,
353 sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
357 ktoenal_cmd(struct portal_ioctl_data * data, void * private)
361 LASSERT (data != NULL);
363 switch(data->ioc_nal_cmd) {
364 case NAL_CMD_REGISTER_PEER_FD: {
365 rc = ktoenal_add_sock(data->ioc_nid, data->ioc_fd);
368 case NAL_CMD_CLOSE_CONNECTION: {
369 rc = ktoenal_close_sock(data->ioc_nid);
372 case NAL_CMD_REGISTER_MYNID: {
373 rc = ktoenal_set_mynid (data->ioc_nid);
383 ktoenal_module_fini (void)
385 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
386 atomic_read (&portal_kmemory));
388 switch (ktoenal_data.ksnd_init)
393 case SOCKNAL_INIT_ALL:
394 kportal_nal_unregister(TOENAL);
395 PORTAL_SYMBOL_UNREGISTER (ktoenal_ni);
398 case SOCKNAL_INIT_PTL:
399 PtlNIFini(ktoenal_ni);
400 lib_fini(&ktoenal_lib);
403 case SOCKNAL_INIT_DATA:
404 /* Module refcount only gets to zero when all connections
405 * have been closed so all lists must be empty */
406 LASSERT (list_empty (&ktoenal_data.ksnd_socklist));
407 LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list));
408 LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns));
409 LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns));
410 LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns));
411 LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns));
413 kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */
415 /* flag threads to terminate; wake and wait for them to die */
416 ktoenal_data.ksnd_shuttingdown = 1;
417 wake_up_all (&ktoenal_data.ksnd_reaper_waitq);
418 wake_up_all (&ktoenal_data.ksnd_sched_waitq);
419 wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
421 while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0)
423 CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
424 atomic_read (&ktoenal_data.ksnd_nthreads));
425 set_current_state (TASK_UNINTERRUPTIBLE);
426 schedule_timeout (HZ);
429 kpr_deregister (&ktoenal_data.ksnd_router);
431 ktoenal_free_buffers();
434 case SOCKNAL_INIT_NOTHING:
438 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
439 atomic_read (&portal_kmemory));
441 printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n",
442 atomic_read(&portal_kmemory));
446 ktoenal_module_init (void)
448 int pkmem = atomic_read(&portal_kmemory);
453 /* packet descriptor must fit in a router descriptor's scratchpad */
454 LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
456 LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
458 ktoenal_api.forward = ktoenal_api_forward;
459 ktoenal_api.shutdown = ktoenal_api_shutdown;
460 ktoenal_api.yield = ktoenal_api_yield;
461 ktoenal_api.validate = NULL; /* our api validate is a NOOP */
462 ktoenal_api.lock = ktoenal_api_lock;
463 ktoenal_api.unlock = ktoenal_api_unlock;
464 ktoenal_api.nal_data = &ktoenal_data;
466 ktoenal_lib.nal_data = &ktoenal_data;
468 memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */
470 INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist);
471 rwlock_init(&ktoenal_data.ksnd_socklist_lock);
473 ktoenal_data.ksnd_nal_cb = &ktoenal_lib;
474 spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock);
476 spin_lock_init (&ktoenal_data.ksnd_sched_lock);
478 init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq);
480 INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns);
481 INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns);
483 INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs);
484 INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns);
485 INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs);
486 INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns);
488 INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list);
489 INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list);
490 init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq);
492 INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list);
493 init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq);
494 spin_lock_init (&ktoenal_data.ksnd_reaper_lock);
496 ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */
498 PORTAL_ALLOC(ktoenal_data.ksnd_fmbs,
499 sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
500 if (ktoenal_data.ksnd_fmbs == NULL)
503 /* NULL out buffer pointers etc */
504 memset(ktoenal_data.ksnd_fmbs, 0,
505 sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
507 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++)
509 ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i];
511 if (i < SOCKNAL_SMALL_FWD_NMSGS)
513 fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
514 fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp;
518 fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
519 fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp;
522 LASSERT (fmb->fmb_npages > 0);
523 for (j = 0; j < fmb->fmb_npages; j++)
525 fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
527 if (fmb->fmb_pages[j] == NULL)
529 ktoenal_module_fini ();
533 LASSERT (page_address (fmb->fmb_pages[j]) != NULL);
536 list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
539 PORTAL_ALLOC(ktoenal_data.ksnd_ltxs,
540 sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
541 if (ktoenal_data.ksnd_ltxs == NULL)
543 ktoenal_module_fini ();
547 /* Deterministic bugs please */
548 memset (ktoenal_data.ksnd_ltxs, 0xeb,
549 sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
551 for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++)
553 ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i];
555 ltx->ltx_idle = i < SOCKNAL_NLTXS ?
556 &ktoenal_data.ksnd_idle_ltx_list :
557 &ktoenal_data.ksnd_idle_nblk_ltx_list;
558 list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
561 rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni);
564 CERROR("ktoenal: PtlNIInit failed: error %d\n", rc);
565 ktoenal_module_fini ();
568 PtlNIDebug(ktoenal_ni, ~0);
570 ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */
572 ktoenal_data.ksnd_slistchange = 1;
573 for (i = 0; i < TOENAL_N_SCHED; i++)
575 rc = ktoenal_thread_start (ktoenal_scheduler, NULL);
578 CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc);
579 ktoenal_module_fini ();
584 rc = ktoenal_thread_start (ktoenal_reaper, NULL);
587 CERROR("Can't spawn socknal reaper: %d\n", rc);
588 ktoenal_module_fini ();
592 rc = ktoenal_thread_start (ktoenal_pollthread, NULL);
595 CERROR("Can't spawn socknal pollthread: %d\n", rc);
596 ktoenal_module_fini ();
600 rc = kpr_register(&ktoenal_data.ksnd_router,
601 &ktoenal_router_interface);
603 CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc);
605 rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL);
607 CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n",
610 PORTAL_SYMBOL_REGISTER(ktoenal_ni);
612 /* flag everything initialised */
613 ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL;
615 printk(KERN_INFO"Routing TOE NAL loaded (Routing %s, initial mem %d)\n",
616 kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled",
622 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
623 MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
624 MODULE_LICENSE("GPL");
626 module_init(ktoenal_module_init);
627 module_exit(ktoenal_module_fini);
629 EXPORT_SYMBOL (ktoenal_ni);