/*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002-2004 Cluster File Systems, Inc.
* Author: Eric Barton <eric@bartonsoftware.com>
*
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * W. Marcus Miller - Based on ksocknal
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
+ * This file is part of Portals, http://www.lustre.org
*
* Portals is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
#define QSWNAL_SYSCTL 201
#define QSWNAL_SYSCTL_OPTIMIZED_GETS 1
-#define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
+#define QSWNAL_SYSCTL_OPTIMIZED_PUTS 2
static ctl_table kqswnal_ctl_table[] = {
+ {QSWNAL_SYSCTL_OPTIMIZED_PUTS, "optimized_puts",
+ &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
+ 0644, NULL, &proc_dointvec},
{QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
&kqswnal_tunables.kqn_optimized_gets, sizeof (int),
0644, NULL, &proc_dointvec},
};
#endif
-static int
-kqswnal_forward(nal_t *nal,
- int id,
- void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
- return (PTL_OK);
-}
-
-static void
-kqswnal_lock (nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-static void
-kqswnal_unlock(nal_t *nal, unsigned long *flags)
-{
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-static int
-kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
-{
- /* NB called holding statelock */
- wait_queue_t wait;
- unsigned long now = jiffies;
-
- CDEBUG (D_NET, "yield\n");
-
- if (milliseconds == 0) {
- if (need_resched())
- schedule();
- return 0;
- }
-
- init_waitqueue_entry(&wait, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- kqswnal_unlock(nal, flags);
-
- if (milliseconds < 0)
- schedule ();
- else
- schedule_timeout((milliseconds * HZ) / 1000);
-
- kqswnal_lock(nal, flags);
-
- remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
-
- if (milliseconds > 0) {
- milliseconds -= ((jiffies - now) * 1000) / HZ;
- if (milliseconds < 0)
- milliseconds = 0;
- }
-
- return (milliseconds);
-}
-
int
kqswnal_get_tx_desc (struct portals_cfg *pcfg)
{
unsigned long flags;
struct list_head *tmp;
kqswnal_tx_t *ktx;
+ ptl_hdr_t *hdr;
int index = pcfg->pcfg_count;
int rc = -ENOENT;
continue;
ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
+ hdr = (ptl_hdr_t *)ktx->ktx_buffer;
- pcfg->pcfg_pbuf1 = (char *)ktx;
- pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
- pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
- pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
+ memcpy(pcfg->pcfg_pbuf, ktx,
+ MIN(sizeof(*ktx), pcfg->pcfg_plen1));
+ pcfg->pcfg_count = le32_to_cpu(hdr->type);
+ pcfg->pcfg_size = le32_to_cpu(hdr->payload_length);
+ pcfg->pcfg_nid = le64_to_cpu(hdr->dest_nid);
pcfg->pcfg_nid2 = ktx->ktx_nid;
pcfg->pcfg_misc = ktx->ktx_launcher;
pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
kqswnal_data.kqn_nid_offset);
kqswnal_data.kqn_nid_offset =
pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
- kqswnal_lib.ni.nid = pcfg->pcfg_nid;
+ kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
return (0);
default:
kqswnal_shutdown(nal_t *nal)
{
unsigned long flags;
+ kqswnal_tx_t *ktx;
+ kqswnal_rx_t *krx;
int do_lib_fini = 0;
/* NB The first ref was this module! */
* ep_dvma_release() get fixed (and releases any mappings in the
* region), we can delete all the code from here --------> */
- if (kqswnal_data.kqn_txds != NULL) {
- int i;
+ for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) {
+ /* If ktx has a buffer, it got mapped; unmap now. NB only
+ * the pre-mapped stuff is still mapped since all tx descs
+ * must be idle */
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
-
- /* If ktx has a buffer, it got mapped; unmap now.
- * NB only the pre-mapped stuff is still mapped
- * since all tx descs must be idle */
-
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- }
+ if (ktx->ktx_buffer != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_tx_nmh,
+ &ktx->ktx_ebuffer);
}
- if (kqswnal_data.kqn_rxds != NULL) {
- int i;
-
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
+ /* If krx_kiov[0].kiov_page got allocated, it got mapped.
+ * NB subsequent pages get merged */
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- }
+ if (krx->krx_kiov[0].kiov_page != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_rx_nmh,
+ &krx->krx_elanbuffer);
}
/* <----------- to here */
}
#endif
- if (kqswnal_data.kqn_txds != NULL)
- {
- int i;
-
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
- {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+ while (kqswnal_data.kqn_txds != NULL) {
+ ktx = kqswnal_data.kqn_txds;
- if (ktx->ktx_buffer != NULL)
- PORTAL_FREE(ktx->ktx_buffer,
- KQSW_TX_BUFFER_SIZE);
- }
+ if (ktx->ktx_buffer != NULL)
+ PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
- PORTAL_FREE(kqswnal_data.kqn_txds,
- sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
- KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = ktx->ktx_alloclist;
+ PORTAL_FREE(ktx, sizeof(*ktx));
}
- if (kqswnal_data.kqn_rxds != NULL)
- {
- int i;
- int j;
+ while (kqswnal_data.kqn_rxds != NULL) {
+ int i;
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
- for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_kiov[j].kiov_page != NULL)
- __free_page (krx->krx_kiov[j].kiov_page);
- }
+ krx = kqswnal_data.kqn_rxds;
+ for (i = 0; i < krx->krx_npages; i++)
+ if (krx->krx_kiov[i].kiov_page != NULL)
+ __free_page (krx->krx_kiov[i].kiov_page);
- PORTAL_FREE(kqswnal_data.kqn_rxds,
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
- KQSW_NRXMSGS_LARGE));
+ kqswnal_data.kqn_rxds = krx->krx_alloclist;
+ PORTAL_FREE(krx, sizeof (*krx));
}
/* resets flags, pointers to NULL etc */
atomic_read(&portal_kmemory));
}
-static int __init
+static int
kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
ptl_ni_limits_t *requested_limits,
ptl_ni_limits_t *actual_limits)
#endif
int rc;
int i;
+ kqswnal_rx_t *krx;
+ kqswnal_tx_t *ktx;
int elan_page_idx;
ptl_process_id_t my_process_id;
int pkmem = atomic_read(&portal_kmemory);
+ LASSERT (nal == &kqswnal_api);
+
if (nal->nal_refct != 0) {
if (actual_limits != NULL)
- *actual_limits = kqswnal_lib.ni.actual_limits;
+ *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
/* This module got the first ref */
PORTAL_MODULE_USE;
return (PTL_OK);
CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
- memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
- memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
-#if MULTIRAIL_EKC
- kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
- kqswnal_rpc_failed.Status = -ECONNREFUSED;
-#endif
/* ensure all pointers NULL etc */
memset (&kqswnal_data, 0, sizeof (kqswnal_data));
- kqswnal_data.kqn_cb = &kqswnal_lib;
-
INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
spin_lock_init (&kqswnal_data.kqn_sched_lock);
init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
- spin_lock_init (&kqswnal_data.kqn_statelock);
- init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
+ /* Leave kqn_rpc_success zeroed */
+#if MULTIRAIL_EKC
+ kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
+#else
+ kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
+#endif
/* pointers/lists/locks initialised */
kqswnal_data.kqn_init = KQN_INIT_DATA;
kqswnal_data.kqn_ep = ep_system();
if (kqswnal_data.kqn_ep == NULL) {
CERROR("Can't initialise EKC\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
CERROR("Can't get elan ID\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#else
if (kqswnal_data.kqn_ep == NULL)
{
CERROR ("Can't get elan device 0\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_IFACE_INVALID);
}
#endif
if (kqswnal_data.kqn_eptx == NULL)
{
CERROR ("Can't allocate transmitter\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_small == NULL)
{
CERROR ("Can't install small msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
if (kqswnal_data.kqn_eprx_large == NULL)
{
CERROR ("Can't install large msg receiver\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve tx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
EP_PERM_WRITE);
if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
CERROR("Can't reserve rx dma space\n");
- kqswnal_shutdown(&kqswnal_api);
+ kqswnal_shutdown(nal);
return (PTL_NO_SPACE);
}
#else
if (rc != DDI_SUCCESS)
{
CERROR ("Can't reserve rx dma space\n");
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
#endif
/**********************************************************************/
/* Allocate/Initialise transmit descriptors */
- PORTAL_ALLOC(kqswnal_data.kqn_txds,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
- if (kqswnal_data.kqn_txds == NULL)
- {
- kqswnal_shutdown (&kqswnal_api);
- return (PTL_NO_SPACE);
- }
-
- /* clear flags, null pointers etc */
- memset(kqswnal_data.kqn_txds, 0,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = NULL;
for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
{
int premapped_pages;
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
int basepage = i * KQSW_NTXMSGPAGES;
+ PORTAL_ALLOC (ktx, sizeof(*ktx));
+ if (ktx == NULL) {
+ kqswnal_shutdown (nal);
+ return (PTL_NO_SPACE);
+ }
+
+ memset(ktx, 0, sizeof(*ktx)); /* NULL pointers; zero flags */
+ ktx->ktx_alloclist = kqswnal_data.kqn_txds;
+ kqswnal_data.kqn_txds = ktx;
+
PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
if (ktx->ktx_buffer == NULL)
{
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
INIT_LIST_HEAD (&ktx->ktx_delayed_list);
ktx->ktx_state = KTX_IDLE;
+#if MULTIRAIL_EKC
+ ktx->ktx_rail = -1; /* unset rail */
+#endif
ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
list_add_tail (&ktx->ktx_list,
ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
/**********************************************************************/
/* Allocate/Initialise receive descriptors */
-
- PORTAL_ALLOC (kqswnal_data.kqn_rxds,
- sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
- if (kqswnal_data.kqn_rxds == NULL)
- {
- kqswnal_shutdown (&kqswnal_api);
- return (PTL_NO_SPACE);
- }
-
- memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
-
+ kqswnal_data.kqn_rxds = NULL;
elan_page_idx = 0;
for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
{
E3_Addr elanbuffer;
#endif
int j;
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ PORTAL_ALLOC(krx, sizeof(*krx));
+ if (krx == NULL) {
+ kqswnal_shutdown(nal);
+ return (PTL_NO_SPACE);
+ }
+
+ memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */
+ krx->krx_alloclist = kqswnal_data.kqn_rxds;
+ kqswnal_data.kqn_rxds = krx;
if (i < KQSW_NRXMSGS_SMALL)
{
struct page *page = alloc_page(GFP_KERNEL);
if (page == NULL) {
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_NO_SPACE);
}
/* Network interface ready to initialise */
my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
- my_process_id.pid = 0;
+ my_process_id.pid = requested_pid;
- rc = lib_init(&kqswnal_lib, my_process_id,
+ rc = lib_init(&kqswnal_lib, nal, my_process_id,
requested_limits, actual_limits);
if (rc != PTL_OK)
{
CERROR ("lib_init failed %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (rc);
}
/**********************************************************************/
/* Queue receives, now that it's OK to run their completion callbacks */
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
/* NB this enqueue can allocate/sleep (attr == 0) */
+ krx->krx_state = KRX_POSTED;
#if MULTIRAIL_EKC
rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
&krx->krx_elanbuffer, 0);
if (rc != EP_SUCCESS)
{
CERROR ("failed ep_queue_receive %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
if (rc != 0)
{
CERROR ("failed to spawn scheduling thread: %d\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
}
rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
if (rc != 0) {
CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- kqswnal_shutdown (&kqswnal_api);
+ kqswnal_shutdown (nal);
return (PTL_FAIL);
}
{
int rc;
- kqswnal_api.startup = kqswnal_startup;
- kqswnal_api.shutdown = kqswnal_shutdown;
- kqswnal_api.forward = kqswnal_forward;
- kqswnal_api.yield = kqswnal_yield;
- kqswnal_api.lock = kqswnal_lock;
- kqswnal_api.unlock = kqswnal_unlock;
- kqswnal_api.nal_data = &kqswnal_data;
-
- kqswnal_lib.nal_data = &kqswnal_data;
+ kqswnal_api.nal_ni_init = kqswnal_startup;
+ kqswnal_api.nal_ni_fini = kqswnal_shutdown;
/* Initialise dynamic tunables to defaults once only */
+ kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
rc = ptl_register_nal(QSWNAL, &kqswnal_api);
/* Pure gateways, and the workaround for 'EKC blocks forever until
* the service is active' want the NAL started up at module load
* time... */
- rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni);
+ rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni);
if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
ptl_unregister_nal(QSWNAL);
return (-ENODEV);