-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
if (dev->rad_id != devid)
continue;
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
- if (!dev->rad_ready) {
- dev->rad_ready = 1;
- cfs_waitq_signal(&dev->rad_waitq);
- }
+ if (!dev->rad_ready) {
+ dev->rad_ready = 1;
+ wake_up(&dev->rad_waitq);
+ }
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
return;
}
kra_device_t *dev = conn->rac_device;
unsigned long flags;
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
- if (!conn->rac_scheduled) {
- kranal_conn_addref(conn); /* +1 ref for scheduler */
- conn->rac_scheduled = 1;
- cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns);
- cfs_waitq_signal(&dev->rad_waitq);
- }
+ if (!conn->rac_scheduled) {
+ kranal_conn_addref(conn); /* +1 ref for scheduler */
+ conn->rac_scheduled = 1;
+ cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns);
+ wake_up(&dev->rad_waitq);
+ }
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
}
kra_tx_t *
unsigned long flags;
kra_tx_t *tx;
- cfs_spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
+ spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
if (cfs_list_empty(&kranal_data.kra_idle_txs)) {
- cfs_spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
return NULL;
}
* got a lock right now... */
tx->tx_cookie = kranal_data.kra_next_tx_cookie++;
- cfs_spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE);
tx->tx_msg.ram_type = RANAL_MSG_NONE;
tx->tx_conn = NULL;
- cfs_spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
+ spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
cfs_list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs);
- cfs_spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
/* finalize AFTER freeing lnet msgs */
for (i = 0; i < 2; i++) {
tx->tx_conn = conn;
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
cfs_list_add_tail(&tx->tx_list, &conn->rac_fmaq);
tx->tx_qtime = jiffies;
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
kranal_schedule_conn(conn);
}
kra_conn_t *conn;
int rc;
int retry;
- cfs_rwlock_t *g_lock = &kranal_data.kra_global_lock;
+ rwlock_t *g_lock = &kranal_data.kra_global_lock;
/* If I get here, I've committed to send, so I complete the tx with
* failure on any problems */
for (retry = 0; ; retry = 1) {
- cfs_read_lock(g_lock);
+ read_lock(g_lock);
peer = kranal_find_peer_locked(nid);
if (peer != NULL) {
conn = kranal_find_conn_locked(peer);
if (conn != NULL) {
kranal_post_fma(conn, tx);
- cfs_read_unlock(g_lock);
+ read_unlock(g_lock);
return;
}
}
/* Making connections; I'll need a write lock... */
- cfs_read_unlock(g_lock);
- cfs_write_lock_irqsave(g_lock, flags);
+ read_unlock(g_lock);
+ write_lock_irqsave(g_lock, flags);
peer = kranal_find_peer_locked(nid);
if (peer != NULL)
break;
- cfs_write_unlock_irqrestore(g_lock, flags);
+ write_unlock_irqrestore(g_lock, flags);
if (retry) {
CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
if (conn != NULL) {
/* Connection exists; queue message on it */
kranal_post_fma(conn, tx);
- cfs_write_unlock_irqrestore(g_lock, flags);
+ write_unlock_irqrestore(g_lock, flags);
return;
}
if (!(peer->rap_reconnect_interval == 0 || /* first attempt */
cfs_time_aftereq(jiffies, peer->rap_reconnect_time))) {
- cfs_write_unlock_irqrestore(g_lock, flags);
+ write_unlock_irqrestore(g_lock, flags);
kranal_tx_done(tx, -EHOSTUNREACH);
return;
}
peer->rap_connecting = 1;
kranal_peer_addref(peer); /* extra ref for connd */
- cfs_spin_lock(&kranal_data.kra_connd_lock);
+ spin_lock(&kranal_data.kra_connd_lock);
- cfs_list_add_tail(&peer->rap_connd_list,
- &kranal_data.kra_connd_peers);
- cfs_waitq_signal(&kranal_data.kra_connd_waitq);
+ cfs_list_add_tail(&peer->rap_connd_list,
+ &kranal_data.kra_connd_peers);
+ wake_up(&kranal_data.kra_connd_waitq);
- cfs_spin_unlock(&kranal_data.kra_connd_lock);
+ spin_unlock(&kranal_data.kra_connd_lock);
}
/* A connection is being established; queue the message... */
cfs_list_add_tail(&tx->tx_list, &peer->rap_tx_queue);
- cfs_write_unlock_irqrestore(g_lock, flags);
+ write_unlock_irqrestore(g_lock, flags);
}
void
rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc);
LASSERT (rrc == RAP_SUCCESS);
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
cfs_list_add_tail(&tx->tx_list, &conn->rac_rdmaq);
tx->tx_qtime = jiffies;
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
}
int
}
int
-kranal_thread_start (int(*fn)(void *arg), void *arg)
+kranal_thread_start(int(*fn)(void *arg), void *arg, char *name)
{
- long pid = cfs_kernel_thread(fn, arg, 0);
+ struct task_struct *task = cfs_thread_run(fn, arg, name);
- if (pid < 0)
- return(int)pid;
-
- cfs_atomic_inc(&kranal_data.kra_nthreads);
- return 0;
+ if (!IS_ERR(task))
+ cfs_atomic_inc(&kranal_data.kra_nthreads);
+ return PTR_ERR(task);
}
void
LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED ||
conn->rac_state == RANAL_CONN_CLOSING);
- if (!conn->rac_close_sent &&
- cfs_time_aftereq(now, conn->rac_last_tx + conn->rac_keepalive *
- CFS_HZ)) {
- /* not sent in a while; schedule conn so scheduler sends a keepalive */
- CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
- conn, libcfs_nid2str(conn->rac_peer->rap_nid));
- kranal_schedule_conn(conn);
- }
-
- timeout = conn->rac_timeout * CFS_HZ;
-
- if (!conn->rac_close_recvd &&
- cfs_time_aftereq(now, conn->rac_last_rx + timeout)) {
- CERROR("%s received from %s within %lu seconds\n",
- (conn->rac_state == RANAL_CONN_ESTABLISHED) ?
- "Nothing" : "CLOSE not",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - conn->rac_last_rx)/CFS_HZ);
- return -ETIMEDOUT;
- }
+ if (!conn->rac_close_sent &&
+ cfs_time_aftereq(now, conn->rac_last_tx + conn->rac_keepalive *
+ HZ)) {
+ /* not sent in a while; schedule conn so scheduler sends a keepalive */
+ CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
+ conn, libcfs_nid2str(conn->rac_peer->rap_nid));
+ kranal_schedule_conn(conn);
+ }
+
+ timeout = conn->rac_timeout * HZ;
+
+ if (!conn->rac_close_recvd &&
+ cfs_time_aftereq(now, conn->rac_last_rx + timeout)) {
+ CERROR("%s received from %s within %lu seconds\n",
+ (conn->rac_state == RANAL_CONN_ESTABLISHED) ?
+ "Nothing" : "CLOSE not",
+ libcfs_nid2str(conn->rac_peer->rap_nid),
+ (now - conn->rac_last_rx)/HZ);
+ return -ETIMEDOUT;
+ }
if (conn->rac_state != RANAL_CONN_ESTABLISHED)
return 0;
* in case of hardware/software errors that make this conn seem
* responsive even though it isn't progressing its message queues. */
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
-
- cfs_list_for_each (ttmp, &conn->rac_fmaq) {
- tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
- if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on fmaq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/CFS_HZ);
- return -ETIMEDOUT;
- }
- }
-
- cfs_list_for_each (ttmp, &conn->rac_rdmaq) {
- tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
- if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on rdmaq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/CFS_HZ);
- return -ETIMEDOUT;
- }
- }
-
- cfs_list_for_each (ttmp, &conn->rac_replyq) {
- tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
- if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
- CERROR("tx on replyq for %s blocked %lu seconds\n",
- libcfs_nid2str(conn->rac_peer->rap_nid),
- (now - tx->tx_qtime)/CFS_HZ);
- return -ETIMEDOUT;
- }
- }
-
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
+
+ cfs_list_for_each (ttmp, &conn->rac_fmaq) {
+ tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
+
+ if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
+ CERROR("tx on fmaq for %s blocked %lu seconds\n",
+ libcfs_nid2str(conn->rac_peer->rap_nid),
+ (now - tx->tx_qtime)/HZ);
+ return -ETIMEDOUT;
+ }
+ }
+
+ cfs_list_for_each (ttmp, &conn->rac_rdmaq) {
+ tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
+
+ if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
+ CERROR("tx on rdmaq for %s blocked %lu seconds\n",
+ libcfs_nid2str(conn->rac_peer->rap_nid),
+ (now - tx->tx_qtime)/HZ);
+ return -ETIMEDOUT;
+ }
+ }
+
+ cfs_list_for_each (ttmp, &conn->rac_replyq) {
+ tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
+
+ if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
+ CERROR("tx on replyq for %s blocked %lu seconds\n",
+ libcfs_nid2str(conn->rac_peer->rap_nid),
+ (now - tx->tx_qtime)/HZ);
+ return -ETIMEDOUT;
+ }
+ }
+
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
return 0;
}
again:
/* NB. We expect to check all the conns and not find any problems, so
* we just use a shared lock while we take a look... */
- cfs_read_lock(&kranal_data.kra_global_lock);
+ read_lock(&kranal_data.kra_global_lock);
cfs_list_for_each (ctmp, conns) {
conn = cfs_list_entry(ctmp, kra_conn_t, rac_hashlist);
continue;
kranal_conn_addref(conn);
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
CERROR("Conn to %s, cqid %d timed out\n",
libcfs_nid2str(conn->rac_peer->rap_nid),
conn->rac_cqid);
- cfs_write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
switch (conn->rac_state) {
default:
break;
}
- cfs_write_unlock_irqrestore(&kranal_data.kra_global_lock,
+ write_unlock_irqrestore(&kranal_data.kra_global_lock,
flags);
kranal_conn_decref(conn);
goto again;
}
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
}
int
kranal_connd (void *arg)
{
- long id = (long)arg;
- char name[16];
- cfs_waitlink_t wait;
- unsigned long flags;
- kra_peer_t *peer;
- kra_acceptsock_t *ras;
- int did_something;
+ long id = (long)arg;
+ wait_queue_t wait;
+ unsigned long flags;
+ kra_peer_t *peer;
+ kra_acceptsock_t *ras;
+ int did_something;
- snprintf(name, sizeof(name), "kranal_connd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
+ cfs_block_allsigs();
- cfs_waitlink_init(&wait);
+ init_waitqueue_entry_current(&wait);
- cfs_spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
+ spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- while (!kranal_data.kra_shutdown) {
- did_something = 0;
+ while (!kranal_data.kra_shutdown) {
+ did_something = 0;
- if (!cfs_list_empty(&kranal_data.kra_connd_acceptq)) {
- ras = cfs_list_entry(kranal_data.kra_connd_acceptq.next,
- kra_acceptsock_t, ras_list);
- cfs_list_del(&ras->ras_list);
+ if (!cfs_list_empty(&kranal_data.kra_connd_acceptq)) {
+ ras = cfs_list_entry(kranal_data.kra_connd_acceptq.next,
+ kra_acceptsock_t, ras_list);
+ cfs_list_del(&ras->ras_list);
- cfs_spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
- flags);
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
+ flags);
- CDEBUG(D_NET,"About to handshake someone\n");
+ CDEBUG(D_NET,"About to handshake someone\n");
- kranal_conn_handshake(ras->ras_sock, NULL);
- kranal_free_acceptsock(ras);
+ kranal_conn_handshake(ras->ras_sock, NULL);
+ kranal_free_acceptsock(ras);
- CDEBUG(D_NET,"Finished handshaking someone\n");
+ CDEBUG(D_NET,"Finished handshaking someone\n");
- cfs_spin_lock_irqsave(&kranal_data.kra_connd_lock,
- flags);
- did_something = 1;
- }
+ spin_lock_irqsave(&kranal_data.kra_connd_lock,
+ flags);
+ did_something = 1;
+ }
- if (!cfs_list_empty(&kranal_data.kra_connd_peers)) {
- peer = cfs_list_entry(kranal_data.kra_connd_peers.next,
- kra_peer_t, rap_connd_list);
+ if (!cfs_list_empty(&kranal_data.kra_connd_peers)) {
+ peer = cfs_list_entry(kranal_data.kra_connd_peers.next,
+ kra_peer_t, rap_connd_list);
- cfs_list_del_init(&peer->rap_connd_list);
- cfs_spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
- flags);
+ cfs_list_del_init(&peer->rap_connd_list);
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
+ flags);
- kranal_connect(peer);
- kranal_peer_decref(peer);
+ kranal_connect(peer);
+ kranal_peer_decref(peer);
- cfs_spin_lock_irqsave(&kranal_data.kra_connd_lock,
- flags);
- did_something = 1;
- }
+ spin_lock_irqsave(&kranal_data.kra_connd_lock,
+ flags);
+ did_something = 1;
+ }
- if (did_something)
- continue;
+ if (did_something)
+ continue;
- cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
- cfs_waitq_add_exclusive(&kranal_data.kra_connd_waitq, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&kranal_data.kra_connd_waitq, &wait);
- cfs_spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
- cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
+ waitq_wait(&wait, TASK_INTERRUPTIBLE);
- cfs_set_current_state(CFS_TASK_RUNNING);
- cfs_waitq_del(&kranal_data.kra_connd_waitq, &wait);
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kranal_data.kra_connd_waitq, &wait);
- cfs_spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- }
+ spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
+ }
- cfs_spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
- kranal_thread_fini();
- return 0;
+ kranal_thread_fini();
+ return 0;
}
void
LASSERT (timeout > 0);
- cfs_spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
+ spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
if (timeout < kranal_data.kra_new_min_timeout)
kranal_data.kra_new_min_timeout = timeout;
- cfs_spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
+ spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
}
int
kranal_reaper (void *arg)
{
- cfs_waitlink_t wait;
- unsigned long flags;
- long timeout;
- int i;
- int conn_entries = kranal_data.kra_conn_hash_size;
- int conn_index = 0;
- int base_index = conn_entries - 1;
- unsigned long next_check_time = jiffies;
- long next_min_timeout = CFS_MAX_SCHEDULE_TIMEOUT;
- long current_min_timeout = 1;
-
- cfs_daemonize("kranal_reaper");
- cfs_block_allsigs();
-
- cfs_waitlink_init(&wait);
-
- cfs_spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- while (!kranal_data.kra_shutdown) {
- /* I wake up every 'p' seconds to check for timeouts on some
- * more peers. I try to check every connection 'n' times
- * within the global minimum of all keepalive and timeout
- * intervals, to ensure I attend to every connection within
- * (n+1)/n times its timeout intervals. */
- const int p = 1;
- const int n = 3;
- unsigned long min_timeout;
- int chunk;
-
- /* careful with the jiffy wrap... */
- timeout = (long)(next_check_time - jiffies);
- if (timeout > 0) {
- cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
- cfs_waitq_add(&kranal_data.kra_reaper_waitq, &wait);
-
- cfs_spin_unlock_irqrestore(&kranal_data.kra_reaper_lock,
- flags);
-
- cfs_waitq_timedwait(&wait, CFS_TASK_INTERRUPTIBLE,
- timeout);
-
- cfs_spin_lock_irqsave(&kranal_data.kra_reaper_lock,
- flags);
-
- cfs_set_current_state(CFS_TASK_RUNNING);
- cfs_waitq_del(&kranal_data.kra_reaper_waitq, &wait);
- continue;
- }
-
- if (kranal_data.kra_new_min_timeout !=
- CFS_MAX_SCHEDULE_TIMEOUT) {
- /* new min timeout set: restart min timeout scan */
- next_min_timeout = CFS_MAX_SCHEDULE_TIMEOUT;
- base_index = conn_index - 1;
- if (base_index < 0)
- base_index = conn_entries - 1;
-
- if (kranal_data.kra_new_min_timeout <
- current_min_timeout) {
- current_min_timeout =
- kranal_data.kra_new_min_timeout;
- CDEBUG(D_NET, "Set new min timeout %ld\n",
- current_min_timeout);
- }
-
- kranal_data.kra_new_min_timeout =
- CFS_MAX_SCHEDULE_TIMEOUT;
- }
- min_timeout = current_min_timeout;
-
- cfs_spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
- LASSERT (min_timeout > 0);
-
- /* Compute how many table entries to check now so I get round
- * the whole table fast enough given that I do this at fixed
- * intervals of 'p' seconds) */
- chunk = conn_entries;
- if (min_timeout > n * p)
- chunk = (chunk * n * p) / min_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kranal_reaper_check(conn_index,
- &next_min_timeout);
- conn_index = (conn_index + 1) % conn_entries;
- }
-
- next_check_time += p * CFS_HZ;
-
- cfs_spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
- if (((conn_index - chunk <= base_index &&
- base_index < conn_index) ||
- (conn_index - conn_entries - chunk <= base_index &&
- base_index < conn_index - conn_entries))) {
-
- /* Scanned all conns: set current_min_timeout... */
- if (current_min_timeout != next_min_timeout) {
- current_min_timeout = next_min_timeout;
- CDEBUG(D_NET, "Set new min timeout %ld\n",
- current_min_timeout);
- }
-
- /* ...and restart min timeout scan */
- next_min_timeout = CFS_MAX_SCHEDULE_TIMEOUT;
- base_index = conn_index - 1;
- if (base_index < 0)
- base_index = conn_entries - 1;
- }
- }
-
- kranal_thread_fini();
- return 0;
+ wait_queue_t wait;
+ unsigned long flags;
+ long timeout;
+ int i;
+ int conn_entries = kranal_data.kra_conn_hash_size;
+ int conn_index = 0;
+ int base_index = conn_entries - 1;
+ unsigned long next_check_time = jiffies;
+ long next_min_timeout = MAX_SCHEDULE_TIMEOUT;
+ long current_min_timeout = 1;
+
+ cfs_block_allsigs();
+
+ init_waitqueue_entry_current(&wait);
+
+ spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
+
+ while (!kranal_data.kra_shutdown) {
+ /* I wake up every 'p' seconds to check for timeouts on some
+ * more peers. I try to check every connection 'n' times
+ * within the global minimum of all keepalive and timeout
+ * intervals, to ensure I attend to every connection within
+ * (n+1)/n times its timeout intervals. */
+ const int p = 1;
+ const int n = 3;
+ unsigned long min_timeout;
+ int chunk;
+
+ /* careful with the jiffy wrap... */
+ timeout = (long)(next_check_time - jiffies);
+ if (timeout > 0) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
+
+ spin_unlock_irqrestore(&kranal_data.kra_reaper_lock,
+ flags);
+
+ waitq_timedwait(&wait, TASK_INTERRUPTIBLE,
+ timeout);
+
+ spin_lock_irqsave(&kranal_data.kra_reaper_lock,
+ flags);
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
+ continue;
+ }
+
+ if (kranal_data.kra_new_min_timeout !=
+ MAX_SCHEDULE_TIMEOUT) {
+ /* new min timeout set: restart min timeout scan */
+ next_min_timeout = MAX_SCHEDULE_TIMEOUT;
+ base_index = conn_index - 1;
+ if (base_index < 0)
+ base_index = conn_entries - 1;
+
+ if (kranal_data.kra_new_min_timeout <
+ current_min_timeout) {
+ current_min_timeout =
+ kranal_data.kra_new_min_timeout;
+ CDEBUG(D_NET, "Set new min timeout %ld\n",
+ current_min_timeout);
+ }
+
+ kranal_data.kra_new_min_timeout =
+ MAX_SCHEDULE_TIMEOUT;
+ }
+ min_timeout = current_min_timeout;
+
+ spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
+
+ LASSERT (min_timeout > 0);
+
+ /* Compute how many table entries to check now so I get round
+ * the whole table fast enough given that I do this at fixed
+ * intervals of 'p' seconds) */
+ chunk = conn_entries;
+ if (min_timeout > n * p)
+ chunk = (chunk * n * p) / min_timeout;
+ if (chunk == 0)
+ chunk = 1;
+
+ for (i = 0; i < chunk; i++) {
+ kranal_reaper_check(conn_index,
+ &next_min_timeout);
+ conn_index = (conn_index + 1) % conn_entries;
+ }
+
+ next_check_time += p * HZ;
+
+ spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
+
+ if (((conn_index - chunk <= base_index &&
+ base_index < conn_index) ||
+ (conn_index - conn_entries - chunk <= base_index &&
+ base_index < conn_index - conn_entries))) {
+
+ /* Scanned all conns: set current_min_timeout... */
+ if (current_min_timeout != next_min_timeout) {
+ current_min_timeout = next_min_timeout;
+ CDEBUG(D_NET, "Set new min timeout %ld\n",
+ current_min_timeout);
+ }
+
+ /* ...and restart min timeout scan */
+ next_min_timeout = MAX_SCHEDULE_TIMEOUT;
+ base_index = conn_index - 1;
+ if (base_index < 0)
+ base_index = conn_entries - 1;
+ }
+ }
+
+ kranal_thread_fini();
+ return 0;
}
void
LASSERT (rrc == RAP_SUCCESS);
LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0);
- cfs_read_lock(&kranal_data.kra_global_lock);
+ read_lock(&kranal_data.kra_global_lock);
conn = kranal_cqid2conn_locked(cqid);
if (conn == NULL) {
/* Conn was destroyed? */
CDEBUG(D_NET, "RDMA CQID lookup %d failed\n", cqid);
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
continue;
}
CDEBUG(D_NET, "Completed %p\n",
cfs_list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list));
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
LASSERT (!cfs_list_empty(&conn->rac_rdmaq));
tx = cfs_list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list);
cfs_list_add_tail(&tx->tx_list, &conn->rac_fmaq);
tx->tx_qtime = jiffies;
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
/* Get conn's fmaq processed, now I've just put something
* there */
kranal_schedule_conn(conn);
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
}
}
if ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0) {
- cfs_read_lock(&kranal_data.kra_global_lock);
+ read_lock(&kranal_data.kra_global_lock);
conn = kranal_cqid2conn_locked(cqid);
if (conn == NULL) {
kranal_schedule_conn(conn);
}
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
continue;
}
for (i = 0; i < kranal_data.kra_conn_hash_size; i++) {
- cfs_read_lock(&kranal_data.kra_global_lock);
+ read_lock(&kranal_data.kra_global_lock);
conns = &kranal_data.kra_conns[i];
}
/* don't block write lockers for too long... */
- cfs_read_unlock(&kranal_data.kra_global_lock);
+ read_unlock(&kranal_data.kra_global_lock);
}
}
}
return 0;
case RAP_NOT_DONE:
- if (cfs_time_aftereq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive *
- CFS_HZ))
- CWARN("EAGAIN sending %02x (idle %lu secs)\n",
- msg->ram_type,
- (jiffies - conn->rac_last_tx)/CFS_HZ);
- return -EAGAIN;
+ if (cfs_time_aftereq(jiffies,
+ conn->rac_last_tx + conn->rac_keepalive *
+ HZ))
+ CWARN("EAGAIN sending %02x (idle %lu secs)\n",
+ msg->ram_type,
+ (jiffies - conn->rac_last_tx)/HZ);
+ return -EAGAIN;
}
}
/* RDMAs in progress */
LASSERT (!conn->rac_close_sent);
- if (cfs_time_aftereq(jiffies,
- conn->rac_last_tx +
- conn->rac_keepalive * CFS_HZ)) {
- CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
- kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
- kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
- }
+ if (cfs_time_aftereq(jiffies,
+ conn->rac_last_tx +
+ conn->rac_keepalive * HZ)) {
+ CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
+ kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
+ kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
+ }
return;
}
if (!conn->rac_close_recvd)
return;
- cfs_write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
if (conn->rac_state == RANAL_CONN_CLOSING)
kranal_terminate_conn_locked(conn);
- cfs_write_unlock_irqrestore(&kranal_data.kra_global_lock,
+ write_unlock_irqrestore(&kranal_data.kra_global_lock,
flags);
return;
}
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
if (cfs_list_empty(&conn->rac_fmaq)) {
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
-
- if (cfs_time_aftereq(jiffies,
- conn->rac_last_tx + conn->rac_keepalive *
- CFS_HZ)) {
- CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
- libcfs_nid2str(conn->rac_peer->rap_nid), conn,
- (jiffies - conn->rac_last_tx)/CFS_HZ,
- conn->rac_keepalive);
- kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
- kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
- }
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
+
+ if (cfs_time_aftereq(jiffies,
+ conn->rac_last_tx + conn->rac_keepalive *
+ HZ)) {
+ CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
+ libcfs_nid2str(conn->rac_peer->rap_nid), conn,
+ (jiffies - conn->rac_last_tx)/HZ,
+ conn->rac_keepalive);
+ kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
+ kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
+ }
return;
}
cfs_list_del(&tx->tx_list);
more_to_do = !cfs_list_empty(&conn->rac_fmaq);
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
expect_reply = 0;
CDEBUG(D_NET, "sending regular msg: %p, type %02x, cookie "LPX64"\n",
/* I need credits to send this. Replace tx at the head of the
* fmaq and I'll get rescheduled when credits appear */
CDEBUG(D_NET, "EAGAIN on %p\n", conn);
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
cfs_list_add(&tx->tx_list, &conn->rac_fmaq);
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
return;
}
} else {
/* LASSERT(current) above ensures this doesn't race with reply
* processing */
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
cfs_list_add_tail(&tx->tx_list, &conn->rac_replyq);
tx->tx_qtime = jiffies;
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
}
if (more_to_do) {
kra_tx_t *tx;
unsigned long flags;
- cfs_spin_lock_irqsave(&conn->rac_lock, flags);
+ spin_lock_irqsave(&conn->rac_lock, flags);
cfs_list_for_each(ttmp, &conn->rac_replyq) {
tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
continue;
if (tx->tx_msg.ram_type != type) {
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
CWARN("Unexpected type %x (%x expected) "
"matched reply from %s\n",
tx->tx_msg.ram_type, type,
}
cfs_list_del(&tx->tx_list);
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
return tx;
}
- cfs_spin_unlock_irqrestore(&conn->rac_lock, flags);
+ spin_unlock_irqrestore(&conn->rac_lock, flags);
CWARN("Unmatched reply %02x/"LPX64" from %s\n",
type, cookie, libcfs_nid2str(conn->rac_peer->rap_nid));
return NULL;
if (msg->ram_type == RANAL_MSG_CLOSE) {
CWARN("RX CLOSE from %s\n", libcfs_nid2str(conn->rac_peer->rap_nid));
conn->rac_close_recvd = 1;
- cfs_write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
if (conn->rac_state == RANAL_CONN_ESTABLISHED)
kranal_close_conn_locked(conn, 0);
conn->rac_close_sent)
kranal_terminate_conn_locked(conn);
- cfs_write_unlock_irqrestore(&kranal_data.kra_global_lock,
+ write_unlock_irqrestore(&kranal_data.kra_global_lock,
flags);
goto out;
}
conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies);
}
-int
-kranal_process_new_conn (kra_conn_t *conn)
+int kranal_process_new_conn (kra_conn_t *conn)
{
- RAP_RETURN rrc;
+ RAP_RETURN rrc;
- rrc = RapkCompleteSync(conn->rac_rihandle, 1);
- if (rrc == RAP_SUCCESS)
- return 0;
+ rrc = RapkCompleteSync(conn->rac_rihandle, 1);
+ if (rrc == RAP_SUCCESS)
+ return 0;
- LASSERT (rrc == RAP_NOT_DONE);
- if (!cfs_time_aftereq(jiffies, conn->rac_last_tx +
- conn->rac_timeout * CFS_HZ))
- return -EAGAIN;
+ LASSERT (rrc == RAP_NOT_DONE);
+ if (!cfs_time_aftereq(jiffies, conn->rac_last_tx +
+ conn->rac_timeout * HZ))
+ return -EAGAIN;
- /* Too late */
- rrc = RapkCompleteSync(conn->rac_rihandle, 0);
- LASSERT (rrc == RAP_SUCCESS);
- return -ETIMEDOUT;
+ /* Too late */
+ rrc = RapkCompleteSync(conn->rac_rihandle, 0);
+ LASSERT (rrc == RAP_SUCCESS);
+ return -ETIMEDOUT;
}
int
kranal_scheduler (void *arg)
{
- kra_device_t *dev = (kra_device_t *)arg;
- cfs_waitlink_t wait;
- char name[16];
- kra_conn_t *conn;
+ kra_device_t *dev = (kra_device_t *)arg;
+ wait_queue_t wait;
+ kra_conn_t *conn;
unsigned long flags;
unsigned long deadline;
unsigned long soonest;
int dropped_lock;
int busy_loops = 0;
- snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx);
- cfs_daemonize(name);
cfs_block_allsigs();
- dev->rad_scheduler = current;
- cfs_waitlink_init(&wait);
+ dev->rad_scheduler = current;
+ init_waitqueue_entry_current(&wait);
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
while (!kranal_data.kra_shutdown) {
/* Safe: kra_shutdown only set when quiescent */
if (busy_loops++ >= RANAL_RESCHED) {
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
- cfs_cond_resched();
- busy_loops = 0;
+ cond_resched();
+ busy_loops = 0;
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
}
dropped_lock = 0;
if (dev->rad_ready) {
/* Device callback fired since I last checked it */
dev->rad_ready = 0;
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
dropped_lock = 1;
kranal_check_rdma_cq(dev);
kranal_check_fma_cq(dev);
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
}
cfs_list_for_each_safe(tmp, nxt, &dev->rad_ready_conns) {
cfs_list_del_init(&conn->rac_schedlist);
LASSERT (conn->rac_scheduled);
conn->rac_scheduled = 0;
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
dropped_lock = 1;
kranal_check_fma_rx(conn);
kranal_complete_closed_conn(conn);
kranal_conn_decref(conn);
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ spin_lock_irqsave(&dev->rad_lock, flags);
}
nsoonest = 0;
deadline = conn->rac_last_tx + conn->rac_keepalive;
if (cfs_time_aftereq(jiffies, deadline)) {
/* Time to process this new conn */
- cfs_spin_unlock_irqrestore(&dev->rad_lock,
+ spin_unlock_irqrestore(&dev->rad_lock,
flags);
dropped_lock = 1;
rc = kranal_process_new_conn(conn);
if (rc != -EAGAIN) {
/* All done with this conn */
- cfs_spin_lock_irqsave(&dev->rad_lock,
+ spin_lock_irqsave(&dev->rad_lock,
flags);
cfs_list_del_init(&conn->rac_schedlist);
- cfs_spin_unlock_irqrestore(&dev-> \
+ spin_unlock_irqrestore(&dev-> \
rad_lock,
flags);
kranal_conn_decref(conn);
- cfs_spin_lock_irqsave(&dev->rad_lock,
+ spin_lock_irqsave(&dev->rad_lock,
flags);
continue;
}
- /* retry with exponential backoff until HZ */
- if (conn->rac_keepalive == 0)
- conn->rac_keepalive = 1;
- else if (conn->rac_keepalive <= CFS_HZ)
- conn->rac_keepalive *= 2;
- else
- conn->rac_keepalive += CFS_HZ;
-
- deadline = conn->rac_last_tx + conn->rac_keepalive;
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
+ /* retry with exponential backoff until HZ */
+ if (conn->rac_keepalive == 0)
+ conn->rac_keepalive = 1;
+ else if (conn->rac_keepalive <= HZ)
+ conn->rac_keepalive *= 2;
+ else
+ conn->rac_keepalive += HZ;
+
+ deadline = conn->rac_last_tx + conn->rac_keepalive;
+ spin_lock_irqsave(&dev->rad_lock, flags);
}
/* Does this conn need attention soonest? */
if (dropped_lock) /* may sleep iff I didn't drop the lock */
continue;
- cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
- cfs_waitq_add_exclusive(&dev->rad_waitq, &wait);
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
-
- if (nsoonest == 0) {
- busy_loops = 0;
- cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
- } else {
- timeout = (long)(soonest - jiffies);
- if (timeout > 0) {
- busy_loops = 0;
- cfs_waitq_timedwait(&wait,
- CFS_TASK_INTERRUPTIBLE,
- timeout);
- }
- }
-
- cfs_waitq_del(&dev->rad_waitq, &wait);
- cfs_set_current_state(CFS_TASK_RUNNING);
- cfs_spin_lock_irqsave(&dev->rad_lock, flags);
- }
-
- cfs_spin_unlock_irqrestore(&dev->rad_lock, flags);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue_exclusive(&dev->rad_waitq, &wait);
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
+
+ if (nsoonest == 0) {
+ busy_loops = 0;
+ waitq_wait(&wait, TASK_INTERRUPTIBLE);
+ } else {
+ timeout = (long)(soonest - jiffies);
+ if (timeout > 0) {
+ busy_loops = 0;
+ waitq_timedwait(&wait,
+ TASK_INTERRUPTIBLE,
+ timeout);
+ }
+ }
+
+ remove_wait_queue(&dev->rad_waitq, &wait);
+ set_current_state(TASK_RUNNING);
+ spin_lock_irqsave(&dev->rad_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&dev->rad_lock, flags);
dev->rad_scheduler = NULL;
kranal_thread_fini();