rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
- read_lock(glock);
+ cfs_read_lock(glock);
peer = ksocknal_find_peer_locked(ni, id);
if (peer != NULL) {
connect = 0;
}
- read_unlock(glock);
+ cfs_read_unlock(glock);
if (last_alive != 0)
*when = last_alive;
ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
- write_lock_bh(glock);
+ cfs_write_lock_bh(glock);
peer = ksocknal_find_peer_locked(ni, id);
if (peer != NULL)
ksocknal_launch_all_connections_locked(peer);
- write_unlock_bh(glock);
+ cfs_write_unlock_bh(glock);
return;
}
return 0;
}
-void
+int
ksocknal_connect (ksock_route_t *route)
{
CFS_LIST_HEAD (zombies);
/* re-queue for attention; this frees me up to handle
* the peer's incoming connection request */
- if (rc == EALREADY) {
+ if (rc == EALREADY ||
+ (rc == 0 && peer->ksnp_accepting > 0)) {
/* We want to introduce a delay before next
* attempt to connect if we lost conn race,
* but the race is resolved quickly usually,
}
cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
- return;
+ return retry_later;
failed:
cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
ksocknal_peer_failed(peer);
ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1);
+ return 0;
}
/* Go through connd_routes queue looking for a route that
ksock_route_t *route;
cfs_waitlink_t wait;
signed long timeout;
+ int nloops = 0;
+ int cons_retry = 0;
int dropped_lock;
snprintf (name, sizeof (name), "socknal_cd%02ld", id);
cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
dropped_lock = 1;
- ksocknal_connect (route);
+ if (ksocknal_connect(route)) {
+ /* consecutive retry */
+ if (cons_retry++ > 10000) {
+ CWARN("massive consecutive "
+ "retry-connecting\n");
+ cons_retry = 0;
+ }
+ } else {
+ cons_retry = 0;
+ }
+
ksocknal_route_decref(route);
cfs_spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
ksocknal_data.ksnd_connd_connecting--;
}
- if (dropped_lock)
+ if (dropped_lock) {
+ if (++nloops < SOCKNAL_RESCHED)
+ continue;
+ cfs_spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
+ nloops = 0;
+ cfs_cond_resched();
+ cfs_spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
continue;
+ }
/* Nothing to do for 'timeout' */
cfs_set_current_state (CFS_TASK_INTERRUPTIBLE);
cfs_waitq_add_exclusive (&ksocknal_data.ksnd_connd_waitq, &wait);
cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
+ nloops = 0;
cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout);
cfs_set_current_state (CFS_TASK_RUNNING);