- l_wait_event returns -EINTR, not -ERESTARTSYS.
LASSERT(desc);
LASSERT(desc->bd_connection);
+ CERROR("IO of %d pages to/from %s:%d (conn %p) timed out\n",
+ desc->bd_page_count, desc->bd_connection->c_remote_uuid,
+ desc->bd_portal, desc->bd_connection);
desc->bd_connection->c_level = LUSTRE_CONN_RECOVD;
desc->bd_flags |= PTL_RPC_FL_TIMEOUT;
if (desc->bd_connection && class_signal_connection_failure) {
ret = l_wait_event(data->waitq, data->complete, &lwi);
if (atomic_dec_and_test(&data->refcount))
OBD_FREE(data, sizeof(*data));
- if (ret == -ERESTARTSYS)
+ if (ret == -EINTR)
return ret;
} else if (phase == CB_PHASE_FINISH) {
data->err = err;
struct ptlrpc_request *req = data;
ENTRY;
- CERROR("req timeout on connid %d xid %Ld\n", req->rq_connid,
- (unsigned long long)req->rq_xid);
+ CERROR("req timeout on connid %d xid %Ld portal %d op %d\n",
+ req->rq_connid, (unsigned long long)req->rq_xid,
+ req->rq_import->imp_client->cli_request_portal,
+ req->rq_reqmsg->opc);
req->rq_flags |= PTL_RPC_FL_TIMEOUT;
if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
RETURN(1);
spin_lock(&recovd->recovd_lock);
if (rd->rd_phase != RD_IDLE) {
- CDEBUG(D_INFO, "connection %p to %s already in recovery\n",
+ CERROR("connection %p to %s already in recovery\n",
conn, conn->c_remote_uuid);
/* XXX need to distinguish from failure-in-recovery */
spin_unlock(&recovd->recovd_lock);
struct ptlrpc_connection *conn =
list_entry(tmp, struct ptlrpc_connection,
c_recovd_data.rd_managed_chain);
- CDEBUG(D_NET, " %p = %s\n", conn, conn->c_remote_uuid);
+ CERROR(" %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
+ conn->c_recovd_data.rd_phase,
+ conn->c_recovd_data.rd_next_phase);
}
}
spin_lock(&recovd->recovd_lock);
- CDEBUG(D_NET, "managed: \n");
+ CERROR("managed: \n");
dump_connection_list(&recovd->recovd_managed_items);
- CDEBUG(D_NET, "troubled: \n");
+ CERROR("troubled: \n");
dump_connection_list(&recovd->recovd_troubled_items);
/*
CERROR("Error invoking recovery upcall (%s): %d\n",
obd_recovery_upcall, rc);
CERROR("Check /proc/sys/lustre/recovery_upcall?\n");
+ } else {
+ CERROR("Invoked upcall %s for connection %s\n",
+ argv[0], argv[1]);
}
RETURN(rc);
}
conn->c_level = LUSTRE_CONN_FULL;
recovd_conn_fixed(conn);
- CDEBUG(D_NET, "recovery complete on conn %p(%s), waking delayed reqs\n",
+ CERROR("recovery complete on conn %p(%s), waking delayed reqs\n",
conn, conn->c_remote_uuid);
/* Finally, continue what we delayed since recovery started */
list_for_each_safe(tmp, pos, &conn->c_delayed_head) {
static int ll_retry_recovery(struct ptlrpc_connection *conn)
{
+ CERROR("Recovery has failed on conn %p\n", conn);
#if 0
/* XXX use a timer, sideshow bob */
recovd_conn_fail(conn);