spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_state == LUSTRE_IMP_FULL) {
+ CERROR("%s: connection lost to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
{
struct l_wait_info lwi;
+ unsigned long timeout;
int inflight = 0;
int rc;
if (in_rpc)
inflight = 1;
- /* wait for all requests to error out and call completion callbacks */
- lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL,
- NULL, NULL);
- rc = l_wait_event(imp->imp_recovery_waitq,
- (atomic_read(&imp->imp_inflight) == inflight),
+
+ /* wait for all requests to error out and call completion
+ callbacks */
+ if (imp->imp_server_timeout)
+ timeout = obd_timeout / 2;
+ else
+ timeout = obd_timeout;
+ timeout = MAX(timeout * HZ, 1);
+ lwi = LWI_TIMEOUT_INTR(timeout, NULL, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inflight) == inflight),
&lwi);
if (rc)
obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
}
-static void ptlrpc_activate_import(struct obd_import *imp)
+void ptlrpc_activate_import(struct obd_import *imp)
{
struct obd_device *obd = imp->imp_obd;
unsigned long flags;
EXIT;
}
+#define ATTEMPT_TOO_SOON(last) \
+ ((last) && ((long)(jiffies - (last)) <= (long)(obd_timeout * 2 * HZ)))
+
+static int import_select_connection(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn, *tmp;
+ struct obd_export *dlmexp;
+ int found = 0;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+
+ if (list_empty(&imp->imp_conn_list)) {
+ CERROR("no available connections on imp %p@%s\n",
+ imp, imp->imp_obd->obd_name);
+ spin_unlock(&imp->imp_lock);
+ RETURN(-EINVAL);
+ }
+
+ list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
+ if (!ATTEMPT_TOO_SOON(imp_conn->oic_last_attempt)) {
+ found = 1;
+ break;
+ }
+ }
+
+ /* if not found, simply choose the current one */
+ if (!found) {
+ CWARN("obd %s imp 0x%p: all connections have been "
+ "tried recently\n", imp->imp_obd->obd_name, imp);
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ imp_conn->oic_last_attempt = jiffies;
+
+ /* move the items ahead of the selected one to list tail */
+ while (1) {
+ tmp= list_entry(imp->imp_conn_list.next,
+ struct obd_import_conn, oic_item);
+ if (tmp == imp_conn)
+ break;
+ list_del(&tmp->oic_item);
+ list_add_tail(&tmp->oic_item, &imp->imp_conn_list);
+ }
+
+ /* switch connection, don't mind if it's same as the current one */
+ if (imp->imp_connection)
+ ptlrpc_put_connection(imp->imp_connection);
+ imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ LASSERT(dlmexp != NULL);
+ if (dlmexp->exp_connection)
+ ptlrpc_put_connection(imp->imp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ imp->imp_conn_current = imp_conn;
+ CWARN("obd %s imp 0x%p: select conn %s\n",
+ imp->imp_obd->obd_name, imp,
+ imp_conn->oic_uuid.uuid);
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(0);
+}
+
+
+
int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
{
struct obd_device *obd = imp->imp_obd;
struct ptlrpc_request *request;
int size[] = {sizeof(imp->imp_target_uuid),
sizeof(obd->obd_uuid),
- sizeof(imp->imp_dlm_handle)};
+ sizeof(imp->imp_dlm_handle),
+ sizeof(unsigned long)};
char *tmp[] = {imp->imp_target_uuid.uuid,
obd->obd_uuid.uuid,
- (char *)&imp->imp_dlm_handle};
+ (char *)&imp->imp_dlm_handle,
+ (char *)&imp->imp_connect_flags}; /* XXX: make this portable! */
struct ptlrpc_connect_async_args *aa;
unsigned long flags;
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
- imp->imp_conn_cnt++;
- imp->imp_last_replay_transno = 0;
+ imp->imp_resend_replay = 0;
if (imp->imp_remote_handle.cookie == 0) {
initial_connect = 1;
} else {
committed_before_reconnect = imp->imp_peer_committed_transno;;
-
+ imp->imp_conn_cnt++;
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (new_uuid) {
- struct ptlrpc_connection *conn;
struct obd_uuid uuid;
- struct obd_export *dlmexp;
obd_str2uuid(&uuid, new_uuid);
- conn = ptlrpc_uuid_to_connection(&uuid);
- if (!conn)
- GOTO(out, rc = -ENOENT);
-
- CDEBUG(D_HA, "switching import %s/%s from %s to %s\n",
- imp->imp_target_uuid.uuid, imp->imp_obd->obd_name,
- imp->imp_connection->c_remote_uuid.uuid,
- conn->c_remote_uuid.uuid);
-
- /* Switch the import's connection and the DLM export's
- * connection (which are almost certainly the same, but we
- * keep distinct refs just to make things clearer. I think. */
- if (imp->imp_connection)
- ptlrpc_put_connection(imp->imp_connection);
- /* We hand off the ref from ptlrpc_get_connection. */
- imp->imp_connection = conn;
-
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
-
- LASSERT(dlmexp != NULL);
-
- if (dlmexp->exp_connection)
- ptlrpc_put_connection(dlmexp->exp_connection);
- dlmexp->exp_connection = ptlrpc_connection_addref(conn);
- class_export_put(dlmexp);
-
+ rc = import_set_conn_priority(imp, &uuid);
+ if (rc)
+ GOTO(out, rc);
}
+ rc = import_select_connection(imp);
+ if (rc)
+ GOTO(out, rc);
- request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
+ request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION,
+ imp->imp_connect_op, 4, size, tmp);
if (!request)
GOTO(out, rc = -ENOMEM);
aa->pcaa_peer_committed = committed_before_reconnect;
aa->pcaa_initial_connect = initial_connect;
- if (aa->pcaa_initial_connect)
- imp->imp_replayable = 1;
+ if (aa->pcaa_initial_connect) {
+ lustre_msg_add_op_flags(request->rq_reqmsg,
+ MSG_CONNECT_INITIAL);
+ imp->imp_replayable = 1;
+ }
ptlrpcd_add_req(request);
rc = 0;
+ imp->imp_connect_start = jiffies;
out:
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
}
static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
- void * data, int rc)
+ void *data, int rc)
{
struct ptlrpc_connect_async_args *aa = data;
struct obd_import *imp = request->rq_import;
if (rc)
GOTO(out, rc);
+ LASSERT(imp->imp_conn_current);
+ imp->imp_conn_current->oic_last_attempt = 0;
msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
} else {
imp->imp_replayable = 0;
}
+ LASSERTF(imp->imp_conn_cnt < request->rq_repmsg->conn_cnt,
+ "imp conn_cnt %d req conn_cnt %d",
+ imp->imp_conn_cnt, request->rq_repmsg->conn_cnt);
+ imp->imp_conn_cnt = request->rq_repmsg->conn_cnt;
imp->imp_remote_handle = request->rq_repmsg->handle;
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
GOTO(finish, rc = 0);
request->rq_repmsg->handle.cookie);
imp->imp_remote_handle = request->rq_repmsg->handle;
} else {
- CERROR("reconnected to %s@%s after partition\n",
+ CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
}
- if (imp->imp_invalid)
+ if (imp->imp_invalid) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
- else
+ } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid);
+ imp->imp_resend_replay = 1;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+ } else {
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
} else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
LASSERT(imp->imp_replayable);
imp->imp_remote_handle = request->rq_repmsg->handle;
+ imp->imp_last_replay_transno = 0;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
+ CDEBUG(D_HA, "oops! we get evicted from %s\n", imp->imp_target_uuid.uuid);
imp->imp_remote_handle = request->rq_repmsg->handle;
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
}
out:
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
- if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
+ if (aa->pcaa_initial_connect && !imp->imp_initial_recov)
ptlrpc_deactivate_import(imp);
- }
- CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
+ CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
imp->imp_target_uuid.uuid,
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
}
}
static int completed_replay_interpret(struct ptlrpc_request *req,
- void * data, int rc)
+ void *data, int rc)
{
atomic_dec(&req->rq_import->imp_replay_inflight);
- ptlrpc_import_recovery_state_machine(req->rq_import);
+ if (req->rq_status == 0) {
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ } else {
+ CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
+ "reconnecting\n",
+ req->rq_import->imp_obd->obd_name, req->rq_status);
+ ptlrpc_connect_import(req->rq_import, NULL);
+ }
+
RETURN(0);
}
LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
atomic_inc(&imp->imp_replay_inflight);
- req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+ req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
if (!req)
RETURN(-ENOMEM);
GOTO(out, rc);
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
ptlrpc_activate_import(imp);
+ CERROR("%s: connection restored to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
}
if (imp->imp_state == LUSTRE_IMP_FULL) {
switch (imp->imp_connect_op) {
case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
- case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+ case MGMT_CONNECT: rq_opc = MGMT_DISCONNECT; break;
default:
CERROR("don't know how to disconnect from %s (connect_op %d)\n",
imp->imp_target_uuid.uuid, imp->imp_connect_op);
if (ptlrpc_import_in_recovery(imp)) {
struct l_wait_info lwi;
- lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep,
- NULL, NULL);
- rc = l_wait_event(imp->imp_recovery_waitq,
+ unsigned long timeout;
+ if (imp->imp_server_timeout)
+ timeout = obd_timeout / 2;
+ else
+ timeout = obd_timeout;
+ timeout = MAX(timeout * HZ, 1);
+ lwi = LWI_TIMEOUT_INTR(obd_timeout, back_to_sleep, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
!ptlrpc_import_in_recovery(imp), &lwi);
}
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
- request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
+ request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, rq_opc,
+ 0, NULL, NULL);
if (request) {
/* For non-replayable connections, don't attempt
reconnect if this fails */
out:
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+ imp->imp_conn_cnt = 0;
spin_unlock_irqrestore(&imp->imp_lock, flags);
RETURN(rc);