*/
#define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/kmod.h>
-#else
+#ifndef __KERNEL__
# include <liblustre.h>
#endif
#include <linux/lustre_export.h>
#include <linux/obd.h>
#include <linux/obd_class.h>
+#include <linux/lustre_sec.h>
#include "ptlrpc_internal.h"
spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_state == LUSTRE_IMP_FULL) {
+ CWARN("%s: connection lost to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
+ ptlrpc_deactivate_timeouts();
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
if (rc)
CERROR("%s: rc = %d waiting for callback (%d != %d)\n",
imp->imp_target_uuid.uuid, rc,
- atomic_read(&imp->imp_inflight), inflight);
+ atomic_read(&imp->imp_inflight), !!in_rpc);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+ ptlrpc_activate_timeouts();
}
void ptlrpc_fail_import(struct obd_import *imp, int generation)
EXIT;
}
+#define ATTEMPT_TOO_SOON(last) \
+ ((last) && ((long)(jiffies - (last)) <= (long)(obd_timeout * 2 * HZ)))
+
+static int import_select_connection(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn, *tmp;
+ struct obd_export *dlmexp;
+ int found = 0;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+
+ if (list_empty(&imp->imp_conn_list)) {
+ CERROR("no available connections on imp %p@%s\n",
+ imp, imp->imp_obd->obd_name);
+ spin_unlock(&imp->imp_lock);
+ RETURN(-EINVAL);
+ }
+
+ list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
+ if (!ATTEMPT_TOO_SOON(imp_conn->oic_last_attempt)) {
+ found = 1;
+ break;
+ }
+ }
+
+ /* if not found, simply choose the current one */
+ if (!found) {
+ CWARN("obd %s imp 0x%p: all connections have been "
+ "tried recently\n", imp->imp_obd->obd_name, imp);
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ imp_conn->oic_last_attempt = jiffies;
+
+ /* move the items ahead of the selected one to list tail */
+ while (1) {
+ tmp= list_entry(imp->imp_conn_list.next,
+ struct obd_import_conn, oic_item);
+ if (tmp == imp_conn)
+ break;
+ list_del(&tmp->oic_item);
+ list_add_tail(&tmp->oic_item, &imp->imp_conn_list);
+ }
+
+ /* switch connection if we chose a new one */
+ if (imp->imp_connection != imp_conn->oic_conn) {
+ if (imp->imp_connection) {
+ ptlrpcs_sec_invalidate_cache(imp->imp_sec);
+ ptlrpc_put_connection(imp->imp_connection);
+ }
+ imp->imp_connection =
+ ptlrpc_connection_addref(imp_conn->oic_conn);
+ }
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ LASSERT(dlmexp != NULL);
+ if (dlmexp->exp_connection)
+ ptlrpc_put_connection(imp->imp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ imp->imp_conn_current = imp_conn;
+ CWARN("obd %s imp 0x%p: select conn %s\n",
+ imp->imp_obd->obd_name, imp,
+ imp_conn->oic_uuid.uuid);
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(0);
+}
+
+
+
int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
{
struct obd_device *obd = imp->imp_obd;
int rc;
__u64 committed_before_reconnect = 0;
struct ptlrpc_request *request;
- int size[] = {sizeof(imp->imp_target_uuid),
- sizeof(obd->obd_uuid),
- sizeof(imp->imp_dlm_handle)};
- char *tmp[] = {imp->imp_target_uuid.uuid,
+ int size[] = {0,
+ sizeof(imp->imp_target_uuid),
+ sizeof(obd->obd_uuid),
+ sizeof(imp->imp_dlm_handle),
+ sizeof(imp->imp_connect_flags),
+ sizeof(imp->imp_connect_data)};
+ char *tmp[] = {NULL,
+ imp->imp_target_uuid.uuid,
obd->obd_uuid.uuid,
- (char *)&imp->imp_dlm_handle};
+ (char *)&imp->imp_dlm_handle,
+ (char *)&imp->imp_connect_flags, /* XXX: make this portable! */
+ (char*) &imp->imp_connect_data};
+ int repsize = sizeof(struct obd_connect_data);
+
struct ptlrpc_connect_async_args *aa;
unsigned long flags;
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (new_uuid) {
- struct ptlrpc_connection *conn;
struct obd_uuid uuid;
- struct obd_export *dlmexp;
obd_str2uuid(&uuid, new_uuid);
- conn = ptlrpc_uuid_to_connection(&uuid);
- if (!conn)
- GOTO(out, rc = -ENOENT);
-
- CDEBUG(D_HA, "switching import %s/%s from %s to %s\n",
- imp->imp_target_uuid.uuid, imp->imp_obd->obd_name,
- imp->imp_connection->c_remote_uuid.uuid,
- conn->c_remote_uuid.uuid);
-
- /* Switch the import's connection and the DLM export's
- * connection (which are almost certainly the same, but we
- * keep distinct refs just to make things clearer. I think. */
- if (imp->imp_connection)
- ptlrpc_put_connection(imp->imp_connection);
- /* We hand off the ref from ptlrpc_get_connection. */
- imp->imp_connection = conn;
-
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
-
- LASSERT(dlmexp != NULL);
-
- if (dlmexp->exp_connection)
- ptlrpc_put_connection(dlmexp->exp_connection);
- dlmexp->exp_connection = ptlrpc_connection_addref(conn);
- class_export_put(dlmexp);
-
+ rc = import_set_conn_priority(imp, &uuid);
+ if (rc)
+ GOTO(out, rc);
}
+ rc = import_select_connection(imp);
+ if (rc)
+ GOTO(out, rc);
+
+ LASSERT(imp->imp_sec);
- request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
+ size[0] = lustre_secdesc_size();
+ request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION,
+ imp->imp_connect_op, 6, size, tmp);
if (!request)
GOTO(out, rc = -ENOMEM);
+ lustre_pack_secdesc(request, size[0]);
+
#ifndef __KERNEL__
lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT);
#endif
+ if (obd->u.cli.cl_async) {
+ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_ASYNC);
+ }
request->rq_send_state = LUSTRE_IMP_CONNECTING;
- request->rq_replen = lustre_msg_size(0, NULL);
+ request->rq_replen = lustre_msg_size(1, &repsize);
request->rq_interpret_reply = ptlrpc_connect_interpret;
LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
MSG_CONNECT_INITIAL);
imp->imp_replayable = 1;
}
+
+ imp->imp_reqs_replayed = imp->imp_locks_replayed = 0;
ptlrpcd_add_req(request);
rc = 0;
}
static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
- void * data, int rc)
+ void *data, int rc)
{
struct ptlrpc_connect_async_args *aa = data;
struct obd_import *imp = request->rq_import;
if (rc)
GOTO(out, rc);
-
+ LASSERT(imp->imp_conn_current);
+ imp->imp_conn_current->oic_last_attempt = 0;
+/*
+ remote_flag = lustre_msg_buf(request->rq_repmsg, 0, sizeof(int));
+ LASSERT(remote_flag != NULL);
+ imp->imp_obd->u.cli.cl_remote = *remote_flag;
+*/
msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
if (aa->pcaa_initial_connect) {
+ struct obd_connect_data *conn_data;
+
+ conn_data = lustre_swab_repbuf(request, 0, sizeof(*conn_data),
+ lustre_swab_connect);
+ LASSERT(conn_data);
+ imp->imp_connect_data.ocd_connect_flags =
+ conn_data->ocd_connect_flags;
+
if (msg_flags & MSG_CONNECT_REPLAYABLE) {
CDEBUG(D_HA, "connected to replayable target: %s\n",
imp->imp_target_uuid.uuid);
imp->imp_conn_cnt = request->rq_repmsg->conn_cnt;
imp->imp_remote_handle = request->rq_repmsg->handle;
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+ ptlrpc_pinger_sending_on_import(imp);
GOTO(finish, rc = 0);
}
request->rq_repmsg->handle.cookie);
imp->imp_remote_handle = request->rq_repmsg->handle;
} else {
- CERROR("reconnected to %s@%s after partition\n",
+ CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
}
if (imp->imp_invalid) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
} else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid);
imp->imp_resend_replay = 1;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
imp->imp_last_replay_transno = 0;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
- CWARN("oops! we get evicted from %s\n", imp->imp_target_uuid.uuid);
+ CDEBUG(D_HA, "oops! we get evicted from %s\n", imp->imp_target_uuid.uuid);
imp->imp_remote_handle = request->rq_repmsg->handle;
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
}
out:
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
- if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
+ if (aa->pcaa_initial_connect && !imp->imp_initial_recov)
ptlrpc_deactivate_import(imp);
- }
- if (rc == -ETIMEDOUT && (jiffies - imp->imp_connect_start) > HZ) {
- CDEBUG(D_ERROR, "recovery of %s on %s failed (timeout)\n",
- imp->imp_target_uuid.uuid,
- (char *)imp->imp_connection->c_remote_uuid.uuid);
- ptlrpc_connect_import(imp, NULL);
- RETURN(0);
- }
- CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n",
+ CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
imp->imp_target_uuid.uuid,
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
}
}
static int completed_replay_interpret(struct ptlrpc_request *req,
- void * data, int rc)
+ void *data, int rc)
{
atomic_dec(&req->rq_import->imp_replay_inflight);
- ptlrpc_import_recovery_state_machine(req->rq_import);
+ if (req->rq_status == 0) {
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ } else {
+ CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
+ "reconnecting\n",
+ req->rq_import->imp_obd->obd_name, req->rq_status);
+ ptlrpc_connect_import(req->rq_import, NULL);
+ }
+
RETURN(0);
}
LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
atomic_inc(&imp->imp_replay_inflight);
- req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
- if (!req)
+ req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
+ if (!req) {
+ atomic_dec(&imp->imp_replay_inflight);
RETURN(-ENOMEM);
+ }
req->rq_replen = lustre_msg_size(0, NULL);
req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
- req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
+ req->rq_reqmsg->flags |= MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE;
req->rq_timeout *= 3;
req->rq_interpret_reply = completed_replay_interpret;
RETURN(0);
}
+#ifdef __KERNEL__
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+ struct obd_import *imp = data;
+ unsigned long flags;
+
+ ENTRY;
+
+ lock_kernel();
+ ptlrpc_daemonize();
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+ THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval");
+ unlock_kernel();
+
+ CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+ imp->imp_obd->obd_name, imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ ptlrpc_invalidate_import(imp, 0);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+
+ ptlrpc_import_recovery_state_machine(imp);
+
+ RETURN(0);
+}
+#endif
+
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
int rc = 0;
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
+#ifdef __KERNEL__
+ rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
+ CLONE_VM | CLONE_FILES);
+ if (rc < 0)
+ CERROR("error starting invalidate thread: %d\n", rc);
+ RETURN(rc);
+#else
ptlrpc_invalidate_import(imp, 1);
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+#endif
}
if (imp->imp_state == LUSTRE_IMP_REPLAY) {
GOTO(out, rc);
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
ptlrpc_activate_import(imp);
+ CWARN("%s: connection restored to %s@%s, "
+ "%d/%d req/lock replayed\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_reqs_replayed,
+ imp->imp_locks_replayed);
}
if (imp->imp_state == LUSTRE_IMP_FULL) {
switch (imp->imp_connect_op) {
case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
- case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+ case MGMT_CONNECT: rq_opc = MGMT_DISCONNECT; break;
default:
CERROR("don't know how to disconnect from %s (connect_op %d)\n",
imp->imp_target_uuid.uuid, imp->imp_connect_op);
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
- request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
+ request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, rq_opc,
+ 0, NULL, NULL);
if (request) {
/* For non-replayable connections, don't attempt
reconnect if this fails */