* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
int pcaa_initial_connect;
};
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
static void __import_set_state(struct obd_import *imp,
enum lustre_imp_state state)
{
*uuid_len -= strlen(UUID_STR);
}
-/* Returns true if import was FULL, false if import was already not
+/**
+ * Returns true if import was FULL, false if import was already not
* connected.
* @imp - import to be disconnected
* @conn_cnt - connection count (epoch) of the request that timed out
return timeout;
}
-/*
+/**
* This function will invalidate the import, if necessary, then block
* for all the RPC completions, and finally notify the obd to
* invalidate its state (ie cancel locks, clear pending requests,
cfs_atomic_inc(&imp->imp_inval_count);
- /*
- * If this is an invalid MGC connection, then don't bother
- * waiting for imp_inflight to drop to 0.
- */
- if (imp->imp_invalid && imp->imp_recon_bk &&!imp->imp_obd->obd_no_recov)
- goto out;
-
if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
ptlrpc_deactivate_import(imp);
* "invalidate" state.
*/
LASSERT(cfs_atomic_read(&imp->imp_inflight) == 0);
-out:
obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
sptlrpc_import_flush_all_ctx(imp);
/* Remove 'invalid' flag */
ptlrpc_activate_import(imp);
/* Attempt a new connect */
- ptlrpc_recover_import(imp, NULL);
+ ptlrpc_recover_import(imp, NULL, 0);
return 0;
}
-
EXPORT_SYMBOL(ptlrpc_reconnect_import);
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
static int import_select_connection(struct obd_import *imp)
{
struct obd_import_conn *imp_conn = NULL, *conn;
imp->imp_obd->obd_name,
libcfs_nid2str(conn->oic_conn->c_peer.nid),
conn->oic_last_attempt);
- /* Don't thrash connections */
- if (cfs_time_before_64(cfs_time_current_64(),
- conn->oic_last_attempt +
- cfs_time_seconds(CONNECTION_SWITCH_MIN))) {
- continue;
- }
/* If we have not tried this connection since
the last successful attempt, go with this one */
we do finally connect. (FIXME: really we should wait for all network
state associated with the last connection attempt to drain before
trying to reconnect on it.) */
- if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item) &&
- !imp->imp_recon_bk /* not retrying */) {
+ if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
if (at_get(&imp->imp_at.iat_net_latency) <
CONNECTION_SWITCH_MAX) {
at_measured(&imp->imp_at.iat_net_latency,
return 1;
}
-int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_connect_import(struct obd_import *imp)
{
struct obd_device *obd = imp->imp_obd;
int initial_connect = 0;
&imp->imp_connect_data.ocd_transno);
cfs_spin_unlock(&imp->imp_lock);
- if (new_uuid) {
- struct obd_uuid uuid;
-
- obd_str2uuid(&uuid, new_uuid);
- rc = import_set_conn_priority(imp, &uuid);
- if (rc)
- GOTO(out, rc);
- }
-
rc = import_select_connection(imp);
if (rc)
GOTO(out, rc);
- /* last in connection list */
- if (imp->imp_conn_current->oic_item.next == &imp->imp_conn_list) {
- if (imp->imp_initial_recov_bk && initial_connect) {
- CDEBUG(D_HA, "Last connection attempt (%d) for %s\n",
- imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd));
- /* Don't retry if connect fails */
- rc = 0;
- obd_set_info_async(obd->obd_self_export,
- sizeof(KEY_INIT_RECOV),
- KEY_INIT_RECOV,
- sizeof(rc), &rc, NULL);
- }
- if (imp->imp_recon_bk) {
- CDEBUG(D_HA, "Last reconnection attempt (%d) for %s\n",
- imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd));
- cfs_spin_lock(&imp->imp_lock);
- imp->imp_last_recon = 1;
- cfs_spin_unlock(&imp->imp_lock);
- }
- }
-
rc = sptlrpc_import_sec_adapt(imp, NULL, 0);
if (rc)
GOTO(out, rc);
* the server is updated on-the-fly we will get the new features. */
imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
&obd->obd_uuid, &imp->imp_connect_data, NULL);
static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
{
#ifdef __KERNEL__
- struct obd_import_conn *imp_conn;
-#endif
- int wake_pinger = 0;
-
- ENTRY;
+ int force_verify;
cfs_spin_lock(&imp->imp_lock);
- if (cfs_list_empty(&imp->imp_conn_list))
- GOTO(unlock, 0);
+ force_verify = imp->imp_force_verify != 0;
+ cfs_spin_unlock(&imp->imp_lock);
-#ifdef __KERNEL__
- imp_conn = cfs_list_entry(imp->imp_conn_list.prev,
- struct obd_import_conn,
- oic_item);
-
- /* XXX: When the failover node is the primary node, it is possible
- * to have two identical connections in imp_conn_list. We must
- * compare not conn's pointers but NIDs, otherwise we can defeat
- * connection throttling. (See bug 14774.) */
- if (imp->imp_conn_current->oic_conn->c_peer.nid !=
- imp_conn->oic_conn->c_peer.nid) {
- ptlrpc_ping_import_soon(imp);
- wake_pinger = 1;
- }
+ if (force_verify)
+ ptlrpc_pinger_wake_up();
#else
/* liblustre has no pinger thread, so we wakeup pinger anyway */
- wake_pinger = 1;
+ ptlrpc_pinger_wake_up();
#endif
-
- unlock:
- cfs_spin_unlock(&imp->imp_lock);
-
- if (wake_pinger)
- ptlrpc_pinger_wake_up();
-
- EXIT;
}
static int ptlrpc_busy_reconnect(int rc)
return (rc == -EBUSY) || (rc == -EAGAIN);
}
-
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
static int ptlrpc_connect_interpret(const struct lu_env *env,
struct ptlrpc_request *request,
void *data, int rc)
/* All imports are pingable */
imp->imp_pingable = 1;
imp->imp_force_reconnect = 0;
+ imp->imp_force_verify = 0;
if (aa->pcaa_initial_connect) {
if (msg_flags & MSG_CONNECT_REPLAYABLE) {
"after reconnect. We should LBUG right here.\n");
}
- if (lustre_msg_get_last_committed(request->rq_repmsg) <
+ if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
+ lustre_msg_get_last_committed(request->rq_repmsg) <
aa->pcaa_peer_committed) {
CERROR("%s went back in time (transno "LPD64
" was previously committed, server now claims "LPD64
"invalidating and reconnecting\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
- ptlrpc_connect_import(imp, NULL);
+ ptlrpc_connect_import(imp);
RETURN(0);
}
} else {
}
imp->imp_connect_data = *ocd;
+ CDEBUG(D_HA, "obd %s to target with inst %u\n",
+ imp->imp_obd->obd_name, ocd->ocd_instance);
exp = class_conn2export(&imp->imp_dlm_handle);
cfs_spin_unlock(&imp->imp_lock);
/* Sigh, some compilers do not like #ifdef in the middle
of macro arguments */
#ifdef __KERNEL__
- const char *older =
- "older. Consider upgrading this client";
+ const char *older = "older. Consider upgrading server "
+ "or downgrading client";
#else
- const char *older =
- "older. Consider recompiling this application";
+ const char *older = "older. Consider recompiling this "
+ "application";
#endif
- const char *newer = "newer than client version";
+ const char *newer = "newer than client version. "
+ "Consider upgrading client";
LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) "
"is much %s (%s)\n",
/* We sent to the server ocd_cksum_types with bits set
* for algorithms we understand. The server masked off
* the checksum types it doesn't support */
- if ((ocd->ocd_cksum_types & OBD_CKSUM_ALL) == 0) {
+ if ((ocd->ocd_cksum_types & cksum_types_supported()) == 0) {
LCONSOLE_WARN("The negotiation of the checksum "
"alogrithm to use with server %s "
"failed (%x/%x), disabling "
"checksums\n",
obd2cli_tgt(imp->imp_obd),
ocd->ocd_cksum_types,
- OBD_CKSUM_ALL);
+ cksum_types_supported());
cli->cl_checksum = 0;
cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
- cli->cl_cksum_type = OBD_CKSUM_CRC32;
} else {
cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
-
- if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM)
- cli->cl_cksum_type = OSC_DEFAULT_CKSUM;
- else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER)
- cli->cl_cksum_type = OBD_CKSUM_ADLER;
- else
- cli->cl_cksum_type = OBD_CKSUM_CRC32;
}
} else {
/* The server does not support OBD_CONNECT_CKSUM.
* Enforce CRC32 for backward compatibility*/
cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
- cli->cl_cksum_type = OBD_CKSUM_CRC32;
}
+ cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types);
- if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
+ if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
cli->cl_max_pages_per_rpc =
ocd->ocd_brw_size >> CFS_PAGE_SHIFT;
- }
+ else if (imp->imp_connect_op == MDS_CONNECT ||
+ imp->imp_connect_op == MGS_CONNECT)
+ cli->cl_max_pages_per_rpc = 1;
/* Reset ns_connect_flags only for initial connect. It might be
* changed in while using FS and if we reset it in reconnect
else
imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
(cli->cl_max_pages_per_rpc > 0));
}
out:
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
- cfs_spin_lock(&imp->imp_lock);
- if (aa->pcaa_initial_connect && !imp->imp_initial_recov &&
- (request->rq_import_generation == imp->imp_generation))
- ptlrpc_deactivate_and_unlock_import(imp);
- else
- cfs_spin_unlock(&imp->imp_lock);
-
- if ((imp->imp_recon_bk && imp->imp_last_recon) ||
- (rc == -EACCES)) {
+ if (rc == -EACCES) {
/*
* Give up trying to reconnect
* EACCES means client has no permission for connection
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
}
- cfs_spin_lock(&imp->imp_lock);
- imp->imp_last_recon = 0;
- cfs_spin_unlock(&imp->imp_lock);
-
cfs_waitq_broadcast(&imp->imp_recovery_waitq);
RETURN(rc);
}
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
static int completed_replay_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void * data, int rc)
CDEBUG(D_WARNING,
"%s: version recovery fails, reconnecting\n",
req->rq_import->imp_obd->obd_name);
- cfs_spin_lock(&req->rq_import->imp_lock);
- req->rq_import->imp_vbr_failed = 0;
- cfs_spin_unlock(&req->rq_import->imp_lock);
} else {
CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
"reconnecting\n",
req->rq_import->imp_obd->obd_name,
req->rq_status);
}
- ptlrpc_connect_import(req->rq_import, NULL);
+ ptlrpc_connect_import(req->rq_import);
}
RETURN(0);
}
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
static int signal_completed_replay(struct obd_import *imp)
{
struct ptlrpc_request *req;
ENTRY;
+ if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
+ RETURN(0);
+
LASSERT(cfs_atomic_read(&imp->imp_replay_inflight) == 0);
cfs_atomic_inc(&imp->imp_replay_inflight);
}
#ifdef __KERNEL__
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
static int ptlrpc_invalidate_import_thread(void *data)
{
struct obd_import *imp = data;
}
#endif
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typicaly we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
int rc = 0;
CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
+ /* reset vbr_failed flag upon eviction */
+ cfs_spin_lock(&imp->imp_lock);
+ imp->imp_vbr_failed = 0;
+ cfs_spin_unlock(&imp->imp_lock);
#ifdef __KERNEL__
/* bug 17802: XXX client_disconnect_export vs connect request
* invalidate thread without reference to import and import can
* be freed at same time. */
class_import_get(imp);
- rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp,
- CLONE_VM | CLONE_FILES);
+ rc = cfs_create_thread(ptlrpc_invalidate_import_thread, imp,
+ CFS_DAEMON_FLAGS);
if (rc < 0) {
class_import_put(imp);
CERROR("error starting invalidate thread: %d\n", rc);
RETURN(rc);
}
-static int back_to_sleep(void *unused)
-{
- return 0;
-}
-
int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
{
struct ptlrpc_request *req;
else
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
- /* Try all connections in the future - bz 12758 */
- imp->imp_last_recon = 0;
cfs_spin_unlock(&imp->imp_lock);
RETURN(rc);