* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
int pcaa_initial_connect;
};
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
static void __import_set_state(struct obd_import *imp,
enum lustre_imp_state state)
{
*uuid_len -= strlen(UUID_STR);
}
-/* Returns true if import was FULL, false if import was already not
+/**
+ * Returns true if import was FULL, false if import was already not
* connected.
* @imp - import to be disconnected
* @conn_cnt - connection count (epoch) of the request that timed out
return timeout;
}
-/*
+/**
* This function will invalidate the import, if necessary, then block
* for all the RPC completions, and finally notify the obd to
* invalidate its state (ie cancel locks, clear pending requests,
EXPORT_SYMBOL(ptlrpc_reconnect_import);
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
static int import_select_connection(struct obd_import *imp)
{
struct obd_import_conn *imp_conn = NULL, *conn;
imp->imp_obd->obd_name,
libcfs_nid2str(conn->oic_conn->c_peer.nid),
conn->oic_last_attempt);
- /* Don't thrash connections */
- if (cfs_time_before_64(cfs_time_current_64(),
- conn->oic_last_attempt +
- cfs_time_seconds(CONNECTION_SWITCH_MIN))) {
- continue;
- }
/* If we have not tried this connection since
the last successful attempt, go with this one */
return 1;
}
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
{
struct obd_device *obd = imp->imp_obd;
static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
{
#ifdef __KERNEL__
- struct obd_import_conn *imp_conn;
-#endif
- int wake_pinger = 0;
-
- ENTRY;
-
- cfs_spin_lock(&imp->imp_lock);
- if (cfs_list_empty(&imp->imp_conn_list))
- GOTO(unlock, 0);
-
-#ifdef __KERNEL__
- imp_conn = cfs_list_entry(imp->imp_conn_list.prev,
- struct obd_import_conn,
- oic_item);
-
- /* XXX: When the failover node is the primary node, it is possible
- * to have two identical connections in imp_conn_list. We must
- * compare not conn's pointers but NIDs, otherwise we can defeat
- * connection throttling. (See bug 14774.) */
- if (imp->imp_conn_current->oic_conn->c_peer.nid !=
- imp_conn->oic_conn->c_peer.nid) {
- ptlrpc_ping_import_soon(imp);
- wake_pinger = 1;
- }
+ /* the pinger takes care of issuing the next reconnect request */
+ return;
#else
/* liblustre has no pinger thread, so we wakeup pinger anyway */
- wake_pinger = 1;
+ ptlrpc_pinger_wake_up();
#endif
-
- unlock:
- cfs_spin_unlock(&imp->imp_lock);
-
- if (wake_pinger)
- ptlrpc_pinger_wake_up();
-
- EXIT;
}
static int ptlrpc_busy_reconnect(int rc)
return (rc == -EBUSY) || (rc == -EAGAIN);
}
-
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
static int ptlrpc_connect_interpret(const struct lu_env *env,
struct ptlrpc_request *request,
void *data, int rc)
/* Sigh, some compilers do not like #ifdef in the middle
of macro arguments */
#ifdef __KERNEL__
- const char *older =
- "older. Consider upgrading this client";
+ const char *older = "older. Consider upgrading server "
+ "or downgrading client";
#else
- const char *older =
- "older. Consider recompiling this application";
+ const char *older = "older. Consider recompiling this "
+ "application";
#endif
- const char *newer = "newer than client version";
+ const char *newer = "newer than client version. "
+ "Consider upgrading client";
LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) "
"is much %s (%s)\n",
cli->cl_cksum_type = OBD_CKSUM_CRC32;
}
- if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
+ if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
cli->cl_max_pages_per_rpc =
ocd->ocd_brw_size >> CFS_PAGE_SHIFT;
- }
+ else if (imp->imp_connect_op == MDS_CONNECT ||
+ imp->imp_connect_op == MGS_CONNECT)
+ cli->cl_max_pages_per_rpc = 1;
/* Reset ns_connect_flags only for initial connect. It might be
* changed in while using FS and if we reset it in reconnect
RETURN(rc);
}
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
static int completed_replay_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void * data, int rc)
RETURN(0);
}
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
static int signal_completed_replay(struct obd_import *imp)
{
struct ptlrpc_request *req;
ENTRY;
+ if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
+ RETURN(0);
+
LASSERT(cfs_atomic_read(&imp->imp_replay_inflight) == 0);
cfs_atomic_inc(&imp->imp_replay_inflight);
}
#ifdef __KERNEL__
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
static int ptlrpc_invalidate_import_thread(void *data)
{
struct obd_import *imp = data;
}
#endif
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typicaly we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
int rc = 0;
* invalidate thread without reference to import and import can
* be freed at same time. */
class_import_get(imp);
- rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp,
- CLONE_VM | CLONE_FILES);
+ rc = cfs_create_thread(ptlrpc_invalidate_import_thread, imp,
+ CFS_DAEMON_FLAGS);
if (rc < 0) {
class_import_put(imp);
CERROR("error starting invalidate thread: %d\n", rc);