+struct ptlrpc_connect_async_args {
+ __u64 pcaa_peer_committed;
+ int pcaa_initial_connect;
+};
+
+/* A CLOSED import should remain so. */
+#define IMPORT_SET_STATE_NOLOCK(imp, state) \
+do { \
+ if (imp->imp_state != LUSTRE_IMP_CLOSED) { \
+ CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \
+ imp, obd2cli_tgt(imp->imp_obd), \
+ ptlrpc_import_state_name(imp->imp_state), \
+ ptlrpc_import_state_name(state)); \
+ imp->imp_state = state; \
+ } \
+} while(0)
+
+#define IMPORT_SET_STATE(imp, state) \
+do { \
+ spin_lock(&imp->imp_lock); \
+ IMPORT_SET_STATE_NOLOCK(imp, state); \
+ spin_unlock(&imp->imp_lock); \
+} while(0)
+
+
+static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
+ void * data, int rc);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* Only this function is allowed to change the import state when it is
+ * CLOSED. I would rather refcount the import and free it after
+ * disconnection like we do with exports. To do that, the client_obd
+ * will need to save the peer info somewhere other than in the import,
+ * though. */
+int ptlrpc_init_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+
+ imp->imp_generation++;
+ imp->imp_state = LUSTRE_IMP_NEW;
+
+ spin_unlock(&imp->imp_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_init_import);
+
+#define UUID_STR "_UUID"
+static void deuuidify(char *uuid, const char *prefix, char **uuid_start,
+ int *uuid_len)
+{
+ *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix))
+ ? uuid : uuid + strlen(prefix);
+
+ *uuid_len = strlen(*uuid_start);
+
+ if (*uuid_len < strlen(UUID_STR))
+ return;
+
+ if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
+ UUID_STR, strlen(UUID_STR)))
+ *uuid_len -= strlen(UUID_STR);
+}
+
+/* Returns true if import was FULL, false if import was already not
+ * connected.
+ * @imp - import to be disconnected
+ * @conn_cnt - connection count (epoch) of the request that timed out
+ * and caused the disconnection. In some cases, multiple
+ * inflight requests can fail to a single target (e.g. OST
+ * bulk requests) and if one has already caused a reconnection
+ * (increasing the import->conn_cnt) the older failure should
+ * not also cause a reconnection. If zero it forces a reconnect.
+ */
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
+{
+ int rc = 0;
+
+ spin_lock(&imp->imp_lock);
+
+ if (imp->imp_state == LUSTRE_IMP_FULL &&
+ (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
+ char *target_start;
+ int target_len;
+
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ if (imp->imp_replayable) {
+ LCONSOLE_WARN("%s: Connection to service %.*s via nid "
+ "%s was lost; in progress operations using this "
+ "service will wait for recovery to complete.\n",
+ imp->imp_obd->obd_name, target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ } else {
+ LCONSOLE_ERROR_MSG(0x166, "%s: Connection to service "
+ "%.*s via nid %s was lost; in progress"
+ "operations using this service will"
+ "fail.\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ }
+ ptlrpc_deactivate_timeouts(imp);
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+ spin_unlock(&imp->imp_lock);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+ rc = 1;
+ } else {
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
+ imp->imp_client->cli_name, imp,
+ (imp->imp_state == LUSTRE_IMP_FULL &&
+ imp->imp_conn_cnt > conn_cnt) ?
+ "reconnected" : "not connected", imp->imp_conn_cnt,
+ conn_cnt, ptlrpc_import_state_name(imp->imp_state));
+ }
+
+ return rc;
+}
+
+/* Must be called with imp_lock held! */
+static void ptlrpc_deactivate_and_unlock_import(struct obd_import *imp)
+{
+ ENTRY;
+ LASSERT_SPIN_LOCKED(&imp->imp_lock);
+
+ if (imp->imp_invalid) {
+ spin_unlock(&imp->imp_lock);
+ EXIT;
+ return;
+ }
+
+ CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
+ imp->imp_invalid = 1;
+ imp->imp_generation++;
+ spin_unlock(&imp->imp_lock);
+
+ ptlrpc_abort_inflight(imp);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+
+ EXIT;
+}
+
+/*
+ * This acts as a barrier; all existing requests are rejected, and
+ * no new requests will be accepted until the import is valid again.
+ */
+void ptlrpc_deactivate_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+ ptlrpc_deactivate_and_unlock_import(imp);
+}
+
+/*
+ * This function will invalidate the import, if necessary, then block
+ * for all the RPC completions, and finally notify the obd to
+ * invalidate its state (ie cancel locks, clear pending requests,
+ * etc).
+ */
+void ptlrpc_invalidate_import(struct obd_import *imp)
+{
+ struct l_wait_info lwi;
+ int rc;
+
+ atomic_inc(&imp->imp_inval_count);
+
+ /*
+ * If this is an invalid MGC connection, then don't bother
+ * waiting for imp_inflight to drop to 0.
+ */
+ if (imp->imp_invalid && imp->imp_recon_bk && !imp->imp_obd->obd_no_recov)
+ goto out;
+
+ if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
+ ptlrpc_deactivate_import(imp);
+
+ LASSERT(imp->imp_invalid);
+
+ /* wait for all requests to error out and call completion callbacks */
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_timeout_cap(cfs_time_seconds(obd_timeout)),
+ HZ, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inflight) == 0), &lwi);
+
+ if (rc) {
+ struct list_head *tmp, *n;
+ struct ptlrpc_request *req;
+
+ CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
+ obd2cli_tgt(imp->imp_obd), rc,
+ atomic_read(&imp->imp_inflight));
+ spin_lock(&imp->imp_lock);
+ list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ DEBUG_REQ(D_ERROR, req, "still on sending list");
+ }
+ list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ DEBUG_REQ(D_ERROR, req, "still on delayed list");
+ }
+ spin_unlock(&imp->imp_lock);
+ LASSERT(atomic_read(&imp->imp_inflight) == 0);
+ }
+
+out:
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
+ sptlrpc_import_flush_all_ctx(imp);
+
+ atomic_dec(&imp->imp_inval_count);
+ cfs_waitq_signal(&imp->imp_recovery_waitq);
+}
+
+/* unset imp_invalid */
+void ptlrpc_activate_import(struct obd_import *imp)
+{
+ struct obd_device *obd = imp->imp_obd;
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_invalid = 0;
+ ptlrpc_activate_timeouts(imp);
+ spin_unlock(&imp->imp_lock);
+ obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+}
+
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
+{
+ ENTRY;
+
+ LASSERT(!imp->imp_dlm_fake);
+
+ if (ptlrpc_set_import_discon(imp, conn_cnt)) {
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "import %s@%s for %s not replayable, "
+ "auto-deactivating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_deactivate_import(imp);
+ }
+
+ CDEBUG(D_HA, "%s: waking up pinger\n",
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_force_verify = 1;
+ spin_unlock(&imp->imp_lock);
+
+ ptlrpc_pinger_wake_up();
+ }
+ EXIT;
+}
+
+int ptlrpc_reconnect_import(struct obd_import *imp)
+{
+
+ ptlrpc_set_import_discon(imp, 0);
+ /* Force a new connect attempt */
+ ptlrpc_invalidate_import(imp);
+ /* Do a fresh connect next time by zeroing the handle */
+ ptlrpc_disconnect_import(imp, 1);
+ /* Wait for all invalidate calls to finish */
+ if (atomic_read(&imp->imp_inval_count) > 0) {
+ int rc;
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inval_count) == 0),
+ &lwi);
+ if (rc)
+ CERROR("Interrupted, inval=%d\n",
+ atomic_read(&imp->imp_inval_count));
+ }
+
+ /* Allow reconnect attempts */
+ imp->imp_obd->obd_no_recov = 0;
+ /* Remove 'invalid' flag */
+ ptlrpc_activate_import(imp);
+ /* Attempt a new connect */
+ ptlrpc_recover_import(imp, NULL);
+ return 0;
+}
+
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+
+static int import_select_connection(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn = NULL, *conn;
+ struct obd_export *dlmexp;
+ ENTRY;
+
+ spin_lock(&imp->imp_lock);
+
+ if (list_empty(&imp->imp_conn_list)) {
+ CERROR("%s: no connections available\n",
+ imp->imp_obd->obd_name);
+ spin_unlock(&imp->imp_lock);
+ RETURN(-EINVAL);
+ }
+
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ CDEBUG(D_HA, "%s: connect to NID %s last attempt "LPU64"\n",
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(conn->oic_conn->c_peer.nid),
+ conn->oic_last_attempt);
+ /* Throttle the reconnect rate to once per RECONNECT_INTERVAL */
+ if (cfs_time_before_64(conn->oic_last_attempt +
+ RECONNECT_INTERVAL * HZ,
+ cfs_time_current_64())) {
+ /* If we have never tried this connection since the
+ the last successful attempt, go with this one */
+ if (cfs_time_beforeq_64(conn->oic_last_attempt,
+ imp->imp_last_success_conn)) {
+ imp_conn = conn;
+ break;
+ }
+
+ /* Both of these connections have already been tried
+ since the last successful connection; just choose the
+ least recently used */
+ if (!imp_conn)
+ imp_conn = conn;
+ else if (cfs_time_before_64(conn->oic_last_attempt,
+ imp_conn->oic_last_attempt))
+ imp_conn = conn;
+ }
+ }
+
+ /* if not found, simply choose the current one */
+ if (!imp_conn) {
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ imp_conn->oic_last_attempt = cfs_time_current_64();
+
+ /* switch connection, don't mind if it's same as the current one */
+ if (imp->imp_connection)
+ ptlrpc_put_connection(imp->imp_connection);
+ imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ LASSERT(dlmexp != NULL);
+ if (dlmexp->exp_connection)
+ ptlrpc_put_connection(dlmexp->exp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ if (imp->imp_conn_current != imp_conn) {
+ if (imp->imp_conn_current)
+ LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
+ imp->imp_obd->obd_name,
+ imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ imp->imp_conn_current = imp_conn;
+ }
+
+ CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+ imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+
+ spin_unlock(&imp->imp_lock);
+
+ RETURN(0);
+}
+
+/*
+ * must be called under imp_lock
+ */
+int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
+{
+ struct ptlrpc_request *req;
+ struct list_head *tmp;
+
+ if (list_empty(&imp->imp_replay_list))
+ return 0;
+ tmp = imp->imp_replay_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req, "zero transno in replay");
+ LBUG();
+ }
+
+ return 1;
+}
+
+int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)