* Copyright (c) 2002, 2003 Cluster File Systems, Inc.
* Author: Mike Shaver <shaver@clusterfs.com>
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
*/
#define DEBUG_SUBSYSTEM S_RPC
return 0;
}
+EXPORT_SYMBOL(ptlrpc_init_import);
#define UUID_STR "_UUID"
static void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
if (*uuid_len < strlen(UUID_STR))
return;
-
+
if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
UUID_STR, strlen(UUID_STR)))
*uuid_len -= strlen(UUID_STR);
spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_state == LUSTRE_IMP_FULL) {
- char nidbuf[PTL_NALFMT_SIZE];
char *target_start;
int target_len;
"lost; in progress operations using this "
"service will %s.\n",
target_len, target_start,
- ptlrpc_peernid2str(&imp->imp_connection->c_peer,
- nidbuf),
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
imp->imp_replayable
? "wait for recovery to complete"
: "fail");
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
CWARN("%s: connection lost to %s@%s\n",
imp->imp_obd->obd_name,
imp->imp_target_uuid.uuid,
EXIT;
}
-#define ATTEMPT_TOO_SOON(last) \
- ((last) && ((long)(jiffies - (last)) <= (long)(obd_timeout * 2 * HZ)))
+/* still trying to connect */
+static int ptlrpc_import_in_connect(struct obd_import *imp)
+{
+ unsigned long flags;
+ int in_connect = 0;
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ if (!imp->imp_invalid &&
+ (imp->imp_state == LUSTRE_IMP_CONNECTING ||
+ imp->imp_state == LUSTRE_IMP_DISCON))
+ in_connect = 1;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ return in_connect;
+}
+
+int ptlrpc_wait_for_connect(struct obd_import *imp)
+{
+ struct l_wait_info lwi;
+ int err;
+
+ lwi = LWI_INTR(NULL, NULL);
+ err = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_connect(imp), &lwi);
+ CERROR("wait got %d (%s, %d)\n", err,
+ ptlrpc_import_state_name(imp->imp_state),
+ imp->imp_invalid);
+ return (imp->imp_invalid ? -ETIMEDOUT : 0);
+}
+EXPORT_SYMBOL(ptlrpc_wait_for_connect);
static int import_select_connection(struct obd_import *imp)
{
- struct obd_import_conn *imp_conn, *tmp;
+ struct obd_import_conn *imp_conn;
struct obd_export *dlmexp;
- int found = 0;
ENTRY;
spin_lock(&imp->imp_lock);
RETURN(-EINVAL);
}
- list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
- if (!imp_conn->oic_last_attempt ||
- time_after(jiffies, imp_conn->oic_last_attempt +
- obd_timeout * 2 * HZ)) {
- found = 1;
- break;
- }
- }
-
- /* if not found, simply choose the current one */
- if (!found) {
- CDEBUG(D_NET, "%s: continuing with current connection\n",
- imp->imp_obd->obd_name);
- LASSERT(imp->imp_conn_current);
- imp_conn = imp->imp_conn_current;
- }
- LASSERT(imp_conn->oic_conn);
-
- imp_conn->oic_last_attempt = jiffies;
-
- /* move the items ahead of the selected one to list tail */
- while (1) {
- tmp= list_entry(imp->imp_conn_list.next,
- struct obd_import_conn, oic_item);
- if (tmp == imp_conn)
- break;
- list_del(&tmp->oic_item);
- list_add_tail(&tmp->oic_item, &imp->imp_conn_list);
+ if (imp->imp_conn_current &&
+ imp->imp_conn_current->oic_item.next != &imp->imp_conn_list) {
+ imp_conn = list_entry(imp->imp_conn_current->oic_item.next,
+ struct obd_import_conn, oic_item);
+ } else {
+ imp_conn = list_entry(imp->imp_conn_list.next,
+ struct obd_import_conn, oic_item);
}
/* switch connection, don't mind if it's same as the current one */
dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
class_export_put(dlmexp);
+ if (imp->imp_conn_current && (imp->imp_conn_current != imp_conn)) {
+ LCONSOLE_WARN("Changing connection for %s to %s\n",
+ imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid);
+ }
imp->imp_conn_current = imp_conn;
- CDEBUG(D_NET, "%s: import %p using connection %s\n",
+ CDEBUG(D_HA, "%s: import %p using connection %s\n",
imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid);
spin_unlock(&imp->imp_lock);
int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
{
struct obd_device *obd = imp->imp_obd;
- int initial_connect = 0;
+ int initial_connect = 0, first_try;
int rc;
__u64 committed_before_reconnect = 0;
struct ptlrpc_request *request;
GOTO(out, rc);
}
+ first_try = (imp->imp_conn_current == NULL);
rc = import_select_connection(imp);
if (rc)
GOTO(out, rc);
+ if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_BLOCK) &&
+ initial_connect && !first_try &&
+ (imp->imp_conn_current == list_entry(imp->imp_conn_list.next,
+ struct obd_import_conn,
+ oic_item))) {
+ /* Never connected, tried everyone, and nobody answered.
+ Give up; in-progress ops will fail (probably EIO) */
+ LCONSOLE_ERROR("All %d connections for %s failed; I am "
+ "deactivating the import.\n",
+ imp->imp_conn_cnt - 1,
+ imp->imp_target_uuid.uuid);
+ ptlrpc_deactivate_import(imp);
+ /* for ptlrpc_wait_for_connect */
+ wake_up(&imp->imp_recovery_waitq);
+ GOTO(out, rc = -ETIMEDOUT);
+ }
+
request = ptlrpc_prep_req(imp, imp->imp_connect_op, 4, size, tmp);
if (!request)
GOTO(out, rc = -ENOMEM);
#endif
request->rq_send_state = LUSTRE_IMP_CONNECTING;
- size[0] = sizeof(struct obd_connect_data);
+ /* Allow a slightly larger reply for future growth compatibility */
+ size[0] = sizeof(struct obd_connect_data) + 16 * sizeof(__u64);
request->rq_replen = lustre_msg_size(1, size);
request->rq_interpret_reply = ptlrpc_connect_interpret;
aa->pcaa_peer_committed = committed_before_reconnect;
aa->pcaa_initial_connect = initial_connect;
- if (aa->pcaa_initial_connect)
+ if (aa->pcaa_initial_connect) {
imp->imp_replayable = 1;
+ /* On an initial connect, we don't know which one of a
+ failover server pair is up. Don't wait long. */
+ request->rq_timeout = max((int)(obd_timeout / 20), 5);
+ }
DEBUG_REQ(D_RPCTRACE, request, "(re)connect request");
ptlrpcd_add_req(request);
RETURN(rc);
}
+EXPORT_SYMBOL(ptlrpc_connect_import);
+
+static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn;
+ unsigned long flags;
+ int wake_pinger = 0;
+
+ ENTRY;
+
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ if (list_empty(&imp->imp_conn_list))
+ GOTO(unlock, 0);
+
+ imp_conn = list_entry(imp->imp_conn_list.prev,
+ struct obd_import_conn,
+ oic_item);
+
+ if (imp->imp_conn_current != imp_conn) {
+ ptlrpc_ping_import_soon(imp);
+ wake_pinger = 1;
+ }
+
+ unlock:
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+ if (wake_pinger)
+ ptlrpc_pinger_wake_up();
+
+ EXIT;
+}
static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
void * data, int rc)
GOTO(out, rc);
LASSERT(imp->imp_conn_current);
- imp->imp_conn_current->oic_last_attempt = 0;
msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
/* All imports are pingable */
imp->imp_pingable = 1;
-
+
if (aa->pcaa_initial_connect) {
if (msg_flags & MSG_CONNECT_REPLAYABLE) {
CDEBUG(D_HA, "connected to replayable target: %s\n",
imp->imp_replayable = 0;
}
imp->imp_remote_handle = request->rq_repmsg->handle;
+
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
GOTO(finish, rc = 0);
}
ptlrpc_connect_import(imp, NULL);
RETURN(0);
}
+ } else {
+ struct obd_connect_data *ocd;
+
+ ocd = lustre_swab_repbuf(request, 0,
+ sizeof *ocd, lustre_swab_connect);
+ if (ocd == NULL) {
+ CERROR("Wrong connect data from server\n");
+ rc = -EPROTO;
+ GOTO(out, rc);
+ }
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ /*
+ * check that server granted subset of flags we asked for.
+ */
+ LASSERT((ocd->ocd_connect_flags &
+ imp->imp_connect_data.ocd_connect_flags) ==
+ ocd->ocd_connect_flags);
+ imp->imp_connect_data = *ocd;
+ if (imp->imp_conn_current != NULL) {
+ list_del(&imp->imp_conn_current->oic_item);
+ list_add(&imp->imp_conn_current->oic_item,
+ &imp->imp_conn_list);
+ imp->imp_conn_current = NULL;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ } else {
+ static int bug7269_dump = 0;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ CERROR("this is bug 7269 - please attach log there\n");
+ if (bug7269_dump == 0)
+ libcfs_debug_dumplog();
+ bug7269_dump = 1;
+ }
}
+
out:
if (rc != 0) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
if (aa->pcaa_initial_connect && !imp->imp_initial_recov) {
ptlrpc_deactivate_import(imp);
}
+
+ ptlrpc_maybe_ping_import_soon(imp);
+
CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
imp->imp_target_uuid.uuid,
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
ptlrpc_import_recovery_state_machine(req->rq_import);
} else {
CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
- "reconnecting\n",
+ "reconnecting\n",
req->rq_import->imp_obd->obd_name, req->rq_status);
ptlrpc_connect_import(req->rq_import, NULL);
}
RETURN(0);
}
+#ifdef __KERNEL__
static int ptlrpc_invalidate_import_thread(void *data)
{
struct obd_import *imp = data;
RETURN(0);
}
+#endif
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
deuuidify(imp->imp_target_uuid.uuid, NULL,
&target_start, &target_len);
LCONSOLE_ERROR("This client was evicted by %.*s; in progress "
- "operations using this service will %s.\n",
- target_len, target_start,
- imp->imp_replayable
- ? "be reattempted"
- : "fail");
+ "operations using this service will fail.\n",
+ target_len, target_start);
CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
+#ifdef __KERNEL__
rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
CLONE_VM | CLONE_FILES);
if (rc < 0)
CERROR("error starting invalidate thread: %d\n", rc);
+ else
+ rc = 0;
RETURN(rc);
+#else
+ ptlrpc_invalidate_import(imp);
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+#endif
}
if (imp->imp_state == LUSTRE_IMP_REPLAY) {
}
if (imp->imp_state == LUSTRE_IMP_RECOVER) {
- char nidbuf[PTL_NALFMT_SIZE];
+ char *nidstr;
CDEBUG(D_HA, "reconnected to %s@%s\n",
imp->imp_target_uuid.uuid,
deuuidify(imp->imp_target_uuid.uuid, NULL,
&target_start, &target_len);
- ptlrpc_peernid2str(&imp->imp_connection->c_peer,
- nidbuf);
+ nidstr = libcfs_nid2str(imp->imp_connection->c_peer.nid);
LCONSOLE_INFO("Connection restored to service %.*s using nid "
- "%s.\n",
- target_len, target_start, nidbuf);
+ "%s.\n", target_len, target_start, nidstr);
CWARN("%s: connection restored to %s@%s\n",
imp->imp_obd->obd_name,
switch (imp->imp_connect_op) {
case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
- case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
default:
CERROR("don't know how to disconnect from %s (connect_op %d)\n",
imp->imp_target_uuid.uuid, imp->imp_connect_op);
RETURN(-EINVAL);
}
-
if (ptlrpc_import_in_recovery(imp)) {
struct l_wait_info lwi;
lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep,
}
spin_lock_irqsave(&imp->imp_lock, flags);
- if (imp->imp_state != LUSTRE_IMP_FULL) {
+ if (imp->imp_state != LUSTRE_IMP_FULL)
GOTO(out, 0);
- }
+
spin_unlock_irqrestore(&imp->imp_lock, flags);
request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
if (request) {
- /* For non-replayable connections, don't attempt
- reconnect if this fails */
- if (!imp->imp_replayable) {
- request->rq_no_resend = 1;
- IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
- request->rq_send_state = LUSTRE_IMP_CONNECTING;
- }
+ /* We are disconnecting, do not retry a failed DISCONNECT rpc if
+ * it fails. We can get through the above with a down server
+ * if the client doesn't know the server is gone yet. */
+ request->rq_no_resend = 1;
+ request->rq_timeout = 5;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
+ request->rq_send_state = LUSTRE_IMP_CONNECTING;
request->rq_replen = lustre_msg_size(0, NULL);
rc = ptlrpc_queue_wait(request);
ptlrpc_req_finished(request);