* 1. we previously got disconnected,
* 2. value > 1 (at the same node with MGS)
* */
+ if (value > 1) {
+ struct adaptive_timeout *at;
+
+ at = &imp->imp_at.iat_net_latency;
+ at_reset(at, INITIAL_CONNECT_TIMEOUT);
+ }
+
if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1)
ptlrpc_reconnect_import(imp);
ENTRY;
if (rc) {
- if (rc == -ENODEV) {
+ if (rc == -ENODEV || rc == -EINVAL) {
/* The lock is useless, cancel it. */
ldlm_lock_cancel(ca->ca_lock);
rc = 0;
int ptlrpc_reconnect_import(struct obd_import *imp)
{
-#ifdef CONFIG_LUSTRE_FS_PINGER
- long timeout_jiffies = cfs_time_seconds(obd_timeout);
- int rc;
-
- ptlrpc_pinger_force(imp);
-
- CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
- obd2cli_tgt(imp->imp_obd), obd_timeout);
+ int rc = 0;
+ ENTRY;
- rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp),
- timeout_jiffies);
- if (rc == 0)
- rc = -ETIMEDOUT;
- else
- rc = 0;
- CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
- ptlrpc_import_state_name(imp->imp_state));
- return rc;
-#else
- ptlrpc_set_import_discon(imp, 0, false);
+ ptlrpc_set_import_discon(imp, 0, true);
/* Force a new connect attempt */
ptlrpc_invalidate_import(imp);
- /* Do a fresh connect next time by zeroing the handle */
- ptlrpc_disconnect_import(imp, 1);
/* Wait for all invalidate calls to finish */
if (atomic_read(&imp->imp_inval_count) > 0) {
int rc;
/* Allow reconnect attempts */
imp->imp_obd->obd_no_recov = 0;
- /* Remove 'invalid' flag */
- ptlrpc_activate_import(imp, false);
+ imp->imp_remote_handle.cookie = 0;
/* Attempt a new connect */
- ptlrpc_recover_import(imp, NULL, 0);
- return 0;
-#endif
+ rc = ptlrpc_recover_import(imp, NULL, 0);
+
+ RETURN(rc);
}
EXPORT_SYMBOL(ptlrpc_reconnect_import);
if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
/* wait for a while before trying recovery again */
imp->imp_next_ping = ptlrpc_next_reconnect(imp);
- spin_unlock(&imp->imp_lock);
if (!imp->imp_no_pinger_recover ||
- imp->imp_connect_error == -EAGAIN)
- ptlrpc_initiate_recovery(imp);
+ imp->imp_connect_error == -EAGAIN) {
+ CDEBUG(D_HA, "%s: starting recovery\n",
+ obd2cli_tgt(imp->imp_obd));
+ ptlrpc_connect_import_locked(imp);
+ } else {
+ spin_unlock(&imp->imp_lock);
+ }
} else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov ||
imp_is_deactive(imp)) {
CDEBUG(D_HA,
void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
void lustre_assert_wire_constants(void);
-int ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery(struct obd_import *imp);
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp, bool d);
int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt,
bool invalid);
void ptlrpc_handle_failed_import(struct obd_import *imp);
int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
-void ptlrpc_initiate_recovery(struct obd_import *imp);
int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
#include "ptlrpc_internal.h"
/**
- * Start recovery on disconnected import.
- * This is done by just attempting a connect
- */
-void ptlrpc_initiate_recovery(struct obd_import *imp)
-{
- ENTRY;
-
- CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
- ptlrpc_connect_import(imp);
-
- EXIT;
-}
-
-/**
* Identify what request from replay list needs to be replayed next
* (based on what we have already replayed) and send it to server.
*/
}
EXPORT_SYMBOL(ptlrpc_set_import_active);
+bool ptlrpc_import_in_recovery_disconnect(struct obd_import *imp,
+ bool disconnect_is_recovery)
+{
+ bool in_recovery = true;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state < LUSTRE_IMP_DISCON ||
+ (!disconnect_is_recovery && imp->imp_state == LUSTRE_IMP_DISCON) ||
+ imp->imp_state >= LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov)
+ in_recovery = false;
+ spin_unlock(&imp->imp_lock);
+
+ return in_recovery;
+}
+
+
/* Attempt to reconnect an import */
int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
{
rc = -EALREADY;
}
spin_unlock(&imp->imp_lock);
- if (rc)
- GOTO(out, rc);
-
- CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
- rc = ptlrpc_connect_import(imp);
- if (rc)
- GOTO(out, rc);
+ if (!rc) {
+ CFS_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
+ rc = ptlrpc_connect_import(imp);
+ }
- if (!async) {
+ if (!async && (rc == -EALREADY || rc == 0)) {
long timeout = cfs_time_seconds(obd_timeout);
- CDEBUG(D_HA, "%s: recovery started, waiting %u jiffies\n",
+ CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
obd2cli_tgt(imp->imp_obd), obd_timeout);
rc = wait_event_idle_timeout(imp->imp_recovery_waitq,
- !ptlrpc_import_in_recovery(imp),
- timeout);
+ !ptlrpc_import_in_recovery_disconnect(imp, true),
+ timeout);
if (rc == 0)
rc = -ETIMEDOUT;
else
rc = 0;
- CDEBUG(D_HA, "%s: recovery finished\n",
- obd2cli_tgt(imp->imp_obd));
+ CDEBUG(D_HA, "%s: recovery finished %s, rc = %d\n",
+ obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(imp->imp_state), rc);
}
EXIT;
}
EXPORT_SYMBOL(ptlrpc_recover_import);
-int ptlrpc_import_in_recovery(struct obd_import *imp)
+bool ptlrpc_import_in_recovery(struct obd_import *imp)
{
- int in_recovery = 1;
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_state <= LUSTRE_IMP_DISCON ||
- imp->imp_state >= LUSTRE_IMP_FULL ||
- imp->imp_obd->obd_no_recov)
- in_recovery = 0;
- spin_unlock(&imp->imp_lock);
-
- return in_recovery;
+ return ptlrpc_import_in_recovery_disconnect(imp, false);
}