static time64_t ptlrpc_inflight_timeout(struct obd_import *imp)
{
time64_t now = ktime_get_real_seconds();
- struct list_head *tmp, *n;
struct ptlrpc_request *req;
time64_t timeout = 0;
spin_lock(&imp->imp_lock);
- list_for_each_safe(tmp, n, &imp->imp_sending_list) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ list_for_each_entry(req, &imp->imp_sending_list, rq_list)
timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
- }
spin_unlock(&imp->imp_lock);
return timeout;
}
*/
void ptlrpc_invalidate_import(struct obd_import *imp)
{
- struct list_head *tmp, *n;
struct ptlrpc_request *req;
time64_t timeout;
int rc;
* this point. */
rc = 1;
} else {
- list_for_each_safe(tmp, n,
- &imp->imp_sending_list) {
- req = list_entry(tmp,
- struct ptlrpc_request,
- rq_list);
+ list_for_each_entry(req, &imp->imp_sending_list,
+ rq_list) {
DEBUG_REQ(D_ERROR, req,
"still on sending list");
}
- list_for_each_safe(tmp, n,
- &imp->imp_delayed_list) {
- req = list_entry(tmp,
- struct ptlrpc_request,
- rq_list);
+ list_for_each_entry(req, &imp->imp_delayed_list,
+ rq_list) {
DEBUG_REQ(D_ERROR, req,
"still on delayed list");
}
imp_conn->oic_last_attempt = ktime_get_seconds();
/* switch connection, don't mind if it's same as the current one */
- if (imp->imp_connection)
- ptlrpc_connection_put(imp->imp_connection);
+ ptlrpc_connection_put(imp->imp_connection);
imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
dlmexp = class_conn2export(&imp->imp_dlm_handle);
if (!dlmexp)
GOTO(out_unlock, rc = -EINVAL);
- if (dlmexp->exp_connection)
- ptlrpc_connection_put(dlmexp->exp_connection);
+ ptlrpc_connection_put(dlmexp->exp_connection);
dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
class_export_put(dlmexp);
*/
static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
{
- struct ptlrpc_request *req;
- struct list_head *tmp;
+ struct ptlrpc_request *req;
/* The requests in committed_list always have smaller transnos than
* the requests in replay_list */
if (!list_empty(&imp->imp_committed_list)) {
- tmp = imp->imp_committed_list.next;
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ req = list_first_entry(&imp->imp_committed_list,
+ struct ptlrpc_request, rq_replay_list);
*transno = req->rq_transno;
if (req->rq_transno == 0) {
DEBUG_REQ(D_ERROR, req,
return 1;
}
if (!list_empty(&imp->imp_replay_list)) {
- tmp = imp->imp_replay_list.next;
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ req = list_first_entry(&imp->imp_committed_list,
+ struct ptlrpc_request, rq_replay_list);
*transno = req->rq_transno;
if (req->rq_transno == 0) {
DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
* for connecting*/
imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
spin_unlock(&imp->imp_lock);
- ptlrpc_maybe_ping_import_soon(imp);
GOTO(out, rc);
}
if (rc != 0) {
bool inact = false;
+ time64_t now = ktime_get_seconds();
+ time64_t next_connect;
import_set_state_nolock(imp, LUSTRE_IMP_DISCON);
if (rc == -EACCES) {
import_set_state_nolock(imp, LUSTRE_IMP_CLOSED);
inact = true;
}
+ } else if (rc == -ENODEV || rc == -ETIMEDOUT) {
+ /* ENODEV means there is no service, force reconnection
+ * to a pair if attempt happen ptlrpc_next_reconnect
+ * before now. ETIMEDOUT could be set during network
+ * error and do not guarantee request deadline happened.
+ */
+ struct obd_import_conn *conn;
+ time64_t reconnect_time;
+
+ /* Same as ptlrpc_next_reconnect, but in past */
+ reconnect_time = now - INITIAL_CONNECT_TIMEOUT;
+ list_for_each_entry(conn, &imp->imp_conn_list,
+ oic_item) {
+ if (conn->oic_last_attempt <= reconnect_time) {
+ imp->imp_force_verify = 1;
+ break;
+ }
+ }
}
+
+ next_connect = imp->imp_conn_current->oic_last_attempt +
+ (request->rq_deadline - request->rq_sent);
spin_unlock(&imp->imp_lock);
if (inact)
if (rc == -EPROTO)
RETURN(rc);
+ /* adjust imp_next_ping to request deadline + 1 and reschedule
+ * a pinger if import lost processing during CONNECTING or far
+ * away from request deadline. It could happen when connection
+ * was initiated outside of pinger, like
+ * ptlrpc_set_import_discon().
+ */
+ if (!imp->imp_force_verify && (imp->imp_next_ping <= now ||
+ imp->imp_next_ping > next_connect)) {
+ imp->imp_next_ping = max(now, next_connect) + 1;
+ ptlrpc_pinger_wake_up();
+ }
+
ptlrpc_maybe_ping_import_soon(imp);
CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
struct obd_import *imp = data;
ENTRY;
-
- unshare_fs_struct();
-
CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
GOTO(out, rc);
ptlrpc_activate_import(imp, true);
- CDEBUG_LIMIT(imp->imp_was_idle ?
- imp->imp_idle_debug : D_CONSOLE,
- "%s: Connection restored to %s (at %s)\n",
- imp->imp_obd->obd_name,
- obd_uuid2str(&conn->c_remote_uuid),
- obd_import_nid2str(imp));
+ /* Reverse import are flagged with dlm_fake == 1.
+ * They do not do recovery and connection are not "restored".
+ */
+ if (!imp->imp_dlm_fake)
+ CDEBUG_LIMIT(imp->imp_was_idle ?
+ imp->imp_idle_debug : D_CONSOLE,
+ "%s: Connection restored to %s (at %s)\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ obd_import_nid2str(imp));
spin_lock(&imp->imp_lock);
imp->imp_was_idle = 0;
spin_unlock(&imp->imp_lock);