static time64_t ptlrpc_inflight_timeout(struct obd_import *imp)
{
time64_t now = ktime_get_real_seconds();
- struct list_head *tmp, *n;
struct ptlrpc_request *req;
time64_t timeout = 0;
spin_lock(&imp->imp_lock);
- list_for_each_safe(tmp, n, &imp->imp_sending_list) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ list_for_each_entry(req, &imp->imp_sending_list, rq_list)
timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
- }
spin_unlock(&imp->imp_lock);
return timeout;
}
*/
void ptlrpc_invalidate_import(struct obd_import *imp)
{
- struct list_head *tmp, *n;
struct ptlrpc_request *req;
time64_t timeout;
int rc;
* this point. */
rc = 1;
} else {
- list_for_each_safe(tmp, n,
- &imp->imp_sending_list) {
- req = list_entry(tmp,
- struct ptlrpc_request,
- rq_list);
+ list_for_each_entry(req, &imp->imp_sending_list,
+ rq_list) {
DEBUG_REQ(D_ERROR, req,
"still on sending list");
}
- list_for_each_safe(tmp, n,
- &imp->imp_delayed_list) {
- req = list_entry(tmp,
- struct ptlrpc_request,
- rq_list);
+ list_for_each_entry(req, &imp->imp_delayed_list,
+ rq_list) {
DEBUG_REQ(D_ERROR, req,
"still on delayed list");
}
int ptlrpc_reconnect_import(struct obd_import *imp)
{
-#ifdef CONFIG_LUSTRE_PINGER
+#ifdef CONFIG_LUSTRE_FS_PINGER
long timeout_jiffies = cfs_time_seconds(obd_timeout);
int rc;
imp_conn->oic_last_attempt = ktime_get_seconds();
/* switch connection, don't mind if it's same as the current one */
- if (imp->imp_connection)
- ptlrpc_connection_put(imp->imp_connection);
+ ptlrpc_connection_put(imp->imp_connection);
imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
dlmexp = class_conn2export(&imp->imp_dlm_handle);
if (!dlmexp)
GOTO(out_unlock, rc = -EINVAL);
- if (dlmexp->exp_connection)
- ptlrpc_connection_put(dlmexp->exp_connection);
+ ptlrpc_connection_put(dlmexp->exp_connection);
dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
class_export_put(dlmexp);
*/
static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
{
- struct ptlrpc_request *req;
- struct list_head *tmp;
+ struct ptlrpc_request *req;
/* The requests in committed_list always have smaller transnos than
* the requests in replay_list */
if (!list_empty(&imp->imp_committed_list)) {
- tmp = imp->imp_committed_list.next;
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ req = list_first_entry(&imp->imp_committed_list,
+ struct ptlrpc_request, rq_replay_list);
*transno = req->rq_transno;
if (req->rq_transno == 0) {
DEBUG_REQ(D_ERROR, req,
return 1;
}
if (!list_empty(&imp->imp_replay_list)) {
- tmp = imp->imp_replay_list.next;
- req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ req = list_first_entry(&imp->imp_committed_list,
+ struct ptlrpc_request, rq_replay_list);
*transno = req->rq_transno;
if (req->rq_transno == 0) {
DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
* for connecting*/
imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
spin_unlock(&imp->imp_lock);
- ptlrpc_maybe_ping_import_soon(imp);
GOTO(out, rc);
}
* with server again
*/
if ((MSG_CONNECT_RECOVERING & msg_flags)) {
- CDEBUG(level,
+ CDEBUG_LIMIT(level,
"%s@%s changed server handle from "
"%#llx to %#llx"
" but is still in recovery\n",
if (rc != 0) {
bool inact = false;
+ time64_t now = ktime_get_seconds();
+ time64_t next_connect;
import_set_state_nolock(imp, LUSTRE_IMP_DISCON);
if (rc == -EACCES) {
import_set_state_nolock(imp, LUSTRE_IMP_CLOSED);
inact = true;
}
+ } else if (rc == -ENODEV || rc == -ETIMEDOUT) {
+ /* ENODEV means there is no service, force reconnection
+ * to a pair if attempt happen ptlrpc_next_reconnect
+ * before now. ETIMEDOUT could be set during network
+ * error and do not guarantee request deadline happened.
+ */
+ struct obd_import_conn *conn;
+ time64_t reconnect_time;
+
+ /* Same as ptlrpc_next_reconnect, but in past */
+ reconnect_time = now - INITIAL_CONNECT_TIMEOUT;
+ list_for_each_entry(conn, &imp->imp_conn_list,
+ oic_item) {
+ if (conn->oic_last_attempt <= reconnect_time) {
+ imp->imp_force_verify = 1;
+ break;
+ }
+ }
}
+
+ next_connect = imp->imp_conn_current->oic_last_attempt +
+ (request->rq_deadline - request->rq_sent);
spin_unlock(&imp->imp_lock);
if (inact)
if (rc == -EPROTO)
RETURN(rc);
+ /* adjust imp_next_ping to request deadline + 1 and reschedule
+ * a pinger if import lost processing during CONNECTING or far
+ * away from request deadline. It could happen when connection
+ * was initiated outside of pinger, like
+ * ptlrpc_set_import_discon().
+ */
+ if (!imp->imp_force_verify && (imp->imp_next_ping <= now ||
+ imp->imp_next_ping > next_connect)) {
+ imp->imp_next_ping = max(now, next_connect) + 1;
+ ptlrpc_pinger_wake_up();
+ }
+
ptlrpc_maybe_ping_import_soon(imp);
CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
RETURN(0);
- LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
- atomic_inc(&imp->imp_replay_inflight);
+ if (!atomic_add_unless(&imp->imp_replay_inflight, 1, 1))
+ RETURN(0);
req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
OBD_PING);
struct obd_import *imp = data;
ENTRY;
-
- unshare_fs_struct();
-
CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
ENTRY;
if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+ struct task_struct *task;
+
deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
&target_start, &target_len);
/* Don't care about MGC eviction */
"using this service will fail.\n",
imp->imp_obd->obd_name, target_len,
target_start);
- LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction");
+ LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction\n");
}
CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_vbr_failed = 0;
spin_unlock(&imp->imp_lock);
- {
- struct task_struct *task;
/* bug 17802: XXX client_disconnect_export vs connect request
* race. if client is evicted at this time then we start
* invalidate thread without reference to import and import can
* be freed at same time. */
class_import_get(imp);
task = kthread_run(ptlrpc_invalidate_import_thread, imp,
- "ll_imp_inval");
+ "ll_imp_inval");
if (IS_ERR(task)) {
class_import_put(imp);
- CERROR("error starting invalidate thread: %d\n", rc);
rc = PTR_ERR(task);
+ CERROR("%s: can't start invalidate thread: rc = %d\n",
+ imp->imp_obd->obd_name, rc);
} else {
rc = 0;
}
RETURN(rc);
- }
}
if (imp->imp_state == LUSTRE_IMP_REPLAY) {
GOTO(out, rc);
ptlrpc_activate_import(imp, true);
- CDEBUG_LIMIT(imp->imp_was_idle ?
- imp->imp_idle_debug : D_CONSOLE,
- "%s: Connection restored to %s (at %s)\n",
- imp->imp_obd->obd_name,
- obd_uuid2str(&conn->c_remote_uuid),
- obd_import_nid2str(imp));
+ /* Reverse import are flagged with dlm_fake == 1.
+ * They do not do recovery and connection are not "restored".
+ */
+ if (!imp->imp_dlm_fake)
+ CDEBUG_LIMIT(imp->imp_was_idle ?
+ imp->imp_idle_debug : D_CONSOLE,
+ "%s: Connection restored to %s (at %s)\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ obd_import_nid2str(imp));
spin_lock(&imp->imp_lock);
imp->imp_was_idle = 0;
spin_unlock(&imp->imp_lock);
memset(&imp->imp_remote_handle, 0,
sizeof(imp->imp_remote_handle));
/* take our DISCONNECT into account */
- if (atomic_read(&imp->imp_inflight) > 1) {
+ if (atomic_read(&imp->imp_reqs) > 1) {
imp->imp_generation++;
imp->imp_initiated_at = imp->imp_generation;
import_set_state_nolock(imp, LUSTRE_IMP_NEW);