}
/* A CLOSED import should remain so. */
- if (state == LUSTRE_IMP_CLOSED)
+ if (imp->imp_state == LUSTRE_IMP_CLOSED)
return;
if (imp->imp_state != LUSTRE_IMP_NEW) {
int ptlrpc_reconnect_import(struct obd_import *imp)
{
-#ifdef ENABLE_PINGER
+#ifdef CONFIG_LUSTRE_FS_PINGER
long timeout_jiffies = cfs_time_seconds(obd_timeout);
int rc;
*/
static int import_select_connection(struct obd_import *imp)
{
- struct obd_import_conn *imp_conn = NULL, *conn;
- struct obd_export *dlmexp;
- char *target_start;
- int target_len, tried_all = 1;
- ENTRY;
+ struct obd_import_conn *imp_conn = NULL, *conn;
+ struct obd_export *dlmexp;
+ char *target_start;
+ int target_len, tried_all = 1;
+ int rc = 0;
+ ENTRY;
spin_lock(&imp->imp_lock);
if (list_empty(&imp->imp_conn_list)) {
- CERROR("%s: no connections available\n",
- imp->imp_obd->obd_name);
- spin_unlock(&imp->imp_lock);
- RETURN(-EINVAL);
+ rc = -EINVAL;
+ CERROR("%s: no connections available: rc = %d\n",
+ imp->imp_obd->obd_name, rc);
+ GOTO(out_unlock, rc);
}
list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
CDEBUG(D_HA, "%s: connect to NID %s last attempt %lld\n",
- imp->imp_obd->obd_name,
- libcfs_nid2str(conn->oic_conn->c_peer.nid),
- conn->oic_last_attempt);
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(conn->oic_conn->c_peer.nid),
+ conn->oic_last_attempt);
- /* If we have not tried this connection since
- the last successful attempt, go with this one */
- if ((conn->oic_last_attempt == 0) ||
+ /* If we have not tried this connection since
+ * the last successful attempt, go with this one
+ */
+ if ((conn->oic_last_attempt == 0) ||
conn->oic_last_attempt <= imp->imp_last_success_conn) {
- imp_conn = conn;
- tried_all = 0;
- break;
- }
+ imp_conn = conn;
+ tried_all = 0;
+ break;
+ }
- /* If all of the connections have already been tried
- since the last successful connection; just choose the
- least recently used */
- if (!imp_conn)
- imp_conn = conn;
+ /* If all of the connections have already been tried
+ * since the last successful connection; just choose the
+ * least recently used
+ */
+ if (!imp_conn)
+ imp_conn = conn;
else if (imp_conn->oic_last_attempt > conn->oic_last_attempt)
- imp_conn = conn;
- }
+ imp_conn = conn;
+ }
- /* if not found, simply choose the current one */
- if (!imp_conn || imp->imp_force_reconnect) {
- LASSERT(imp->imp_conn_current);
- imp_conn = imp->imp_conn_current;
- tried_all = 0;
- }
- LASSERT(imp_conn->oic_conn);
-
- /* If we've tried everything, and we're back to the beginning of the
- list, increase our timeout and try again. It will be reset when
- we do finally connect. (FIXME: really we should wait for all network
- state associated with the last connection attempt to drain before
- trying to reconnect on it.) */
- if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
+ /* if not found, simply choose the current one */
+ if (!imp_conn || imp->imp_force_reconnect) {
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ tried_all = 0;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ /* If we've tried everything, and we're back to the beginning of the
+ * list, increase our timeout and try again. It will be reset when
+ * we do finally connect. (FIXME: really we should wait for all network
+ * state associated with the last connection attempt to drain before
+ * trying to reconnect on it.)
+ */
+ if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+
if (at_get(at) < CONNECTION_SWITCH_MAX) {
at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
if (at_get(at) > CONNECTION_SWITCH_MAX)
at_reset(at, CONNECTION_SWITCH_MAX);
}
LASSERT(imp_conn->oic_last_attempt);
- CDEBUG(D_HA, "%s: tried all connections, increasing latency "
- "to %ds\n", imp->imp_obd->obd_name, at_get(at));
+ CDEBUG(D_HA,
+ "%s: tried all connections, increasing latency to %ds\n",
+ imp->imp_obd->obd_name, at_get(at));
}
imp_conn->oic_last_attempt = ktime_get_seconds();
- /* switch connection, don't mind if it's same as the current one */
- if (imp->imp_connection)
- ptlrpc_connection_put(imp->imp_connection);
- imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
-
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
- LASSERT(dlmexp != NULL);
- if (dlmexp->exp_connection)
- ptlrpc_connection_put(dlmexp->exp_connection);
- dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
- class_export_put(dlmexp);
-
- if (imp->imp_conn_current != imp_conn) {
- if (imp->imp_conn_current) {
- deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
- &target_start, &target_len);
-
- CDEBUG(D_HA, "%s: Connection changing to"
- " %.*s (at %s)\n",
- imp->imp_obd->obd_name,
- target_len, target_start,
- libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
- }
+ /* switch connection, don't mind if it's same as the current one */
+ if (imp->imp_connection)
+ ptlrpc_connection_put(imp->imp_connection);
+ imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ if (!dlmexp)
+ GOTO(out_unlock, rc = -EINVAL);
+ if (dlmexp->exp_connection)
+ ptlrpc_connection_put(dlmexp->exp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ if (imp->imp_conn_current != imp_conn) {
+ if (imp->imp_conn_current) {
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ CDEBUG(D_HA, "%s: Connection changing to"
+ " %.*s (at %s)\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ }
- imp->imp_conn_current = imp_conn;
- }
+ imp->imp_conn_current = imp_conn;
+ }
/* The below message is checked in conf-sanity.sh test_35[ab] */
- CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
- imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
- libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+ imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+out_unlock:
spin_unlock(&imp->imp_lock);
-
- RETURN(0);
+ RETURN(rc);
}
/*
/* Report the rpc service time to the server so that it knows how long
* to wait for clients to join recovery */
- lustre_msg_set_service_time(request->rq_reqmsg,
- at_timeout2est(request->rq_timeout));
+ lustre_msg_set_service_timeout(request->rq_reqmsg,
+ at_timeout2est(request->rq_timeout));
/* The amount of time we give the server to process the connect req.
* import_select_connection will increase the net latency on
lustre_msg_add_op_flags(request->rq_reqmsg,
MSG_CONNECT_TRANSNO);
- DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %ld)",
+ DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
request->rq_timeout);
ptlrpcd_add_req(request);
rc = 0;
struct obd_import *imp = request->rq_import;
struct lustre_handle old_hdl;
__u64 old_connect_flags;
+ timeout_t service_timeout;
int msg_flags;
struct obd_connect_data *ocd;
struct obd_export *exp = NULL;
imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
/* The net statistics after (re-)connect is not valid anymore,
- * because may reflect other routing, etc. */
+ * because may reflect other routing, etc.
+ */
+ service_timeout = lustre_msg_get_service_timeout(request->rq_repmsg);
at_reinit(&imp->imp_at.iat_net_latency, 0, 0);
- ptlrpc_at_adj_net_latency(request,
- lustre_msg_get_service_time(
- request->rq_repmsg));
+ ptlrpc_at_adj_net_latency(request, service_timeout);
/* Import flags should be updated before waking import at FULL state */
rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
ptlrpc_prepare_replay(imp);
rc = ptlrpc_import_recovery_state_machine(imp);
if (rc == -ENOTCONN) {
- CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
- "invalidating and reconnecting\n",
+ CDEBUG(D_HA,
+ "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_connection->c_remote_uuid.uuid);
ptlrpc_connect_import(imp);
ENTRY;
if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+ struct task_struct *task;
+
deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
&target_start, &target_len);
/* Don't care about MGC eviction */
"using this service will fail.\n",
imp->imp_obd->obd_name, target_len,
target_start);
- LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction");
+ LASSERTF(!obd_lbug_on_eviction, "LBUG upon eviction\n");
}
CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
obd2cli_tgt(imp->imp_obd),
imp->imp_vbr_failed = 0;
spin_unlock(&imp->imp_lock);
- {
- struct task_struct *task;
/* bug 17802: XXX client_disconnect_export vs connect request
* race. if client is evicted at this time then we start
* invalidate thread without reference to import and import can
* be freed at same time. */
class_import_get(imp);
task = kthread_run(ptlrpc_invalidate_import_thread, imp,
- "ll_imp_inval");
+ "ll_imp_inval");
if (IS_ERR(task)) {
class_import_put(imp);
- CERROR("error starting invalidate thread: %d\n", rc);
rc = PTR_ERR(task);
+ CERROR("%s: can't start invalidate thread: rc = %d\n",
+ imp->imp_obd->obd_name, rc);
} else {
rc = 0;
}
RETURN(rc);
- }
}
if (imp->imp_state == LUSTRE_IMP_REPLAY) {
/* We want client umounts to happen quickly, no matter the
server state... */
- req->rq_timeout = min_t(int, req->rq_timeout,
+ req->rq_timeout = min_t(timeout_t, req->rq_timeout,
INITIAL_CONNECT_TIMEOUT);
import_set_state(imp, LUSTRE_IMP_CONNECTING);
DEBUG_REQ(D_HA, req, "inflight=%d, refcount=%d: rc = %d",
atomic_read(&imp->imp_inflight),
- atomic_read(&imp->imp_refcount), rc);
+ refcount_read(&imp->imp_refcount), rc);
spin_lock(&imp->imp_lock);
/* DISCONNECT reply can be late and another connection can just
memset(&imp->imp_remote_handle, 0,
sizeof(imp->imp_remote_handle));
/* take our DISCONNECT into account */
- if (atomic_read(&imp->imp_inflight) > 1) {
+ if (atomic_read(&imp->imp_reqs) > 1) {
imp->imp_generation++;
imp->imp_initiated_at = imp->imp_generation;
import_set_state_nolock(imp, LUSTRE_IMP_NEW);
/* Adaptive Timeout utils */
-/* Update at_current with the specified value (bounded by at_min and at_max),
- * as well as the AT history "bins".
+/* Update at_current_timeout with the specified value (bounded by at_min and
+ * at_max), as well as the AT history "bins".
* - Bin into timeslices using AT_BINS bins.
* - This gives us a max of the last at_history seconds without the storage,
* but still smoothing out a return to normalcy from a slow response.
* - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
*/
-int at_measured(struct adaptive_timeout *at, unsigned int val)
+timeout_t at_measured(struct adaptive_timeout *at, timeout_t timeout)
{
- unsigned int old = at->at_current;
+ timeout_t old_timeout = at->at_current_timeout;
time64_t now = ktime_get_real_seconds();
long binlimit = max_t(long, at_history / AT_BINS, 1);
LASSERT(at);
- CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
- val, at, (long)(now - at->at_binstart), at->at_current,
+ CDEBUG(D_OTHER, "add %u to %p time=%lld v=%u (%u %u %u %u)\n",
+ timeout, at, now - at->at_binstart, at->at_current_timeout,
at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
- if (val == 0)
- /* 0's don't count, because we never want our timeout to
- drop to 0, and because 0 could mean an error */
+ if (timeout <= 0)
+ /* Negative timeouts and 0's don't count, because we never
+ * want our timeout to drop to 0 or below, and because 0 could
+ * mean an error
+ */
return 0;
spin_lock(&at->at_lock);
if (unlikely(at->at_binstart == 0)) {
/* Special case to remove default from history */
- at->at_current = val;
- at->at_worst_ever = val;
- at->at_worst_time = now;
- at->at_hist[0] = val;
+ at->at_current_timeout = timeout;
+ at->at_worst_timeout_ever = timeout;
+ at->at_worst_timestamp = now;
+ at->at_hist[0] = timeout;
at->at_binstart = now;
} else if (now - at->at_binstart < binlimit ) {
/* in bin 0 */
- at->at_hist[0] = max(val, at->at_hist[0]);
- at->at_current = max(val, at->at_current);
+ at->at_hist[0] = max_t(timeout_t, timeout, at->at_hist[0]);
+ at->at_current_timeout = max_t(timeout_t, timeout,
+ at->at_current_timeout);
} else {
int i, shift;
- unsigned int maxv = val;
+ timeout_t maxv = timeout;
/* move bins over */
shift = (u32)(now - at->at_binstart) / binlimit;
for(i = AT_BINS - 1; i >= 0; i--) {
if (i >= shift) {
at->at_hist[i] = at->at_hist[i - shift];
- maxv = max(maxv, at->at_hist[i]);
+ maxv = max_t(timeout_t, maxv, at->at_hist[i]);
} else {
at->at_hist[i] = 0;
}
}
- at->at_hist[0] = val;
- at->at_current = maxv;
+ at->at_hist[0] = timeout;
+ at->at_current_timeout = maxv;
at->at_binstart += shift * binlimit;
}
- if (at->at_current > at->at_worst_ever) {
- at->at_worst_ever = at->at_current;
- at->at_worst_time = now;
- }
+ if (at->at_current_timeout > at->at_worst_timeout_ever) {
+ at->at_worst_timeout_ever = at->at_current_timeout;
+ at->at_worst_timestamp = now;
+ }
- if (at->at_flags & AT_FLG_NOHIST)
+ if (at->at_flags & AT_FLG_NOHIST)
/* Only keep last reported val; keeping the rest of the history
- for proc only */
- at->at_current = val;
+ * for debugfs only
+ */
+ at->at_current_timeout = timeout;
if (at_max > 0)
- at->at_current = min(at->at_current, at_max);
- at->at_current = max(at->at_current, at_min);
-
- if (at->at_current != old)
- CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d "
- "(val=%u) hist %u %u %u %u\n", at,
- old, at->at_current, at->at_current - old, val,
+ at->at_current_timeout = min_t(timeout_t,
+ at->at_current_timeout, at_max);
+ at->at_current_timeout = max_t(timeout_t, at->at_current_timeout,
+ at_min);
+ if (at->at_current_timeout != old_timeout)
+ CDEBUG(D_OTHER,
+ "AT %p change: old=%u new=%u delta=%d (val=%d) hist %u %u %u %u\n",
+ at, old_timeout, at->at_current_timeout,
+ at->at_current_timeout - old_timeout, timeout,
at->at_hist[0], at->at_hist[1], at->at_hist[2],
at->at_hist[3]);
- /* if we changed, report the old value */
- old = (at->at_current != old) ? old : 0;
+ /* if we changed, report the old timeout value */
+ old_timeout = (at->at_current_timeout != old_timeout) ? old_timeout : 0;
spin_unlock(&at->at_lock);
- return old;
+ return old_timeout;
}
/* Find the imp_at index for a given portal; assign if space available */