*/
cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
- cli->cl_max_short_io_bytes = OBD_MAX_SHORT_IO_BYTES;
+ cli->cl_max_short_io_bytes = OBD_DEF_SHORT_IO_BYTES;
/*
* set cl_chunkbits default value to PAGE_SHIFT,
target->obd_name,
obd_uuid2str(&exp->exp_client_uuid),
obd_export_nid2str(exp),
- target->obd_max_recoverable_clients,
+ atomic_read(&target->obd_max_recoverable_clients),
timeout / 60, timeout % 60);
} else {
struct target_distribute_txn_data *tdtd;
if (obd_uuid_equals(&cluuid, &target->obd_uuid))
goto dont_check_exports;
- export = cfs_hash_lookup(target->obd_uuid_hash, &cluuid);
+ export = obd_uuid_lookup(target, &cluuid);
if (!export)
goto no_export;
rc = -EALREADY;
class_export_put(export);
export = NULL;
+ } else if (OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT) &&
+ !lw_client) {
+ spin_unlock(&export->exp_lock);
+ rc = -EAGAIN;
} else {
export->exp_connecting = 1;
spin_unlock(&export->exp_lock);
LCONSOLE_WARN("%s: Client %s (at %s) refused connection, still busy with %d references\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid),
- atomic_read(&export->exp_refcount));
+ refcount_read(&export->exp_handle.h_ref));
GOTO(out, rc = -EBUSY);
} else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 &&
rc != EALREADY) {
CDEBUG(D_HA, "%s: connection from %s@%s %st%llu exp %p cur %lld last %lld\n",
target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid),
target->obd_recovering ? "recovering/" : "", data->ocd_transno,
- export, ktime_get_real_seconds(),
+ export, ktime_get_seconds(),
export ? export->exp_last_request_time : 0);
/*
connected = atomic_read(&target->obd_connected_clients);
in_progress = atomic_read(&target->obd_lock_replay_clients);
- known = target->obd_max_recoverable_clients;
+ known =
+ atomic_read(&target->obd_max_recoverable_clients);
stale = target->obd_stale_clients;
remaining = hrtimer_expires_remaining(timer);
left = ktime_divns(remaining, NSEC_PER_SEC);
* condition.
*/
if (new_mds_mds_conn)
- target->obd_max_recoverable_clients++;
+ atomic_inc(&target->obd_max_recoverable_clients);
+
if (atomic_inc_return(&target->obd_connected_clients) ==
- target->obd_max_recoverable_clients)
+ atomic_read(&target->obd_max_recoverable_clients))
wake_up(&target->obd_next_transno_waitq);
}
/* Only log a recovery message when recovery has occurred. */
if (obd->obd_recovery_start) {
- time64_t now = ktime_get_real_seconds();
+ time64_t now = ktime_get_seconds();
time64_t elapsed_time;
elapsed_time = max_t(time64_t, now - obd->obd_recovery_start,
LCONSOLE_INFO("%s: Recovery over after %lld:%.02lld, of %d clients %d recovered and %d %s evicted.\n",
obd->obd_name, (s64)elapsed_time / 60,
(s64)elapsed_time % 60,
- obd->obd_max_recoverable_clients,
+ atomic_read(&obd->obd_max_recoverable_clients),
atomic_read(&obd->obd_connected_clients),
obd->obd_stale_clients,
obd->obd_stale_clients == 1 ? "was" : "were");
}
spin_unlock(&obd->obd_recovery_task_lock);
- obd->obd_recovery_end = ktime_get_real_seconds();
+ obd->obd_recovery_end = ktime_get_seconds();
/* When recovery finished, cleanup orphans on MDS and OST. */
if (obd->obd_type && OBP(obd, postrecov)) {
static void abort_req_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head abort_list;
+ LIST_HEAD(abort_list);
- INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
list_splice_init(&obd->obd_req_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
static void abort_lock_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head abort_list;
+ LIST_HEAD(abort_list);
- INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
void target_cleanup_recovery(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head clean_list;
+ LIST_HEAD(clean_list);
- INIT_LIST_HEAD(&clean_list);
spin_lock(&obd->obd_dev_lock);
if (!obd->obd_recovering) {
spin_unlock(&obd->obd_dev_lock);
return;
}
- delay = ktime_set(obd->obd_recovery_timeout, 0);
- hrtimer_start(&obd->obd_recovery_timer, delay, HRTIMER_MODE_REL);
- obd->obd_recovery_start = ktime_get_real_seconds();
+ obd->obd_recovery_start = ktime_get_seconds();
+ delay = ktime_set(obd->obd_recovery_start +
+ obd->obd_recovery_timeout, 0);
+ hrtimer_start(&obd->obd_recovery_timer, delay, HRTIMER_MODE_ABS);
spin_unlock(&obd->obd_dev_lock);
LCONSOLE_WARN("%s: Will be in recovery for at least %lu:%02lu, or until %d client%s reconnect%s\n",
obd->obd_name,
obd->obd_recovery_timeout / 60,
obd->obd_recovery_timeout % 60,
- obd->obd_max_recoverable_clients,
- (obd->obd_max_recoverable_clients == 1) ? "" : "s",
- (obd->obd_max_recoverable_clients == 1) ? "s" : "");
+ atomic_read(&obd->obd_max_recoverable_clients),
+ (atomic_read(&obd->obd_max_recoverable_clients) == 1) ?
+ "" : "s",
+ (atomic_read(&obd->obd_max_recoverable_clients) == 1) ?
+ "s" : "");
}
/**
time_t left;
spin_lock(&obd->obd_dev_lock);
- if (!obd->obd_recovering || obd->obd_abort_recovery) {
+ if (!obd->obd_recovering || obd->obd_abort_recovery ||
+ obd->obd_stopping) {
spin_unlock(&obd->obd_dev_lock);
return;
}
obd->obd_name, timeout, extend);
if (obd->obd_recovery_timeout < timeout) {
- ktime_t now = ktime_get_real();
- ktime_t end;
+ ktime_t end, now;
obd->obd_recovery_timeout = timeout;
end = ktime_set(obd->obd_recovery_start + timeout, 0);
+ now = ktime_set(ktime_get_seconds(), 0);
left_ns = ktime_sub(end, now);
- hrtimer_forward_now(&obd->obd_recovery_timer, left_ns);
- left = ktime_divns(left_ns, NSEC_PER_MSEC);
+ hrtimer_start(&obd->obd_recovery_timer, end, HRTIMER_MODE_ABS);
+ left = ktime_divns(left_ns, NSEC_PER_SEC);
}
spin_unlock(&obd->obd_dev_lock);
CDEBUG(D_HA,
"max: %d, connected: %d, completed: %d, queue_len: %d, req_transno: %llu, next_transno: %llu\n",
- obd->obd_max_recoverable_clients, connected, completed,
+ atomic_read(&obd->obd_max_recoverable_clients),
+ connected, completed,
queue_len, req_transno, next_transno);
if (obd->obd_abort_recovery) {
last = now;
}
}
- if (obd->obd_recovery_start != 0 && ktime_get_real_seconds() >=
+ if (obd->obd_recovery_start != 0 && ktime_get_seconds() >=
(obd->obd_recovery_start + obd->obd_recovery_time_hard)) {
__u64 next_update_transno = 0;
* yet, let's wait those threads stopped
*/
if (next_update_transno == 0) {
- struct l_wait_info lwi = { 0 };
-
- l_wait_event(tdtd->tdtd_recovery_threads_waitq,
- atomic_read(
- &tdtd->tdtd_recovery_threads_count) == 0,
- &lwi);
+ spin_unlock(&obd->obd_recovery_task_lock);
+ wait_event_idle(
+ tdtd->tdtd_recovery_threads_waitq,
+ atomic_read(&tdtd->tdtd_recovery_threads_count)
+ == 0);
+ spin_lock(&obd->obd_recovery_task_lock);
next_update_transno =
distribute_txn_get_next_transno(
lut->lut_tdtd);
if (obd->obd_abort_recovery) {
CWARN("recovery is aborted, evict exports in recovery\n");
if (lut->lut_tdtd != NULL) {
- struct l_wait_info lwi = { 0 };
-
tdtd = lut->lut_tdtd;
/*
* Let's wait all of the update log recovery thread
* finished
*/
- l_wait_event(tdtd->tdtd_recovery_threads_waitq,
- atomic_read(&tdtd->tdtd_recovery_threads_count) == 0,
- &lwi);
+ wait_event_idle(
+ tdtd->tdtd_recovery_threads_waitq,
+ atomic_read(&tdtd->tdtd_recovery_threads_count)
+ == 0);
/* Then abort the update recovery list */
dtrq_list_destroy(lut->lut_tdtd);
}
/** evict exports which didn't finish recovery yet */
class_disconnect_stale_exports(obd, exp_finished);
return 1;
- } else if (obd->obd_recovery_expired) {
+ } else if (obd->obd_recovery_expired &&
+ obd->obd_recovery_timeout < obd->obd_recovery_time_hard) {
obd->obd_recovery_expired = 0;
+
/** If some clients died being recovered, evict them */
LCONSOLE_WARN("%s: recovery is timed out, evict stale exports\n",
obd->obd_name);
(void)handler(req);
lu_context_exit(&thread->t_env->le_ctx);
+ req->rq_svc_thread->t_env->le_ses = NULL;
+
/* don't reset timer for final stage */
if (!exp_finished(req->rq_export)) {
time_t to = obd_timeout;
CDEBUG(D_HA,
"connected %d stale %d max_recoverable_clients %d abort %d expired %d\n",
- clnts, obd->obd_stale_clients, obd->obd_max_recoverable_clients,
+ clnts, obd->obd_stale_clients,
+ atomic_read(&obd->obd_max_recoverable_clients),
obd->obd_abort_recovery, obd->obd_recovery_expired);
if (!obd->obd_abort_recovery && !obd->obd_recovery_expired) {
- LASSERT(clnts <= obd->obd_max_recoverable_clients);
+ LASSERT(clnts <=
+ atomic_read(&obd->obd_max_recoverable_clients));
if (clnts + obd->obd_stale_clients <
- obd->obd_max_recoverable_clients)
+ atomic_read(&obd->obd_max_recoverable_clients))
return 0;
}
if (lut->lut_tdtd != NULL) {
if (!lut->lut_tdtd->tdtd_replay_ready &&
- !obd->obd_abort_recovery) {
+ !obd->obd_abort_recovery && !obd->obd_stopping) {
/*
* Let's extend recovery timer, in case the recovery
* timer expired, and some clients got evicted
struct ptlrpc_request *req)
{
DEBUG_REQ(D_HA, req,
- "remove t%lld from %s because of duplicate update records are found.\n",
+ "remove t%lld from %s because duplicate update records found",
lustre_msg_get_transno(req->rq_reqmsg),
libcfs_nid2str(req->rq_peer.nid));
lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
target_send_reply(req, req->rq_status, 0);
} else {
- DEBUG_REQ(D_ERROR, req, "wrong opc from %s\n",
+ DEBUG_REQ(D_ERROR, req, "wrong opc from %s",
libcfs_nid2str(req->rq_peer.nid));
}
target_exp_dequeue_req_replay(req);
atomic_read(&obd->obd_lock_replay_clients));
while ((req = target_next_replay_lock(lut))) {
LASSERT(trd->trd_processing_task == current_pid());
- DEBUG_REQ(D_HA, req, "processing lock from %s: ",
+ DEBUG_REQ(D_HA, req, "processing lock from %s:",
libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
spin_unlock(&obd->obd_recovery_task_lock);
while ((req = target_next_final_ping(obd))) {
LASSERT(trd->trd_processing_task == current_pid());
- DEBUG_REQ(D_HA, req, "processing final ping from %s: ",
+ DEBUG_REQ(D_HA, req, "processing final ping from %s:",
libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
if (lut->lut_bottom->dd_rdonly)
return;
- if (obd->obd_max_recoverable_clients == 0) {
+ if (atomic_read(&obd->obd_max_recoverable_clients) == 0) {
/** Update server last boot epoch */
tgt_boot_epoch_update(lut);
return;
CDEBUG(D_HA, "RECOVERY: service %s, %d recoverable clients, "
"last_transno %llu\n", obd->obd_name,
- obd->obd_max_recoverable_clients, obd->obd_last_committed);
+ atomic_read(&obd->obd_max_recoverable_clients),
+ obd->obd_last_committed);
LASSERT(obd->obd_stopping == 0);
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
obd->obd_recovery_start = 0;
obd->obd_recovery_end = 0;
- hrtimer_init(&obd->obd_recovery_timer, CLOCK_REALTIME,
- HRTIMER_MODE_REL);
+ hrtimer_init(&obd->obd_recovery_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS);
obd->obd_recovery_timer.function = &target_recovery_expired;
target_start_recovery_thread(lut, handler);
}
if (duplicate != NULL) {
DEBUG_REQ(D_HA, duplicate,
- "put prev final req\n");
+ "put prev final req");
target_request_copy_put(duplicate);
}
RETURN(0);
#endif
#ifdef HAVE_SERVER_SUPPORT
-static int target_bulk_timeout(void *data)
-{
- ENTRY;
- /*
- * We don't fail the connection here, because having the export
- * killed makes the (vital) call to commitrw very sad.
- */
- RETURN(1);
-}
-
static inline const char *bulk2type(struct ptlrpc_request *req)
{
if (req->rq_bulk_read)
return "UNKNOWN";
}
-int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
- struct l_wait_info *lwi)
+int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc)
{
struct ptlrpc_request *req = desc->bd_req;
- time64_t start = ktime_get_real_seconds();
+ time64_t start = ktime_get_seconds();
time64_t deadline;
int rc = 0;
ENTRY;
/* If there is eviction in progress, wait for it to finish. */
- if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
- *lwi = LWI_INTR(NULL, NULL);
- rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
- !atomic_read(&exp->exp_obd->obd_evict_inprogress),
- lwi);
- }
+ wait_event_idle(
+ exp->exp_obd->obd_evict_inprogress_waitq,
+ !atomic_read(&exp->exp_obd->obd_evict_inprogress));
/* Check if client was evicted or reconnected already. */
if (exp->exp_failed ||
}
if (rc < 0) {
- DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
+ DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc = %d",
bulk2type(req), rc);
RETURN(rc);
}
deadline = req->rq_deadline;
do {
- time64_t timeoutl = deadline - ktime_get_real_seconds();
- long timeout_jiffies = timeoutl <= 0 ?
- 1 : cfs_time_seconds(timeoutl);
+ time64_t timeoutl = deadline - ktime_get_seconds();
time64_t rq_deadline;
- *lwi = LWI_TIMEOUT_INTERVAL(timeout_jiffies,
- cfs_time_seconds(1),
- target_bulk_timeout, desc);
- rc = l_wait_event(desc->bd_waitq,
- !ptlrpc_server_bulk_active(desc) ||
- exp->exp_failed ||
- exp->exp_conn_cnt >
- lustre_msg_get_conn_cnt(req->rq_reqmsg),
- lwi);
- LASSERT(rc == 0 || rc == -ETIMEDOUT);
+ while (timeoutl >= 0 &&
+ wait_event_idle_timeout(
+ desc->bd_waitq,
+ !ptlrpc_server_bulk_active(desc) ||
+ exp->exp_failed ||
+ exp->exp_conn_cnt >
+ lustre_msg_get_conn_cnt(req->rq_reqmsg),
+ timeoutl ? cfs_time_seconds(1) : 1) == 0)
+ timeoutl -= 1;
+ rc = timeoutl < 0 ? -ETIMEDOUT : 0;
+
/* Wait again if we changed rq_deadline. */
rq_deadline = READ_ONCE(req->rq_deadline);
deadline = start + bulk_timeout;
if (deadline > rq_deadline)
deadline = rq_deadline;
} while (rc == -ETIMEDOUT &&
- deadline > ktime_get_real_seconds());
+ deadline > ktime_get_seconds());
if (rc == -ETIMEDOUT) {
DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %lld%+llds",