*/
cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
- cli->cl_max_short_io_bytes = OBD_MAX_SHORT_IO_BYTES;
+ cli->cl_max_short_io_bytes = OBD_DEF_SHORT_IO_BYTES;
/*
* set cl_chunkbits default value to PAGE_SHIFT,
if (obd_uuid_equals(&cluuid, &target->obd_uuid))
goto dont_check_exports;
- export = cfs_hash_lookup(target->obd_uuid_hash, &cluuid);
+ export = obd_uuid_lookup(target, &cluuid);
if (!export)
goto no_export;
rc = -EALREADY;
class_export_put(export);
export = NULL;
+ } else if (OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT) &&
+ !lw_client) {
+ spin_unlock(&export->exp_lock);
+ rc = -EAGAIN;
} else {
export->exp_connecting = 1;
spin_unlock(&export->exp_lock);
LCONSOLE_WARN("%s: Client %s (at %s) refused connection, still busy with %d references\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid),
- atomic_read(&export->exp_refcount));
+ refcount_read(&export->exp_handle.h_ref));
GOTO(out, rc = -EBUSY);
} else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 &&
rc != EALREADY) {
static void abort_req_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head abort_list;
+ LIST_HEAD(abort_list);
- INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
list_splice_init(&obd->obd_req_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
static void abort_lock_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head abort_list;
+ LIST_HEAD(abort_list);
- INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
void target_cleanup_recovery(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- struct list_head clean_list;
+ LIST_HEAD(clean_list);
- INIT_LIST_HEAD(&clean_list);
spin_lock(&obd->obd_dev_lock);
if (!obd->obd_recovering) {
spin_unlock(&obd->obd_dev_lock);
time_t left;
spin_lock(&obd->obd_dev_lock);
- if (!obd->obd_recovering || obd->obd_abort_recovery) {
+ if (!obd->obd_recovering || obd->obd_abort_recovery ||
+ obd->obd_stopping) {
spin_unlock(&obd->obd_dev_lock);
return;
}
* yet, let's wait those threads stopped
*/
if (next_update_transno == 0) {
+ spin_unlock(&obd->obd_recovery_task_lock);
wait_event_idle(
tdtd->tdtd_recovery_threads_waitq,
atomic_read(&tdtd->tdtd_recovery_threads_count)
== 0);
+ spin_lock(&obd->obd_recovery_task_lock);
next_update_transno =
distribute_txn_get_next_transno(
lut->lut_tdtd);
/** evict exports which didn't finish recovery yet */
class_disconnect_stale_exports(obd, exp_finished);
return 1;
- } else if (obd->obd_recovery_expired) {
+ } else if (obd->obd_recovery_expired &&
+ obd->obd_recovery_timeout < obd->obd_recovery_time_hard) {
obd->obd_recovery_expired = 0;
/** If some clients died being recovered, evict them */
if (lut->lut_tdtd != NULL) {
if (!lut->lut_tdtd->tdtd_replay_ready &&
- !obd->obd_abort_recovery) {
+ !obd->obd_abort_recovery && !obd->obd_stopping) {
/*
* Let's extend recovery timer, in case the recovery
* timer expired, and some clients got evicted
#endif
#ifdef HAVE_SERVER_SUPPORT
-static int target_bulk_timeout(void *data)
-{
- ENTRY;
- /*
- * We don't fail the connection here, because having the export
- * killed makes the (vital) call to commitrw very sad.
- */
- RETURN(1);
-}
-
static inline const char *bulk2type(struct ptlrpc_request *req)
{
if (req->rq_bulk_read)
struct ptlrpc_request *req = desc->bd_req;
time64_t start = ktime_get_seconds();
time64_t deadline;
- struct l_wait_info lwi;
int rc = 0;
ENTRY;
do {
time64_t timeoutl = deadline - ktime_get_seconds();
- long timeout_jiffies = timeoutl <= 0 ?
- 1 : cfs_time_seconds(timeoutl);
time64_t rq_deadline;
- lwi = LWI_TIMEOUT_INTERVAL(timeout_jiffies,
- cfs_time_seconds(1),
- target_bulk_timeout, desc);
- rc = l_wait_event(desc->bd_waitq,
- !ptlrpc_server_bulk_active(desc) ||
- exp->exp_failed ||
- exp->exp_conn_cnt >
- lustre_msg_get_conn_cnt(req->rq_reqmsg),
- &lwi);
- LASSERT(rc == 0 || rc == -ETIMEDOUT);
+ while (timeoutl >= 0 &&
+ wait_event_idle_timeout(
+ desc->bd_waitq,
+ !ptlrpc_server_bulk_active(desc) ||
+ exp->exp_failed ||
+ exp->exp_conn_cnt >
+ lustre_msg_get_conn_cnt(req->rq_reqmsg),
+ timeoutl ? cfs_time_seconds(1) : 1) == 0)
+ timeoutl -= 1;
+ rc = timeoutl < 0 ? -ETIMEDOUT : 0;
+
/* Wait again if we changed rq_deadline. */
rq_deadline = READ_ONCE(req->rq_deadline);
deadline = start + bulk_timeout;