X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fldlm%2Fldlm_lib.c;h=5086c28e488d0308012585b926d05338f0a09ce0;hp=0d95dd6302ee02a6d3d87adf54f2254649c88574;hb=5e30a2c06176f50f5e17aba68fdae7e38d922d33;hpb=06408a4ef381121fa58783026a0cf0a6b0fa479c diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0d95dd6..5086c28e 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -434,7 +434,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) */ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES; - cli->cl_max_short_io_bytes = OBD_MAX_SHORT_IO_BYTES; + cli->cl_max_short_io_bytes = OBD_DEF_SHORT_IO_BYTES; /* * set cl_chunkbits default value to PAGE_SHIFT, @@ -1176,7 +1176,7 @@ int target_handle_connect(struct ptlrpc_request *req) if (obd_uuid_equals(&cluuid, &target->obd_uuid)) goto dont_check_exports; - export = cfs_hash_lookup(target->obd_uuid_hash, &cluuid); + export = obd_uuid_lookup(target, &cluuid); if (!export) goto no_export; @@ -1244,6 +1244,10 @@ int target_handle_connect(struct ptlrpc_request *req) rc = -EALREADY; class_export_put(export); export = NULL; + } else if (OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT) && + !lw_client) { + spin_unlock(&export->exp_lock); + rc = -EAGAIN; } else { export->exp_connecting = 1; spin_unlock(&export->exp_lock); @@ -1261,7 +1265,7 @@ no_export: LCONSOLE_WARN("%s: Client %s (at %s) refused connection, still busy with %d references\n", target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), - atomic_read(&export->exp_refcount)); + refcount_read(&export->exp_handle.h_ref)); GOTO(out, rc = -EBUSY); } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 && rc != EALREADY) { @@ -1830,7 +1834,8 @@ static void extend_recovery_timer(struct obd_device *obd, time_t dr_timeout, time_t left; spin_lock(&obd->obd_dev_lock); - if (!obd->obd_recovering || obd->obd_abort_recovery) { + if (!obd->obd_recovering || obd->obd_abort_recovery || + obd->obd_stopping) { spin_unlock(&obd->obd_dev_lock); return; } @@ -2113,13 +2118,13 @@ repeat: * yet, let's wait those threads stopped */ if (next_update_transno == 0) { - struct l_wait_info lwi = { 0 }; - - l_wait_event(tdtd->tdtd_recovery_threads_waitq, - atomic_read( - &tdtd->tdtd_recovery_threads_count) == 0, - &lwi); + spin_unlock(&obd->obd_recovery_task_lock); + wait_event_idle( + tdtd->tdtd_recovery_threads_waitq, + atomic_read(&tdtd->tdtd_recovery_threads_count) + == 0); + spin_lock(&obd->obd_recovery_task_lock); next_update_transno = distribute_txn_get_next_transno( lut->lut_tdtd); @@ -2159,16 +2164,15 @@ repeat: if (obd->obd_abort_recovery) { CWARN("recovery is aborted, evict exports in recovery\n"); if (lut->lut_tdtd != NULL) { - struct l_wait_info lwi = { 0 }; - tdtd = lut->lut_tdtd; /* * Let's wait all of the update log recovery thread * finished */ - l_wait_event(tdtd->tdtd_recovery_threads_waitq, - atomic_read(&tdtd->tdtd_recovery_threads_count) == 0, - &lwi); + wait_event_idle( + tdtd->tdtd_recovery_threads_waitq, + atomic_read(&tdtd->tdtd_recovery_threads_count) + == 0); /* Then abort the update recovery list */ dtrq_list_destroy(lut->lut_tdtd); } @@ -2176,7 +2180,8 @@ repeat: /** evict exports which didn't finish recovery yet */ class_disconnect_stale_exports(obd, exp_finished); return 1; - } else if (obd->obd_recovery_expired) { + } else if (obd->obd_recovery_expired && + obd->obd_recovery_timeout < obd->obd_recovery_time_hard) { obd->obd_recovery_expired = 0; /** If some clients died being recovered, evict them */ @@ -2271,6 +2276,8 @@ static void handle_recovery_req(struct ptlrpc_thread *thread, (void)handler(req); lu_context_exit(&thread->t_env->le_ctx); + req->rq_svc_thread->t_env->le_ses = NULL; + /* don't reset timer for final stage */ if (!exp_finished(req->rq_export)) { time_t to = obd_timeout; @@ -2328,7 +2335,7 @@ static int check_for_recovery_ready(struct lu_target *lut) if (lut->lut_tdtd != NULL) { if (!lut->lut_tdtd->tdtd_replay_ready && - !obd->obd_abort_recovery) { + !obd->obd_abort_recovery && !obd->obd_stopping) { /* * Let's extend recovery timer, in case the recovery * timer expired, and some clients got evicted @@ -3299,16 +3306,6 @@ void ldlm_dump_export_locks(struct obd_export *exp) #endif #ifdef HAVE_SERVER_SUPPORT -static int target_bulk_timeout(void *data) -{ - ENTRY; - /* - * We don't fail the connection here, because having the export - * killed makes the (vital) call to commitrw very sad. - */ - RETURN(1); -} - static inline const char *bulk2type(struct ptlrpc_request *req) { if (req->rq_bulk_read) @@ -3323,18 +3320,14 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc) struct ptlrpc_request *req = desc->bd_req; time64_t start = ktime_get_seconds(); time64_t deadline; - struct l_wait_info lwi; int rc = 0; ENTRY; /* If there is eviction in progress, wait for it to finish. */ - if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) { - lwi = LWI_INTR(NULL, NULL); - rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq, - !atomic_read(&exp->exp_obd->obd_evict_inprogress), - &lwi); - } + wait_event_idle( + exp->exp_obd->obd_evict_inprogress_waitq, + !atomic_read(&exp->exp_obd->obd_evict_inprogress)); /* Check if client was evicted or reconnected already. */ if (exp->exp_failed || @@ -3371,20 +3364,19 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc) do { time64_t timeoutl = deadline - ktime_get_seconds(); - long timeout_jiffies = timeoutl <= 0 ? - 1 : cfs_time_seconds(timeoutl); time64_t rq_deadline; - lwi = LWI_TIMEOUT_INTERVAL(timeout_jiffies, - cfs_time_seconds(1), - target_bulk_timeout, desc); - rc = l_wait_event(desc->bd_waitq, - !ptlrpc_server_bulk_active(desc) || - exp->exp_failed || - exp->exp_conn_cnt > - lustre_msg_get_conn_cnt(req->rq_reqmsg), - &lwi); - LASSERT(rc == 0 || rc == -ETIMEDOUT); + while (timeoutl >= 0 && + wait_event_idle_timeout( + desc->bd_waitq, + !ptlrpc_server_bulk_active(desc) || + exp->exp_failed || + exp->exp_conn_cnt > + lustre_msg_get_conn_cnt(req->rq_reqmsg), + timeoutl ? cfs_time_seconds(1) : 1) == 0) + timeoutl -= 1; + rc = timeoutl < 0 ? -ETIMEDOUT : 0; + /* Wait again if we changed rq_deadline. */ rq_deadline = READ_ONCE(req->rq_deadline); deadline = start + bulk_timeout;