X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Fservice.c;h=6a68a19b839105fcee64e50e3ff351baee0635b8;hb=301d76a71176c186129231ddd1323bae21100165;hp=64ecbc1d7a6a4262694568efb1efc2a38c0c72d6;hpb=a7ff5d050ee7db0e80baac5fb3848ffcfa04dea6;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 64ecbc1..6a68a19 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. */ #define DEBUG_SUBSYSTEM S_RPC @@ -63,9 +62,11 @@ MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply"); static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt); static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req); static void ptlrpc_at_remove_timed(struct ptlrpc_request *req); +static int ptlrpc_start_threads(struct ptlrpc_service *svc); +static int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait); /** Holds a list of all PTLRPC services */ -struct list_head ptlrpc_all_services; +LIST_HEAD(ptlrpc_all_services); /** Used to protect the \e ptlrpc_all_services list */ struct mutex ptlrpc_all_services_mutex; @@ -229,7 +230,7 @@ struct ptlrpc_hr_partition { #define HRT_STOPPING 1 struct ptlrpc_hr_service { - /* CPU partition table, it's just cfs_cpt_table for now */ + /* CPU partition table, it's just cfs_cpt_tab for now */ struct cfs_cpt_table *hr_cpt_table; /** controller sleep waitq */ wait_queue_head_t hr_waitq; @@ -447,9 +448,9 @@ static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt) return posted; } - rqbd = list_entry(svcpt->scp_rqbd_idle.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); + rqbd = list_first_entry(&svcpt->scp_rqbd_idle, + struct ptlrpc_request_buffer_desc, + rqbd_list); /* assume we will post successfully */ svcpt->scp_nrqbds_posted++; @@ -684,13 +685,12 @@ static int ptlrpc_service_part_init(struct ptlrpc_service *svc, failed: if (array->paa_reqs_count != NULL) { - OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size); + OBD_FREE_PTR_ARRAY(array->paa_reqs_count, size); array->paa_reqs_count = NULL; } if (array->paa_reqs_array != NULL) { - OBD_FREE(array->paa_reqs_array, - sizeof(struct list_head) * array->paa_size); + OBD_FREE_PTR_ARRAY(array->paa_reqs_array, array->paa_size); array->paa_reqs_array = NULL; } @@ -725,7 +725,7 @@ struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, cptable = cconf->cc_cptable; if (cptable == NULL) - cptable = cfs_cpt_table; + cptable = cfs_cpt_tab; if (conf->psc_thr.tc_cpu_bind > 1) { CERROR("%s: Invalid cpu bind value %d, only 1 or 0 allowed\n", @@ -744,7 +744,7 @@ struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, strlen(cconf->cc_pattern), 0, ncpts - 1, &el); if (rc != 0) { - CERROR("%s: invalid CPT pattern string: %s", + CERROR("%s: invalid CPT pattern string: %s\n", conf->psc_name, cconf->cc_pattern); RETURN(ERR_PTR(-EINVAL)); } @@ -755,7 +755,7 @@ struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, CERROR("%s: failed to parse CPT array %s: %d\n", conf->psc_name, cconf->cc_pattern, rc); if (cpts != NULL) - OBD_FREE(cpts, sizeof(*cpts) * ncpts); + OBD_FREE_PTR_ARRAY(cpts, ncpts); RETURN(ERR_PTR(rc < 0 ? rc : -EINVAL)); } ncpts = rc; @@ -765,7 +765,7 @@ struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, OBD_ALLOC(service, offsetof(struct ptlrpc_service, srv_parts[ncpts])); if (service == NULL) { if (cpts != NULL) - OBD_FREE(cpts, sizeof(*cpts) * ncpts); + OBD_FREE_PTR_ARRAY(cpts, ncpts); RETURN(ERR_PTR(-ENOMEM)); } @@ -906,8 +906,6 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; struct ptlrpc_service *svc = svcpt->scp_service; int refcount; - struct list_head *tmp; - struct list_head *nxt; if (!atomic_dec_and_test(&req->rq_refcount)) return; @@ -951,9 +949,9 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) * I expect only about 1 or 2 rqbds need to be recycled here */ while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) { - rqbd = list_entry(svcpt->scp_hist_rqbds.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); + rqbd = list_first_entry(&svcpt->scp_hist_rqbds, + struct ptlrpc_request_buffer_desc, + rqbd_list); list_del(&rqbd->rqbd_list); svcpt->scp_hist_nrqbds--; @@ -962,9 +960,7 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) * remove rqbd's reqs from svc's req history while * I've got the service lock */ - list_for_each(tmp, &rqbd->rqbd_reqs) { - req = list_entry(tmp, struct ptlrpc_request, - rq_list); + list_for_each_entry(req, &rqbd->rqbd_reqs, rq_list) { /* Track the highest culled req seq */ if (req->rq_history_seq > svcpt->scp_hist_seq_culled) { @@ -976,10 +972,9 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) spin_unlock(&svcpt->scp_lock); - list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) { - req = list_entry(rqbd->rqbd_reqs.next, - struct ptlrpc_request, - rq_list); + while ((req = list_first_entry_or_null( + &rqbd->rqbd_reqs, + struct ptlrpc_request, rq_list))) { list_del(&req->rq_list); ptlrpc_server_free_request(req); } @@ -1351,13 +1346,14 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; struct ptlrpc_request *reqcopy; struct lustre_msg *reqmsg; - time64_t olddl = req->rq_deadline - ktime_get_real_seconds(); + timeout_t olddl = req->rq_deadline - ktime_get_real_seconds(); time64_t newdl; int rc; ENTRY; - if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT)) { + if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT) || + CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_ENQ_RESEND)) { /* don't send early reply */ RETURN(1); } @@ -1367,9 +1363,9 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) * difference between clients' and servers' expectations */ DEBUG_REQ(D_ADAPTTO, req, - "%ssending early reply (deadline %+llds, margin %+llds) for %d+%d", + "%ssending early reply (deadline %+ds, margin %+ds) for %d+%d", AT_OFF ? "AT off - not " : "", - (s64)olddl, (s64)(olddl - at_get(&svcpt->scp_at_estimate)), + olddl, olddl - at_get(&svcpt->scp_at_estimate), at_get(&svcpt->scp_at_estimate), at_extra); if (AT_OFF) @@ -1378,8 +1374,8 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) if (olddl < 0) { /* below message is checked in replay-ost-single.sh test_9 */ DEBUG_REQ(D_WARNING, req, - "Already past deadline (%+llds), not sending early reply. Consider increasing at_early_margin (%d)?", - (s64)olddl, at_early_margin); + "Already past deadline (%+ds), not sending early reply. Consider increasing at_early_margin (%d)?", + olddl, at_early_margin); /* Return an error so we're not re-added to the timed list. */ RETURN(-ETIMEDOUT); @@ -1436,8 +1432,8 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) */ if (req->rq_deadline >= newdl) { DEBUG_REQ(D_WARNING, req, - "Could not add any time (%lld/%lld), not sending early reply", - (s64)olddl, (s64)(newdl - ktime_get_real_seconds())); + "Could not add any time (%d/%lld), not sending early reply", + olddl, newdl - ktime_get_real_seconds()); RETURN(-ETIMEDOUT); } @@ -1526,11 +1522,11 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) { struct ptlrpc_at_array *array = &svcpt->scp_at_array; struct ptlrpc_request *rq, *n; - struct list_head work_list; + LIST_HEAD(work_list); __u32 index, count; time64_t deadline; time64_t now = ktime_get_real_seconds(); - s64 delay; + s64 delay_ms; int first, counter = 0; ENTRY; @@ -1539,7 +1535,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) spin_unlock(&svcpt->scp_at_lock); RETURN(0); } - delay = ktime_ms_delta(ktime_get(), svcpt->scp_at_checktime); + delay_ms = ktime_ms_delta(ktime_get(), svcpt->scp_at_checktime); svcpt->scp_at_check = 0; if (array->paa_count == 0) { @@ -1560,7 +1556,6 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) * We're close to a timeout, and we don't know how much longer the * server will take. Send early replies to everyone expiring soon. */ - INIT_LIST_HEAD(&work_list); deadline = -1; div_u64_rem(array->paa_deadline, array->paa_size, &index); count = array->paa_count; @@ -1611,19 +1606,19 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) */ LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n", svcpt->scp_service->srv_name); - CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%lld\n", + CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%lldms\n", counter, svcpt->scp_nreqs_incoming, svcpt->scp_nreqs_active, - at_get(&svcpt->scp_at_estimate), delay); + at_get(&svcpt->scp_at_estimate), delay_ms); } /* * we took additional refcount so entries can't be deleted from list, no * locking is needed */ - while (!list_empty(&work_list)) { - rq = list_entry(work_list.next, struct ptlrpc_request, - rq_timed_list); + while ((rq = list_first_entry_or_null(&work_list, + struct ptlrpc_request, + rq_timed_list)) != NULL) { list_del_init(&rq->rq_timed_list); if (ptlrpc_at_send_early_reply(rq) == 0) @@ -1649,13 +1644,6 @@ ptlrpc_server_check_resend_in_progress(struct ptlrpc_request *req) return NULL; /* - * bulk request are aborted upon reconnect, don't try to - * find a match - */ - if (req->rq_bulk_write || req->rq_bulk_read) - return NULL; - - /* * This list should not be longer than max_requests in * flights on the client, so it is not all that long. * Also we only hit this codepath in case of a resent @@ -1864,6 +1852,7 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, ptlrpc_at_remove_timed(orig); spin_unlock(&orig->rq_rqbd->rqbd_svcpt->scp_at_lock); orig->rq_deadline = req->rq_deadline; + orig->rq_rep_mbits = req->rq_rep_mbits; if (likely(linked)) ptlrpc_at_add_timed(orig); ptlrpc_server_drop_request(orig); @@ -2057,6 +2046,7 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, struct ptlrpc_service *svc = svcpt->scp_service; struct ptlrpc_request *req; __u32 deadline; + __u32 opc; int rc; ENTRY; @@ -2067,8 +2057,8 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, RETURN(0); } - req = list_entry(svcpt->scp_req_incoming.next, - struct ptlrpc_request, rq_list); + req = list_first_entry(&svcpt->scp_req_incoming, + struct ptlrpc_request, rq_list); list_del_init(&req->rq_list); svcpt->scp_nreqs_incoming--; /* @@ -2113,8 +2103,9 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, goto err_req; } + opc = lustre_msg_get_opc(req->rq_reqmsg); if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) && - lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) { + opc == cfs_fail_val) { CERROR("drop incoming rpc opc %u, x%llu\n", cfs_fail_val, req->rq_xid); goto err_req; @@ -2128,7 +2119,7 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, goto err_req; } - switch (lustre_msg_get_opc(req->rq_reqmsg)) { + switch (opc) { case MDS_WRITEPAGE: case OST_WRITE: case OUT_UPDATE: @@ -2162,8 +2153,8 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, /* req_in handling should/must be fast */ if (ktime_get_real_seconds() - req->rq_arrival_time.tv_sec > 5) DEBUG_REQ(D_WARNING, req, "Slow req_in handling %llds", - (s64)(ktime_get_real_seconds() - - req->rq_arrival_time.tv_sec)); + ktime_get_real_seconds() - + req->rq_arrival_time.tv_sec); /* Set rpc server deadline and add it to the timed list */ deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) & @@ -2199,8 +2190,20 @@ static int ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, thread->t_env->le_ses = &req->rq_session; } + + if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_ENQ_RESEND) && + (opc == LDLM_ENQUEUE) && + (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) + OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_ENQ_RESEND, 6); + ptlrpc_at_add_timed(req); + if (opc != OST_CONNECT && opc != MDS_CONNECT && + opc != MGS_CONNECT && req->rq_export != NULL) { + if (exp_connect_flags2(req->rq_export) & OBD_CONNECT2_REP_MBITS) + req->rq_rep_mbits = lustre_msg_get_mbits(req->rq_reqmsg); + } + /* Move it over to the request processing queue */ rc = ptlrpc_server_request_add(svcpt, req); if (rc) @@ -2290,7 +2293,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, CDEBUG(D_RPCTRACE, "Handling RPC req@%p pname:cluuid+ref:pid:xid:nid:opc:job %s:%s+%d:%d:x%llu:%s:%d:%s\n", - request, current_comm(), + request, current->comm, (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -2330,7 +2333,7 @@ put_conn: arrived_usecs = ktime_us_delta(work_end, arrived); CDEBUG(D_RPCTRACE, "Handled RPC req@%p pname:cluuid+ref:pid:xid:nid:opc:job %s:%s+%d:%d:x%llu:%s:%d:%s Request processed in %lldus (%lldus total) trans %llu rc %d/%d\n", - request, current_comm(), + request, current->comm, (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -2554,14 +2557,6 @@ static void ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) } } -static int ptlrpc_retry_rqbds(void *arg) -{ - struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg; - - svcpt->scp_rqbd_timeout = 0; - return -ETIMEDOUT; -} - static inline int ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt) { return svcpt->scp_nreqs_active < @@ -2670,24 +2665,24 @@ static void ptlrpc_watchdog_fire(struct work_struct *w) } } -static void ptlrpc_watchdog_init(struct delayed_work *work, time_t time) +void ptlrpc_watchdog_init(struct delayed_work *work, timeout_t timeout) { INIT_DELAYED_WORK(work, ptlrpc_watchdog_fire); - schedule_delayed_work(work, cfs_time_seconds(time)); + schedule_delayed_work(work, cfs_time_seconds(timeout)); } -static void ptlrpc_watchdog_disable(struct delayed_work *work) +void ptlrpc_watchdog_disable(struct delayed_work *work) { cancel_delayed_work_sync(work); } -static void ptlrpc_watchdog_touch(struct delayed_work *work, time_t time) +void ptlrpc_watchdog_touch(struct delayed_work *work, timeout_t timeout) { struct ptlrpc_thread *thread = container_of(&work->work, struct ptlrpc_thread, t_watchdog.work); thread->t_touched = ktime_get(); - mod_delayed_work(system_wq, work, cfs_time_seconds(time)); + mod_delayed_work(system_wq, work, cfs_time_seconds(timeout)); } /** @@ -2705,20 +2700,28 @@ static __attribute__((__noinline__)) int ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, struct ptlrpc_thread *thread) { - /* Don't exit while there are replies to be handled */ - struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout, - ptlrpc_retry_rqbds, svcpt); - ptlrpc_watchdog_disable(&thread->t_watchdog); cond_resched(); - l_wait_event_exclusive_head(svcpt->scp_waitq, - ptlrpc_thread_stopping(thread) || - ptlrpc_server_request_incoming(svcpt) || - ptlrpc_server_request_pending(svcpt, false) || - ptlrpc_rqbd_pending(svcpt) || - ptlrpc_at_check(svcpt), &lwi); + if (svcpt->scp_rqbd_timeout == 0) + /* Don't exit while there are replies to be handled */ + wait_event_idle_exclusive_lifo( + svcpt->scp_waitq, + ptlrpc_thread_stopping(thread) || + ptlrpc_server_request_incoming(svcpt) || + ptlrpc_server_request_pending(svcpt, false) || + ptlrpc_rqbd_pending(svcpt) || + ptlrpc_at_check(svcpt)); + else if (wait_event_idle_exclusive_lifo_timeout( + svcpt->scp_waitq, + ptlrpc_thread_stopping(thread) || + ptlrpc_server_request_incoming(svcpt) || + ptlrpc_server_request_pending(svcpt, false) || + ptlrpc_rqbd_pending(svcpt) || + ptlrpc_at_check(svcpt), + svcpt->scp_rqbd_timeout) == 0) + svcpt->scp_rqbd_timeout = 0; if (ptlrpc_thread_stopping(thread)) return -EINTR; @@ -2747,8 +2750,7 @@ static int ptlrpc_main(void *arg) ENTRY; thread->t_task = current; - thread->t_pid = current_pid(); - unshare_fs_struct(); + thread->t_pid = current->pid; if (svc->srv_cpt_bind) { rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt); @@ -2948,7 +2950,7 @@ static int ptlrpc_hr_main(void *arg) { struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg; struct ptlrpc_hr_partition *hrp = hrt->hrt_partition; - struct list_head replies; + LIST_HEAD(replies); struct lu_env *env; int rc; @@ -2956,9 +2958,6 @@ static int ptlrpc_hr_main(void *arg) if (env == NULL) RETURN(-ENOMEM); - INIT_LIST_HEAD(&replies); - unshare_fs_struct(); - rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt); if (rc != 0) { char threadname[20]; @@ -2982,7 +2981,7 @@ static int ptlrpc_hr_main(void *arg) wake_up(&ptlrpc_hr.hr_waitq); while (!ptlrpc_hr.hr_stopping) { - l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); + wait_event_idle(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); while (!list_empty(&replies)) { struct ptlrpc_reply_state *rs; @@ -3022,7 +3021,7 @@ static void ptlrpc_stop_hr_threads(void) if (hrp->hrp_thrs == NULL) continue; /* uninitialized */ for (j = 0; j < hrp->hrp_nthrs; j++) - wake_up_all(&hrp->hrp_thrs[j].hrt_waitq); + wake_up(&hrp->hrp_thrs[j].hrt_waitq); } cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { @@ -3077,14 +3076,13 @@ static int ptlrpc_start_hr_threads(void) static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) { struct ptlrpc_thread *thread; - struct list_head zombie; + LIST_HEAD(zombie); ENTRY; CDEBUG(D_INFO, "Stopping threads for service %s\n", svcpt->scp_service->srv_name); - INIT_LIST_HEAD(&zombie); spin_lock(&svcpt->scp_lock); /* let the thread know that we would like it to stop asap */ list_for_each_entry(thread, &svcpt->scp_threads, t_link) @@ -3092,9 +3090,9 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) wake_up_all(&svcpt->scp_waitq); - while (!list_empty(&svcpt->scp_threads)) { - thread = list_entry(svcpt->scp_threads.next, - struct ptlrpc_thread, t_link); + while ((thread = list_first_entry_or_null(&svcpt->scp_threads, + struct ptlrpc_thread, + t_link)) != NULL) { if (thread_is_stopped(thread)) { list_move(&thread->t_link, &zombie); continue; @@ -3111,9 +3109,9 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) spin_unlock(&svcpt->scp_lock); - while (!list_empty(&zombie)) { - thread = list_entry(zombie.next, - struct ptlrpc_thread, t_link); + while ((thread = list_first_entry_or_null(&zombie, + struct ptlrpc_thread, + t_link)) != NULL) { list_del(&thread->t_link); OBD_FREE_PTR(thread); } @@ -3123,7 +3121,7 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) /** * Stops all threads of a particular service \a svc */ -void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) +static void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) { struct ptlrpc_service_part *svcpt; int i; @@ -3138,7 +3136,7 @@ void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) EXIT; } -int ptlrpc_start_threads(struct ptlrpc_service *svc) +static int ptlrpc_start_threads(struct ptlrpc_service *svc) { int rc = 0; int i; @@ -3170,7 +3168,7 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc) RETURN(rc); } -int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) +static int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) { struct ptlrpc_thread *thread; struct ptlrpc_service *svc; @@ -3290,7 +3288,7 @@ int ptlrpc_hr_init(void) ENTRY; memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr)); - ptlrpc_hr.hr_cpt_table = cfs_cpt_table; + ptlrpc_hr.hr_cpt_table = cfs_cpt_tab; ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table, sizeof(*hrp)); @@ -3351,10 +3349,8 @@ void ptlrpc_hr_fini(void) ptlrpc_stop_hr_threads(); cfs_percpt_for_each(hrp, cpt, ptlrpc_hr.hr_partitions) { - if (hrp->hrp_thrs != NULL) { - OBD_FREE(hrp->hrp_thrs, - hrp->hrp_nthrs * sizeof(hrp->hrp_thrs[0])); - } + if (hrp->hrp_thrs) + OBD_FREE_PTR_ARRAY(hrp->hrp_thrs, hrp->hrp_nthrs); } cfs_percpt_free(ptlrpc_hr.hr_partitions); @@ -3396,7 +3392,6 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) { struct ptlrpc_service_part *svcpt; struct ptlrpc_request_buffer_desc *rqbd; - struct l_wait_info lwi; int rc; int i; @@ -3434,18 +3429,21 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) */ spin_lock(&svcpt->scp_lock); while (svcpt->scp_nrqbds_posted != 0) { + int seconds = PTLRPC_REQ_LONG_UNLINK; + spin_unlock(&svcpt->scp_lock); /* * Network access will complete in finite time but * the HUGE timeout lets us CWARN for visibility * of sluggish NALs */ - lwi = LWI_TIMEOUT_INTERVAL( - cfs_time_seconds(LONG_UNLINK), - cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(svcpt->scp_waitq, - svcpt->scp_nrqbds_posted == 0, &lwi); - if (rc == -ETIMEDOUT) { + while (seconds > 0 && + wait_event_idle_timeout( + svcpt->scp_waitq, + svcpt->scp_nrqbds_posted == 0, + cfs_time_seconds(1)) == 0) + seconds -= 1; + if (seconds == 0) { CWARN("Service %s waiting for request buffers\n", svcpt->scp_service->srv_name); } @@ -3469,9 +3467,9 @@ ptlrpc_service_purge_all(struct ptlrpc_service *svc) break; spin_lock(&svcpt->scp_rep_lock); - while (!list_empty(&svcpt->scp_rep_active)) { - rs = list_entry(svcpt->scp_rep_active.next, - struct ptlrpc_reply_state, rs_list); + while ((rs = list_first_entry_or_null(&svcpt->scp_rep_active, + struct ptlrpc_reply_state, + rs_list)) != NULL) { spin_lock(&rs->rs_lock); ptlrpc_schedule_difficult_reply(rs); spin_unlock(&rs->rs_lock); @@ -3483,10 +3481,9 @@ ptlrpc_service_purge_all(struct ptlrpc_service *svc) * all unlinked) and no service threads, so I'm the only * thread noodling the request queue now */ - while (!list_empty(&svcpt->scp_req_incoming)) { - req = list_entry(svcpt->scp_req_incoming.next, - struct ptlrpc_request, rq_list); - + while ((req = list_first_entry_or_null(&svcpt->scp_req_incoming, + struct ptlrpc_request, + rq_list)) != NULL) { list_del(&req->rq_list); svcpt->scp_nreqs_incoming--; ptlrpc_server_finish_request(svcpt, req); @@ -3497,7 +3494,23 @@ ptlrpc_service_purge_all(struct ptlrpc_service *svc) ptlrpc_server_finish_active_request(svcpt, req); } - LASSERT(list_empty(&svcpt->scp_rqbd_posted)); + /* + * The portal may be shared by several services (eg:OUT_PORTAL). + * So the request could be referenced by other target. So we + * have to wait the ptlrpc_server_drop_request invoked. + * + * TODO: move the req_buffer as global rather than per service. + */ + spin_lock(&svcpt->scp_lock); + while (!list_empty(&svcpt->scp_rqbd_posted)) { + spin_unlock(&svcpt->scp_lock); + wait_event_idle_timeout(svcpt->scp_waitq, + list_empty(&svcpt->scp_rqbd_posted), + cfs_time_seconds(1)); + spin_lock(&svcpt->scp_lock); + } + spin_unlock(&svcpt->scp_lock); + LASSERT(svcpt->scp_nreqs_incoming == 0); LASSERT(svcpt->scp_nreqs_active == 0); /* @@ -3510,19 +3523,16 @@ ptlrpc_service_purge_all(struct ptlrpc_service *svc) * Now free all the request buffers since nothing * references them any more... */ - - while (!list_empty(&svcpt->scp_rqbd_idle)) { - rqbd = list_entry(svcpt->scp_rqbd_idle.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); + while ((rqbd = list_first_entry_or_null(&svcpt->scp_rqbd_idle, + struct ptlrpc_request_buffer_desc, + rqbd_list)) != NULL) ptlrpc_free_rqbd(rqbd); - } + ptlrpc_wait_replies(svcpt); - while (!list_empty(&svcpt->scp_rep_idle)) { - rs = list_entry(svcpt->scp_rep_idle.next, - struct ptlrpc_reply_state, - rs_list); + while ((rs = list_first_entry_or_null(&svcpt->scp_rep_idle, + struct ptlrpc_reply_state, + rs_list)) != NULL) { list_del(&rs->rs_list); OBD_FREE_LARGE(rs, svc->srv_max_reply_size); } @@ -3545,14 +3555,14 @@ ptlrpc_service_free(struct ptlrpc_service *svc) array = &svcpt->scp_at_array; if (array->paa_reqs_array != NULL) { - OBD_FREE(array->paa_reqs_array, - sizeof(struct list_head) * array->paa_size); + OBD_FREE_PTR_ARRAY(array->paa_reqs_array, + array->paa_size); array->paa_reqs_array = NULL; } if (array->paa_reqs_count != NULL) { - OBD_FREE(array->paa_reqs_count, - sizeof(__u32) * array->paa_size); + OBD_FREE_PTR_ARRAY(array->paa_reqs_count, + array->paa_size); array->paa_reqs_count = NULL; } }