Whamcloud - gitweb
LU-10467 ptlrpc: convert use of l_wait_event_exclusive_head()
[fs/lustre-release.git] / lustre / ptlrpc / service.c
index 1d39dd0..fefcb29 100644 (file)
@@ -41,6 +41,7 @@
 #include <lu_object.h>
 #include <uapi/linux/lnet/lnet-types.h>
 #include "ptlrpc_internal.h"
+#include <linux/delay.h>
 
 /* The following are visible and mutable through /sys/module/ptlrpc */
 int test_req_buffer_pressure = 0;
@@ -1024,6 +1025,30 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
        }
 }
 
+static void ptlrpc_add_exp_list_nolock(struct ptlrpc_request *req,
+                                      struct obd_export *export, bool hp)
+{
+       __u16 tag = lustre_msg_get_tag(req->rq_reqmsg);
+
+       if (hp)
+               list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
+       else
+               list_add(&req->rq_exp_list, &export->exp_reg_rpcs);
+       if (tag && export->exp_used_slots)
+               set_bit(tag - 1, export->exp_used_slots);
+}
+
+static void ptlrpc_del_exp_list(struct ptlrpc_request *req)
+{
+       __u16 tag = lustre_msg_get_tag(req->rq_reqmsg);
+
+       spin_lock(&req->rq_export->exp_rpc_lock);
+       list_del_init(&req->rq_exp_list);
+       if (tag && !req->rq_obsolete && req->rq_export->exp_used_slots)
+               clear_bit(tag - 1, req->rq_export->exp_used_slots);
+       spin_unlock(&req->rq_export->exp_rpc_lock);
+}
+
 /** Change request export and move hp request from old export to new */
 void ptlrpc_request_change_export(struct ptlrpc_request *req,
                                  struct obd_export *export)
@@ -1031,19 +1056,13 @@ void ptlrpc_request_change_export(struct ptlrpc_request *req,
        if (req->rq_export != NULL) {
                LASSERT(!list_empty(&req->rq_exp_list));
                /* remove rq_exp_list from last export */
-               spin_lock(&req->rq_export->exp_rpc_lock);
-               list_del_init(&req->rq_exp_list);
-               spin_unlock(&req->rq_export->exp_rpc_lock);
-               /*
-                * export has one reference already, so it`s safe to
+               ptlrpc_del_exp_list(req);
+               /* export has one reference already, so it's safe to
                 * add req to export queue here and get another
                 * reference for request later
                 */
                spin_lock(&export->exp_rpc_lock);
-               if (req->rq_ops != NULL) /* hp request */
-                       list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
-               else
-                       list_add(&req->rq_exp_list, &export->exp_reg_rpcs);
+               ptlrpc_add_exp_list_nolock(req, export, req->rq_ops != NULL);
                spin_unlock(&export->exp_rpc_lock);
 
                class_export_rpc_dec(req->rq_export);
@@ -1053,8 +1072,6 @@ void ptlrpc_request_change_export(struct ptlrpc_request *req,
        /* request takes one export refcount */
        req->rq_export = class_export_get(export);
        class_export_rpc_inc(export);
-
-       return;
 }
 
 /**
@@ -1664,6 +1681,47 @@ found:
        return tmp;
 }
 
+#ifdef HAVE_SERVER_SUPPORT
+static void ptlrpc_server_mark_obsolete(struct ptlrpc_request *req)
+{
+       req->rq_obsolete = 1;
+}
+
+static void
+ptlrpc_server_mark_in_progress_obsolete(struct ptlrpc_request *req)
+{
+       struct ptlrpc_request   *tmp = NULL;
+       __u16                   tag;
+
+       if (!tgt_is_increasing_xid_client(req->rq_export) ||
+           req->rq_export->exp_used_slots == NULL)
+               return;
+
+       tag = lustre_msg_get_tag(req->rq_reqmsg);
+       if (tag == 0)
+               return;
+
+       if (!test_bit(tag - 1, req->rq_export->exp_used_slots))
+               return;
+
+       /* This list should not be longer than max_requests in
+        * flights on the client, so it is not all that long.
+        * Also we only hit this codepath in case of a resent
+        * request which makes it even more rarely hit */
+       list_for_each_entry(tmp, &req->rq_export->exp_reg_rpcs, rq_exp_list) {
+               if (tag == lustre_msg_get_tag(tmp->rq_reqmsg) &&
+                   req->rq_xid > tmp->rq_xid)
+                       ptlrpc_server_mark_obsolete(tmp);
+
+       }
+       list_for_each_entry(tmp, &req->rq_export->exp_hp_rpcs, rq_exp_list) {
+               if (tag == lustre_msg_get_tag(tmp->rq_reqmsg) &&
+                   req->rq_xid > tmp->rq_xid)
+                       ptlrpc_server_mark_obsolete(tmp);
+       }
+}
+#endif
+
 /**
  * Check if a request should be assigned with a high priority.
  *
@@ -1721,9 +1779,7 @@ static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
                if (req->rq_ops && req->rq_ops->hpreq_fini)
                        req->rq_ops->hpreq_fini(req);
 
-               spin_lock(&req->rq_export->exp_rpc_lock);
-               list_del_init(&req->rq_exp_list);
-               spin_unlock(&req->rq_export->exp_rpc_lock);
+               ptlrpc_del_exp_list(req);
        }
        EXIT;
 }
@@ -1770,7 +1826,7 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
        hp = rc > 0;
        ptlrpc_nrs_req_initialize(svcpt, req, hp);
 
-       if (req->rq_export != NULL) {
+       while (req->rq_export != NULL) {
                struct obd_export *exp = req->rq_export;
 
                /*
@@ -1778,7 +1834,18 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
                 * atomically
                 */
                spin_lock_bh(&exp->exp_rpc_lock);
+#ifdef HAVE_SERVER_SUPPORT
+               ptlrpc_server_mark_in_progress_obsolete(req);
+#endif
                orig = ptlrpc_server_check_resend_in_progress(req);
+               if (orig && OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) {
+                       spin_unlock_bh(&exp->exp_rpc_lock);
+
+                       OBD_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
+                       msleep(4 * MSEC_PER_SEC);
+                       continue;
+               }
+
                if (orig && likely(atomic_inc_not_zero(&orig->rq_refcount))) {
                        bool linked;
 
@@ -1801,14 +1868,17 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
                                ptlrpc_at_add_timed(orig);
                        ptlrpc_server_drop_request(orig);
                        ptlrpc_nrs_req_finalize(req);
+
+                       /* don't mark slot unused for resend in progress */
+                       req->rq_obsolete = 1;
+
                        RETURN(-EBUSY);
                }
 
-               if (hp || req->rq_ops != NULL)
-                       list_add(&req->rq_exp_list, &exp->exp_hp_rpcs);
-               else
-                       list_add(&req->rq_exp_list, &exp->exp_reg_rpcs);
+               ptlrpc_add_exp_list_nolock(req, exp, hp || req->rq_ops != NULL);
+
                spin_unlock_bh(&exp->exp_rpc_lock);
+               break;
        }
 
        /*
@@ -2224,7 +2294,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
               (request->rq_export ?
                (char *)request->rq_export->exp_client_uuid.uuid : "0"),
               (request->rq_export ?
-               atomic_read(&request->rq_export->exp_refcount) : -99),
+               refcount_read(&request->rq_export->exp_handle.h_ref) : -99),
               lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
               libcfs_id2str(request->rq_peer),
               lustre_msg_get_opc(request->rq_reqmsg),
@@ -2264,7 +2334,7 @@ put_conn:
               (request->rq_export ?
               (char *)request->rq_export->exp_client_uuid.uuid : "0"),
               (request->rq_export ?
-              atomic_read(&request->rq_export->exp_refcount) : -99),
+               refcount_read(&request->rq_export->exp_handle.h_ref) : -99),
               lustre_msg_get_status(request->rq_reqmsg),
               request->rq_xid,
               libcfs_id2str(request->rq_peer),
@@ -2484,14 +2554,6 @@ static void ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
        }
 }
 
-static int ptlrpc_retry_rqbds(void *arg)
-{
-       struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg;
-
-       svcpt->scp_rqbd_timeout = 0;
-       return -ETIMEDOUT;
-}
-
 static inline int ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
 {
        return svcpt->scp_nreqs_active <
@@ -2584,7 +2646,8 @@ static void ptlrpc_watchdog_fire(struct work_struct *w)
        u64 ms_lapse = ktime_ms_delta(ktime_get(), thread->t_touched);
        u32 ms_frac = do_div(ms_lapse, MSEC_PER_SEC);
 
-       if (!__ratelimit(&watchdog_limit)) {
+       /* ___ratelimit() returns true if the action is NOT ratelimited */
+       if (__ratelimit(&watchdog_limit)) {
                /* below message is checked in sanity-quota.sh test_6,18 */
                LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:\n",
                              thread->t_task->comm, thread->t_task->pid,
@@ -2634,20 +2697,28 @@ static __attribute__((__noinline__)) int
 ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
                  struct ptlrpc_thread *thread)
 {
-       /* Don't exit while there are replies to be handled */
-       struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
-                                            ptlrpc_retry_rqbds, svcpt);
-
        ptlrpc_watchdog_disable(&thread->t_watchdog);
 
        cond_resched();
 
-       l_wait_event_exclusive_head(svcpt->scp_waitq,
-                               ptlrpc_thread_stopping(thread) ||
-                               ptlrpc_server_request_incoming(svcpt) ||
-                               ptlrpc_server_request_pending(svcpt, false) ||
-                               ptlrpc_rqbd_pending(svcpt) ||
-                               ptlrpc_at_check(svcpt), &lwi);
+       if (svcpt->scp_rqbd_timeout == 0)
+               /* Don't exit while there are replies to be handled */
+               wait_event_idle_exclusive_lifo(
+                       svcpt->scp_waitq,
+                       ptlrpc_thread_stopping(thread) ||
+                       ptlrpc_server_request_incoming(svcpt) ||
+                       ptlrpc_server_request_pending(svcpt, false) ||
+                       ptlrpc_rqbd_pending(svcpt) ||
+                       ptlrpc_at_check(svcpt));
+       else if (wait_event_idle_exclusive_lifo_timeout(
+                        svcpt->scp_waitq,
+                        ptlrpc_thread_stopping(thread) ||
+                        ptlrpc_server_request_incoming(svcpt) ||
+                        ptlrpc_server_request_pending(svcpt, false) ||
+                        ptlrpc_rqbd_pending(svcpt) ||
+                        ptlrpc_at_check(svcpt),
+                        svcpt->scp_rqbd_timeout) == 0)
+               svcpt->scp_rqbd_timeout = 0;
 
        if (ptlrpc_thread_stopping(thread))
                return -EINTR;
@@ -2778,6 +2849,9 @@ static int ptlrpc_main(void *arg)
 
                /* reset le_ses to initial state */
                env->le_ses = NULL;
+               /* Refill the context before execution to make sure
+                * all thread keys are allocated */
+               lu_env_refill(env);
                /* Process all incoming reqs before handling any */
                if (ptlrpc_server_request_incoming(svcpt)) {
                        lu_context_enter(&env->le_ctx);
@@ -3002,7 +3076,6 @@ static int ptlrpc_start_hr_threads(void)
 
 static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
 {
-       struct l_wait_info lwi = { 0 };
        struct ptlrpc_thread *thread;
        struct list_head zombie;
 
@@ -3030,8 +3103,8 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
 
                CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
                       svcpt->scp_service->srv_thread_name, thread->t_id);
-               l_wait_event(thread->t_ctl_waitq,
-                            thread_is_stopped(thread), &lwi);
+               wait_event_idle(thread->t_ctl_waitq,
+                               thread_is_stopped(thread));
 
                spin_lock(&svcpt->scp_lock);
        }
@@ -3099,7 +3172,6 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc)
 
 int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
 {
-       struct l_wait_info lwi = { 0 };
        struct ptlrpc_thread *thread;
        struct ptlrpc_service *svc;
        struct task_struct *task;
@@ -3199,9 +3271,8 @@ int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
        if (!wait)
                RETURN(0);
 
-       l_wait_event(thread->t_ctl_waitq,
-                    thread_is_running(thread) || thread_is_stopped(thread),
-                    &lwi);
+       wait_event_idle(thread->t_ctl_waitq,
+                       thread_is_running(thread) || thread_is_stopped(thread));
 
        rc = thread_is_stopped(thread) ? thread->t_id : 0;
        RETURN(rc);
@@ -3297,13 +3368,10 @@ void ptlrpc_hr_fini(void)
 static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
 {
        while (1) {
-               int rc;
-               struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
-                                                    NULL, NULL);
-
-               rc = l_wait_event(svcpt->scp_waitq,
-                    atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
-               if (rc == 0)
+               if (wait_event_idle_timeout(
+                       svcpt->scp_waitq,
+                       atomic_read(&svcpt->scp_nreps_difficult) == 0,
+                       cfs_time_seconds(10)) > 0)
                        break;
                CWARN("Unexpectedly long timeout %s %p\n",
                      svcpt->scp_service->srv_name, svcpt->scp_service);
@@ -3328,7 +3396,6 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
 {
        struct ptlrpc_service_part *svcpt;
        struct ptlrpc_request_buffer_desc *rqbd;
-       struct l_wait_info lwi;
        int rc;
        int i;
 
@@ -3366,18 +3433,21 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
                 */
                spin_lock(&svcpt->scp_lock);
                while (svcpt->scp_nrqbds_posted != 0) {
+                       int seconds = LONG_UNLINK;
+
                        spin_unlock(&svcpt->scp_lock);
                        /*
                         * Network access will complete in finite time but
                         * the HUGE timeout lets us CWARN for visibility
                         * of sluggish NALs
                         */
-                       lwi = LWI_TIMEOUT_INTERVAL(
-                                       cfs_time_seconds(LONG_UNLINK),
-                                       cfs_time_seconds(1), NULL, NULL);
-                       rc = l_wait_event(svcpt->scp_waitq,
-                                         svcpt->scp_nrqbds_posted == 0, &lwi);
-                       if (rc == -ETIMEDOUT) {
+                       while (seconds > 0 &&
+                              wait_event_idle_timeout(
+                                      svcpt->scp_waitq,
+                                      svcpt->scp_nrqbds_posted == 0,
+                                      cfs_time_seconds(1)) == 0)
+                               seconds -= 1;
+                       if (seconds == 0) {
                                CWARN("Service %s waiting for request buffers\n",
                                      svcpt->scp_service->srv_name);
                        }