Whamcloud - gitweb
LU-56 ptlrpc: partition data for ptlrpc service
authorLiang Zhen <liang@whamcloud.com>
Wed, 23 May 2012 06:34:18 +0000 (14:34 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 22 Jun 2012 22:08:50 +0000 (18:08 -0400)
We will have multiple partition data & threads for ptlrpc service,
this patch is the first step work, we moved quite a lot members
of ptlrpc_service to a new structure ptlrpc_service_part.
Now we only create one instance of ptlrpc_service_part for each
service, but we will have multiple instances for each service
very soon (instance per CPT, CPU ParTion).

Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I63d816bdf44a22528c6097fe348060f57d862df3
Reviewed-on: http://review.whamcloud.com/2895
Tested-by: Hudson
Reviewed-by: wangdi <di.wang@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
16 files changed:
lustre/include/lustre_net.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_recovery.c
lustre/ost/ost_handler.c
lustre/ptlrpc/events.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/sec.c
lustre/ptlrpc/service.c
lustre/quota/quota_context.c
lustre/quota/quota_interface.c

index ff325fb..64b5878 100644 (file)
@@ -283,8 +283,6 @@ struct ptlrpc_request_set {
        set_interpreter_func  set_interpret;
        /** opaq argument passed to completion \a set_interpret callback. */
        void                 *set_arg;
-       /** rq_status of requests that have been freed already */
-       int                   set_rc;
        /**
         * Lock for \a set_new_requests manipulations
         * locked so that any old caller can communicate requests to
@@ -294,6 +292,8 @@ struct ptlrpc_request_set {
        /** List of new yet unsent requests. Only used with ptlrpcd now. */
        cfs_list_t            set_new_requests;
 
+       /** rq_status of requests that have been freed already */
+       int                   set_rc;
        /** Additional fields used by the flow control extension */
        /** Maximum number of RPCs in flight */
        int                   set_max_inflight;
@@ -316,6 +316,7 @@ struct ptlrpc_set_cbdata {
 };
 
 struct ptlrpc_bulk_desc;
+struct ptlrpc_service_part;
 
 /**
  * ptlrpc callback & work item stuff
@@ -371,7 +372,7 @@ struct ptlrpc_reply_state {
         /** xid */
         __u64                  rs_xid;
         struct obd_export     *rs_export;
-        struct ptlrpc_service *rs_service;
+       struct ptlrpc_service_part *rs_svcpt;
         /** Lnet metadata handle for the reply */
         lnet_handle_md_t       rs_md_h;
         cfs_atomic_t           rs_refcount;
@@ -994,9 +995,9 @@ struct ptlrpc_thread {
         /**
          * the svc this thread belonged to b=18582
          */
-        struct ptlrpc_service *t_svc;
-        cfs_waitq_t t_ctl_waitq;
-        struct lu_env *t_env;
+       struct ptlrpc_service_part      *t_svcpt;
+       cfs_waitq_t                     t_ctl_waitq;
+       struct lu_env                   *t_env;
 };
 
 static inline int thread_is_init(struct ptlrpc_thread *thread)
@@ -1072,7 +1073,7 @@ struct ptlrpc_request_buffer_desc {
         /** History of requests for this buffer */
         cfs_list_t             rqbd_reqs;
         /** Back pointer to service for which this buffer is registered */
-        struct ptlrpc_service *rqbd_service;
+       struct ptlrpc_service_part *rqbd_svcpt;
         /** LNet descriptor */
         lnet_handle_md_t       rqbd_md_h;
         int                    rqbd_refcount;
@@ -1130,21 +1131,10 @@ struct ptlrpc_service_ops {
  * The service is listening on a particular portal (like tcp port)
  * and perform actions for a specific server like IO service for OST
  * or general metadata service for MDS.
- *
- * ptlrpc service has four locks:
- * \a srv_lock
- *    serialize operations on rqbd and requests waiting for preprocess
- * \a srv_rq_lock
- *    serialize operations active requests sent to this portal
- * \a srv_at_lock
- *    serialize adaptive timeout stuff
- * \a srv_rs_lock
- *    serialize operations on RS list (reply states)
- *
- * We don't have any use-case to take two or more locks at the same time
- * for now, so there is no lock order issue.
  */
 struct ptlrpc_service {
+       /** serialize /proc operations */
+       cfs_spinlock_t                  srv_lock;
         /** most often accessed fields */
         /** chain thru all services */
         cfs_list_t                      srv_list;
@@ -1160,13 +1150,6 @@ struct ptlrpc_service {
         int                             srv_threads_min;
         /** thread upper limit */
         int                             srv_threads_max;
-        /** always increasing number */
-        unsigned                        srv_threads_next_id;
-        /** # of starting threads */
-        int                             srv_threads_starting;
-        /** # running threads */
-        int                             srv_threads_running;
-
         /** Root of /proc dir tree for this service */
         cfs_proc_dir_entry_t           *srv_procroot;
         /** Pointer to statistic data for this service */
@@ -1197,97 +1180,142 @@ struct ptlrpc_service {
         /** under unregister_service */
         unsigned                        srv_is_stopping:1;
 
-        /**
-         * serialize the following fields, used for protecting
-         * rqbd list and incoming requests waiting for preprocess
-         */
-        cfs_spinlock_t                  srv_lock  __cfs_cacheline_aligned;
-        /** incoming reqs */
-        cfs_list_t                      srv_req_in_queue;
-        /** total # req buffer descs allocated */
-        int                             srv_nbufs;
-        /** # posted request buffers */
-        int                             srv_nrqbd_receiving;
-        /** timeout before re-posting reqs, in tick */
-        cfs_duration_t                  srv_rqbd_timeout;
-        /** request buffers to be reposted */
-        cfs_list_t                      srv_idle_rqbds;
-        /** req buffers receiving */
-        cfs_list_t                      srv_active_rqbds;
-        /** request buffer history */
-        cfs_list_t                      srv_history_rqbds;
-        /** # request buffers in history */
-        int                             srv_n_history_rqbds;
-        /** max # request buffers in history */
-        int                             srv_max_history_rqbds;
-        /** request history */
-        cfs_list_t                      srv_request_history;
-        /** next request sequence # */
-        __u64                           srv_request_seq;
-        /** highest seq culled from history */
-        __u64                           srv_request_max_cull_seq;
-        /**
-         * all threads sleep on this. This wait-queue is signalled when new
-         * incoming request arrives and when difficult reply has to be handled.
-         */
-        cfs_waitq_t                     srv_waitq;
+       /**
+        * max # request buffers in history, it needs to be convert into
+        * per-partition value when we have multiple partitions
+        */
+       int                             srv_max_history_rqbds;
+       /**
+        * partition data for ptlrpc service, only one instance so far,
+        * instance per CPT will come soon
+        */
+       struct ptlrpc_service_part      *srv_part;
+};
 
-        /**
-         * serialize the following fields, used for processing requests
-         * sent to this portal
-         */
-        cfs_spinlock_t                  srv_rq_lock __cfs_cacheline_aligned;
-        /** # reqs in either of the queues below */
-        /** reqs waiting for service */
-        cfs_list_t                      srv_request_queue;
-        /** high priority queue */
-        cfs_list_t                      srv_request_hpq;
-        /** # incoming reqs */
-        int                             srv_n_queued_reqs;
-        /** # reqs being served */
-        int                             srv_n_active_reqs;
-        /** # HPreqs being served */
-        int                             srv_n_active_hpreq;
-        /** # hp requests handled */
-        int                             srv_hpreq_count;
-
-        /** AT stuff */
-        /** @{ */
-        /**
-         * serialize the following fields, used for changes on
-         * adaptive timeout
-         */
-        cfs_spinlock_t                  srv_at_lock __cfs_cacheline_aligned;
-        /** estimated rpc service time */
-        struct adaptive_timeout         srv_at_estimate;
-        /** reqs waiting for replies */
-        struct ptlrpc_at_array          srv_at_array;
-        /** early reply timer */
-        cfs_timer_t                     srv_at_timer;
-        /** check early replies */
-        unsigned                        srv_at_check;
-        /** debug */
-        cfs_time_t                      srv_at_checktime;
-        /** @} */
+/**
+ * Definition of PortalRPC service partition data.
+ * Although a service only has one instance of it right now, but we
+ * will have multiple instances very soon (instance per CPT).
+ *
+ * it has four locks:
+ * \a scp_lock
+ *    serialize operations on rqbd and requests waiting for preprocess
+ * \a scp_req_lock
+ *    serialize operations active requests sent to this portal
+ * \a scp_at_lock
+ *    serialize adaptive timeout stuff
+ * \a scp_rep_lock
+ *    serialize operations on RS list (reply states)
+ *
+ * We don't have any use-case to take two or more locks at the same time
+ * for now, so there is no lock order issue.
+ */
+struct ptlrpc_service_part {
+       /** back reference to owner */
+       struct ptlrpc_service           *scp_service __cfs_cacheline_aligned;
+       /* CPT id, reserved */
+       int                             scp_cpt;
+       /** always increasing number */
+       int                             scp_thr_nextid;
+       /** # of starting threads */
+       int                             scp_nthrs_starting;
+       /** # of stopping threads, reserved for shrinking threads */
+       int                             scp_nthrs_stopping;
+       /** # running threads */
+       int                             scp_nthrs_running;
+       /** service threads list */
+       cfs_list_t                      scp_threads;
 
-        /**
-         * serialize the following fields, used for processing
-         * replies for this portal
-         */
-        cfs_spinlock_t                  srv_rs_lock __cfs_cacheline_aligned;
-        /** all the active replies */
-        cfs_list_t                      srv_active_replies;
+       /**
+        * serialize the following fields, used for protecting
+        * rqbd list and incoming requests waiting for preprocess,
+        * threads starting & stopping are also protected by this lock.
+        */
+       cfs_spinlock_t                  scp_lock  __cfs_cacheline_aligned;
+       /** total # req buffer descs allocated */
+       int                             scp_nrqbds_total;
+       /** # posted request buffers for receiving */
+       int                             scp_nrqbds_posted;
+       /** # incoming reqs */
+       int                             scp_nreqs_incoming;
+       /** request buffers to be reposted */
+       cfs_list_t                      scp_rqbd_idle;
+       /** req buffers receiving */
+       cfs_list_t                      scp_rqbd_posted;
+       /** incoming reqs */
+       cfs_list_t                      scp_req_incoming;
+       /** timeout before re-posting reqs, in tick */
+       cfs_duration_t                  scp_rqbd_timeout;
+       /**
+        * all threads sleep on this. This wait-queue is signalled when new
+        * incoming request arrives and when difficult reply has to be handled.
+        */
+       cfs_waitq_t                     scp_waitq;
+
+       /** request history */
+       cfs_list_t                      scp_hist_reqs;
+       /** request buffer history */
+       cfs_list_t                      scp_hist_rqbds;
+       /** # request buffers in history */
+       int                             scp_hist_nrqbds;
+       /** sequence number for request */
+       __u64                           scp_hist_seq;
+       /** highest seq culled from history */
+       __u64                           scp_hist_seq_culled;
+
+       /**
+        * serialize the following fields, used for processing requests
+        * sent to this portal
+        */
+       cfs_spinlock_t                  scp_req_lock __cfs_cacheline_aligned;
+       /** # reqs in either of the queues below */
+       /** reqs waiting for service */
+       cfs_list_t                      scp_req_pending;
+       /** high priority queue */
+       cfs_list_t                      scp_hreq_pending;
+       /** # reqs being served */
+       int                             scp_nreqs_active;
+       /** # HPreqs being served */
+       int                             scp_nhreqs_active;
+       /** # hp requests handled */
+       int                             scp_hreq_count;
+
+       /** AT stuff */
+       /** @{ */
+       /**
+        * serialize the following fields, used for changes on
+        * adaptive timeout
+        */
+       cfs_spinlock_t                  scp_at_lock __cfs_cacheline_aligned;
+       /** estimated rpc service time */
+       struct adaptive_timeout         scp_at_estimate;
+       /** reqs waiting for replies */
+       struct ptlrpc_at_array          scp_at_array;
+       /** early reply timer */
+       cfs_timer_t                     scp_at_timer;
+       /** debug */
+       cfs_time_t                      scp_at_checktime;
+       /** check early replies */
+       unsigned                        scp_at_check;
+       /** @} */
+
+       /**
+        * serialize the following fields, used for processing
+        * replies for this portal
+        */
+       cfs_spinlock_t                  scp_rep_lock __cfs_cacheline_aligned;
+       /** all the active replies */
+       cfs_list_t                      scp_rep_active;
 #ifndef __KERNEL__
-        /** replies waiting for service */
-        cfs_list_t                      srv_reply_queue;
+       /** replies waiting for service */
+       cfs_list_t                      scp_rep_queue;
 #endif
-        /** List of free reply_states */
-        cfs_list_t                      srv_free_rs_list;
-        /** waitq to run, when adding stuff to srv_free_rs_list */
-        cfs_waitq_t                     srv_free_rs_waitq;
-        /** # 'difficult' replies */
-        cfs_atomic_t                    srv_n_difficult_replies;
-        //struct ptlrpc_srv_ni srv_interfaces[0];
+       /** List of free reply_states */
+       cfs_list_t                      scp_rep_idle;
+       /** waitq to run, when adding stuff to srv_free_rs_list */
+       cfs_waitq_t                     scp_rep_waitq;
+       /** # 'difficult' replies */
+       cfs_atomic_t                    scp_nreps_difficult;
 };
 
 /**
@@ -1625,7 +1653,6 @@ struct ptlrpc_service *ptlrpc_register_service(
 void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
 
 int ptlrpc_start_threads(struct ptlrpc_service *svc);
-int ptlrpc_start_thread(struct ptlrpc_service *svc);
 int ptlrpc_unregister_service(struct ptlrpc_service *service);
 int liblustre_check_services(void *arg);
 void ptlrpc_daemonize(char *name);
@@ -1912,6 +1939,22 @@ static inline int ptlrpc_no_resend(struct ptlrpc_request *req)
         return req->rq_no_resend;
 }
 
+static inline int
+ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt)
+{
+       int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate);
+
+       return svcpt->scp_service->srv_watchdog_factor *
+              max_t(int, at, obd_timeout);
+}
+
+static inline struct ptlrpc_service *
+ptlrpc_req2svc(struct ptlrpc_request *req)
+{
+       LASSERT(req->rq_rqbd != NULL);
+       return req->rq_rqbd->rqbd_svcpt->scp_service;
+}
+
 /* ldlm/ldlm_lib.c */
 /**
  * Target client logic
index c6ba490..9802ba0 100644 (file)
@@ -595,18 +595,20 @@ int server_disconnect_export(struct obd_export *exp)
                 struct ptlrpc_reply_state *rs =
                         cfs_list_entry(exp->exp_outstanding_replies.next,
                                        struct ptlrpc_reply_state, rs_exp_list);
-                struct ptlrpc_service *svc = rs->rs_service;
+               struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
 
-                cfs_spin_lock(&svc->srv_rs_lock);
-                cfs_list_del_init(&rs->rs_exp_list);
-                cfs_spin_lock(&rs->rs_lock);
-                ptlrpc_schedule_difficult_reply(rs);
-                cfs_spin_unlock(&rs->rs_lock);
-                cfs_spin_unlock(&svc->srv_rs_lock);
-        }
-        cfs_spin_unlock(&exp->exp_lock);
+               cfs_spin_lock(&svcpt->scp_rep_lock);
 
-        RETURN(rc);
+               cfs_list_del_init(&rs->rs_exp_list);
+               cfs_spin_lock(&rs->rs_lock);
+               ptlrpc_schedule_difficult_reply(rs);
+               cfs_spin_unlock(&rs->rs_lock);
+
+               cfs_spin_unlock(&svcpt->scp_rep_lock);
+       }
+       cfs_spin_unlock(&exp->exp_lock);
+
+       RETURN(rc);
 }
 
 /* --------------------------------------------------------------------------
@@ -1517,7 +1519,7 @@ check_and_start_recovery_timer(struct obd_device *obd,
         if (!new_client && service_time)
                 /* Teach server about old server's estimates, as first guess
                  * at how long new requests will take. */
-                at_measured(&req->rq_rqbd->rqbd_service->srv_at_estimate,
+               at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
                             service_time);
 
         target_start_recovery_timer(obd);
@@ -1828,15 +1830,17 @@ static int handle_recovery_req(struct ptlrpc_thread *thread,
                  * this client may come in recovery time
                  */
                 if (!AT_OFF) {
-                        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
-                        /* If the server sent early reply for this request,
-                         * the client will recalculate the timeout according to
-                         * current server estimate service time, so we will
-                         * use the maxium timeout here for waiting the client
-                         * sending the next req */
-                        to = max((int)at_est2timeout(
-                                 at_get(&svc->srv_at_estimate)),
-                                 (int)lustre_msg_get_timeout(req->rq_reqmsg));
+                       struct ptlrpc_service_part *svcpt;
+
+                       svcpt = req->rq_rqbd->rqbd_svcpt;
+                       /* If the server sent early reply for this request,
+                        * the client will recalculate the timeout according to
+                        * current server estimate service time, so we will
+                        * use the maxium timeout here for waiting the client
+                        * sending the next req */
+                       to = max((int)at_est2timeout(
+                                at_get(&svcpt->scp_at_estimate)),
+                                (int)lustre_msg_get_timeout(req->rq_reqmsg));
                         /* Add net_latency (see ptlrpc_replay_req) */
                         to += lustre_msg_get_service_time(req->rq_reqmsg);
                 }
@@ -2314,10 +2318,10 @@ int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
 
 void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 {
+       struct ptlrpc_service_part *svcpt;
         int                        netrc;
         struct ptlrpc_reply_state *rs;
         struct obd_export         *exp;
-        struct ptlrpc_service     *svc;
         ENTRY;
 
         if (req->rq_no_reply) {
@@ -2325,7 +2329,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
                 return;
         }
 
-        svc = req->rq_rqbd->rqbd_service;
+       svcpt = req->rq_rqbd->rqbd_svcpt;
         rs = req->rq_reply_state;
         if (rs == NULL || !rs->rs_difficult) {
                 /* no notifiers */
@@ -2337,7 +2341,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
         /* must be an export if locks saved */
         LASSERT (req->rq_export != NULL);
         /* req/reply consistent */
-        LASSERT (rs->rs_service == svc);
+       LASSERT(rs->rs_svcpt == svcpt);
 
         /* "fresh" reply */
         LASSERT (!rs->rs_scheduled);
@@ -2374,9 +2378,9 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 
         netrc = target_send_reply_msg (req, rc, fail_id);
 
-        cfs_spin_lock(&svc->srv_rs_lock);
+       cfs_spin_lock(&svcpt->scp_rep_lock);
 
-        cfs_atomic_inc(&svc->srv_n_difficult_replies);
+       cfs_atomic_inc(&svcpt->scp_nreps_difficult);
 
         if (netrc != 0) {
                 /* error sending: reply is off the net.  Also we need +1
@@ -2396,12 +2400,12 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
                 CDEBUG(D_HA, "Schedule reply immediately\n");
                 ptlrpc_dispatch_difficult_reply(rs);
         } else {
-                cfs_list_add (&rs->rs_list, &svc->srv_active_replies);
-                rs->rs_scheduled = 0;           /* allow notifier to schedule */
-        }
-        cfs_spin_unlock(&rs->rs_lock);
-        cfs_spin_unlock(&svc->srv_rs_lock);
-        EXIT;
+               cfs_list_add(&rs->rs_list, &svcpt->scp_rep_active);
+               rs->rs_scheduled = 0;   /* allow notifier to schedule */
+       }
+       cfs_spin_unlock(&rs->rs_lock);
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
+       EXIT;
 }
 
 int target_handle_qc_callback(struct ptlrpc_request *req)
index 4aad818..0883897 100644 (file)
@@ -1058,9 +1058,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
 
         LASSERT(req->rq_export);
 
-        if (req->rq_rqbd->rqbd_service->srv_stats)
-                ldlm_svc_get_eopc(dlm_req,
-                                  req->rq_rqbd->rqbd_service->srv_stats);
+       if (ptlrpc_req2svc(req)->srv_stats != NULL)
+               ldlm_svc_get_eopc(dlm_req, ptlrpc_req2svc(req)->srv_stats);
 
         if (req->rq_export && req->rq_export->exp_nid_stats &&
             req->rq_export->exp_nid_stats->nid_ldlm_stats)
index e58ac8b..ee01e1c 100644 (file)
@@ -336,20 +336,20 @@ static int mdt_getstatus(struct mdt_thread_info *info)
 
 static int mdt_statfs(struct mdt_thread_info *info)
 {
-        struct ptlrpc_request *req = mdt_info_req(info);
-        struct md_device      *next  = info->mti_mdt->mdt_child;
-        struct ptlrpc_service *svc;
-        struct obd_statfs     *osfs;
-        int                    rc;
+       struct ptlrpc_request           *req = mdt_info_req(info);
+       struct md_device                *next = info->mti_mdt->mdt_child;
+       struct ptlrpc_service_part      *svcpt;
+       struct obd_statfs               *osfs;
+       int                             rc;
 
-        ENTRY;
+       ENTRY;
 
-        svc = info->mti_pill->rc_req->rq_rqbd->rqbd_service;
+       svcpt = info->mti_pill->rc_req->rq_rqbd->rqbd_svcpt;
 
-        /* This will trigger a watchdog timeout */
-        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
-                         (MDT_SERVICE_WATCHDOG_FACTOR *
-                          at_get(&svc->srv_at_estimate)) + 1);
+       /* This will trigger a watchdog timeout */
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
+                        (MDT_SERVICE_WATCHDOG_FACTOR *
+                         at_get(&svcpt->scp_at_estimate)) + 1);
 
         rc = mdt_check_ucred(info);
         if (rc)
index 43b41d4..08ddc03 100644 (file)
@@ -589,10 +589,10 @@ void mdt_fs_cleanup(const struct lu_env *env, struct mdt_device *mdt)
 /* reconstruction code */
 static void mdt_steal_ack_locks(struct ptlrpc_request *req)
 {
+       struct ptlrpc_service_part *svcpt;
         struct obd_export         *exp = req->rq_export;
         cfs_list_t                *tmp;
         struct ptlrpc_reply_state *oldrep;
-        struct ptlrpc_service     *svc;
         int                        i;
 
         /* CAVEAT EMPTOR: spinlock order */
@@ -610,8 +610,8 @@ static void mdt_steal_ack_locks(struct ptlrpc_request *req)
                                 lustre_msg_get_opc(req->rq_reqmsg),
                                 oldrep->rs_opc);
 
-                svc = oldrep->rs_service;
-                cfs_spin_lock (&svc->srv_rs_lock);
+               svcpt = oldrep->rs_svcpt;
+               cfs_spin_lock(&svcpt->scp_rep_lock);
 
                 cfs_list_del_init (&oldrep->rs_exp_list);
 
@@ -631,7 +631,7 @@ static void mdt_steal_ack_locks(struct ptlrpc_request *req)
                 ptlrpc_schedule_difficult_reply (oldrep);
                 cfs_spin_unlock(&oldrep->rs_lock);
 
-                cfs_spin_unlock (&svc->srv_rs_lock);
+               cfs_spin_unlock(&svcpt->scp_rep_lock);
                 break;
         }
         cfs_spin_unlock(&exp->exp_lock);
index 05e51df..20cd111 100644 (file)
@@ -1689,12 +1689,13 @@ struct ost_prolong_data {
  */
 static inline int prolong_timeout(struct ptlrpc_request *req)
 {
-        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+       struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
 
-        if (AT_OFF)
-                return obd_timeout / 2;
+       if (AT_OFF)
+               return obd_timeout / 2;
 
-        return max(at_est2timeout(at_get(&svc->srv_at_estimate)), ldlm_timeout);
+       return max(at_est2timeout(at_get(&svcpt->scp_at_estimate)),
+                  ldlm_timeout);
 }
 
 static void ost_prolong_lock_one(struct ost_prolong_data *opd,
@@ -2189,11 +2190,11 @@ int ost_handle(struct ptlrpc_request *req)
                 req_capsule_set(&req->rq_pill, &RQF_OST_BRW_WRITE);
                 CDEBUG(D_INODE, "write\n");
                 /* req->rq_request_portal would be nice, if it was set */
-                if (req->rq_rqbd->rqbd_service->srv_req_portal !=OST_IO_PORTAL){
-                        CERROR("%s: deny write request from %s to portal %u\n",
-                               req->rq_export->exp_obd->obd_name,
-                               obd_export_nid2str(req->rq_export),
-                               req->rq_rqbd->rqbd_service->srv_req_portal);
+               if (ptlrpc_req2svc(req)->srv_req_portal != OST_IO_PORTAL) {
+                       CERROR("%s: deny write request from %s to portal %u\n",
+                              req->rq_export->exp_obd->obd_name,
+                              obd_export_nid2str(req->rq_export),
+                              ptlrpc_req2svc(req)->srv_req_portal);
                         GOTO(out, rc = -EPROTO);
                 }
                 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_NET))
@@ -2210,11 +2211,11 @@ int ost_handle(struct ptlrpc_request *req)
                 req_capsule_set(&req->rq_pill, &RQF_OST_BRW_READ);
                 CDEBUG(D_INODE, "read\n");
                 /* req->rq_request_portal would be nice, if it was set */
-                if (req->rq_rqbd->rqbd_service->srv_req_portal !=OST_IO_PORTAL){
-                        CERROR("%s: deny read request from %s to portal %u\n",
-                               req->rq_export->exp_obd->obd_name,
-                               obd_export_nid2str(req->rq_export),
-                               req->rq_rqbd->rqbd_service->srv_req_portal);
+               if (ptlrpc_req2svc(req)->srv_req_portal != OST_IO_PORTAL) {
+                       CERROR("%s: deny read request from %s to portal %u\n",
+                              req->rq_export->exp_obd->obd_name,
+                              obd_export_nid2str(req->rq_export),
+                              ptlrpc_req2svc(req)->srv_req_portal);
                         GOTO(out, rc = -EPROTO);
                 }
                 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_NET))
index ab6da13..5565049 100644 (file)
@@ -221,13 +221,74 @@ void client_bulk_callback (lnet_event_t *ev)
 }
 
 /*
+ * We will have percpt request history list for ptlrpc service in upcoming
+ * patches because we don't want to be serialized by current per-service
+ * history operations. So we require history ID can (somehow) show arriving
+ * order w/o grabbing global lock, and user can sort them in userspace.
+ *
+ * This is how we generate history ID for ptlrpc_request:
+ * ----------------------------------------------------
+ * |  32 bits  |  16 bits  | (16 - X)bits  |  X bits  |
+ * ----------------------------------------------------
+ * |  seconds  | usec / 16 |   sequence    | CPT id   |
+ * ----------------------------------------------------
+ *
+ * it might not be precise but should be good enough.
+ */
+#define REQS_ALL_BITS(svcpt)   ((int)(sizeof((svcpt)->scp_hist_seq) * 8))
+#define REQS_SEC_BITS          32
+#define REQS_USEC_BITS         16
+/* will be replaced by bits for total service partition number soon */
+#define REQS_CPT_BITS(svcpt)   0
+#define REQS_SEQ_BITS(svcpt)   (REQS_ALL_BITS(svcpt) - REQS_CPT_BITS(svcpt) -\
+                                REQS_SEC_BITS - REQS_USEC_BITS)
+
+#define REQS_SEQ_SHIFT(svcpt)  (REQS_CPT_BITS(svcpt))
+#define REQS_USEC_SHIFT(svcpt) (REQS_SEQ_SHIFT(svcpt) + REQS_SEQ_BITS(svcpt))
+#define REQS_SEC_SHIFT(svcpt)  (REQS_USEC_SHIFT(svcpt) + REQS_USEC_BITS)
+
+static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt,
+                                  struct ptlrpc_request *req)
+{
+       __u64   sec = req->rq_arrival_time.tv_sec;
+       __u32   usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */
+       __u64   new_seq;
+
+       /* set sequence ID for request and add it to history list,
+        * it must be called with hold svcpt::scp_lock */
+
+       LASSERT(REQS_SEQ_BITS(svcpt) > 0);
+
+       new_seq = (sec << REQS_SEC_SHIFT(svcpt)) |
+                 (usec << REQS_USEC_SHIFT(svcpt)) | svcpt->scp_cpt;
+       if (new_seq > svcpt->scp_hist_seq) {
+               /* This handles the initial case of scp_hist_seq == 0 or
+                * we just jumped into a new time window */
+               svcpt->scp_hist_seq = new_seq;
+       } else {
+               /* NB: increase sequence number in current usec bucket,
+                * however, it's possible that we used up all bits for
+                * sequence and jumped into the next usec bucket (future time),
+                * then we hope there will be less RPCs per bucket at some
+                * point, and sequence will catch up again */
+               svcpt->scp_hist_seq += (1U << REQS_CPT_BITS(svcpt));
+               new_seq = svcpt->scp_hist_seq;
+       }
+
+       req->rq_history_seq = new_seq;
+
+       cfs_list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs);
+}
+
+/*
  * Server's incoming request callback
  */
 void request_in_callback(lnet_event_t *ev)
 {
-        struct ptlrpc_cb_id               *cbid = ev->md.user_ptr;
-        struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
-        struct ptlrpc_service             *service = rqbd->rqbd_service;
+       struct ptlrpc_cb_id               *cbid = ev->md.user_ptr;
+       struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
+       struct ptlrpc_service_part        *svcpt = rqbd->rqbd_svcpt;
+       struct ptlrpc_service             *service = svcpt->scp_service;
         struct ptlrpc_request             *req;
         ENTRY;
 
@@ -289,21 +350,20 @@ void request_in_callback(lnet_event_t *ev)
 
         CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
 
-        cfs_spin_lock(&service->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
 
-        req->rq_history_seq = service->srv_request_seq++;
-        cfs_list_add_tail(&req->rq_history_list, &service->srv_request_history);
+       ptlrpc_req_add_history(svcpt, req);
 
-        if (ev->unlinked) {
-                service->srv_nrqbd_receiving--;
-                CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
-                       service->srv_nrqbd_receiving);
-
-                /* Normally, don't complain about 0 buffers posted; LNET won't
-                 * drop incoming reqs since we set the portal lazy */
-                if (test_req_buffer_pressure &&
-                    ev->type != LNET_EVENT_UNLINK &&
-                    service->srv_nrqbd_receiving == 0)
+       if (ev->unlinked) {
+               svcpt->scp_nrqbds_posted--;
+               CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
+                      svcpt->scp_nrqbds_posted);
+
+               /* Normally, don't complain about 0 buffers posted; LNET won't
+                * drop incoming reqs since we set the portal lazy */
+               if (test_req_buffer_pressure &&
+                   ev->type != LNET_EVENT_UNLINK &&
+                   svcpt->scp_nrqbds_posted == 0)
                         CWARN("All %s request buffers busy\n",
                               service->srv_name);
 
@@ -313,15 +373,15 @@ void request_in_callback(lnet_event_t *ev)
                 rqbd->rqbd_refcount++;
         }
 
-        cfs_list_add_tail(&req->rq_list, &service->srv_req_in_queue);
-        service->srv_n_queued_reqs++;
+       cfs_list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
+       svcpt->scp_nreqs_incoming++;
 
-        /* NB everything can disappear under us once the request
-         * has been queued and we unlock, so do the wake now... */
-        cfs_waitq_signal(&service->srv_waitq);
+       /* NB everything can disappear under us once the request
+        * has been queued and we unlock, so do the wake now... */
+       cfs_waitq_signal(&svcpt->scp_waitq);
 
-        cfs_spin_unlock(&service->srv_lock);
-        EXIT;
+       cfs_spin_unlock(&svcpt->scp_lock);
+       EXIT;
 }
 
 /*
@@ -329,9 +389,9 @@ void request_in_callback(lnet_event_t *ev)
  */
 void reply_out_callback(lnet_event_t *ev)
 {
-        struct ptlrpc_cb_id       *cbid = ev->md.user_ptr;
-        struct ptlrpc_reply_state *rs = cbid->cbid_arg;
-        struct ptlrpc_service     *svc = rs->rs_service;
+       struct ptlrpc_cb_id       *cbid = ev->md.user_ptr;
+       struct ptlrpc_reply_state *rs = cbid->cbid_arg;
+       struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
         ENTRY;
 
         LASSERT (ev->type == LNET_EVENT_SEND ||
@@ -352,17 +412,19 @@ void reply_out_callback(lnet_event_t *ev)
         if (ev->unlinked) {
                 /* Last network callback. The net's ref on 'rs' stays put
                  * until ptlrpc_handle_rs() is done with it */
-                cfs_spin_lock(&svc->srv_rs_lock);
-                cfs_spin_lock(&rs->rs_lock);
-                rs->rs_on_net = 0;
-                if (!rs->rs_no_ack ||
-                    rs->rs_transno <= rs->rs_export->exp_obd->obd_last_committed)
-                        ptlrpc_schedule_difficult_reply (rs);
-                cfs_spin_unlock(&rs->rs_lock);
-                cfs_spin_unlock(&svc->srv_rs_lock);
-        }
-
-        EXIT;
+               cfs_spin_lock(&svcpt->scp_rep_lock);
+               cfs_spin_lock(&rs->rs_lock);
+
+               rs->rs_on_net = 0;
+               if (!rs->rs_no_ack ||
+                   rs->rs_transno <=
+                   rs->rs_export->exp_obd->obd_last_committed)
+                       ptlrpc_schedule_difficult_reply(rs);
+
+               cfs_spin_unlock(&rs->rs_lock);
+               cfs_spin_unlock(&svcpt->scp_rep_lock);
+       }
+       EXIT;
 }
 
 #ifdef HAVE_SERVER_SUPPORT
index 5e29563..0d6ceec 100644 (file)
@@ -248,12 +248,12 @@ void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir,
 
 static int
 ptlrpc_lprocfs_read_req_history_len(char *page, char **start, off_t off,
-                                    int count, int *eof, void *data)
+                                   int count, int *eof, void *data)
 {
-        struct ptlrpc_service *svc = data;
+       struct ptlrpc_service *svc = data;
 
-        *eof = 1;
-        return snprintf(page, count, "%d\n", svc->srv_n_history_rqbds);
+       *eof = 1;
+       return snprintf(page, count, "%d\n", svc->srv_part->scp_hist_nrqbds);
 }
 
 static int
@@ -288,11 +288,11 @@ ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer,
         if (val > cfs_num_physpages/(2 * bufpages))
                 return -ERANGE;
 
-        cfs_spin_lock(&svc->srv_lock);
-        svc->srv_max_history_rqbds = val;
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svc->srv_lock);
+       svc->srv_max_history_rqbds = val;
+       cfs_spin_unlock(&svc->srv_lock);
 
-        return count;
+       return count;
 }
 
 static int
@@ -318,23 +318,27 @@ ptlrpc_lprocfs_wr_threads_min(struct file *file, const char *buffer,
         if (val < 2)
                 return -ERANGE;
 
-        if (val > svc->srv_threads_max)
-                return -ERANGE;
+       cfs_spin_lock(&svc->srv_lock);
+       if (val > svc->srv_threads_max) {
+               cfs_spin_unlock(&svc->srv_lock);
+               return -ERANGE;
+       }
 
-        cfs_spin_lock(&svc->srv_lock);
-        svc->srv_threads_min = val;
-        cfs_spin_unlock(&svc->srv_lock);
+       svc->srv_threads_min = val;
+       cfs_spin_unlock(&svc->srv_lock);
 
-        return count;
+       return count;
 }
 
 static int
 ptlrpc_lprocfs_rd_threads_started(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data)
+                                 int count, int *eof, void *data)
 {
-        struct ptlrpc_service *svc = data;
+       struct ptlrpc_service *svc = data;
 
-        return snprintf(page, count, "%d\n", svc->srv_threads_running);
+       LASSERT(svc->srv_part != NULL);
+       return snprintf(page, count, "%d\n",
+                       svc->srv_part->scp_nthrs_running);
 }
 
 static int
@@ -360,14 +364,16 @@ ptlrpc_lprocfs_wr_threads_max(struct file *file, const char *buffer,
         if (val < 2)
                 return -ERANGE;
 
-        if (val < svc->srv_threads_min)
-                return -ERANGE;
+       cfs_spin_lock(&svc->srv_lock);
+       if (val < svc->srv_threads_min) {
+               cfs_spin_unlock(&svc->srv_lock);
+               return -ERANGE;
+       }
 
-        cfs_spin_lock(&svc->srv_lock);
-        svc->srv_threads_max = val;
-        cfs_spin_unlock(&svc->srv_lock);
+       svc->srv_threads_max = val;
+       cfs_spin_unlock(&svc->srv_lock);
 
-        return count;
+       return count;
 }
 
 struct ptlrpc_srh_iterator {
@@ -376,15 +382,15 @@ struct ptlrpc_srh_iterator {
 };
 
 int
-ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc,
-                                    struct ptlrpc_srh_iterator *srhi,
-                                    __u64 seq)
+ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt,
+                                   struct ptlrpc_srh_iterator *srhi,
+                                   __u64 seq)
 {
-        cfs_list_t            *e;
-        struct ptlrpc_request *req;
+       cfs_list_t              *e;
+       struct ptlrpc_request   *req;
 
-        if (srhi->srhi_req != NULL &&
-            srhi->srhi_seq > svc->srv_request_max_cull_seq &&
+       if (srhi->srhi_req != NULL &&
+           srhi->srhi_seq > svcpt->scp_hist_seq_culled &&
             srhi->srhi_seq <= seq) {
                 /* If srhi_req was set previously, hasn't been culled and
                  * we're searching for a seq on or after it (i.e. more
@@ -393,14 +399,14 @@ ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc,
                  * be near the head), we shouldn't have to do long
                  * re-scans */
                 LASSERT (srhi->srhi_seq == srhi->srhi_req->rq_history_seq);
-                LASSERT (!cfs_list_empty(&svc->srv_request_history));
-                e = &srhi->srhi_req->rq_history_list;
-        } else {
-                /* search from start */
-                e = svc->srv_request_history.next;
-        }
-
-        while (e != &svc->srv_request_history) {
+               LASSERT(!cfs_list_empty(&svcpt->scp_hist_reqs));
+               e = &srhi->srhi_req->rq_history_list;
+       } else {
+               /* search from start */
+               e = svcpt->scp_hist_reqs.next;
+       }
+
+       while (e != &svcpt->scp_hist_reqs) {
                 req = cfs_list_entry(e, struct ptlrpc_request, rq_history_list);
 
                 if (req->rq_history_seq >= seq) {
@@ -428,9 +434,9 @@ ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos)
         srhi->srhi_seq = 0;
         srhi->srhi_req = NULL;
 
-        cfs_spin_lock(&svc->srv_lock);
-        rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svc->srv_part->scp_lock);
+       rc = ptlrpc_lprocfs_svc_req_history_seek(svc->srv_part, srhi, *pos);
+       cfs_spin_unlock(&svc->srv_part->scp_lock);
 
         if (rc == 0) {
                 *pos = srhi->srhi_seq;
@@ -452,15 +458,16 @@ ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter)
 
 static void *
 ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
-                                    void *iter, loff_t *pos)
+                                   void *iter, loff_t *pos)
 {
-        struct ptlrpc_service       *svc = s->private;
-        struct ptlrpc_srh_iterator  *srhi = iter;
-        int                          rc;
+       struct ptlrpc_service           *svc = s->private;
+       struct ptlrpc_service_part      *svcpt = svc->srv_part;
+       struct ptlrpc_srh_iterator      *srhi = iter;
+       int                             rc;
 
-        cfs_spin_lock(&svc->srv_lock);
-        rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos + 1);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
+       rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, *pos + 1);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
         if (rc != 0) {
                 OBD_FREE(srhi, sizeof(*srhi));
@@ -503,14 +510,15 @@ EXPORT_SYMBOL(target_print_req);
 
 static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
 {
-        struct ptlrpc_service      *svc = s->private;
-        struct ptlrpc_srh_iterator *srhi = iter;
-        struct ptlrpc_request      *req;
-        int                         rc;
+       struct ptlrpc_service           *svc = s->private;
+       struct ptlrpc_service_part      *svcpt = svc->srv_part;
+       struct ptlrpc_srh_iterator      *srhi = iter;
+       struct ptlrpc_request           *req;
+       int                             rc;
 
-        cfs_spin_lock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
 
-        rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, srhi->srhi_seq);
+       rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq);
 
         if (rc == 0) {
                 req = srhi->srhi_req;
@@ -534,9 +542,8 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
                        svc->srv_ops.so_req_printer(s, srhi->srhi_req);
         }
 
-        cfs_spin_unlock(&svc->srv_lock);
-
-        return rc;
+       cfs_spin_unlock(&svcpt->scp_lock);
+       return rc;
 }
 
 static int
@@ -566,19 +573,24 @@ ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
 
 /* See also lprocfs_rd_timeouts */
 static int ptlrpc_lprocfs_rd_timeouts(char *page, char **start, off_t off,
-                                      int count, int *eof, void *data)
-{
-        struct ptlrpc_service *svc = data;
-        unsigned int cur, worst;
-        time_t worstt;
-        struct dhms ts;
-        int rc = 0;
-
-        *eof = 1;
-        cur = at_get(&svc->srv_at_estimate);
-        worst = svc->srv_at_estimate.at_worst_ever;
-        worstt = svc->srv_at_estimate.at_worst_time;
-        s2dhms(&ts, cfs_time_current_sec() - worstt);
+                                     int count, int *eof, void *data)
+{
+       struct ptlrpc_service           *svc = data;
+       struct ptlrpc_service_part      *svcpt;
+       struct dhms                     ts;
+       time_t                          worstt;
+       unsigned int                    cur;
+       unsigned int                    worst;
+       int                             rc = 0;
+
+       svcpt = svc->srv_part;
+       LASSERT(svcpt != NULL);
+
+       *eof = 1;
+       cur = at_get(&svcpt->scp_at_estimate);
+       worst = svcpt->scp_at_estimate.at_worst_ever;
+       worstt = svcpt->scp_at_estimate.at_worst_time;
+       s2dhms(&ts, cfs_time_current_sec() - worstt);
         if (AT_OFF)
                 rc += snprintf(page + rc, count - rc,
                               "adaptive timeouts off, using obd_timeout %u\n",
@@ -587,9 +599,8 @@ static int ptlrpc_lprocfs_rd_timeouts(char *page, char **start, off_t off,
                        "%10s : cur %3u  worst %3u (at %ld, "DHMS_FMT" ago) ",
                        "service", cur, worst, worstt,
                        DHMS_VARS(&ts));
-        rc = lprocfs_at_hist_helper(page, count, rc,
-                                    &svc->srv_at_estimate);
-        return rc;
+       rc = lprocfs_at_hist_helper(page, count, rc, &svcpt->scp_at_estimate);
+       return rc;
 }
 
 static int ptlrpc_lprocfs_rd_hp_ratio(char *page, char **start, off_t off,
@@ -612,10 +623,11 @@ static int ptlrpc_lprocfs_wr_hp_ratio(struct file *file, const char *buffer,
         if (val < 0)
                 return -ERANGE;
 
-        cfs_spin_lock(&svc->srv_lock);
-        svc->srv_hpreq_ratio = val;
-        cfs_spin_unlock(&svc->srv_lock);
-        return count;
+       cfs_spin_lock(&svc->srv_lock);
+       svc->srv_hpreq_ratio = val;
+       cfs_spin_unlock(&svc->srv_lock);
+
+       return count;
 }
 
 void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry,
index c65ae1c..69c616f 100644 (file)
@@ -395,7 +395,8 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 
 static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
 {
-        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+       struct ptlrpc_service_part      *svcpt = req->rq_rqbd->rqbd_svcpt;
+       struct ptlrpc_service           *svc = svcpt->scp_service;
         int service_time = max_t(int, cfs_time_current_sec() -
                                  req->rq_arrival_time.tv_sec, 1);
 
@@ -407,12 +408,14 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
                 /* early replies, errors and recovery requests don't count
                  * toward our service time estimate */
-                int oldse = at_measured(&svc->srv_at_estimate, service_time);
-                if (oldse != 0)
-                        DEBUG_REQ(D_ADAPTTO, req,
-                                  "svc %s changed estimate from %d to %d",
-                                  svc->srv_name, oldse,
-                                  at_get(&svc->srv_at_estimate));
+               int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
+
+               if (oldse != 0) {
+                       DEBUG_REQ(D_ADAPTTO, req,
+                                 "svc %s changed estimate from %d to %d",
+                                 svc->srv_name, oldse,
+                                 at_get(&svcpt->scp_at_estimate));
+               }
         }
         /* Report actual service time for client latency calc */
         lustre_msg_set_service_time(req->rq_repmsg, service_time);
@@ -424,7 +427,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                 lustre_msg_set_timeout(req->rq_repmsg, 0);
         else
                 lustre_msg_set_timeout(req->rq_repmsg,
-                                       at_get(&svc->srv_at_estimate));
+                                      at_get(&svcpt->scp_at_estimate));
 
         if (req->rq_reqmsg &&
             !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
@@ -444,7 +447,6 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
  */
 int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
 {
-        struct ptlrpc_service     *svc = req->rq_rqbd->rqbd_service;
         struct ptlrpc_reply_state *rs = req->rq_reply_state;
         struct ptlrpc_connection  *conn;
         int                        rc;
@@ -518,7 +520,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
         rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
                            (rs->rs_difficult && !rs->rs_no_ack) ?
                            LNET_ACK_REQ : LNET_NOACK_REQ,
-                           &rs->rs_cb_id, conn, svc->srv_rep_portal,
+                          &rs->rs_cb_id, conn,
+                          ptlrpc_req2svc(req)->srv_rep_portal,
                            req->rq_xid, req->rq_reply_off);
 out:
         if (unlikely(rc != 0))
@@ -760,9 +763,9 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
  */
 int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
 {
-        struct ptlrpc_service   *service = rqbd->rqbd_service;
-        static lnet_process_id_t  match_id = {LNET_NID_ANY, LNET_PID_ANY};
-        int                      rc;
+       struct ptlrpc_service     *service = rqbd->rqbd_svcpt->scp_service;
+       static lnet_process_id_t  match_id = {LNET_NID_ANY, LNET_PID_ANY};
+       int                       rc;
         lnet_md_t                 md;
         lnet_handle_me_t          me_h;
 
index 545d767..edc835d 100644 (file)
@@ -275,49 +275,51 @@ do {                                            \
 # define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0)
 #endif
 
-struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc)
-{
-        struct ptlrpc_reply_state *rs = NULL;
-
-        cfs_spin_lock(&svc->srv_rs_lock);
-        /* See if we have anything in a pool, and wait if nothing */
-        while (cfs_list_empty(&svc->srv_free_rs_list)) {
-                struct l_wait_info lwi;
-                int rc;
-                cfs_spin_unlock(&svc->srv_rs_lock);
-                /* If we cannot get anything for some long time, we better
-                   bail out instead of waiting infinitely */
-                lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
-                rc = l_wait_event(svc->srv_free_rs_waitq,
-                                  !cfs_list_empty(&svc->srv_free_rs_list),
-                                  &lwi);
-                if (rc)
-                        goto out;
-                cfs_spin_lock(&svc->srv_rs_lock);
-        }
-
-        rs = cfs_list_entry(svc->srv_free_rs_list.next,
-                            struct ptlrpc_reply_state, rs_list);
-        cfs_list_del(&rs->rs_list);
-        cfs_spin_unlock(&svc->srv_rs_lock);
-        LASSERT(rs);
-        memset(rs, 0, svc->srv_max_reply_size);
-        rs->rs_service = svc;
-        rs->rs_prealloc = 1;
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt)
+{
+       struct ptlrpc_reply_state *rs = NULL;
+
+       cfs_spin_lock(&svcpt->scp_rep_lock);
+
+       /* See if we have anything in a pool, and wait if nothing */
+       while (cfs_list_empty(&svcpt->scp_rep_idle)) {
+               struct l_wait_info      lwi;
+               int                     rc;
+
+               cfs_spin_unlock(&svcpt->scp_rep_lock);
+               /* If we cannot get anything for some long time, we better
+                * bail out instead of waiting infinitely */
+               lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
+               rc = l_wait_event(svcpt->scp_rep_waitq,
+                                 !cfs_list_empty(&svcpt->scp_rep_idle), &lwi);
+               if (rc != 0)
+                       goto out;
+               cfs_spin_lock(&svcpt->scp_rep_lock);
+       }
+
+       rs = cfs_list_entry(svcpt->scp_rep_idle.next,
+                           struct ptlrpc_reply_state, rs_list);
+       cfs_list_del(&rs->rs_list);
+
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
+
+       LASSERT(rs != NULL);
+       memset(rs, 0, svcpt->scp_service->srv_max_reply_size);
+       rs->rs_svcpt = svcpt;
+       rs->rs_prealloc = 1;
 out:
-        return rs;
+       return rs;
 }
 
 void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
 {
-        struct ptlrpc_service *svc = rs->rs_service;
-
-        LASSERT(svc);
+       struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
 
-        cfs_spin_lock(&svc->srv_rs_lock);
-        cfs_list_add(&rs->rs_list, &svc->srv_free_rs_list);
-        cfs_spin_unlock(&svc->srv_rs_lock);
-        cfs_waitq_signal(&svc->srv_free_rs_waitq);
+       cfs_spin_lock(&svcpt->scp_rep_lock);
+       cfs_list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
+       cfs_waitq_signal(&svcpt->scp_rep_waitq);
 }
 
 int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
@@ -344,7 +346,7 @@ int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
         cfs_atomic_set(&rs->rs_refcount, 1);    /* 1 ref for rq_reply_state */
         rs->rs_cb_id.cbid_fn = reply_out_callback;
         rs->rs_cb_id.cbid_arg = rs;
-        rs->rs_service = req->rq_rqbd->rqbd_service;
+       rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt;
         CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
         CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
         CFS_INIT_LIST_HEAD(&rs->rs_list);
index 7ce2dcc..2282bf8 100644 (file)
@@ -47,6 +47,7 @@ struct ldlm_res_id;
 struct ptlrpc_request_set;
 extern int test_req_buffer_pressure;
 
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt);
 /* ptlrpcd.c */
 int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
 
@@ -93,7 +94,8 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page,
                           int pageoffset, int len);
 
 /* pack_generic.c */
-struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc);
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt);
 void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
 
 /* pinger.c */
index cd06da6..f9487b2 100644 (file)
@@ -231,7 +231,6 @@ EXPORT_SYMBOL(ptlrpc_commit_replies);
 EXPORT_SYMBOL(ptlrpc_register_service);
 EXPORT_SYMBOL(ptlrpc_stop_all_threads);
 EXPORT_SYMBOL(ptlrpc_start_threads);
-EXPORT_SYMBOL(ptlrpc_start_thread);
 EXPORT_SYMBOL(ptlrpc_unregister_service);
 EXPORT_SYMBOL(ptlrpc_service_health_check);
 EXPORT_SYMBOL(ptlrpc_hpreq_reorder);
index 66cf583..d876a22 100644 (file)
@@ -2108,7 +2108,7 @@ int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
         rc = policy->sp_sops->alloc_rs(req, msglen);
         if (unlikely(rc == -ENOMEM)) {
                 /* failed alloc, try emergency pool */
-                rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
+               rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_svcpt);
                 if (rs == NULL)
                         RETURN(-ENOMEM);
 
index d807e49..c0960e3 100644 (file)
@@ -63,22 +63,23 @@ CFS_MODULE_PARM(at_extra, "i", int, 0644,
 
 
 /* forward ref */
-static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc);
+static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
 static void ptlrpc_hpreq_fini(struct ptlrpc_request *req);
 
 static CFS_LIST_HEAD(ptlrpc_all_services);
 cfs_spinlock_t ptlrpc_all_services_lock;
 
 struct ptlrpc_request_buffer_desc *
-ptlrpc_alloc_rqbd (struct ptlrpc_service *svc)
+ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
 {
-        struct ptlrpc_request_buffer_desc *rqbd;
+       struct ptlrpc_service             *svc = svcpt->scp_service;
+       struct ptlrpc_request_buffer_desc *rqbd;
 
-        OBD_ALLOC_PTR(rqbd);
-        if (rqbd == NULL)
-                return (NULL);
+       OBD_ALLOC_PTR(rqbd);
+       if (rqbd == NULL)
+               return NULL;
 
-        rqbd->rqbd_service = svc;
+       rqbd->rqbd_svcpt = svcpt;
         rqbd->rqbd_refcount = 0;
         rqbd->rqbd_cbid.cbid_fn = request_in_callback;
         rqbd->rqbd_cbid.cbid_arg = rqbd;
@@ -90,34 +91,35 @@ ptlrpc_alloc_rqbd (struct ptlrpc_service *svc)
                 return (NULL);
         }
 
-        cfs_spin_lock(&svc->srv_lock);
-        cfs_list_add(&rqbd->rqbd_list, &svc->srv_idle_rqbds);
-        svc->srv_nbufs++;
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
+       cfs_list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+       svcpt->scp_nrqbds_total++;
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        return (rqbd);
+       return rqbd;
 }
 
 void
-ptlrpc_free_rqbd (struct ptlrpc_request_buffer_desc *rqbd)
+ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
 {
-        struct ptlrpc_service *svc = rqbd->rqbd_service;
+       struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
 
-        LASSERT (rqbd->rqbd_refcount == 0);
-        LASSERT (cfs_list_empty(&rqbd->rqbd_reqs));
+       LASSERT(rqbd->rqbd_refcount == 0);
+       LASSERT(cfs_list_empty(&rqbd->rqbd_reqs));
 
-        cfs_spin_lock(&svc->srv_lock);
-        cfs_list_del(&rqbd->rqbd_list);
-        svc->srv_nbufs--;
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
+       cfs_list_del(&rqbd->rqbd_list);
+       svcpt->scp_nrqbds_total--;
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        OBD_FREE_LARGE(rqbd->rqbd_buffer, svc->srv_buf_size);
-        OBD_FREE_PTR(rqbd);
+       OBD_FREE_LARGE(rqbd->rqbd_buffer, svcpt->scp_service->srv_buf_size);
+       OBD_FREE_PTR(rqbd);
 }
 
 int
-ptlrpc_grow_req_bufs(struct ptlrpc_service *svc)
+ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt)
 {
+       struct ptlrpc_service             *svc = svcpt->scp_service;
         struct ptlrpc_request_buffer_desc *rqbd;
         int                                rc = 0;
         int                                i;
@@ -125,10 +127,10 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service *svc)
         for (i = 0; i < svc->srv_nbuf_per_group; i++) {
                 /* NB: another thread might be doing this as well, we need to
                  * make sure that it wouldn't over-allocate, see LU-1212. */
-                if (svc->srv_nrqbd_receiving >= svc->srv_nbuf_per_group)
-                        break;
+               if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
+                       break;
 
-                rqbd = ptlrpc_alloc_rqbd(svc);
+               rqbd = ptlrpc_alloc_rqbd(svcpt);
 
                 if (rqbd == NULL) {
                         CERROR("%s: Can't allocate request buffer\n",
@@ -137,18 +139,18 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service *svc)
                         break;
                 }
 
-                if (ptlrpc_server_post_idle_rqbds(svc) < 0) {
-                        rc = -EAGAIN;
-                        break;
-                }
-        }
+               if (ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
+                       rc = -EAGAIN;
+                       break;
+               }
+       }
 
-        CDEBUG(D_RPCTRACE,
-               "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
-               svc->srv_name, i, svc->srv_buf_size,
-               svc->srv_nrqbd_receiving, svc->srv_nbufs, rc);
+       CDEBUG(D_RPCTRACE,
+              "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
+              svc->srv_name, i, svc->srv_buf_size,
+              svcpt->scp_nrqbds_posted, svcpt->scp_nrqbds_total, rc);
 
-        return rc;
+       return rc;
 }
 
 /**
@@ -197,9 +199,9 @@ struct ptlrpc_hr_service {
 };
 
 struct rs_batch {
-        cfs_list_t              rsb_replies;
-        struct ptlrpc_service  *rsb_svc;
-        unsigned int            rsb_n_replies;
+       cfs_list_t                      rsb_replies;
+       unsigned int                    rsb_n_replies;
+       struct ptlrpc_service_part      *rsb_svcpt;
 };
 
 /**
@@ -271,15 +273,15 @@ static void rs_batch_dispatch(struct rs_batch *b)
  */
 static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
 {
-        struct ptlrpc_service *svc = rs->rs_service;
+       struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
 
-        if (svc != b->rsb_svc || b->rsb_n_replies >= MAX_SCHEDULED) {
-                if (b->rsb_svc != NULL) {
-                        rs_batch_dispatch(b);
-                        cfs_spin_unlock(&b->rsb_svc->srv_rs_lock);
-                }
-                cfs_spin_lock(&svc->srv_rs_lock);
-                b->rsb_svc = svc;
+       if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) {
+               if (b->rsb_svcpt != NULL) {
+                       rs_batch_dispatch(b);
+                       cfs_spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+               }
+               cfs_spin_lock(&svcpt->scp_rep_lock);
+               b->rsb_svcpt = svcpt;
         }
         cfs_spin_lock(&rs->rs_lock);
         rs->rs_scheduled_ever = 1;
@@ -301,10 +303,10 @@ static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
  */
 static void rs_batch_fini(struct rs_batch *b)
 {
-        if (b->rsb_svc != 0) {
-                rs_batch_dispatch(b);
-                cfs_spin_unlock(&b->rsb_svc->srv_rs_lock);
-        }
+       if (b->rsb_svcpt != NULL) {
+               rs_batch_dispatch(b);
+               cfs_spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+       }
 }
 
 #define DECLARE_RS_BATCH(b)     struct rs_batch b
@@ -338,16 +340,16 @@ void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
         cfs_waitq_signal(&hr->hr_threads[idx].hrt_wait);
         EXIT;
 #else
-        cfs_list_add_tail(&rs->rs_list, &rs->rs_service->srv_reply_queue);
+       cfs_list_add_tail(&rs->rs_list, &rs->rs_svcpt->scp_rep_queue);
 #endif
 }
 
 void
-ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs)
+ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
 {
-        ENTRY;
+       ENTRY;
 
-        LASSERT_SPIN_LOCKED(&rs->rs_service->srv_rs_lock);
+       LASSERT_SPIN_LOCKED(&rs->rs_svcpt->scp_rep_lock);
         LASSERT_SPIN_LOCKED(&rs->rs_lock);
         LASSERT (rs->rs_difficult);
         rs->rs_scheduled_ever = 1;  /* flag any notification attempt */
@@ -391,58 +393,61 @@ void ptlrpc_commit_replies(struct obd_export *exp)
 }
 
 static int
-ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc)
+ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
 {
-        struct ptlrpc_request_buffer_desc *rqbd;
-        int                                rc;
-        int                                posted = 0;
+       struct ptlrpc_request_buffer_desc *rqbd;
+       int                               rc;
+       int                               posted = 0;
 
-        for (;;) {
-                cfs_spin_lock(&svc->srv_lock);
+       for (;;) {
+               cfs_spin_lock(&svcpt->scp_lock);
 
-                if (cfs_list_empty (&svc->srv_idle_rqbds)) {
-                        cfs_spin_unlock(&svc->srv_lock);
-                        return (posted);
-                }
+               if (cfs_list_empty(&svcpt->scp_rqbd_idle)) {
+                       cfs_spin_unlock(&svcpt->scp_lock);
+                       return posted;
+               }
 
-                rqbd = cfs_list_entry(svc->srv_idle_rqbds.next,
-                                      struct ptlrpc_request_buffer_desc,
-                                      rqbd_list);
-                cfs_list_del (&rqbd->rqbd_list);
+               rqbd = cfs_list_entry(svcpt->scp_rqbd_idle.next,
+                                     struct ptlrpc_request_buffer_desc,
+                                     rqbd_list);
+               cfs_list_del(&rqbd->rqbd_list);
 
-                /* assume we will post successfully */
-                svc->srv_nrqbd_receiving++;
-                cfs_list_add (&rqbd->rqbd_list, &svc->srv_active_rqbds);
+               /* assume we will post successfully */
+               svcpt->scp_nrqbds_posted++;
+               cfs_list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
 
-                cfs_spin_unlock(&svc->srv_lock);
+               cfs_spin_unlock(&svcpt->scp_lock);
 
-                rc = ptlrpc_register_rqbd(rqbd);
-                if (rc != 0)
-                        break;
+               rc = ptlrpc_register_rqbd(rqbd);
+               if (rc != 0)
+                       break;
 
-                posted = 1;
-        }
+               posted = 1;
+       }
 
-        cfs_spin_lock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
 
-        svc->srv_nrqbd_receiving--;
-        cfs_list_del(&rqbd->rqbd_list);
-        cfs_list_add_tail(&rqbd->rqbd_list, &svc->srv_idle_rqbds);
+       svcpt->scp_nrqbds_posted--;
+       cfs_list_del(&rqbd->rqbd_list);
+       cfs_list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
 
-        /* Don't complain if no request buffers are posted right now; LNET
-         * won't drop requests because we set the portal lazy! */
+       /* Don't complain if no request buffers are posted right now; LNET
+        * won't drop requests because we set the portal lazy! */
 
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        return (-1);
+       return -1;
 }
 
 static void ptlrpc_at_timer(unsigned long castmeharder)
 {
-        struct ptlrpc_service *svc = (struct ptlrpc_service *)castmeharder;
-        svc->srv_at_check = 1;
-        svc->srv_at_checktime = cfs_time_current();
-        cfs_waitq_signal(&svc->srv_waitq);
+       struct ptlrpc_service_part *svcpt;
+
+       svcpt = (struct ptlrpc_service_part *)castmeharder;
+
+       svcpt->scp_at_check = 1;
+       svcpt->scp_at_checktime = cfs_time_current();
+       cfs_waitq_signal(&svcpt->scp_waitq);
 }
 
 static void
@@ -493,6 +498,97 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service_conf *conf,
 }
 
 /**
+ * Initialize percpt data for a service
+ */
+static int
+ptlrpc_service_part_init(struct ptlrpc_service *svc,
+                        struct ptlrpc_service_part *svcpt)
+{
+       struct ptlrpc_at_array  *array;
+       int                     size;
+       int                     index;
+       int                     rc;
+
+       CFS_INIT_LIST_HEAD(&svcpt->scp_threads);
+
+       /* rqbd and incoming request queue */
+       cfs_spin_lock_init(&svcpt->scp_lock);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_req_incoming);
+       cfs_waitq_init(&svcpt->scp_waitq);
+       /* history request & rqbd list */
+       CFS_INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
+
+       /* acitve requests and hp requests */
+       cfs_spin_lock_init(&svcpt->scp_req_lock);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_req_pending);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_hreq_pending);
+
+       /* reply states */
+       cfs_spin_lock_init(&svcpt->scp_rep_lock);
+       CFS_INIT_LIST_HEAD(&svcpt->scp_rep_active);
+#ifndef __KERNEL__
+       CFS_INIT_LIST_HEAD(&svcpt->scp_rep_queue);
+#endif
+       CFS_INIT_LIST_HEAD(&svcpt->scp_rep_idle);
+       cfs_waitq_init(&svcpt->scp_rep_waitq);
+       cfs_atomic_set(&svcpt->scp_nreps_difficult, 0);
+
+       /* adaptive timeout */
+       cfs_spin_lock_init(&svcpt->scp_at_lock);
+       array = &svcpt->scp_at_array;
+
+       size = at_est2timeout(at_max);
+       array->paa_size     = size;
+       array->paa_count    = 0;
+       array->paa_deadline = -1;
+
+       /* allocate memory for scp_at_array (ptlrpc_at_array) */
+       OBD_ALLOC(array->paa_reqs_array, sizeof(cfs_list_t) * size);
+       if (array->paa_reqs_array == NULL)
+               return -ENOMEM;
+
+       for (index = 0; index < size; index++)
+               CFS_INIT_LIST_HEAD(&array->paa_reqs_array[index]);
+
+       OBD_ALLOC(array->paa_reqs_count, sizeof(__u32) * size);
+       if (array->paa_reqs_count == NULL)
+               goto failed;
+
+       cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
+       /* At SOW, service time should be quick; 10s seems generous. If client
+        * timeout is less than this, we'll be sending an early reply. */
+       at_init(&svcpt->scp_at_estimate, 10, 0);
+
+       /* assign this before call ptlrpc_grow_req_bufs */
+       svcpt->scp_service = svc;
+       /* Now allocate the request buffers, but don't post them now */
+       rc = ptlrpc_grow_req_bufs(svcpt);
+       /* We shouldn't be under memory pressure at startup, so
+        * fail if we can't allocate all our buffers at this time. */
+       if (rc != 0)
+               goto failed;
+
+       return 0;
+
+ failed:
+       if (array->paa_reqs_count != NULL) {
+               OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size);
+               array->paa_reqs_count = NULL;
+       }
+
+       if (array->paa_reqs_array != NULL) {
+               OBD_FREE(array->paa_reqs_array,
+                        sizeof(cfs_list_t) * array->paa_size);
+               array->paa_reqs_array = NULL;
+       }
+
+       return -ENOMEM;
+}
+
+/**
  * Initialize service on a given portal.
  * This includes starting serving threads , allocating and posting rqbds and
  * so on.
@@ -501,11 +597,8 @@ struct ptlrpc_service *
 ptlrpc_register_service(struct ptlrpc_service_conf *conf,
                        cfs_proc_dir_entry_t *proc_entry)
 {
-       struct ptlrpc_service   *service;
-       struct ptlrpc_at_array  *array;
-       unsigned int            index;
-       unsigned int            size;
-       int                     rc;
+       struct ptlrpc_service           *service;
+       int                             rc;
        ENTRY;
 
        LASSERT(conf->psc_buf.bc_nbufs > 0);
@@ -517,16 +610,13 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
        if (service == NULL)
                RETURN(ERR_PTR(-ENOMEM));
 
-        /* First initialise enough for early teardown */
-
-        cfs_spin_lock_init(&service->srv_lock);
-        cfs_spin_lock_init(&service->srv_rq_lock);
-        cfs_spin_lock_init(&service->srv_rs_lock);
-        CFS_INIT_LIST_HEAD(&service->srv_threads);
-        cfs_waitq_init(&service->srv_waitq);
-
+       /* public members */
+       cfs_spin_lock_init(&service->srv_lock);
        service->srv_name               = conf->psc_name;
        service->srv_watchdog_factor    = conf->psc_watchdog_factor;
+       CFS_INIT_LIST_HEAD(&service->srv_list); /* for safty of cleanup */
+
+       /* buffer configuration */
        service->srv_nbuf_per_group     = test_req_buffer_pressure ?
                                          1 : conf->psc_buf.bc_nbufs;
        service->srv_max_req_size       = conf->psc_buf.bc_req_max_size +
@@ -534,8 +624,12 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
        service->srv_buf_size           = conf->psc_buf.bc_buf_size;
        service->srv_rep_portal         = conf->psc_buf.bc_rep_portal;
        service->srv_req_portal         = conf->psc_buf.bc_req_portal;
-       service->srv_request_seq        = 1; /* valid seq #s start at 1 */
-       service->srv_request_max_cull_seq = 0;
+
+       /* Increase max reply size to next power of two */
+       service->srv_max_reply_size = 1;
+       while (service->srv_max_reply_size <
+              conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
+               service->srv_max_reply_size <<= 1;
 
        ptlrpc_server_nthreads_check(conf, &service->srv_threads_min,
                                     &service->srv_threads_max);
@@ -544,71 +638,23 @@ ptlrpc_register_service(struct ptlrpc_service_conf *conf,
        service->srv_ctx_tags           = conf->psc_thr.tc_ctx_tags;
        service->srv_cpu_affinity       = !!conf->psc_thr.tc_cpu_affinity;
        service->srv_hpreq_ratio        = PTLRPC_SVC_HP_RATIO;
-       service->srv_hpreq_count        = 0;
-       service->srv_n_active_hpreq     = 0;
        service->srv_ops                = conf->psc_ops;
 
-        rc = LNetSetLazyPortal(service->srv_req_portal);
-        LASSERT (rc == 0);
-
-        CFS_INIT_LIST_HEAD(&service->srv_request_queue);
-        CFS_INIT_LIST_HEAD(&service->srv_request_hpq);
-        CFS_INIT_LIST_HEAD(&service->srv_idle_rqbds);
-        CFS_INIT_LIST_HEAD(&service->srv_active_rqbds);
-        CFS_INIT_LIST_HEAD(&service->srv_history_rqbds);
-        CFS_INIT_LIST_HEAD(&service->srv_request_history);
-        CFS_INIT_LIST_HEAD(&service->srv_active_replies);
-#ifndef __KERNEL__
-        CFS_INIT_LIST_HEAD(&service->srv_reply_queue);
-#endif
-        CFS_INIT_LIST_HEAD(&service->srv_free_rs_list);
-        cfs_waitq_init(&service->srv_free_rs_waitq);
-        cfs_atomic_set(&service->srv_n_difficult_replies, 0);
-
-        cfs_spin_lock_init(&service->srv_at_lock);
-        CFS_INIT_LIST_HEAD(&service->srv_req_in_queue);
-
-        array = &service->srv_at_array;
-        size = at_est2timeout(at_max);
-        array->paa_size = size;
-        array->paa_count = 0;
-        array->paa_deadline = -1;
-
-        /* allocate memory for srv_at_array (ptlrpc_at_array) */
-        OBD_ALLOC(array->paa_reqs_array, sizeof(cfs_list_t) * size);
-        if (array->paa_reqs_array == NULL)
+       OBD_ALLOC_PTR(service->srv_part);
+       if (service->srv_part == NULL)
                GOTO(failed, rc = -ENOMEM);
 
-       for (index = 0; index < size; index++)
-               CFS_INIT_LIST_HEAD(&array->paa_reqs_array[index]);
-
-       OBD_ALLOC(array->paa_reqs_count, sizeof(__u32) * size);
-       if (array->paa_reqs_count == NULL)
-               GOTO(failed, rc = -ENOMEM);
+       rc = ptlrpc_service_part_init(service, service->srv_part);
+       if (rc != 0)
+               GOTO(failed, rc);
 
-        cfs_timer_init(&service->srv_at_timer, ptlrpc_at_timer, service);
-        /* At SOW, service time should be quick; 10s seems generous. If client
-           timeout is less than this, we'll be sending an early reply. */
-        at_init(&service->srv_at_estimate, 10, 0);
+       rc = LNetSetLazyPortal(service->srv_req_portal);
+       LASSERT(rc == 0);
 
         cfs_spin_lock (&ptlrpc_all_services_lock);
         cfs_list_add (&service->srv_list, &ptlrpc_all_services);
         cfs_spin_unlock (&ptlrpc_all_services_lock);
 
-        /* Now allocate the request buffers */
-        rc = ptlrpc_grow_req_bufs(service);
-        /* We shouldn't be under memory pressure at startup, so
-         * fail if we can't post all our buffers at this time. */
-        if (rc != 0)
-               GOTO(failed, rc = -ENOMEM);
-
-        /* Now allocate pool of reply buffers */
-        /* Increase max reply size to next power of two */
-        service->srv_max_reply_size = 1;
-        while (service->srv_max_reply_size <
-              conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
-                service->srv_max_reply_size <<= 1;
-
         if (proc_entry != NULL)
                 ptlrpc_lprocfs_register_service(proc_entry, service);
 
@@ -659,8 +705,9 @@ static void ptlrpc_server_free_request(struct ptlrpc_request *req)
  */
 void ptlrpc_server_drop_request(struct ptlrpc_request *req)
 {
-        struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
-        struct ptlrpc_service             *svc = rqbd->rqbd_service;
+       struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
+       struct ptlrpc_service_part        *svcpt = rqbd->rqbd_svcpt;
+       struct ptlrpc_service             *svc = svcpt->scp_service;
         int                                refcount;
         cfs_list_t                        *tmp;
         cfs_list_t                        *nxt;
@@ -668,9 +715,9 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
         if (!cfs_atomic_dec_and_test(&req->rq_refcount))
                 return;
 
-        cfs_spin_lock(&svc->srv_at_lock);
-        if (req->rq_at_linked) {
-                struct ptlrpc_at_array *array = &svc->srv_at_array;
+       cfs_spin_lock(&svcpt->scp_at_lock);
+       if (req->rq_at_linked) {
+               struct ptlrpc_at_array *array = &svcpt->scp_at_array;
                 __u32 index = req->rq_at_index;
 
                 LASSERT(!cfs_list_empty(&req->rq_timed_list));
@@ -682,7 +729,8 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
                 array->paa_count--;
         } else
                 LASSERT(cfs_list_empty(&req->rq_timed_list));
-        cfs_spin_unlock(&svc->srv_at_lock);
+
+       cfs_spin_unlock(&svcpt->scp_at_lock);
 
         /* finalize request */
         if (req->rq_export) {
@@ -690,7 +738,7 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
                 req->rq_export = NULL;
         }
 
-        cfs_spin_lock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
 
         cfs_list_add(&req->rq_list, &rqbd->rqbd_reqs);
 
@@ -698,18 +746,19 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
         if (refcount == 0) {
                 /* request buffer is now idle: add to history */
                 cfs_list_del(&rqbd->rqbd_list);
-                cfs_list_add_tail(&rqbd->rqbd_list, &svc->srv_history_rqbds);
-                svc->srv_n_history_rqbds++;
 
-                /* cull some history?
-                 * I expect only about 1 or 2 rqbds need to be recycled here */
-                while (svc->srv_n_history_rqbds > svc->srv_max_history_rqbds) {
-                        rqbd = cfs_list_entry(svc->srv_history_rqbds.next,
-                                              struct ptlrpc_request_buffer_desc,
-                                              rqbd_list);
+               cfs_list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
+               svcpt->scp_hist_nrqbds++;
+
+               /* cull some history?
+                * I expect only about 1 or 2 rqbds need to be recycled here */
+               while (svcpt->scp_hist_nrqbds > svc->srv_max_history_rqbds) {
+                       rqbd = cfs_list_entry(svcpt->scp_hist_rqbds.next,
+                                             struct ptlrpc_request_buffer_desc,
+                                             rqbd_list);
 
-                        cfs_list_del(&rqbd->rqbd_list);
-                        svc->srv_n_history_rqbds--;
+                       cfs_list_del(&rqbd->rqbd_list);
+                       svcpt->scp_hist_nrqbds--;
 
                         /* remove rqbd's reqs from svc's req history while
                          * I've got the service lock */
@@ -717,14 +766,15 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
                                 req = cfs_list_entry(tmp, struct ptlrpc_request,
                                                      rq_list);
                                 /* Track the highest culled req seq */
-                                if (req->rq_history_seq >
-                                    svc->srv_request_max_cull_seq)
-                                        svc->srv_request_max_cull_seq =
-                                                req->rq_history_seq;
-                                cfs_list_del(&req->rq_history_list);
-                        }
+                               if (req->rq_history_seq >
+                                   svcpt->scp_hist_seq_culled) {
+                                       svcpt->scp_hist_seq_culled =
+                                               req->rq_history_seq;
+                               }
+                               cfs_list_del(&req->rq_history_list);
+                       }
 
-                        cfs_spin_unlock(&svc->srv_lock);
+                       cfs_spin_unlock(&svcpt->scp_lock);
 
                         cfs_list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) {
                                 req = cfs_list_entry(rqbd->rqbd_reqs.next,
@@ -734,46 +784,47 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req)
                                 ptlrpc_server_free_request(req);
                         }
 
-                        cfs_spin_lock(&svc->srv_lock);
-                        /*
-                         * now all reqs including the embedded req has been
-                         * disposed, schedule request buffer for re-use.
-                         */
-                        LASSERT(cfs_atomic_read(&rqbd->rqbd_req.rq_refcount) ==
-                                0);
-                        cfs_list_add_tail(&rqbd->rqbd_list,
-                                          &svc->srv_idle_rqbds);
-                }
-
-                cfs_spin_unlock(&svc->srv_lock);
-        } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
-                /* If we are low on memory, we are not interested in history */
-                cfs_list_del(&req->rq_list);
-                cfs_list_del_init(&req->rq_history_list);
-                cfs_spin_unlock(&svc->srv_lock);
-
-                ptlrpc_server_free_request(req);
-        } else {
-                cfs_spin_unlock(&svc->srv_lock);
-        }
+                       cfs_spin_lock(&svcpt->scp_lock);
+                       /*
+                        * now all reqs including the embedded req has been
+                        * disposed, schedule request buffer for re-use.
+                        */
+                       LASSERT(cfs_atomic_read(&rqbd->rqbd_req.rq_refcount) ==
+                               0);
+                       cfs_list_add_tail(&rqbd->rqbd_list,
+                                         &svcpt->scp_rqbd_idle);
+               }
+
+               cfs_spin_unlock(&svcpt->scp_lock);
+       } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
+               /* If we are low on memory, we are not interested in history */
+               cfs_list_del(&req->rq_list);
+               cfs_list_del_init(&req->rq_history_list);
+
+               cfs_spin_unlock(&svcpt->scp_lock);
+
+               ptlrpc_server_free_request(req);
+       } else {
+               cfs_spin_unlock(&svcpt->scp_lock);
+       }
 }
 
 /**
  * to finish a request: stop sending more early replies, and release
  * the request. should be called after we finished handling the request.
  */
-static void ptlrpc_server_finish_request(struct ptlrpc_service *svc,
-                                         struct ptlrpc_request *req)
+static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
+                                        struct ptlrpc_request *req)
 {
-        ptlrpc_hpreq_fini(req);
+       ptlrpc_hpreq_fini(req);
 
-        cfs_spin_lock(&svc->srv_rq_lock);
-        svc->srv_n_active_reqs--;
-        if (req->rq_hp)
-                svc->srv_n_active_hpreq--;
-        cfs_spin_unlock(&svc->srv_rq_lock);
+       cfs_spin_lock(&svcpt->scp_req_lock);
+       svcpt->scp_nreqs_active--;
+       if (req->rq_hp)
+               svcpt->scp_nhreqs_active--;
+       cfs_spin_unlock(&svcpt->scp_req_lock);
 
-        ptlrpc_server_drop_request(req);
+       ptlrpc_server_drop_request(req);
 }
 
 /**
@@ -906,35 +957,37 @@ static int ptlrpc_check_req(struct ptlrpc_request *req)
         return rc;
 }
 
-static void ptlrpc_at_set_timer(struct ptlrpc_service *svc)
+static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
 {
-        struct ptlrpc_at_array *array = &svc->srv_at_array;
-        __s32 next;
+       struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+       __s32 next;
 
-        cfs_spin_lock(&svc->srv_at_lock);
-        if (array->paa_count == 0) {
-                cfs_timer_disarm(&svc->srv_at_timer);
-                cfs_spin_unlock(&svc->srv_at_lock);
-                return;
-        }
+       cfs_spin_lock(&svcpt->scp_at_lock);
+       if (array->paa_count == 0) {
+               cfs_timer_disarm(&svcpt->scp_at_timer);
+               cfs_spin_unlock(&svcpt->scp_at_lock);
+               return;
+       }
 
-        /* Set timer for closest deadline */
-        next = (__s32)(array->paa_deadline - cfs_time_current_sec() -
-                       at_early_margin);
-        if (next <= 0)
-                ptlrpc_at_timer((unsigned long)svc);
-        else
-                cfs_timer_arm(&svc->srv_at_timer, cfs_time_shift(next));
-        cfs_spin_unlock(&svc->srv_at_lock);
-        CDEBUG(D_INFO, "armed %s at %+ds\n", svc->srv_name, next);
+       /* Set timer for closest deadline */
+       next = (__s32)(array->paa_deadline - cfs_time_current_sec() -
+                      at_early_margin);
+       if (next <= 0) {
+               ptlrpc_at_timer((unsigned long)svcpt);
+       } else {
+               cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
+               CDEBUG(D_INFO, "armed %s at %+ds\n",
+                      svcpt->scp_service->srv_name, next);
+       }
+       cfs_spin_unlock(&svcpt->scp_at_lock);
 }
 
 /* Add rpc to early reply check list */
 static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
 {
-        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+       struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+       struct ptlrpc_at_array *array = &svcpt->scp_at_array;
         struct ptlrpc_request *rq = NULL;
-        struct ptlrpc_at_array *array = &svc->srv_at_array;
         __u32 index;
         int found = 0;
 
@@ -947,7 +1000,7 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
         if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
                 return(-ENOSYS);
 
-        cfs_spin_lock(&svc->srv_at_lock);
+       cfs_spin_lock(&svcpt->scp_at_lock);
         LASSERT(cfs_list_empty(&req->rq_timed_list));
 
         index = (unsigned long)req->rq_deadline % array->paa_size;
@@ -980,17 +1033,17 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
                 array->paa_deadline = req->rq_deadline;
                 found = 1;
         }
-        cfs_spin_unlock(&svc->srv_at_lock);
+       cfs_spin_unlock(&svcpt->scp_at_lock);
 
-        if (found)
-                ptlrpc_at_set_timer(svc);
+       if (found)
+               ptlrpc_at_set_timer(svcpt);
 
-        return 0;
+       return 0;
 }
 
 static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
 {
-        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+       struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
         struct ptlrpc_request *reqcopy;
         struct lustre_msg *reqmsg;
         cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
@@ -1003,8 +1056,8 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
         DEBUG_REQ(D_ADAPTTO, req,
                   "%ssending early reply (deadline %+lds, margin %+lds) for "
                   "%d+%d", AT_OFF ? "AT off - not " : "",
-                  olddl, olddl - at_get(&svc->srv_at_estimate),
-                  at_get(&svc->srv_at_estimate), at_extra);
+                 olddl, olddl - at_get(&svcpt->scp_at_estimate),
+                 at_get(&svcpt->scp_at_estimate), at_extra);
 
         if (AT_OFF)
                 RETURN(0);
@@ -1033,28 +1086,28 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                  * during the recovery period send at least 4 early replies,
                  * spacing them every at_extra if we can. at_estimate should
                  * always equal this fixed value during recovery. */
-                at_measured(&svc->srv_at_estimate, min(at_extra,
-                            req->rq_export->exp_obd->obd_recovery_timeout / 4));
-        } else {
-                /* Fake our processing time into the future to ask the clients
-                 * for some extra amount of time */
-                at_measured(&svc->srv_at_estimate, at_extra +
-                            cfs_time_current_sec() -
-                            req->rq_arrival_time.tv_sec);
-
-                /* Check to see if we've actually increased the deadline -
-                 * we may be past adaptive_max */
-                if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-                    at_get(&svc->srv_at_estimate)) {
-                        DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
-                                  "(%ld/%ld), not sending early reply\n",
-                                  olddl, req->rq_arrival_time.tv_sec +
-                                  at_get(&svc->srv_at_estimate) -
-                                  cfs_time_current_sec());
-                        RETURN(-ETIMEDOUT);
-                }
-        }
-        newdl = cfs_time_current_sec() + at_get(&svc->srv_at_estimate);
+               at_measured(&svcpt->scp_at_estimate, min(at_extra,
+                           req->rq_export->exp_obd->obd_recovery_timeout / 4));
+       } else {
+               /* Fake our processing time into the future to ask the clients
+                * for some extra amount of time */
+               at_measured(&svcpt->scp_at_estimate, at_extra +
+                           cfs_time_current_sec() -
+                           req->rq_arrival_time.tv_sec);
+
+               /* Check to see if we've actually increased the deadline -
+                * we may be past adaptive_max */
+               if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+                   at_get(&svcpt->scp_at_estimate)) {
+                       DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+                                 "(%ld/%ld), not sending early reply\n",
+                                 olddl, req->rq_arrival_time.tv_sec +
+                                 at_get(&svcpt->scp_at_estimate) -
+                                 cfs_time_current_sec());
+                       RETURN(-ETIMEDOUT);
+               }
+       }
+       newdl = cfs_time_current_sec() + at_get(&svcpt->scp_at_estimate);
 
         OBD_ALLOC(reqcopy, sizeof *reqcopy);
         if (reqcopy == NULL)
@@ -1126,11 +1179,11 @@ out:
 
 /* Send early replies to everybody expiring within at_early_margin
    asking for at_extra time */
-static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
+static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
 {
+       struct ptlrpc_at_array *array = &svcpt->scp_at_array;
         struct ptlrpc_request *rq, *n;
         cfs_list_t work_list;
-        struct ptlrpc_at_array *array = &svc->srv_at_array;
         __u32  index, count;
         time_t deadline;
         time_t now = cfs_time_current_sec();
@@ -1138,25 +1191,25 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
         int first, counter = 0;
         ENTRY;
 
-        cfs_spin_lock(&svc->srv_at_lock);
-        if (svc->srv_at_check == 0) {
-                cfs_spin_unlock(&svc->srv_at_lock);
-                RETURN(0);
-        }
-        delay = cfs_time_sub(cfs_time_current(), svc->srv_at_checktime);
-        svc->srv_at_check = 0;
+       cfs_spin_lock(&svcpt->scp_at_lock);
+       if (svcpt->scp_at_check == 0) {
+               cfs_spin_unlock(&svcpt->scp_at_lock);
+               RETURN(0);
+       }
+       delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime);
+       svcpt->scp_at_check = 0;
 
-        if (array->paa_count == 0) {
-                cfs_spin_unlock(&svc->srv_at_lock);
-                RETURN(0);
-        }
+       if (array->paa_count == 0) {
+               cfs_spin_unlock(&svcpt->scp_at_lock);
+               RETURN(0);
+       }
 
-        /* The timer went off, but maybe the nearest rpc already completed. */
-        first = array->paa_deadline - now;
-        if (first > at_early_margin) {
-                /* We've still got plenty of time.  Reset the timer. */
-                cfs_spin_unlock(&svc->srv_at_lock);
-                ptlrpc_at_set_timer(svc);
+       /* The timer went off, but maybe the nearest rpc already completed. */
+       first = array->paa_deadline - now;
+       if (first > at_early_margin) {
+               /* We've still got plenty of time.  Reset the timer. */
+               cfs_spin_unlock(&svcpt->scp_at_lock);
+               ptlrpc_at_set_timer(svcpt);
                 RETURN(0);
         }
 
@@ -1200,10 +1253,10 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
                         index = 0;
         }
         array->paa_deadline = deadline;
-        cfs_spin_unlock(&svc->srv_at_lock);
+       cfs_spin_unlock(&svcpt->scp_at_lock);
 
-        /* we have a new earliest deadline, restart the timer */
-        ptlrpc_at_set_timer(svc);
+       /* we have a new earliest deadline, restart the timer */
+       ptlrpc_at_set_timer(svcpt);
 
         CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early "
                "replies\n", first, at_extra, counter);
@@ -1211,11 +1264,13 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
                 /* We're already past request deadlines before we even get a
                    chance to send early replies */
                 LCONSOLE_WARN("%s: This server is not able to keep up with "
-                              "request traffic (cpu-bound).\n", svc->srv_name);
-                CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, "
-                      "delay="CFS_DURATION_T"(jiff)\n",
-                      counter, svc->srv_n_queued_reqs, svc->srv_n_active_reqs,
-                      at_get(&svc->srv_at_estimate), delay);
+                             "request traffic (cpu-bound).\n",
+                             svcpt->scp_service->srv_name);
+               CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, "
+                     "delay="CFS_DURATION_T"(jiff)\n",
+                     counter, svcpt->scp_nreqs_incoming,
+                     svcpt->scp_nreqs_active,
+                     at_get(&svcpt->scp_at_estimate), delay);
         }
 
         /* we took additional refcount so entries can't be deleted from list, no
@@ -1231,7 +1286,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
                 ptlrpc_server_drop_request(rq);
         }
 
-        RETURN(0);
+       RETURN(1); /* return "did_something" for liblustre */
 }
 
 /**
@@ -1286,23 +1341,23 @@ static void ptlrpc_hpreq_fini(struct ptlrpc_request *req)
  * Make the request a high priority one.
  *
  * All the high priority requests are queued in a separate FIFO
- * ptlrpc_service::srv_request_hpq list which is parallel to
- * ptlrpc_service::srv_request_queue list but has a higher priority
+ * ptlrpc_service_part::scp_hpreq_pending list which is parallel to
+ * ptlrpc_service_part::scp_req_pending list but has a higher priority
  * for handling.
  *
  * \see ptlrpc_server_handle_request().
  */
-static void ptlrpc_hpreq_reorder_nolock(struct ptlrpc_service *svc,
+static void ptlrpc_hpreq_reorder_nolock(struct ptlrpc_service_part *svcpt,
                                         struct ptlrpc_request *req)
 {
         ENTRY;
-        LASSERT(svc != NULL);
+
         cfs_spin_lock(&req->rq_lock);
         if (req->rq_hp == 0) {
                 int opc = lustre_msg_get_opc(req->rq_reqmsg);
 
                 /* Add to the high priority queue. */
-                cfs_list_move_tail(&req->rq_list, &svc->srv_request_hpq);
+               cfs_list_move_tail(&req->rq_list, &svcpt->scp_hreq_pending);
                 req->rq_hp = 1;
                 if (opc != OBD_PING)
                         DEBUG_REQ(D_RPCTRACE, req, "high priority req");
@@ -1316,18 +1371,18 @@ static void ptlrpc_hpreq_reorder_nolock(struct ptlrpc_service *svc,
  */
 void ptlrpc_hpreq_reorder(struct ptlrpc_request *req)
 {
-        struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
-        ENTRY;
+       struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+       ENTRY;
 
-        cfs_spin_lock(&svc->srv_rq_lock);
-        /* It may happen that the request is already taken for the processing
-         * but still in the export list, or the request is not in the request
-         * queue but in the export list already, do not add it into the
-         * HP list. */
-        if (!cfs_list_empty(&req->rq_list))
-                ptlrpc_hpreq_reorder_nolock(svc, req);
-        cfs_spin_unlock(&svc->srv_rq_lock);
-        EXIT;
+       cfs_spin_lock(&svcpt->scp_req_lock);
+       /* It may happen that the request is already taken for the processing
+        * but still in the export list, or the request is not in the request
+        * queue but in the export list already, do not add it into the
+        * HP list. */
+       if (!cfs_list_empty(&req->rq_list))
+               ptlrpc_hpreq_reorder_nolock(svcpt, req);
+       cfs_spin_unlock(&svcpt->scp_req_lock);
+       EXIT;
 }
 
 /** Check if the request is a high priority one. */
@@ -1344,50 +1399,51 @@ static int ptlrpc_server_hpreq_check(struct ptlrpc_service *svc,
 }
 
 /** Check if a request is a high priority one. */
-static int ptlrpc_server_request_add(struct ptlrpc_service *svc,
-                                     struct ptlrpc_request *req)
+static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
+                                    struct ptlrpc_request *req)
 {
-        int rc;
-        ENTRY;
+       int     rc;
+       ENTRY;
 
-        rc = ptlrpc_server_hpreq_check(svc, req);
-        if (rc < 0)
-                RETURN(rc);
+       rc = ptlrpc_server_hpreq_check(svcpt->scp_service, req);
+       if (rc < 0)
+               RETURN(rc);
 
-        cfs_spin_lock(&svc->srv_rq_lock);
+       cfs_spin_lock(&svcpt->scp_req_lock);
 
-        if (rc)
-                ptlrpc_hpreq_reorder_nolock(svc, req);
-        else
-                cfs_list_add_tail(&req->rq_list,
-                                  &svc->srv_request_queue);
+       if (rc)
+               ptlrpc_hpreq_reorder_nolock(svcpt, req);
+       else
+               cfs_list_add_tail(&req->rq_list, &svcpt->scp_req_pending);
 
-        cfs_spin_unlock(&svc->srv_rq_lock);
+       cfs_spin_unlock(&svcpt->scp_req_lock);
 
-        RETURN(0);
+       RETURN(0);
 }
 
 /**
  * Allow to handle high priority request
- * User can call it w/o any lock but need to hold ptlrpc_service::srv_rq_lock
- * to get reliable result
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
  */
-static int ptlrpc_server_allow_high(struct ptlrpc_service *svc, int force)
+static int ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
+                                   int force)
 {
-        if (force)
-                return 1;
+       if (force)
+               return 1;
 
-        if (svc->srv_n_active_reqs >= svc->srv_threads_running - 1)
-                return 0;
+       if (svcpt->scp_nreqs_active >= svcpt->scp_nthrs_running - 1)
+               return 0;
 
-        return cfs_list_empty(&svc->srv_request_queue) ||
-               svc->srv_hpreq_count < svc->srv_hpreq_ratio;
+       return cfs_list_empty(&svcpt->scp_req_pending) ||
+              svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
 }
 
-static int ptlrpc_server_high_pending(struct ptlrpc_service *svc, int force)
+static int ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
+                                     int force)
 {
-        return ptlrpc_server_allow_high(svc, force) &&
-               !cfs_list_empty(&svc->srv_request_hpq);
+       return ptlrpc_server_allow_high(svcpt, force) &&
+              !cfs_list_empty(&svcpt->scp_hreq_pending);
 }
 
 /**
@@ -1396,45 +1452,47 @@ static int ptlrpc_server_high_pending(struct ptlrpc_service *svc, int force)
  * already being processed (i.e. those threads can service more high-priority
  * requests), or if there are enough idle threads that a later thread can do
  * a high priority request.
- * User can call it w/o any lock but need to hold ptlrpc_service::srv_rq_lock
- * to get reliable result
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
  */
-static int ptlrpc_server_allow_normal(struct ptlrpc_service *svc, int force)
+static int ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
+                                     int force)
 {
 #ifndef __KERNEL__
-        if (1) /* always allow to handle normal request for liblustre */
-                return 1;
+       if (1) /* always allow to handle normal request for liblustre */
+               return 1;
 #endif
-        if (force ||
-            svc->srv_n_active_reqs < svc->srv_threads_running - 2)
-                return 1;
+       if (force ||
+           svcpt->scp_nreqs_active < svcpt->scp_nthrs_running - 2)
+               return 1;
 
-        if (svc->srv_n_active_reqs >= svc->srv_threads_running - 1)
-                return 0;
+       if (svcpt->scp_nreqs_active >= svcpt->scp_nthrs_running - 1)
+               return 0;
 
-       return svc->srv_n_active_hpreq > 0 ||
-              svc->srv_ops.so_hpreq_handler == NULL;
+       return svcpt->scp_nhreqs_active > 0 ||
+              svcpt->scp_service->srv_ops.so_hpreq_handler == NULL;
 }
 
-static int ptlrpc_server_normal_pending(struct ptlrpc_service *svc, int force)
+static int ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
+                                       int force)
 {
-        return ptlrpc_server_allow_normal(svc, force) &&
-               !cfs_list_empty(&svc->srv_request_queue);
+       return ptlrpc_server_allow_normal(svcpt, force) &&
+              !cfs_list_empty(&svcpt->scp_req_pending);
 }
 
 /**
  * Returns true if there are requests available in incoming
  * request queue for processing and it is allowed to fetch them.
- * User can call it w/o any lock but need to hold ptlrpc_service::srv_rq_lock
+ * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
  * to get reliable result
  * \see ptlrpc_server_allow_normal
  * \see ptlrpc_server_allow high
  */
 static inline int
-ptlrpc_server_request_pending(struct ptlrpc_service *svc, int force)
+ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, int force)
 {
-        return ptlrpc_server_high_pending(svc, force) ||
-               ptlrpc_server_normal_pending(svc, force);
+       return ptlrpc_server_high_pending(svcpt, force) ||
+              ptlrpc_server_normal_pending(svcpt, force);
 }
 
 /**
@@ -1443,26 +1501,25 @@ ptlrpc_server_request_pending(struct ptlrpc_service *svc, int force)
  * Returns a pointer to fetched request.
  */
 static struct ptlrpc_request *
-ptlrpc_server_request_get(struct ptlrpc_service *svc, int force)
+ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, int force)
 {
-        struct ptlrpc_request *req;
-        ENTRY;
-
-        if (ptlrpc_server_high_pending(svc, force)) {
-                req = cfs_list_entry(svc->srv_request_hpq.next,
-                                     struct ptlrpc_request, rq_list);
-                svc->srv_hpreq_count++;
-                RETURN(req);
+       struct ptlrpc_request *req;
+       ENTRY;
 
-        }
+       if (ptlrpc_server_high_pending(svcpt, force)) {
+               req = cfs_list_entry(svcpt->scp_hreq_pending.next,
+                                    struct ptlrpc_request, rq_list);
+               svcpt->scp_hreq_count++;
+               RETURN(req);
+       }
 
-        if (ptlrpc_server_normal_pending(svc, force)) {
-                req = cfs_list_entry(svc->srv_request_queue.next,
-                                     struct ptlrpc_request, rq_list);
-                svc->srv_hpreq_count = 0;
-                RETURN(req);
-        }
-        RETURN(NULL);
+       if (ptlrpc_server_normal_pending(svcpt, force)) {
+               req = cfs_list_entry(svcpt->scp_req_pending.next,
+                                    struct ptlrpc_request, rq_list);
+               svcpt->scp_hreq_count = 0;
+               RETURN(req);
+       }
+       RETURN(NULL);
 }
 
 /**
@@ -1472,28 +1529,27 @@ ptlrpc_server_request_get(struct ptlrpc_service *svc, int force)
  * ptlrpc_server_handle_req later on.
  */
 static int
-ptlrpc_server_handle_req_in(struct ptlrpc_service *svc)
+ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt)
 {
-        struct ptlrpc_request *req;
-        __u32                  deadline;
-        int                    rc;
-        ENTRY;
-
-        LASSERT(svc);
+       struct ptlrpc_service   *svc = svcpt->scp_service;
+       struct ptlrpc_request   *req;
+       __u32                   deadline;
+       int                     rc;
+       ENTRY;
 
-        cfs_spin_lock(&svc->srv_lock);
-        if (cfs_list_empty(&svc->srv_req_in_queue)) {
-                cfs_spin_unlock(&svc->srv_lock);
-                RETURN(0);
-        }
+       cfs_spin_lock(&svcpt->scp_lock);
+       if (cfs_list_empty(&svcpt->scp_req_incoming)) {
+               cfs_spin_unlock(&svcpt->scp_lock);
+               RETURN(0);
+       }
 
-        req = cfs_list_entry(svc->srv_req_in_queue.next,
-                             struct ptlrpc_request, rq_list);
-        cfs_list_del_init (&req->rq_list);
-        svc->srv_n_queued_reqs--;
-        /* Consider this still a "queued" request as far as stats are
-           concerned */
-        cfs_spin_unlock(&svc->srv_lock);
+       req = cfs_list_entry(svcpt->scp_req_incoming.next,
+                            struct ptlrpc_request, rq_list);
+       cfs_list_del_init(&req->rq_list);
+       svcpt->scp_nreqs_incoming--;
+       /* Consider this still a "queued" request as far as stats are
+        * concerned */
+       cfs_spin_unlock(&svcpt->scp_lock);
 
         /* go through security check/transform */
         rc = sptlrpc_svc_unwrap_request(req);
@@ -1596,21 +1652,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service *svc)
         ptlrpc_at_add_timed(req);
 
         /* Move it over to the request processing queue */
-        rc = ptlrpc_server_request_add(svc, req);
-        if (rc) {
-                ptlrpc_hpreq_fini(req);
-                GOTO(err_req, rc);
-        }
-        cfs_waitq_signal(&svc->srv_waitq);
-        RETURN(1);
+       rc = ptlrpc_server_request_add(svcpt, req);
+       if (rc) {
+               ptlrpc_hpreq_fini(req);
+               GOTO(err_req, rc);
+       }
+       cfs_waitq_signal(&svcpt->scp_waitq);
+       RETURN(1);
 
 err_req:
-        cfs_spin_lock(&svc->srv_rq_lock);
-        svc->srv_n_active_reqs++;
-        cfs_spin_unlock(&svc->srv_rq_lock);
-        ptlrpc_server_finish_request(svc, req);
+       cfs_spin_lock(&svcpt->scp_req_lock);
+       svcpt->scp_nreqs_active++;
+       cfs_spin_unlock(&svcpt->scp_req_lock);
+       ptlrpc_server_finish_request(svcpt, req);
 
-        RETURN(1);
+       RETURN(1);
 }
 
 /**
@@ -1618,9 +1674,10 @@ err_req:
  * Calls handler function from service to do actual processing.
  */
 static int
-ptlrpc_server_handle_request(struct ptlrpc_service *svc,
-                             struct ptlrpc_thread *thread)
+ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
+                            struct ptlrpc_thread *thread)
 {
+       struct ptlrpc_service *svc = svcpt->scp_service;
         struct obd_export     *export = NULL;
         struct ptlrpc_request *request;
         struct timeval         work_start;
@@ -1630,19 +1687,17 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         int                    fail_opc = 0;
         ENTRY;
 
-        LASSERT(svc);
-
-        cfs_spin_lock(&svc->srv_rq_lock);
+       cfs_spin_lock(&svcpt->scp_req_lock);
 #ifndef __KERNEL__
-        /* !@%$# liblustre only has 1 thread */
-        if (cfs_atomic_read(&svc->srv_n_difficult_replies) != 0) {
-                cfs_spin_unlock(&svc->srv_rq_lock);
-                RETURN(0);
-        }
+       /* !@%$# liblustre only has 1 thread */
+       if (cfs_atomic_read(&svcpt->scp_nreps_difficult) != 0) {
+               cfs_spin_unlock(&svcpt->scp_req_lock);
+               RETURN(0);
+       }
 #endif
-        request = ptlrpc_server_request_get(svc, 0);
-        if  (request == NULL) {
-                cfs_spin_unlock(&svc->srv_rq_lock);
+       request = ptlrpc_server_request_get(svcpt, 0);
+       if  (request == NULL) {
+               cfs_spin_unlock(&svcpt->scp_req_lock);
                 RETURN(0);
         }
 
@@ -1653,23 +1708,25 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
 
         if (unlikely(fail_opc)) {
                 if (request->rq_export && request->rq_ops) {
-                        cfs_spin_unlock(&svc->srv_rq_lock);
-                        OBD_FAIL_TIMEOUT(fail_opc, 4);
-                        cfs_spin_lock(&svc->srv_rq_lock);
-                        request = ptlrpc_server_request_get(svc, 0);
-                        if  (request == NULL) {
-                                cfs_spin_unlock(&svc->srv_rq_lock);
-                                RETURN(0);
-                        }
-                }
-        }
+                       cfs_spin_unlock(&svcpt->scp_req_lock);
+
+                       OBD_FAIL_TIMEOUT(fail_opc, 4);
 
-        cfs_list_del_init(&request->rq_list);
-        svc->srv_n_active_reqs++;
-        if (request->rq_hp)
-                svc->srv_n_active_hpreq++;
+                       cfs_spin_lock(&svcpt->scp_req_lock);
+                       request = ptlrpc_server_request_get(svcpt, 0);
+                       if  (request == NULL) {
+                               cfs_spin_unlock(&svcpt->scp_req_lock);
+                               RETURN(0);
+                       }
+               }
+       }
+
+       cfs_list_del_init(&request->rq_list);
+       svcpt->scp_nreqs_active++;
+       if (request->rq_hp)
+               svcpt->scp_nhreqs_active++;
 
-        cfs_spin_unlock(&svc->srv_rq_lock);
+       cfs_spin_unlock(&svcpt->scp_req_lock);
 
         ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
 
@@ -1682,11 +1739,11 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
                 lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
                                     timediff);
                 lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
-                                    svc->srv_n_queued_reqs);
-                lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
-                                    svc->srv_n_active_reqs);
-                lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
-                                    at_get(&svc->srv_at_estimate));
+                                   svcpt->scp_nreqs_incoming);
+               lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
+                                   svcpt->scp_nreqs_active);
+               lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
+                                   at_get(&svcpt->scp_at_estimate));
         }
 
         rc = lu_context_init(&request->rq_session,
@@ -1801,18 +1858,19 @@ put_conn:
         }
 
 out_req:
-        ptlrpc_server_finish_request(svc, request);
+       ptlrpc_server_finish_request(svcpt, request);
 
-        RETURN(1);
+       RETURN(1);
 }
 
 /**
  * An internal function to process a single reply state object.
  */
 static int
-ptlrpc_handle_rs (struct ptlrpc_reply_state *rs)
+ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
 {
-        struct ptlrpc_service     *svc = rs->rs_service;
+       struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+       struct ptlrpc_service     *svc = svcpt->scp_service;
         struct obd_export         *exp;
         int                        nlocks;
         int                        been_handled;
@@ -1900,15 +1958,15 @@ ptlrpc_handle_rs (struct ptlrpc_reply_state *rs)
                 class_export_put (exp);
                 rs->rs_export = NULL;
                 ptlrpc_rs_decref (rs);
-                if (cfs_atomic_dec_and_test(&svc->srv_n_difficult_replies) &&
-                    svc->srv_is_stopping)
-                        cfs_waitq_broadcast(&svc->srv_waitq);
-                RETURN(1);
-        }
+               if (cfs_atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
+                   svc->srv_is_stopping)
+                       cfs_waitq_broadcast(&svcpt->scp_waitq);
+               RETURN(1);
+       }
 
-        /* still on the net; callback will schedule */
-        cfs_spin_unlock(&rs->rs_lock);
-        RETURN(1);
+       /* still on the net; callback will schedule */
+       cfs_spin_unlock(&rs->rs_lock);
+       RETURN(1);
 }
 
 #ifndef __KERNEL__
@@ -1922,22 +1980,22 @@ ptlrpc_handle_rs (struct ptlrpc_reply_state *rs)
  * \retval 1 one reply processed
  */
 static int
-ptlrpc_server_handle_reply(struct ptlrpc_service *svc)
+ptlrpc_server_handle_reply(struct ptlrpc_service_part *svcpt)
 {
-        struct ptlrpc_reply_state *rs = NULL;
-        ENTRY;
+       struct ptlrpc_reply_state *rs = NULL;
+       ENTRY;
 
-        cfs_spin_lock(&svc->srv_rs_lock);
-        if (!cfs_list_empty(&svc->srv_reply_queue)) {
-                rs = cfs_list_entry(svc->srv_reply_queue.prev,
-                                    struct ptlrpc_reply_state,
-                                    rs_list);
-                cfs_list_del_init(&rs->rs_list);
-        }
-        cfs_spin_unlock(&svc->srv_rs_lock);
-        if (rs != NULL)
-                ptlrpc_handle_rs(rs);
-        RETURN(rs != NULL);
+       cfs_spin_lock(&svcpt->scp_rep_lock);
+       if (!cfs_list_empty(&svcpt->scp_rep_queue)) {
+               rs = cfs_list_entry(svcpt->scp_rep_queue.prev,
+                                   struct ptlrpc_reply_state,
+                                   rs_list);
+               cfs_list_del_init(&rs->rs_list);
+       }
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
+       if (rs != NULL)
+               ptlrpc_handle_rs(rs);
+       RETURN(rs != NULL);
 }
 
 /* FIXME make use of timeout later */
@@ -1954,41 +2012,44 @@ liblustre_check_services (void *arg)
         cfs_list_for_each_safe (tmp, nxt, &ptlrpc_all_services) {
                 struct ptlrpc_service *svc =
                         cfs_list_entry (tmp, struct ptlrpc_service, srv_list);
+               struct ptlrpc_service_part *svcpt;
 
-                if (svc->srv_threads_running != 0)     /* I've recursed */
-                        continue;
+               svcpt = svc->srv_part;
 
-                /* service threads can block for bulk, so this limits us
-                 * (arbitrarily) to recursing 1 stack frame per service.
-                 * Note that the problem with recursion is that we have to
-                 * unwind completely before our caller can resume. */
+               if (svcpt->scp_nthrs_running != 0)     /* I've recursed */
+                       continue;
 
-                svc->srv_threads_running++;
+               /* service threads can block for bulk, so this limits us
+                * (arbitrarily) to recursing 1 stack frame per service.
+                * Note that the problem with recursion is that we have to
+                * unwind completely before our caller can resume. */
 
-                do {
-                        rc = ptlrpc_server_handle_req_in(svc);
-                        rc |= ptlrpc_server_handle_reply(svc);
-                        rc |= ptlrpc_at_check_timed(svc);
-                        rc |= ptlrpc_server_handle_request(svc, NULL);
-                        rc |= (ptlrpc_server_post_idle_rqbds(svc) > 0);
-                        did_something |= rc;
-                } while (rc);
+               svcpt->scp_nthrs_running++;
 
-                svc->srv_threads_running--;
-        }
+               do {
+                       rc = ptlrpc_server_handle_req_in(svcpt);
+                       rc |= ptlrpc_server_handle_reply(svcpt);
+                       rc |= ptlrpc_at_check_timed(svcpt);
+                       rc |= ptlrpc_server_handle_request(svcpt, NULL);
+                       rc |= (ptlrpc_server_post_idle_rqbds(svcpt) > 0);
+                       did_something |= rc;
+               } while (rc);
 
-        RETURN(did_something);
+               svcpt->scp_nthrs_running--;
+       }
+
+       RETURN(did_something);
 }
 #define ptlrpc_stop_all_threads(s) do {} while (0)
 
 #else /* __KERNEL__ */
 
 static void
-ptlrpc_check_rqbd_pool(struct ptlrpc_service *svc)
+ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
 {
-        int avail = svc->srv_nrqbd_receiving;
-        int low_water = test_req_buffer_pressure ? 0 :
-                        svc->srv_nbuf_per_group / 2;
+       int avail = svcpt->scp_nrqbds_posted;
+       int low_water = test_req_buffer_pressure ? 0 :
+                       svcpt->scp_service->srv_nbuf_per_group / 2;
 
         /* NB I'm not locking; just looking. */
 
@@ -1998,107 +2059,109 @@ ptlrpc_check_rqbd_pool(struct ptlrpc_service *svc)
          * space. */
 
         if (avail <= low_water)
-                ptlrpc_grow_req_bufs(svc);
+               ptlrpc_grow_req_bufs(svcpt);
 
-        if (svc->srv_stats)
-                lprocfs_counter_add(svc->srv_stats, PTLRPC_REQBUF_AVAIL_CNTR,
-                                    avail);
+       if (svcpt->scp_service->srv_stats) {
+               lprocfs_counter_add(svcpt->scp_service->srv_stats,
+                                   PTLRPC_REQBUF_AVAIL_CNTR, avail);
+       }
 }
 
 static int
 ptlrpc_retry_rqbds(void *arg)
 {
-        struct ptlrpc_service *svc = (struct ptlrpc_service *)arg;
+       struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg;
 
-        svc->srv_rqbd_timeout = 0;
-        return (-ETIMEDOUT);
+       svcpt->scp_rqbd_timeout = 0;
+       return -ETIMEDOUT;
 }
 
 static inline int
-ptlrpc_threads_enough(struct ptlrpc_service *svc)
+ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
 {
-       return svc->srv_n_active_reqs <
-              svc->srv_threads_running - 1 -
-              (svc->srv_ops.so_hpreq_handler != NULL);
+       return svcpt->scp_nreqs_active <
+              svcpt->scp_nthrs_running - 1 -
+              (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
 }
 
 /**
  * allowed to create more threads
- * user can call it w/o any lock but need to hold ptlrpc_service::srv_lock to
- * get reliable result
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
  */
 static inline int
-ptlrpc_threads_increasable(struct ptlrpc_service *svc)
+ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
 {
-        return svc->srv_threads_running +
-               svc->srv_threads_starting < svc->srv_threads_max;
+       return svcpt->scp_nthrs_running +
+              svcpt->scp_nthrs_starting < svcpt->scp_service->srv_threads_max;
 }
 
 /**
  * too many requests and allowed to create more threads
  */
 static inline int
-ptlrpc_threads_need_create(struct ptlrpc_service *svc)
+ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
 {
-        return !ptlrpc_threads_enough(svc) && ptlrpc_threads_increasable(svc);
+       return !ptlrpc_threads_enough(svcpt) &&
+               ptlrpc_threads_increasable(svcpt);
 }
 
 static inline int
 ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
 {
-        return thread_is_stopping(thread) ||
-               thread->t_svc->srv_is_stopping;
+       return thread_is_stopping(thread) ||
+              thread->t_svcpt->scp_service->srv_is_stopping;
 }
 
 static inline int
-ptlrpc_rqbd_pending(struct ptlrpc_service *svc)
+ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
 {
-        return !cfs_list_empty(&svc->srv_idle_rqbds) &&
-               svc->srv_rqbd_timeout == 0;
+       return !cfs_list_empty(&svcpt->scp_rqbd_idle) &&
+              svcpt->scp_rqbd_timeout == 0;
 }
 
 static inline int
-ptlrpc_at_check(struct ptlrpc_service *svc)
+ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
 {
-        return svc->srv_at_check;
+       return svcpt->scp_at_check;
 }
 
 /**
  * requests wait on preprocessing
- * user can call it w/o any lock but need to hold ptlrpc_service::srv_lock to
- * get reliable result
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
  */
 static inline int
-ptlrpc_server_request_waiting(struct ptlrpc_service *svc)
+ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
 {
-        return !cfs_list_empty(&svc->srv_req_in_queue);
+       return !cfs_list_empty(&svcpt->scp_req_incoming);
 }
 
 static __attribute__((__noinline__)) int
-ptlrpc_wait_event(struct ptlrpc_service *svc,
-                  struct ptlrpc_thread *thread)
+ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
+                 struct ptlrpc_thread *thread)
 {
-        /* Don't exit while there are replies to be handled */
-        struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout,
-                                             ptlrpc_retry_rqbds, svc);
+       /* Don't exit while there are replies to be handled */
+       struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
+                                            ptlrpc_retry_rqbds, svcpt);
 
-        lc_watchdog_disable(thread->t_watchdog);
+       lc_watchdog_disable(thread->t_watchdog);
 
-        cfs_cond_resched();
+       cfs_cond_resched();
 
-        l_wait_event_exclusive_head(svc->srv_waitq,
-                               ptlrpc_thread_stopping(thread) ||
-                               ptlrpc_server_request_waiting(svc) ||
-                               ptlrpc_server_request_pending(svc, 0) ||
-                               ptlrpc_rqbd_pending(svc) ||
-                               ptlrpc_at_check(svc), &lwi);
+       l_wait_event_exclusive_head(svcpt->scp_waitq,
+                               ptlrpc_thread_stopping(thread) ||
+                               ptlrpc_server_request_incoming(svcpt) ||
+                               ptlrpc_server_request_pending(svcpt, 0) ||
+                               ptlrpc_rqbd_pending(svcpt) ||
+                               ptlrpc_at_check(svcpt), &lwi);
 
-        if (ptlrpc_thread_stopping(thread))
-                return -EINTR;
+       if (ptlrpc_thread_stopping(thread))
+               return -EINTR;
 
-        lc_watchdog_touch(thread->t_watchdog, CFS_GET_TIMEOUT(svc));
-
-        return 0;
+       lc_watchdog_touch(thread->t_watchdog,
+                         ptlrpc_server_get_timeout(svcpt));
+       return 0;
 }
 
 /**
@@ -2109,10 +2172,11 @@ ptlrpc_wait_event(struct ptlrpc_service *svc,
  */
 static int ptlrpc_main(void *arg)
 {
-        struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
-        struct ptlrpc_service  *svc = data->svc;
-        struct ptlrpc_thread   *thread = data->thread;
-        struct ptlrpc_reply_state *rs;
+       struct ptlrpc_svc_data          *data = (struct ptlrpc_svc_data *)arg;
+       struct ptlrpc_thread            *thread = data->thread;
+       struct ptlrpc_service_part      *svcpt = thread->t_svcpt;
+       struct ptlrpc_service           *svc = svcpt->scp_service;
+       struct ptlrpc_reply_state       *rs;
 #ifdef WITH_GROUP_INFO
         cfs_group_info_t *ginfo = NULL;
 #endif
@@ -2181,74 +2245,76 @@ static int ptlrpc_main(void *arg)
                 goto out_srv_fini;
         }
 
-        cfs_spin_lock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
 
-        LASSERT(thread_is_starting(thread));
-        thread_clear_flags(thread, SVC_STARTING);
-        svc->srv_threads_starting--;
+       LASSERT(thread_is_starting(thread));
+       thread_clear_flags(thread, SVC_STARTING);
 
-        /* SVC_STOPPING may already be set here if someone else is trying
-         * to stop the service while this new thread has been dynamically
-         * forked. We still set SVC_RUNNING to let our creator know that
-         * we are now running, however we will exit as soon as possible */
-        thread_add_flags(thread, SVC_RUNNING);
-        svc->srv_threads_running++;
-        cfs_spin_unlock(&svc->srv_lock);
+       svcpt->scp_nthrs_starting--;
 
-        /*
-         * wake up our creator. Note: @data is invalid after this point,
-         * because it's allocated on ptlrpc_start_thread() stack.
-         */
-        cfs_waitq_signal(&thread->t_ctl_waitq);
+       /* SVC_STOPPING may already be set here if someone else is trying
+        * to stop the service while this new thread has been dynamically
+        * forked. We still set SVC_RUNNING to let our creator know that
+        * we are now running, however we will exit as soon as possible */
+       thread_add_flags(thread, SVC_RUNNING);
+       svcpt->scp_nthrs_running++;
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        thread->t_watchdog = lc_watchdog_add(CFS_GET_TIMEOUT(svc), NULL, NULL);
+       /*
+        * wake up our creator. Note: @data is invalid after this point,
+        * because it's allocated on ptlrpc_start_thread() stack.
+        */
+       cfs_waitq_signal(&thread->t_ctl_waitq);
 
-        cfs_spin_lock(&svc->srv_rs_lock);
-        cfs_list_add(&rs->rs_list, &svc->srv_free_rs_list);
-        cfs_waitq_signal(&svc->srv_free_rs_waitq);
-        cfs_spin_unlock(&svc->srv_rs_lock);
+       thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
+                                            NULL, NULL);
 
-        CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
-               svc->srv_threads_running);
+       cfs_spin_lock(&svcpt->scp_rep_lock);
+       cfs_list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+       cfs_waitq_signal(&svcpt->scp_rep_waitq);
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
 
-        /* XXX maintain a list of all managed devices: insert here */
-        while (!ptlrpc_thread_stopping(thread)) {
-                if (ptlrpc_wait_event(svc, thread))
-                        break;
+       CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
+              svcpt->scp_nthrs_running);
 
-                ptlrpc_check_rqbd_pool(svc);
+       /* XXX maintain a list of all managed devices: insert here */
+       while (!ptlrpc_thread_stopping(thread)) {
+               if (ptlrpc_wait_event(svcpt, thread))
+                       break;
 
-                if (ptlrpc_threads_need_create(svc)) {
-                        /* Ignore return code - we tried... */
-                        ptlrpc_start_thread(svc);
-                }
+               ptlrpc_check_rqbd_pool(svcpt);
 
-                /* Process all incoming reqs before handling any */
-                if (ptlrpc_server_request_waiting(svc)) {
-                        ptlrpc_server_handle_req_in(svc);
-                        /* but limit ourselves in case of flood */
-                        if (counter++ < 100)
-                                continue;
-                        counter = 0;
+               if (ptlrpc_threads_need_create(svcpt)) {
+                       /* Ignore return code - we tried... */
+                       ptlrpc_start_thread(svcpt);
                 }
 
-                if (ptlrpc_at_check(svc))
-                        ptlrpc_at_check_timed(svc);
-
-                if (ptlrpc_server_request_pending(svc, 0)) {
-                        lu_context_enter(&env->le_ctx);
-                        ptlrpc_server_handle_request(svc, thread);
-                        lu_context_exit(&env->le_ctx);
+               /* Process all incoming reqs before handling any */
+               if (ptlrpc_server_request_incoming(svcpt)) {
+                       ptlrpc_server_handle_req_in(svcpt);
+                       /* but limit ourselves in case of flood */
+                       if (counter++ < 100)
+                               continue;
+                       counter = 0;
+               }
+
+               if (ptlrpc_at_check(svcpt))
+                       ptlrpc_at_check_timed(svcpt);
+
+               if (ptlrpc_server_request_pending(svcpt, 0)) {
+                       lu_context_enter(&env->le_ctx);
+                       ptlrpc_server_handle_request(svcpt, thread);
+                       lu_context_exit(&env->le_ctx);
                 }
 
-                if (ptlrpc_rqbd_pending(svc) &&
-                    ptlrpc_server_post_idle_rqbds(svc) < 0) {
-                        /* I just failed to repost request buffers.
-                         * Wait for a timeout (unless something else
-                         * happens) before I try again */
-                        svc->srv_rqbd_timeout = cfs_time_seconds(1)/10;
-                        CDEBUG(D_RPCTRACE,"Posted buffers: %d\n",
-                               svc->srv_nrqbd_receiving);
+               if (ptlrpc_rqbd_pending(svcpt) &&
+                   ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
+                       /* I just failed to repost request buffers.
+                        * Wait for a timeout (unless something else
+                        * happens) before I try again */
+                       svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
+                       CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
+                              svcpt->scp_nrqbds_posted);
                 }
         }
 
@@ -2270,21 +2336,22 @@ out:
         CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
                thread, thread->t_pid, thread->t_id, rc);
 
-        cfs_spin_lock(&svc->srv_lock);
-        if (thread_test_and_clear_flags(thread, SVC_STARTING))
-                svc->srv_threads_starting--;
+       cfs_spin_lock(&svcpt->scp_lock);
+       if (thread_test_and_clear_flags(thread, SVC_STARTING))
+               svcpt->scp_nthrs_starting--;
 
-        if (thread_test_and_clear_flags(thread, SVC_RUNNING))
-                /* must know immediately */
-                svc->srv_threads_running--;
+       if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
+               /* must know immediately */
+               svcpt->scp_nthrs_running--;
+       }
 
-        thread->t_id    = rc;
-        thread_add_flags(thread, SVC_STOPPED);
+       thread->t_id = rc;
+       thread_add_flags(thread, SVC_STOPPED);
 
-        cfs_waitq_signal(&thread->t_ctl_waitq);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_waitq_signal(&thread->t_ctl_waitq);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        return rc;
+       return rc;
 }
 
 struct ptlrpc_hr_args {
@@ -2428,30 +2495,30 @@ static int ptlrpc_start_hr_threads(struct ptlrpc_hr_service *hr)
         RETURN(0);
 }
 
-static void ptlrpc_stop_thread(struct ptlrpc_service *svc,
-                               struct ptlrpc_thread *thread)
+static void ptlrpc_stop_thread(struct ptlrpc_service_part *svcpt,
+                              struct ptlrpc_thread *thread)
 {
-        struct l_wait_info lwi = { 0 };
-        ENTRY;
+       struct l_wait_info lwi = { 0 };
+       ENTRY;
 
-        CDEBUG(D_RPCTRACE, "Stopping thread [ %p : %u ]\n",
-               thread, thread->t_pid);
+       CDEBUG(D_RPCTRACE, "Stopping thread [ %p : %u ]\n",
+              thread, thread->t_pid);
 
-        cfs_spin_lock(&svc->srv_lock);
-        /* let the thread know that we would like it to stop asap */
-        thread_add_flags(thread, SVC_STOPPING);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
+       /* let the thread know that we would like it to stop asap */
+       thread_add_flags(thread, SVC_STOPPING);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        cfs_waitq_broadcast(&svc->srv_waitq);
-        l_wait_event(thread->t_ctl_waitq,
-                     thread_is_stopped(thread), &lwi);
+       cfs_waitq_broadcast(&svcpt->scp_waitq);
+       l_wait_event(thread->t_ctl_waitq,
+                    thread_is_stopped(thread), &lwi);
 
-        cfs_spin_lock(&svc->srv_lock);
-        cfs_list_del(&thread->t_link);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_spin_lock(&svcpt->scp_lock);
+       cfs_list_del(&thread->t_link);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
-        OBD_FREE_PTR(thread);
-        EXIT;
+       OBD_FREE_PTR(thread);
+       EXIT;
 }
 
 /**
@@ -2459,21 +2526,24 @@ static void ptlrpc_stop_thread(struct ptlrpc_service *svc,
  */
 void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
 {
-        struct ptlrpc_thread *thread;
-        ENTRY;
+       struct ptlrpc_service_part      *svcpt = svc->srv_part;
+       struct ptlrpc_thread            *thread;
+       ENTRY;
 
-        cfs_spin_lock(&svc->srv_lock);
-        while (!cfs_list_empty(&svc->srv_threads)) {
-                thread = cfs_list_entry(svc->srv_threads.next,
-                                        struct ptlrpc_thread, t_link);
+       LASSERT(svcpt != NULL);
 
-                cfs_spin_unlock(&svc->srv_lock);
-                ptlrpc_stop_thread(svc, thread);
-                cfs_spin_lock(&svc->srv_lock);
-        }
+       cfs_spin_lock(&svcpt->scp_lock);
+       while (!cfs_list_empty(&svcpt->scp_threads)) {
+               thread = cfs_list_entry(svcpt->scp_threads.next,
+                                       struct ptlrpc_thread, t_link);
 
-        cfs_spin_unlock(&svc->srv_lock);
-        EXIT;
+               cfs_spin_unlock(&svcpt->scp_lock);
+               ptlrpc_stop_thread(svcpt, thread);
+               cfs_spin_lock(&svcpt->scp_lock);
+       }
+
+       cfs_spin_unlock(&svcpt->scp_lock);
+       EXIT;
 }
 
 int ptlrpc_start_threads(struct ptlrpc_service *svc)
@@ -2485,7 +2555,7 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc)
            ptlrpc_server_handle_request */
         LASSERT(svc->srv_threads_min >= 2);
         for (i = 0; i < svc->srv_threads_min; i++) {
-                rc = ptlrpc_start_thread(svc);
+               rc = ptlrpc_start_thread(svc->srv_part);
                 /* We have enough threads, don't start more.  b=15759 */
                 if (rc == -EMFILE) {
                         rc = 0;
@@ -2501,46 +2571,49 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc)
         RETURN(rc);
 }
 
-int ptlrpc_start_thread(struct ptlrpc_service *svc)
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt)
 {
-        struct l_wait_info lwi = { 0 };
-        struct ptlrpc_svc_data d;
-        struct ptlrpc_thread *thread;
-        char name[32];
-        int rc;
-        ENTRY;
+       struct l_wait_info      lwi = { 0 };
+       struct ptlrpc_svc_data  d;
+       struct ptlrpc_thread    *thread;
+       struct ptlrpc_service   *svc = svcpt->scp_service;
+       char                    name[32];
+       int                     rc;
+       ENTRY;
 
-        CDEBUG(D_RPCTRACE, "%s started %d min %d max %d running %d\n",
-               svc->srv_name, svc->srv_threads_running, svc->srv_threads_min,
-               svc->srv_threads_max, svc->srv_threads_running);
+       LASSERT(svcpt != NULL);
 
-        if (unlikely(svc->srv_is_stopping))
-                RETURN(-ESRCH);
+       CDEBUG(D_RPCTRACE, "%s started %d min %d max %d\n",
+              svc->srv_name, svcpt->scp_nthrs_running,
+              svc->srv_threads_min, svc->srv_threads_max);
 
-        if (!ptlrpc_threads_increasable(svc) ||
-            (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
-             svc->srv_threads_running == svc->srv_threads_min - 1))
-                RETURN(-EMFILE);
+       if (unlikely(svc->srv_is_stopping))
+               RETURN(-ESRCH);
 
-        OBD_ALLOC_PTR(thread);
-        if (thread == NULL)
-                RETURN(-ENOMEM);
-        cfs_waitq_init(&thread->t_ctl_waitq);
+       if (!ptlrpc_threads_increasable(svcpt) ||
+           (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
+            svcpt->scp_nthrs_running == svc->srv_threads_min - 1))
+               RETURN(-EMFILE);
 
-        cfs_spin_lock(&svc->srv_lock);
-        if (!ptlrpc_threads_increasable(svc)) {
-                cfs_spin_unlock(&svc->srv_lock);
-                OBD_FREE_PTR(thread);
-                RETURN(-EMFILE);
-        }
+       OBD_ALLOC_PTR(thread);
+       if (thread == NULL)
+               RETURN(-ENOMEM);
+       cfs_waitq_init(&thread->t_ctl_waitq);
+
+       cfs_spin_lock(&svcpt->scp_lock);
+       if (!ptlrpc_threads_increasable(svcpt)) {
+               cfs_spin_unlock(&svcpt->scp_lock);
+               OBD_FREE_PTR(thread);
+               RETURN(-EMFILE);
+       }
 
-        svc->srv_threads_starting++;
-        thread->t_id    = svc->srv_threads_next_id++;
-        thread_add_flags(thread, SVC_STARTING);
-        thread->t_svc   = svc;
+       svcpt->scp_nthrs_starting++;
+       thread->t_id = svcpt->scp_thr_nextid++;
+       thread_add_flags(thread, SVC_STARTING);
+       thread->t_svcpt = svcpt;
 
-        cfs_list_add(&thread->t_link, &svc->srv_threads);
-        cfs_spin_unlock(&svc->srv_lock);
+       cfs_list_add(&thread->t_link, &svcpt->scp_threads);
+       cfs_spin_unlock(&svcpt->scp_lock);
 
         sprintf(name, "%s_%02d", svc->srv_thread_name, thread->t_id);
         d.svc = svc;
@@ -2556,10 +2629,10 @@ int ptlrpc_start_thread(struct ptlrpc_service *svc)
         if (rc < 0) {
                 CERROR("cannot start thread '%s': rc %d\n", name, rc);
 
-                cfs_spin_lock(&svc->srv_lock);
-                cfs_list_del(&thread->t_link);
-                --svc->srv_threads_starting;
-                cfs_spin_unlock(&svc->srv_lock);
+               cfs_spin_lock(&svcpt->scp_lock);
+               cfs_list_del(&thread->t_link);
+               --svcpt->scp_nthrs_starting;
+               cfs_spin_unlock(&svcpt->scp_lock);
 
                 OBD_FREE(thread, sizeof(*thread));
                 RETURN(rc);
@@ -2572,7 +2645,6 @@ int ptlrpc_start_thread(struct ptlrpc_service *svc)
         RETURN(rc);
 }
 
-
 int ptlrpc_hr_init(void)
 {
         int i;
@@ -2622,35 +2694,43 @@ void ptlrpc_hr_fini(void)
 /**
  * Wait until all already scheduled replies are processed.
  */
-static void ptlrpc_wait_replies(struct ptlrpc_service *svc)
-{
-        while (1) {
-                int rc;
-                struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
-                                                     NULL, NULL);
-                rc = l_wait_event(svc->srv_waitq, cfs_atomic_read(&svc-> \
-                                  srv_n_difficult_replies) == 0,
-                                  &lwi);
-                if (rc == 0)
-                        break;
-                CWARN("Unexpectedly long timeout %p\n", svc);
-        }
+static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
+{
+       while (1) {
+               int rc;
+               struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
+                                                    NULL, NULL);
+
+               rc = l_wait_event(svcpt->scp_waitq,
+                    cfs_atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+               if (rc == 0)
+                       break;
+               CWARN("Unexpectedly long timeout %s %p\n",
+                     svcpt->scp_service->srv_name, svcpt->scp_service);
+       }
 }
 
 int ptlrpc_unregister_service(struct ptlrpc_service *service)
 {
-        int                   rc;
-        struct l_wait_info    lwi;
-        cfs_list_t           *tmp;
-        struct ptlrpc_reply_state *rs, *t;
-        struct ptlrpc_at_array *array = &service->srv_at_array;
-        ENTRY;
+       struct l_wait_info              lwi;
+       struct ptlrpc_service_part      *svcpt;
+       struct ptlrpc_reply_state       *rs;
+       struct ptlrpc_reply_state       *t;
+       struct ptlrpc_at_array          *array;
+       cfs_list_t                      *tmp;
+       int                             rc;
+       ENTRY;
+
+       service->srv_is_stopping = 1;
+       svcpt = service->srv_part;
+
+       if (svcpt == NULL || /* no instance of ptlrpc_service_part */
+           svcpt->scp_service == NULL) /* it's not fully initailzed */
+               GOTO(out, rc = 0);
 
-        service->srv_is_stopping = 1;
-        cfs_timer_disarm(&service->srv_at_timer);
+       cfs_timer_disarm(&svcpt->scp_at_timer);
 
-        ptlrpc_stop_all_threads(service);
-        LASSERT(cfs_list_empty(&service->srv_threads));
+       ptlrpc_stop_all_threads(service);
 
         cfs_spin_lock (&ptlrpc_all_services_lock);
         cfs_list_del_init (&service->srv_list);
@@ -2667,9 +2747,9 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
         rc = LNetClearLazyPortal(service->srv_req_portal);
         LASSERT (rc == 0);
 
-        /* Unlink all the request buffers.  This forces a 'final' event with
-         * its 'unlink' flag set for each posted rqbd */
-        cfs_list_for_each(tmp, &service->srv_active_rqbds) {
+       /* Unlink all the request buffers.  This forces a 'final' event with
+        * its 'unlink' flag set for each posted rqbd */
+       cfs_list_for_each(tmp, &svcpt->scp_rqbd_posted) {
                 struct ptlrpc_request_buffer_desc *rqbd =
                         cfs_list_entry(tmp, struct ptlrpc_request_buffer_desc,
                                        rqbd_list);
@@ -2681,9 +2761,9 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
         /* Wait for the network to release any buffers it's currently
          * filling */
         for (;;) {
-                cfs_spin_lock(&service->srv_lock);
-                rc = service->srv_nrqbd_receiving;
-                cfs_spin_unlock(&service->srv_lock);
+               cfs_spin_lock(&svcpt->scp_lock);
+               rc = svcpt->scp_nrqbds_posted;
+               cfs_spin_unlock(&svcpt->scp_lock);
 
                 if (rc == 0)
                         break;
@@ -2692,75 +2772,74 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
                  * timeout lets us CWARN for visibility of sluggish NALs */
                 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
                                            cfs_time_seconds(1), NULL, NULL);
-                rc = l_wait_event(service->srv_waitq,
-                                  service->srv_nrqbd_receiving == 0,
-                                  &lwi);
-                if (rc == -ETIMEDOUT)
-                        CWARN("Service %s waiting for request buffers\n",
-                              service->srv_name);
-        }
-
-        /* schedule all outstanding replies to terminate them */
-        cfs_spin_lock(&service->srv_rs_lock);
-        while (!cfs_list_empty(&service->srv_active_replies)) {
-                struct ptlrpc_reply_state *rs =
-                        cfs_list_entry(service->srv_active_replies.next,
-                                       struct ptlrpc_reply_state, rs_list);
-                cfs_spin_lock(&rs->rs_lock);
-                ptlrpc_schedule_difficult_reply(rs);
-                cfs_spin_unlock(&rs->rs_lock);
-        }
-        cfs_spin_unlock(&service->srv_rs_lock);
-
-        /* purge the request queue.  NB No new replies (rqbds all unlinked)
-         * and no service threads, so I'm the only thread noodling the
-         * request queue now */
-        while (!cfs_list_empty(&service->srv_req_in_queue)) {
-                struct ptlrpc_request *req =
-                        cfs_list_entry(service->srv_req_in_queue.next,
-                                       struct ptlrpc_request,
-                                       rq_list);
-
-                cfs_list_del(&req->rq_list);
-                service->srv_n_queued_reqs--;
-                service->srv_n_active_reqs++;
-                ptlrpc_server_finish_request(service, req);
-        }
-        while (ptlrpc_server_request_pending(service, 1)) {
-                struct ptlrpc_request *req;
-
-                req = ptlrpc_server_request_get(service, 1);
-                cfs_list_del(&req->rq_list);
-                service->srv_n_active_reqs++;
-                ptlrpc_server_finish_request(service, req);
-        }
-        LASSERT(service->srv_n_queued_reqs == 0);
-        LASSERT(service->srv_n_active_reqs == 0);
-        LASSERT(service->srv_n_history_rqbds == 0);
-        LASSERT(cfs_list_empty(&service->srv_active_rqbds));
-
-        /* Now free all the request buffers since nothing references them
-         * any more... */
-        while (!cfs_list_empty(&service->srv_idle_rqbds)) {
-                struct ptlrpc_request_buffer_desc *rqbd =
-                        cfs_list_entry(service->srv_idle_rqbds.next,
-                                       struct ptlrpc_request_buffer_desc,
-                                       rqbd_list);
+               rc = l_wait_event(svcpt->scp_waitq,
+                                 svcpt->scp_nrqbds_posted == 0, &lwi);
+               if (rc == -ETIMEDOUT)
+                       CWARN("Service %s waiting for request buffers\n",
+                             service->srv_name);
+       }
 
-                ptlrpc_free_rqbd(rqbd);
-        }
+       /* schedule all outstanding replies to terminate them */
+       cfs_spin_lock(&svcpt->scp_rep_lock);
+       while (!cfs_list_empty(&svcpt->scp_rep_active)) {
+               struct ptlrpc_reply_state *rs =
+                       cfs_list_entry(svcpt->scp_rep_active.next,
+                                      struct ptlrpc_reply_state, rs_list);
+               cfs_spin_lock(&rs->rs_lock);
+               ptlrpc_schedule_difficult_reply(rs);
+               cfs_spin_unlock(&rs->rs_lock);
+       }
+       cfs_spin_unlock(&svcpt->scp_rep_lock);
+
+       /* purge the request queue.  NB No new replies (rqbds all unlinked)
+        * and no service threads, so I'm the only thread noodling the
+        * request queue now */
+       while (!cfs_list_empty(&svcpt->scp_req_incoming)) {
+               struct ptlrpc_request *req =
+                       cfs_list_entry(svcpt->scp_req_incoming.next,
+                                      struct ptlrpc_request,
+                                      rq_list);
+
+               cfs_list_del(&req->rq_list);
+               svcpt->scp_nreqs_incoming--;
+               svcpt->scp_nreqs_active++;
+               ptlrpc_server_finish_request(svcpt, req);
+       }
+       while (ptlrpc_server_request_pending(svcpt, 1)) {
+               struct ptlrpc_request *req;
 
-        ptlrpc_wait_replies(service);
+               req = ptlrpc_server_request_get(svcpt, 1);
+               cfs_list_del(&req->rq_list);
+               svcpt->scp_nreqs_active++;
+               ptlrpc_server_finish_request(svcpt, req);
+       }
+       LASSERT(svcpt->scp_nreqs_incoming == 0);
+       LASSERT(svcpt->scp_nreqs_active == 0);
+       LASSERT(svcpt->scp_hist_nrqbds == 0);
+       LASSERT(cfs_list_empty(&svcpt->scp_rqbd_posted));
+
+       /* Now free all the request buffers since nothing references them
+        * any more... */
+       while (!cfs_list_empty(&svcpt->scp_rqbd_idle)) {
+               struct ptlrpc_request_buffer_desc *rqbd =
+                       cfs_list_entry(svcpt->scp_rqbd_idle.next,
+                                      struct ptlrpc_request_buffer_desc,
+                                      rqbd_list);
+
+               ptlrpc_free_rqbd(rqbd);
+       }
 
-        cfs_list_for_each_entry_safe(rs, t, &service->srv_free_rs_list,
-                                     rs_list) {
-                cfs_list_del(&rs->rs_list);
-                OBD_FREE_LARGE(rs, service->srv_max_reply_size);
-        }
+       ptlrpc_wait_replies(svcpt);
+
+       cfs_list_for_each_entry_safe(rs, t, &svcpt->scp_rep_idle, rs_list) {
+               cfs_list_del(&rs->rs_list);
+               OBD_FREE_LARGE(rs, service->srv_max_reply_size);
+       }
 
-        /* In case somebody rearmed this in the meantime */
-        cfs_timer_disarm(&service->srv_at_timer);
+       /* In case somebody rearmed this in the meantime */
+       cfs_timer_disarm(&svcpt->scp_at_timer);
 
+       array = &svcpt->scp_at_array;
         if (array->paa_reqs_array != NULL) {
                 OBD_FREE(array->paa_reqs_array,
                          sizeof(cfs_list_t) * array->paa_size);
@@ -2773,8 +2852,10 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
                 array->paa_reqs_count= NULL;
         }
 
-        OBD_FREE_PTR(service);
-        RETURN(0);
+       OBD_FREE_PTR(svcpt);
+ out:
+       OBD_FREE_PTR(service);
+       RETURN(0);
 }
 
 /**
@@ -2785,37 +2866,41 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
  * to be shot, so it's intentionally non-aggressive. */
 int ptlrpc_service_health_check(struct ptlrpc_service *svc)
 {
-        struct ptlrpc_request *request;
-        struct timeval         right_now;
-        long                   timediff;
+       struct ptlrpc_service_part      *svcpt;
+       struct ptlrpc_request           *request;
+       struct timeval                  right_now;
+       long                            timediff;
 
-        if (svc == NULL)
-                return 0;
+       if (svc == NULL || svc->srv_part == NULL)
+               return 0;
 
-        cfs_gettimeofday(&right_now);
+       cfs_gettimeofday(&right_now);
 
-        cfs_spin_lock(&svc->srv_rq_lock);
-        if (!ptlrpc_server_request_pending(svc, 1)) {
-                cfs_spin_unlock(&svc->srv_rq_lock);
-                return 0;
-        }
+       svcpt = svc->srv_part;
+       cfs_spin_lock(&svcpt->scp_req_lock);
+       if (!ptlrpc_server_request_pending(svcpt, 1)) {
+               cfs_spin_unlock(&svcpt->scp_req_lock);
+               return 0;
+       }
 
-        /* How long has the next entry been waiting? */
-        if (cfs_list_empty(&svc->srv_request_queue))
-                request = cfs_list_entry(svc->srv_request_hpq.next,
-                                         struct ptlrpc_request, rq_list);
-        else
-                request = cfs_list_entry(svc->srv_request_queue.next,
-                                         struct ptlrpc_request, rq_list);
-        timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
-        cfs_spin_unlock(&svc->srv_rq_lock);
+       /* How long has the next entry been waiting? */
+       if (cfs_list_empty(&svcpt->scp_req_pending)) {
+               request = cfs_list_entry(svcpt->scp_hreq_pending.next,
+                                        struct ptlrpc_request, rq_list);
+       } else {
+               request = cfs_list_entry(svcpt->scp_req_pending.next,
+                                        struct ptlrpc_request, rq_list);
+       }
 
-        if ((timediff / ONE_MILLION) > (AT_OFF ? obd_timeout * 3/2 :
-                                        at_max)) {
-                CERROR("%s: unhealthy - request has been waiting %lds\n",
-                       svc->srv_name, timediff / ONE_MILLION);
-                return (-1);
-        }
+       timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
+       cfs_spin_unlock(&svcpt->scp_req_lock);
 
-        return 0;
+       if ((timediff / ONE_MILLION) >
+           (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+               CERROR("%s: unhealthy - request has been waiting %lds\n",
+                      svcpt->scp_service->srv_name, timediff / ONE_MILLION);
+               return -1;
+       }
+
+       return 0;
 }
index 7d92270..b6905f4 100644 (file)
@@ -1128,7 +1128,8 @@ schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
                         if (oti->oti_thread->t_watchdog)
                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
-                                      CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
+                                       ptlrpc_server_get_timeout(\
+                                               oti->oti_thread->t_svcpt));
                 } else {
                         cfs_spin_unlock(&qctxt->lqc_lock);
                 }
index 1e68ade..77b10ec 100644 (file)
@@ -452,23 +452,27 @@ static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp,
         while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
                                         inode, frags)) &
                QUOTA_RET_ACQUOTA) {
-
-                cfs_spin_lock(&qctxt->lqc_lock);
-                if (!qctxt->lqc_import && oti) {
-                        cfs_spin_unlock(&qctxt->lqc_lock);
-                        LASSERT(oti->oti_thread);
-                        /* The recovery thread doesn't have watchdog
-                         * attached. LU-369 */
-                        if (oti->oti_thread->t_watchdog)
-                                lc_watchdog_disable(oti->oti_thread->\
-                                                t_watchdog);
-                        CDEBUG(D_QUOTA, "sleep for quota master\n");
-                        l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
-                                     &lwi);
-                        CDEBUG(D_QUOTA, "wake up when quota master is back\n");
-                        if (oti->oti_thread->t_watchdog)
-                                lc_watchdog_touch(oti->oti_thread->t_watchdog,
-                                       CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
+               struct ptlrpc_thread *thr = oti != NULL ?
+                                           oti->oti_thread : NULL;
+
+               cfs_spin_lock(&qctxt->lqc_lock);
+               if (!qctxt->lqc_import && oti != NULL) {
+                       cfs_spin_unlock(&qctxt->lqc_lock);
+
+                       LASSERT(thr != NULL);
+                       /* The recovery thread doesn't have watchdog
+                        * attached. LU-369 */
+                       if (thr->t_watchdog != NULL)
+                               lc_watchdog_disable(thr->t_watchdog);
+                       CDEBUG(D_QUOTA, "sleep for quota master\n");
+                       l_wait_event(qctxt->lqc_wait_for_qmaster,
+                                    check_qm(qctxt), &lwi);
+
+                       CDEBUG(D_QUOTA, "wake up when quota master is back\n");
+                       if (thr->t_watchdog != NULL) {
+                               lc_watchdog_touch(thr->t_watchdog,
+                                  ptlrpc_server_get_timeout(thr->t_svcpt));
+                       }
                 } else {
                         cfs_spin_unlock(&qctxt->lqc_lock);
                 }
@@ -510,9 +514,9 @@ static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp,
                         cfs_waitq_t        waitq;
                         struct l_wait_info lwi;
 
-                        if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
-                                lc_watchdog_touch(oti->oti_thread->t_watchdog,
-                                       CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
+                       if (thr != NULL && thr->t_watchdog != NULL)
+                               lc_watchdog_touch(thr->t_watchdog,
+                                  ptlrpc_server_get_timeout(thr->t_svcpt));
                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
                                count_err++);