From 2ec741f46ac31cb566ea17d8ffda122398896102 Mon Sep 17 00:00:00 2001
From: Liang Zhen <liang.zhen@intel.com>
Date: Sun, 5 Jan 2014 23:00:26 +0800
Subject: [PATCH] LU-181 ptlrpc: reorganize ptlrpc_request

ptlrpc_request has some structure members are only for client side,
and some others are only for server side, this patch moved these
members to different structure then putting into an union.

By doing this, size of ptlrpc_request is decreased about 300 bytes,
besides saving memory, it also can reduce memory footprint while
processing.

Another change in this patch is, osp will not use rq_exp_list anymore
because it is a server only member now.
osp will use ptlrpc_req_async_args to store commit_cb parameters in
this patch.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: Id910ac225b8e9d33a0cae40b3124ce55f1a3fbc9
Reviewed-on: http://review.whamcloud.com/8806
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
---
 lustre/include/lustre_net.h     | 403 +++++++++++++++++++++++-----------------
 lustre/ldlm/ldlm_lib.c          |   6 +-
 lustre/osp/osp_sync.c           |  61 +++---
 lustre/ptlrpc/client.c          |  69 ++-----
 lustre/ptlrpc/events.c          |  17 +-
 lustre/ptlrpc/niobuf.c          |   2 +-
 lustre/ptlrpc/ptlrpc_internal.h |  34 ++++
 lustre/ptlrpc/ptlrpcd.c         |   7 +-
 lustre/ptlrpc/sec.c             |  15 +-
 9 files changed, 344 insertions(+), 270 deletions(-)

diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index a61a02c..f39d8d7 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -499,6 +499,11 @@
 /* Macro to hide a typecast. */
 #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
 
+struct ptlrpc_replay_async_args {
+	int		praa_old_state;
+	int		praa_old_status;
+};
+
 /**
  * Structure to single define portal connection.
  */
@@ -1771,6 +1776,167 @@ struct ptlrpc_hpreq_ops {
         void (*hpreq_fini)(struct ptlrpc_request *);
 };
 
+struct ptlrpc_cli_req {
+	/** For bulk requests on client only: bulk descriptor */
+	struct ptlrpc_bulk_desc		*cr_bulk;
+	/** optional time limit for send attempts */
+	cfs_duration_t			 cr_delay_limit;
+	/** time request was first queued */
+	cfs_time_t			 cr_queued_time;
+	/** request sent timeval */
+	struct timeval			 cr_sent_tv;
+	/** time for request really sent out */
+	time_t				 cr_sent_out;
+	/** when req reply unlink must finish. */
+	time_t				 cr_reply_deadline;
+	/** when req bulk unlink must finish. */
+	time_t				 cr_bulk_deadline;
+	/** Portal to which this request would be sent */
+	short				 cr_req_ptl;
+	/** Portal where to wait for reply and where reply would be sent */
+	short				 cr_rep_ptl;
+	/** request resending number */
+	unsigned int			 cr_resend_nr;
+	/** What was import generation when this request was sent */
+	int				 cr_imp_gen;
+	enum lustre_imp_state		 cr_send_state;
+	/** Per-request waitq introduced by bug 21938 for recovery waiting */
+	wait_queue_head_t		 cr_set_waitq;
+	/** Link item for request set lists */
+	struct list_head		 cr_set_chain;
+	/** link to waited ctx */
+	struct list_head		 cr_ctx_chain;
+
+	/** client's half ctx */
+	struct ptlrpc_cli_ctx		*cr_cli_ctx;
+	/** Link back to the request set */
+	struct ptlrpc_request_set	*cr_set;
+	/** outgoing request MD handle */
+	lnet_handle_md_t		 cr_req_md_h;
+	/** request-out callback parameter */
+	struct ptlrpc_cb_id		 cr_req_cbid;
+	/** incoming reply MD handle */
+	lnet_handle_md_t		 cr_reply_md_h;
+	wait_queue_head_t		 cr_reply_waitq;
+	/** reply callback parameter */
+	struct ptlrpc_cb_id		 cr_reply_cbid;
+	/** Async completion handler, called when reply is received */
+	ptlrpc_interpterer_t		 cr_reply_interp;
+	/** Resend handler, called when request is resend to update RPC data */
+	ptlrpc_resend_cb_t		 cr_resend_cb;
+	/** Async completion context */
+	union ptlrpc_async_args		 cr_async_args;
+	/** Opaq data for replay and commit callbacks. */
+	void				*cr_cb_data;
+	/**
+	 * Commit callback, called when request is committed and about to be
+	 * freed.
+	 */
+	void (*cr_commit_cb)(struct ptlrpc_request *);
+	/** Replay callback, called after request is replayed at recovery */
+	void (*cr_replay_cb)(struct ptlrpc_request *);
+};
+
+/** client request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse */
+#define rq_bulk			rq_cli.cr_bulk
+#define rq_delay_limit		rq_cli.cr_delay_limit
+#define rq_queued_time		rq_cli.cr_queued_time
+#define rq_sent_tv		rq_cli.cr_sent_tv
+#define rq_real_sent		rq_cli.cr_sent_out
+#define rq_reply_deadline	rq_cli.cr_reply_deadline
+#define rq_bulk_deadline	rq_cli.cr_bulk_deadline
+#define rq_nr_resend		rq_cli.cr_resend_nr
+#define rq_request_portal	rq_cli.cr_req_ptl
+#define rq_reply_portal		rq_cli.cr_rep_ptl
+#define rq_import_generation	rq_cli.cr_imp_gen
+#define rq_send_state		rq_cli.cr_send_state
+#define rq_set_chain		rq_cli.cr_set_chain
+#define rq_ctx_chain		rq_cli.cr_ctx_chain
+#define rq_set			rq_cli.cr_set
+#define rq_set_waitq		rq_cli.cr_set_waitq
+#define rq_cli_ctx		rq_cli.cr_cli_ctx
+#define rq_req_md_h		rq_cli.cr_req_md_h
+#define rq_req_cbid		rq_cli.cr_req_cbid
+#define rq_reply_md_h		rq_cli.cr_reply_md_h
+#define rq_reply_waitq		rq_cli.cr_reply_waitq
+#define rq_reply_cbid		rq_cli.cr_reply_cbid
+#define rq_interpret_reply	rq_cli.cr_reply_interp
+#define rq_resend_cb		rq_cli.cr_resend_cb
+#define rq_async_args		rq_cli.cr_async_args
+#define rq_cb_data		rq_cli.cr_cb_data
+#define rq_commit_cb		rq_cli.cr_commit_cb
+#define rq_replay_cb		rq_cli.cr_replay_cb
+
+struct ptlrpc_srv_req {
+	/** initial thread servicing this request */
+	struct ptlrpc_thread		*sr_svc_thread;
+	/**
+	 * Server side list of incoming unserved requests sorted by arrival
+	 * time.  Traversed from time to time to notice about to expire
+	 * requests and sent back "early replies" to clients to let them
+	 * know server is alive and well, just very busy to service their
+	 * requests in time
+	 */
+	struct list_head		 sr_timed_list;
+	/** server-side per-export list */
+	struct list_head		 sr_exp_list;
+	/** server-side history, used for debuging purposes. */
+	struct list_head		 sr_hist_list;
+	/** history sequence # */
+	__u64				 sr_hist_seq;
+	/** the index of service's srv_at_array into which request is linked */
+	time_t				 sr_at_index;
+	/** authed uid */
+	uid_t				 sr_auth_uid;
+	/** authed uid mapped to */
+	uid_t				 sr_auth_mapped_uid;
+	/** RPC is generated from what part of Lustre */
+	enum lustre_sec_part		 sr_sp_from;
+	/** request session context */
+	struct lu_context		 sr_ses;
+	/** \addtogroup  nrs
+	 * @{
+	 */
+	/** stub for NRS request */
+	struct ptlrpc_nrs_request	 sr_nrq;
+	/** @} nrs */
+	/** request arrival time */
+	struct timeval			 sr_arrival_time;
+	/** server's half ctx */
+	struct ptlrpc_svc_ctx		*sr_svc_ctx;
+	/** (server side), pointed directly into req buffer */
+	struct ptlrpc_user_desc		*sr_user_desc;
+	/** separated reply state */
+	struct ptlrpc_reply_state	*sr_reply_state;
+	/** server-side hp handlers */
+	struct ptlrpc_hpreq_ops		*sr_ops;
+	/** incoming request buffer */
+	struct ptlrpc_request_buffer_desc *sr_rqbd;
+};
+
+/** server request member alias */
+/* NB: these alias should NOT be used by any new code, instead they should
+ * be removed step by step to avoid potential abuse */
+#define rq_svc_thread		rq_srv.sr_svc_thread
+#define rq_timed_list		rq_srv.sr_timed_list
+#define rq_exp_list		rq_srv.sr_exp_list
+#define rq_history_list		rq_srv.sr_hist_list
+#define rq_history_seq		rq_srv.sr_hist_seq
+#define rq_at_index		rq_srv.sr_at_index
+#define rq_auth_uid		rq_srv.sr_auth_uid
+#define rq_auth_mapped_uid	rq_srv.sr_auth_mapped_uid
+#define rq_sp_from		rq_srv.sr_sp_from
+#define rq_session		rq_srv.sr_ses
+#define rq_nrq			rq_srv.sr_nrq
+#define rq_arrival_time		rq_srv.sr_arrival_time
+#define rq_reply_state		rq_srv.sr_reply_state
+#define rq_svc_ctx		rq_srv.sr_svc_ctx
+#define rq_user_desc		rq_srv.sr_user_desc
+#define rq_ops			rq_srv.sr_ops
+#define rq_rqbd			rq_srv.sr_rqbd
+
 /**
  * Represents remote procedure call.
  *
@@ -1779,46 +1945,18 @@ struct ptlrpc_hpreq_ops {
  */
 struct ptlrpc_request {
 	/* Request type: one of PTL_RPC_MSG_* */
-	int			rq_type;
+	int				 rq_type;
 	/** Result of request processing */
-	int			rq_status;
+	int				 rq_status;
 	/**
 	 * Linkage item through which this request is included into
 	 * sending/delayed lists on client and into rqbd list on server
 	 */
-	struct list_head	rq_list;
-	/**
-	 * Server side list of incoming unserved requests sorted by arrival
-	 * time.  Traversed from time to time to notice about to expire
-	 * requests and sent back "early replies" to clients to let them
-	 * know server is alive and well, just very busy to service their
-	 * requests in time
-	 */
-	struct list_head	rq_timed_list;
-	/** server-side history, used for debuging purposes. */
-	struct list_head	rq_history_list;
-	/** server-side per-export list */
-	struct list_head	rq_exp_list;
-	/** server-side hp handlers */
-	struct ptlrpc_hpreq_ops	*rq_ops;
-
-	/** initial thread servicing this request */
-	struct ptlrpc_thread	*rq_svc_thread;
-
-        /** history sequence # */
-	__u64			rq_history_seq;
-	/** \addtogroup  nrs
-	 * @{
+	struct list_head		 rq_list;
+	/** Lock to protect request flags and some other important bits, like
+	 * rq_list
 	 */
-	/** stub for NRS request */
-	struct ptlrpc_nrs_request rq_nrq;
-	/** @} nrs */
-        /** the index of service's srv_at_array into which request is linked */
-        time_t rq_at_index;
-        /** Lock to protect request flags and some other important bits, like
-         * rq_list
-         */
-	spinlock_t rq_lock;
+	spinlock_t			 rq_lock;
 	/** client-side flags are serialized by rq_lock */
 	unsigned int rq_intr:1, rq_replied:1, rq_err:1,
                 rq_timedout:1, rq_resend:1, rq_restart:1,
@@ -1854,18 +1992,15 @@ struct ptlrpc_request {
 		/* bulk request, sent to server, but uncommitted */
 		rq_unstable:1;
 
-	unsigned int rq_nr_resend;
-
-	enum rq_phase rq_phase; /* one of RQ_PHASE_* */
-	enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */
-	atomic_t rq_refcount;/* client-side refcount for SENT race,
-				    server-side refcounf for multiple replies */
-
-	/** Portal to which this request would be sent */
-	short rq_request_portal;  /* XXX FIXME bug 249 */
-	/** Portal where to wait for reply and where reply would be sent */
-	short rq_reply_portal;    /* XXX FIXME bug 249 */
-
+	/** one of RQ_PHASE_* */
+	enum rq_phase			 rq_phase;
+	/** one of RQ_PHASE_* to be used next */
+	enum rq_phase			 rq_next_phase;
+	/**
+	 * client-side refcount for SENT race, server-side refcounf
+	 * for multiple replies
+	 */
+	atomic_t			 rq_refcount;
         /**
          * client-side:
          * !rq_truncate : # reply bytes actually received,
@@ -1876,6 +2011,8 @@ struct ptlrpc_request {
         int rq_reqlen;
         /** Reply length */
         int rq_replen;
+	/** Pool if request is from preallocated list */
+	struct ptlrpc_request_pool	*rq_pool;
 	/** Request message - what client sent */
 	struct lustre_msg *rq_reqmsg;
         /** Reply message - server response */
@@ -1884,22 +2021,23 @@ struct ptlrpc_request {
         __u64 rq_transno;
         /** xid */
         __u64 rq_xid;
-        /**
-         * List item to for replay list. Not yet commited requests get linked
-         * there.
-         * Also see \a rq_replay comment above.
-         */
-	struct list_head	rq_replay_list;
-
+	/**
+	 * List item to for replay list. Not yet commited requests get linked
+	 * there.
+	 * Also see \a rq_replay comment above.
+	 * It's also link chain on obd_export::exp_req_replay_queue
+	 */
+	struct list_head		 rq_replay_list;
+	/** non-shared members for client & server request*/
+	union {
+		struct ptlrpc_cli_req	 rq_cli;
+		struct ptlrpc_srv_req	 rq_srv;
+	};
 	/**
 	 * security and encryption data
 	 * @{ */
-	struct ptlrpc_cli_ctx   *rq_cli_ctx;	/**< client's half ctx */
-	struct ptlrpc_svc_ctx   *rq_svc_ctx;	/**< server's half ctx */
-	struct list_head	rq_ctx_chain;	/**< link to waited ctx */
-
-	struct sptlrpc_flavor	rq_flvr;	/**< for client & server */
-	enum lustre_sec_part	rq_sp_from;
+	/** description of flavors for client & server */
+	struct sptlrpc_flavor		 rq_flvr;
 
 	/* client/server security flags */
 	unsigned int
@@ -1918,19 +2056,16 @@ struct ptlrpc_request {
                                  rq_pack_bulk:1,
                                  /* doesn't expect reply FIXME */
                                  rq_no_reply:1,
-                                 rq_pill_init:1;     /* pill initialized */
-
-        uid_t                    rq_auth_uid;        /* authed uid */
-        uid_t                    rq_auth_mapped_uid; /* authed uid mapped to */
+				 rq_pill_init:1, /* pill initialized */
+				 rq_srv_req:1; /* server request */
 
-        /* (server side), pointed directly into req buffer */
-        struct ptlrpc_user_desc *rq_user_desc;
 
-        /* various buffer pointers */
-        struct lustre_msg       *rq_reqbuf;      /* req wrapper */
-	char                    *rq_repbuf;      /* rep buffer */
-	struct lustre_msg       *rq_repdata;     /* rep wrapper msg */
-	struct lustre_msg       *rq_clrbuf;      /* only in priv mode */
+	/** various buffer pointers */
+	struct lustre_msg		*rq_reqbuf;      /**< req wrapper */
+	char				*rq_repbuf;      /**< rep buffer */
+	struct lustre_msg		*rq_repdata;     /**< rep wrapper msg */
+	/** only in priv mode */
+	struct lustre_msg		*rq_clrbuf;
         int                      rq_reqbuf_len;  /* req wrapper buf len */
         int                      rq_reqdata_len; /* req wrapper msg len */
         int                      rq_repbuf_len;  /* rep buffer len */
@@ -1939,107 +2074,37 @@ struct ptlrpc_request {
         int                      rq_clrdata_len; /* only in priv mode */
 
 	/** early replies go to offset 0, regular replies go after that */
-	unsigned int             rq_reply_off;
-
-        /** @} */
-
-        /** Fields that help to see if request and reply were swabbed or not */
-        __u32 rq_req_swab_mask;
-        __u32 rq_rep_swab_mask;
-
-        /** What was import generation when this request was sent */
-        int rq_import_generation;
-        enum lustre_imp_state rq_send_state;
-
-        /** how many early replies (for stats) */
-        int rq_early_count;
-
-        /** client+server request */
-        lnet_handle_md_t     rq_req_md_h;
-        struct ptlrpc_cb_id  rq_req_cbid;
-        /** optional time limit for send attempts */
-        cfs_duration_t       rq_delay_limit;
-        /** time request was first queued */
-        cfs_time_t           rq_queued_time;
-
-        /* server-side... */
-        /** request arrival time */
-        struct timeval       rq_arrival_time;
-        /** separated reply state */
-        struct ptlrpc_reply_state *rq_reply_state;
-        /** incoming request buffer */
-        struct ptlrpc_request_buffer_desc *rq_rqbd;
-
-	/** client-only incoming reply */
-	lnet_handle_md_t     rq_reply_md_h;
-	wait_queue_head_t    rq_reply_waitq;
-	struct ptlrpc_cb_id  rq_reply_cbid;
-
-        /** our LNet NID */
-        lnet_nid_t           rq_self;
-        /** Peer description (the other side) */
-        lnet_process_id_t    rq_peer;
-        /** Server-side, export on which request was received */
-        struct obd_export   *rq_export;
-        /** Client side, import where request is being sent */
-        struct obd_import   *rq_import;
-
-        /** Replay callback, called after request is replayed at recovery */
-        void (*rq_replay_cb)(struct ptlrpc_request *);
-        /**
-         * Commit callback, called when request is committed and about to be
-         * freed.
-         */
-        void (*rq_commit_cb)(struct ptlrpc_request *);
-        /** Opaq data for replay and commit callbacks. */
-        void  *rq_cb_data;
-
-        /** For bulk requests on client only: bulk descriptor */
-        struct ptlrpc_bulk_desc *rq_bulk;
-
-        /** client outgoing req */
-        /**
-         * when request/reply sent (secs), or time when request should be sent
-         */
-        time_t rq_sent;
-        /** time for request really sent out */
-        time_t rq_real_sent;
-
-        /** when request must finish. volatile
-         * so that servers' early reply updates to the deadline aren't
-         * kept in per-cpu cache */
-        volatile time_t rq_deadline;
-        /** when req reply unlink must finish. */
-        time_t rq_reply_deadline;
-        /** when req bulk unlink must finish. */
-        time_t rq_bulk_deadline;
-        /**
-         * service time estimate (secs) 
-         * If the requestsis not served by this time, it is marked as timed out.
-         */
-        int    rq_timeout;
+	unsigned int			 rq_reply_off;
 
-        /** Multi-rpc bits */
-        /** Per-request waitq introduced by bug 21938 for recovery waiting */
-	wait_queue_head_t rq_set_waitq;
-	/** Link item for request set lists */
-	struct list_head  rq_set_chain;
-        /** Link back to the request set */
-        struct ptlrpc_request_set *rq_set;
-        /** Async completion handler, called when reply is received */
-        ptlrpc_interpterer_t rq_interpret_reply;
-	/** Resend handler, called when request is resend to update RPC data */
-	ptlrpc_resend_cb_t rq_resend_cb;
-        /** Async completion context */
-        union ptlrpc_async_args rq_async_args;
-
-        /** Pool if request is from preallocated list */
-        struct ptlrpc_request_pool *rq_pool;
-
-        struct lu_context           rq_session;
+	/** @} */
 
-        /** request format description */
-        struct req_capsule          rq_pill;
+	/** Fields that help to see if request and reply were swabbed or not */
+	__u32				 rq_req_swab_mask;
+	__u32				 rq_rep_swab_mask;
+
+	/** how many early replies (for stats) */
+	int				 rq_early_count;
+	/** Server-side, export on which request was received */
+	struct obd_export		*rq_export;
+	/** import where request is being sent */
+	struct obd_import		*rq_import;
+	/** our LNet NID */
+	lnet_nid_t			 rq_self;
+	/** Peer description (the other side) */
+	lnet_process_id_t		 rq_peer;
+	/**
+	 * service time estimate (secs)
+	 * If the request is not served by this time, it is marked as timed out.
+	 */
+	int				 rq_timeout;
+	/**
+	 * when request/reply sent (secs), or time when request should be sent
+	 */
+	time_t				 rq_sent;
+	/** when request must finish. */
+	time_t				 rq_deadline;
+	/** request format description */
+	struct req_capsule		 rq_pill;
 };
 
 /**
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 3db7adc..c3f6551 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -2652,6 +2652,7 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
 		long timeoutl = deadline - cfs_time_current_sec();
 		cfs_duration_t timeout = timeoutl <= 0 ?
 					 CFS_TICK : cfs_time_seconds(timeoutl);
+		time_t	rq_deadline;
 
 		*lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
 					    target_bulk_timeout, desc);
@@ -2663,9 +2664,10 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
 				  lwi);
 		LASSERT(rc == 0 || rc == -ETIMEDOUT);
 		/* Wait again if we changed rq_deadline. */
+		rq_deadline = ACCESS_ONCE(req->rq_deadline);
 		deadline = start + bulk_timeout;
-		if (deadline > req->rq_deadline)
-			deadline = req->rq_deadline;
+		if (deadline > rq_deadline)
+			deadline = rq_deadline;
 	} while ((rc == -ETIMEDOUT) &&
 		 (deadline > cfs_time_current_sec()));
 
diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c
index 083a751..6737bc4 100644
--- a/lustre/osp/osp_sync.c
+++ b/lustre/osp/osp_sync.c
@@ -94,14 +94,13 @@ static void osp_sync_remove_from_tracker(struct osp_device *d);
 
 #define OSP_JOB_MAGIC		0x26112005
 
-/**
- * Return status: whether OSP thread should keep running
- *
- * \param[in] d		OSP device
- *
- * \retval 1		should keep running
- * \retval 0		should stop
- */
+struct osp_job_req_args {
+	/** bytes reserved for ptlrpc_replay_req() */
+	struct ptlrpc_replay_async_args	jra_raa;
+	struct list_head		jra_link;
+	__u32				jra_magic;
+};
+
 static inline int osp_sync_running(struct osp_device *d)
 {
 	return !!(d->opd_syn_thread.t_flags & SVC_RUNNING);
@@ -410,6 +409,7 @@ int osp_sync_gap(const struct lu_env *env, struct osp_device *d,
 static void osp_sync_request_commit_cb(struct ptlrpc_request *req)
 {
 	struct osp_device *d = req->rq_cb_data;
+	struct osp_job_req_args *jra;
 
 	CDEBUG(D_HA, "commit req %p, transno "LPU64"\n", req, req->rq_transno);
 
@@ -418,15 +418,16 @@ static void osp_sync_request_commit_cb(struct ptlrpc_request *req)
 
 	/* do not do any opd_dyn_rpc_* accounting here
 	 * it's done in osp_sync_interpret sooner or later */
-
 	LASSERT(d);
-	LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC);
-	LASSERT(list_empty(&req->rq_exp_list));
+
+	jra = ptlrpc_req_async_args(req);
+	LASSERT(jra->jra_magic == OSP_JOB_MAGIC);
+	LASSERT(list_empty(&jra->jra_link));
 
 	ptlrpc_request_addref(req);
 
 	spin_lock(&d->opd_syn_lock);
-	list_add(&req->rq_exp_list, &d->opd_syn_committed_there);
+	list_add(&jra->jra_link, &d->opd_syn_committed_there);
 	spin_unlock(&d->opd_syn_lock);
 
 	/* XXX: some batching wouldn't hurt */
@@ -454,10 +455,12 @@ static int osp_sync_interpret(const struct lu_env *env,
 			      struct ptlrpc_request *req, void *aa, int rc)
 {
 	struct osp_device *d = req->rq_cb_data;
+	struct osp_job_req_args *jra = aa;
 
-	if (req->rq_svc_thread != (void *) OSP_JOB_MAGIC)
-		DEBUG_REQ(D_ERROR, req, "bad magic %p\n", req->rq_svc_thread);
-	LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC);
+	if (jra->jra_magic != OSP_JOB_MAGIC) {
+		DEBUG_REQ(D_ERROR, req, "bad magic %u\n", jra->jra_magic);
+		LBUG();
+	}
 	LASSERT(d);
 
 	CDEBUG(D_HA, "reply req %p/%d, rc %d, transno %u\n", req,
@@ -471,12 +474,12 @@ static int osp_sync_interpret(const struct lu_env *env,
 		 * but object doesn't exist anymore - cancell llog record
 		 */
 		LASSERT(req->rq_transno == 0);
-		LASSERT(list_empty(&req->rq_exp_list));
+		LASSERT(list_empty(&jra->jra_link));
 
 		ptlrpc_request_addref(req);
 
 		spin_lock(&d->opd_syn_lock);
-		list_add(&req->rq_exp_list, &d->opd_syn_committed_there);
+		list_add(&jra->jra_link, &d->opd_syn_committed_there);
 		spin_unlock(&d->opd_syn_lock);
 
 		wake_up(&d->opd_syn_waitq);
@@ -537,8 +540,13 @@ static int osp_sync_interpret(const struct lu_env *env,
 static void osp_sync_send_new_rpc(struct osp_device *d,
 				  struct ptlrpc_request *req)
 {
+	struct osp_job_req_args *jra;
+
 	LASSERT(d->opd_syn_rpc_in_flight <= d->opd_syn_max_rpc_in_flight);
-	LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC);
+
+	jra = ptlrpc_req_async_args(req);
+	jra->jra_magic = OSP_JOB_MAGIC;
+	INIT_LIST_HEAD(&jra->jra_link);
 
 	ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
 }
@@ -596,8 +604,6 @@ static struct ptlrpc_request *osp_sync_new_job(struct osp_device *d,
 	body->oa.o_lcookie.lgc_lgl = llh->lgh_id;
 	body->oa.o_lcookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT;
 	body->oa.o_lcookie.lgc_index = h->lrh_index;
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	req->rq_svc_thread = (void *) OSP_JOB_MAGIC;
 
 	req->rq_interpret_reply = osp_sync_interpret;
 	req->rq_commit_cb = osp_sync_request_commit_cb;
@@ -769,9 +775,6 @@ static int osp_prep_unlink_update_req(const struct lu_env *env,
 	if (rc != 0)
 		GOTO(out, rc);
 
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	req->rq_svc_thread = (void *)OSP_JOB_MAGIC;
-
 	req->rq_interpret_reply = osp_sync_interpret;
 	req->rq_commit_cb = osp_sync_request_commit_cb;
 	req->rq_cb_data = osp;
@@ -969,7 +972,7 @@ static void osp_sync_process_committed(const struct lu_env *env,
 	struct obd_device	*obd = d->opd_obd;
 	struct obd_import	*imp = obd->u.cli.cl_import;
 	struct ost_body		*body;
-	struct ptlrpc_request	*req, *tmp;
+	struct ptlrpc_request	*req;
 	struct llog_ctxt	*ctxt;
 	struct llog_handle	*llh;
 	struct list_head	 list;
@@ -1008,12 +1011,16 @@ static void osp_sync_process_committed(const struct lu_env *env,
 	INIT_LIST_HEAD(&d->opd_syn_committed_there);
 	spin_unlock(&d->opd_syn_lock);
 
-	list_for_each_entry_safe(req, tmp, &list, rq_exp_list) {
+	while (!list_empty(&list)) {
 		struct llog_cookie *lcookie = NULL;
+		struct osp_job_req_args	*jra;
 
-		LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC);
-		list_del_init(&req->rq_exp_list);
+		jra = list_entry(list.next, struct osp_job_req_args, jra_link);
+		LASSERT(jra->jra_magic == OSP_JOB_MAGIC);
+		list_del_init(&jra->jra_link);
 
+		req = container_of((void *)jra, struct ptlrpc_request,
+				   rq_async_args);
 		if (d->opd_connect_mdt) {
 			struct object_update_request *ureq;
 			struct object_update *update;
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 422bdaf..b241515 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -614,7 +614,6 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
         lustre_msg_add_version(request->rq_reqmsg, version);
         request->rq_send_state = LUSTRE_IMP_FULL;
         request->rq_type = PTL_RPC_MSG_REQUEST;
-        request->rq_export = NULL;
 
         request->rq_req_cbid.cbid_fn  = request_out_callback;
         request->rq_req_cbid.cbid_arg = request;
@@ -631,19 +630,7 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 
         ptlrpc_at_set_req_timeout(request);
 
-	spin_lock_init(&request->rq_lock);
-	INIT_LIST_HEAD(&request->rq_list);
-	INIT_LIST_HEAD(&request->rq_timed_list);
-	INIT_LIST_HEAD(&request->rq_replay_list);
-	INIT_LIST_HEAD(&request->rq_ctx_chain);
-	INIT_LIST_HEAD(&request->rq_set_chain);
-	INIT_LIST_HEAD(&request->rq_history_list);
-	INIT_LIST_HEAD(&request->rq_exp_list);
-	init_waitqueue_head(&request->rq_reply_waitq);
-	init_waitqueue_head(&request->rq_set_waitq);
 	request->rq_xid = ptlrpc_next_xid();
-	atomic_set(&request->rq_refcount, 1);
-
 	lustre_msg_set_opc(request->rq_reqmsg, opcode);
 
 	RETURN(0);
@@ -720,7 +707,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
 		request = ptlrpc_request_cache_alloc(GFP_NOFS);
 
 	if (request) {
-		LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp);
+		ptlrpc_cli_req_init(request);
+
+		LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
 		LASSERT(imp != LP_POISON);
 		LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n",
 			imp->imp_client);
@@ -1310,7 +1299,7 @@ static int after_reply(struct ptlrpc_request *req)
 	}
 
 	do_gettimeofday(&work_start);
-	timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL);
+	timediff = cfs_timeval_sub(&work_start, &req->rq_sent_tv, NULL);
 	if (obd->obd_svc_stats != NULL) {
 		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
 				    timediff);
@@ -2266,24 +2255,23 @@ EXPORT_SYMBOL(ptlrpc_set_wait);
  */
 static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 {
-        ENTRY;
-        if (request == NULL) {
-                EXIT;
-                return;
-        }
+	ENTRY;
+
+	if (request == NULL)
+		RETURN_EXIT;
 
-        LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
-        LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */
+	LASSERT(!request->rq_srv_req);
+	LASSERT(request->rq_export == NULL);
+	LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
 	LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
 	LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
-	LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
-        LASSERTF(!request->rq_replay, "req %p\n", request);
+	LASSERTF(!request->rq_replay, "req %p\n", request);
 
-        req_capsule_fini(&request->rq_pill);
+	req_capsule_fini(&request->rq_pill);
 
-        /* We must take it off the imp_replay_list first.  Otherwise, we'll set
-         * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
-        if (request->rq_import != NULL) {
+	/* We must take it off the imp_replay_list first.  Otherwise, we'll set
+	 * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
+	if (request->rq_import != NULL) {
 		if (!locked)
 			spin_lock(&request->rq_import->imp_lock);
 		list_del_init(&request->rq_replay_list);
@@ -2300,10 +2288,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 
         if (request->rq_repbuf != NULL)
                 sptlrpc_cli_free_repbuf(request);
-        if (request->rq_export != NULL) {
-                class_export_put(request->rq_export);
-                request->rq_export = NULL;
-        }
+
         if (request->rq_import != NULL) {
                 class_import_put(request->rq_import);
                 request->rq_import = NULL;
@@ -2738,11 +2723,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 }
 EXPORT_SYMBOL(ptlrpc_queue_wait);
 
-struct ptlrpc_replay_async_args {
-        int praa_old_state;
-        int praa_old_status;
-};
-
 /**
  * Callback used for replayed requests reply processing.
  * In case of succesful reply calls registeresd request replay callback.
@@ -3122,7 +3102,7 @@ static int ptlrpcd_check_work(struct ptlrpc_request *req)
 void *ptlrpcd_alloc_work(struct obd_import *imp,
 			 int (*cb)(const struct lu_env *, void *), void *cbdata)
 {
-	struct ptlrpc_request         *req = NULL;
+	struct ptlrpc_request	      *req = NULL;
 	struct ptlrpc_work_async_args *args;
 	ENTRY;
 
@@ -3138,10 +3118,11 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
 		RETURN(ERR_PTR(-ENOMEM));
 	}
 
+	ptlrpc_cli_req_init(req);
+
 	req->rq_send_state = LUSTRE_IMP_FULL;
 	req->rq_type = PTL_RPC_MSG_REQUEST;
 	req->rq_import = class_import_get(imp);
-	req->rq_export = NULL;
 	req->rq_interpret_reply = work_interpreter;
 	/* don't want reply */
 	req->rq_receiving_reply = 0;
@@ -3149,16 +3130,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
 	req->rq_no_delay = req->rq_no_resend = 1;
 	req->rq_pill.rc_fmt = (void *)&worker_format;
 
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_list);
-	INIT_LIST_HEAD(&req->rq_replay_list);
-	INIT_LIST_HEAD(&req->rq_set_chain);
-	INIT_LIST_HEAD(&req->rq_history_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
-	atomic_set(&req->rq_refcount, 1);
-
 	CLASSERT (sizeof(*args) <= sizeof(req->rq_async_args));
 	args = ptlrpc_req_async_args(req);
 	args->cb     = cb;
diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c
index 1147ff3..ca4b0e6 100644
--- a/lustre/ptlrpc/events.c
+++ b/lustre/ptlrpc/events.c
@@ -325,22 +325,19 @@ void request_in_callback(lnet_event_t *ev)
                 }
         }
 
-        /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
-         * flags are reset and scalars are zero.  We only set the message
-         * size to non-zero if this was a successful receive. */
-        req->rq_xid = ev->match_bits;
-        req->rq_reqbuf = ev->md.start + ev->offset;
+	ptlrpc_srv_req_init(req);
+	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
+	 * flags are reset and scalars are zero.  We only set the message
+	 * size to non-zero if this was a successful receive. */
+	req->rq_xid = ev->match_bits;
+	req->rq_reqbuf = ev->md.start + ev->offset;
 	if (ev->type == LNET_EVENT_PUT && ev->status == 0)
 		req->rq_reqdata_len = ev->mlength;
 	do_gettimeofday(&req->rq_arrival_time);
 	req->rq_peer = ev->initiator;
 	req->rq_self = ev->target.nid;
 	req->rq_rqbd = rqbd;
-        req->rq_phase = RQ_PHASE_NEW;
-	spin_lock_init(&req->rq_lock);
-	INIT_LIST_HEAD(&req->rq_timed_list);
-	INIT_LIST_HEAD(&req->rq_exp_list);
-	atomic_set(&req->rq_refcount, 1);
+	req->rq_phase = RQ_PHASE_NEW;
 	if (ev->type == LNET_EVENT_PUT)
 		CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n",
 		       req, req->rq_xid, ev->mlength);
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c
index 8a76e58..d1ae6b8 100644
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -816,7 +816,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
-	do_gettimeofday(&request->rq_arrival_time);
+	do_gettimeofday(&request->rq_sent_tv);
 	request->rq_sent = cfs_time_current_sec();
 	/* We give the server rq_timeout secs to process the req, and
 	   add the network latency for our local timeout. */
diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h
index 53f14dc..05de240 100644
--- a/lustre/ptlrpc/ptlrpc_internal.h
+++ b/lustre/ptlrpc/ptlrpc_internal.h
@@ -327,4 +327,38 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
 	if (atomic_dec_and_test(&set->set_refcount))
 		OBD_FREE_PTR(set);
 }
+
+/** initialise ptlrpc common fields */
+static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req)
+{
+	spin_lock_init(&req->rq_lock);
+	atomic_set(&req->rq_refcount, 1);
+	INIT_LIST_HEAD(&req->rq_list);
+	INIT_LIST_HEAD(&req->rq_replay_list);
+}
+
+/** initialise client side ptlrpc request */
+static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_req *cr = &req->rq_cli;
+
+	ptlrpc_req_comm_init(req);
+	INIT_LIST_HEAD(&cr->cr_set_chain);
+	INIT_LIST_HEAD(&cr->cr_ctx_chain);
+	init_waitqueue_head(&cr->cr_reply_waitq);
+	init_waitqueue_head(&cr->cr_set_waitq);
+}
+
+/** initialise server side ptlrpc request */
+static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req)
+{
+	struct ptlrpc_srv_req *sr = &req->rq_srv;
+
+	ptlrpc_req_comm_init(req);
+	req->rq_srv_req = 1;
+	INIT_LIST_HEAD(&sr->sr_exp_list);
+	INIT_LIST_HEAD(&sr->sr_timed_list);
+	INIT_LIST_HEAD(&sr->sr_hist_list);
+}
+
 #endif /* PTLRPC_INTERNAL_H */
diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c
index aa09f15..2373084 100644
--- a/lustre/ptlrpc/ptlrpcd.c
+++ b/lustre/ptlrpc/ptlrpcd.c
@@ -88,11 +88,10 @@ static int ptlrpcd_users = 0;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
 {
-        struct ptlrpc_request_set *rq_set = req->rq_set;
+	struct ptlrpc_request_set *set = req->rq_set;
 
-        LASSERT(rq_set != NULL);
-
-	wake_up(&rq_set->set_waitq);
+	LASSERT(set != NULL);
+	wake_up(&set->set_waitq);
 }
 EXPORT_SYMBOL(ptlrpcd_wake);
 
diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c
index 7748663..c8f7731 100644
--- a/lustre/ptlrpc/sec.c
+++ b/lustre/ptlrpc/sec.c
@@ -928,11 +928,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
 	if (!req)
 		RETURN(-ENOMEM);
 
-	spin_lock_init(&req->rq_lock);
+	ptlrpc_cli_req_init(req);
 	atomic_set(&req->rq_refcount, 10000);
-	INIT_LIST_HEAD(&req->rq_ctx_chain);
-	init_waitqueue_head(&req->rq_reply_waitq);
-	init_waitqueue_head(&req->rq_set_waitq);
+
 	req->rq_import = imp;
 	req->rq_flvr = sec->ps_flvr;
 	req->rq_cli_ctx = ctx;
@@ -1106,15 +1104,17 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 				   struct ptlrpc_request **req_ret)
 {
 	struct ptlrpc_request  *early_req;
-	char                   *early_buf;
-	int                     early_bufsz, early_size;
-	int                     rc;
+	char		       *early_buf;
+	int			early_bufsz, early_size;
+	int			rc;
 	ENTRY;
 
 	early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
 	if (early_req == NULL)
 		RETURN(-ENOMEM);
 
+	ptlrpc_cli_req_init(early_req);
+
 	early_size = req->rq_nob_received;
 	early_bufsz = size_roundup_power2(early_size);
 	OBD_ALLOC_LARGE(early_buf, early_bufsz);
@@ -1157,7 +1157,6 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
 	memcpy(early_buf, req->rq_repbuf, early_size);
 	spin_unlock(&req->rq_lock);
 
-	spin_lock_init(&early_req->rq_lock);
         early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
         early_req->rq_flvr = req->rq_flvr;
         early_req->rq_repbuf = early_buf;
-- 
1.8.3.1