From 2ec741f46ac31cb566ea17d8ffda122398896102 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Sun, 5 Jan 2014 23:00:26 +0800 Subject: [PATCH] LU-181 ptlrpc: reorganize ptlrpc_request ptlrpc_request has some structure members are only for client side, and some others are only for server side, this patch moved these members to different structure then putting into an union. By doing this, size of ptlrpc_request is decreased about 300 bytes, besides saving memory, it also can reduce memory footprint while processing. Another change in this patch is, osp will not use rq_exp_list anymore because it is a server only member now. osp will use ptlrpc_req_async_args to store commit_cb parameters in this patch. Signed-off-by: Liang Zhen Change-Id: Id910ac225b8e9d33a0cae40b3124ce55f1a3fbc9 Reviewed-on: http://review.whamcloud.com/8806 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Bobi Jam --- lustre/include/lustre_net.h | 403 +++++++++++++++++++++++----------------- lustre/ldlm/ldlm_lib.c | 6 +- lustre/osp/osp_sync.c | 61 +++--- lustre/ptlrpc/client.c | 69 ++----- lustre/ptlrpc/events.c | 17 +- lustre/ptlrpc/niobuf.c | 2 +- lustre/ptlrpc/ptlrpc_internal.h | 34 ++++ lustre/ptlrpc/ptlrpcd.c | 7 +- lustre/ptlrpc/sec.c | 15 +- 9 files changed, 344 insertions(+), 270 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index a61a02c..f39d8d7 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -499,6 +499,11 @@ /* Macro to hide a typecast. */ #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) +struct ptlrpc_replay_async_args { + int praa_old_state; + int praa_old_status; +}; + /** * Structure to single define portal connection. */ @@ -1771,6 +1776,167 @@ struct ptlrpc_hpreq_ops { void (*hpreq_fini)(struct ptlrpc_request *); }; +struct ptlrpc_cli_req { + /** For bulk requests on client only: bulk descriptor */ + struct ptlrpc_bulk_desc *cr_bulk; + /** optional time limit for send attempts */ + cfs_duration_t cr_delay_limit; + /** time request was first queued */ + cfs_time_t cr_queued_time; + /** request sent timeval */ + struct timeval cr_sent_tv; + /** time for request really sent out */ + time_t cr_sent_out; + /** when req reply unlink must finish. */ + time_t cr_reply_deadline; + /** when req bulk unlink must finish. */ + time_t cr_bulk_deadline; + /** Portal to which this request would be sent */ + short cr_req_ptl; + /** Portal where to wait for reply and where reply would be sent */ + short cr_rep_ptl; + /** request resending number */ + unsigned int cr_resend_nr; + /** What was import generation when this request was sent */ + int cr_imp_gen; + enum lustre_imp_state cr_send_state; + /** Per-request waitq introduced by bug 21938 for recovery waiting */ + wait_queue_head_t cr_set_waitq; + /** Link item for request set lists */ + struct list_head cr_set_chain; + /** link to waited ctx */ + struct list_head cr_ctx_chain; + + /** client's half ctx */ + struct ptlrpc_cli_ctx *cr_cli_ctx; + /** Link back to the request set */ + struct ptlrpc_request_set *cr_set; + /** outgoing request MD handle */ + lnet_handle_md_t cr_req_md_h; + /** request-out callback parameter */ + struct ptlrpc_cb_id cr_req_cbid; + /** incoming reply MD handle */ + lnet_handle_md_t cr_reply_md_h; + wait_queue_head_t cr_reply_waitq; + /** reply callback parameter */ + struct ptlrpc_cb_id cr_reply_cbid; + /** Async completion handler, called when reply is received */ + ptlrpc_interpterer_t cr_reply_interp; + /** Resend handler, called when request is resend to update RPC data */ + ptlrpc_resend_cb_t cr_resend_cb; + /** Async completion context */ + union ptlrpc_async_args cr_async_args; + /** Opaq data for replay and commit callbacks. */ + void *cr_cb_data; + /** + * Commit callback, called when request is committed and about to be + * freed. + */ + void (*cr_commit_cb)(struct ptlrpc_request *); + /** Replay callback, called after request is replayed at recovery */ + void (*cr_replay_cb)(struct ptlrpc_request *); +}; + +/** client request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse */ +#define rq_bulk rq_cli.cr_bulk +#define rq_delay_limit rq_cli.cr_delay_limit +#define rq_queued_time rq_cli.cr_queued_time +#define rq_sent_tv rq_cli.cr_sent_tv +#define rq_real_sent rq_cli.cr_sent_out +#define rq_reply_deadline rq_cli.cr_reply_deadline +#define rq_bulk_deadline rq_cli.cr_bulk_deadline +#define rq_nr_resend rq_cli.cr_resend_nr +#define rq_request_portal rq_cli.cr_req_ptl +#define rq_reply_portal rq_cli.cr_rep_ptl +#define rq_import_generation rq_cli.cr_imp_gen +#define rq_send_state rq_cli.cr_send_state +#define rq_set_chain rq_cli.cr_set_chain +#define rq_ctx_chain rq_cli.cr_ctx_chain +#define rq_set rq_cli.cr_set +#define rq_set_waitq rq_cli.cr_set_waitq +#define rq_cli_ctx rq_cli.cr_cli_ctx +#define rq_req_md_h rq_cli.cr_req_md_h +#define rq_req_cbid rq_cli.cr_req_cbid +#define rq_reply_md_h rq_cli.cr_reply_md_h +#define rq_reply_waitq rq_cli.cr_reply_waitq +#define rq_reply_cbid rq_cli.cr_reply_cbid +#define rq_interpret_reply rq_cli.cr_reply_interp +#define rq_resend_cb rq_cli.cr_resend_cb +#define rq_async_args rq_cli.cr_async_args +#define rq_cb_data rq_cli.cr_cb_data +#define rq_commit_cb rq_cli.cr_commit_cb +#define rq_replay_cb rq_cli.cr_replay_cb + +struct ptlrpc_srv_req { + /** initial thread servicing this request */ + struct ptlrpc_thread *sr_svc_thread; + /** + * Server side list of incoming unserved requests sorted by arrival + * time. Traversed from time to time to notice about to expire + * requests and sent back "early replies" to clients to let them + * know server is alive and well, just very busy to service their + * requests in time + */ + struct list_head sr_timed_list; + /** server-side per-export list */ + struct list_head sr_exp_list; + /** server-side history, used for debuging purposes. */ + struct list_head sr_hist_list; + /** history sequence # */ + __u64 sr_hist_seq; + /** the index of service's srv_at_array into which request is linked */ + time_t sr_at_index; + /** authed uid */ + uid_t sr_auth_uid; + /** authed uid mapped to */ + uid_t sr_auth_mapped_uid; + /** RPC is generated from what part of Lustre */ + enum lustre_sec_part sr_sp_from; + /** request session context */ + struct lu_context sr_ses; + /** \addtogroup nrs + * @{ + */ + /** stub for NRS request */ + struct ptlrpc_nrs_request sr_nrq; + /** @} nrs */ + /** request arrival time */ + struct timeval sr_arrival_time; + /** server's half ctx */ + struct ptlrpc_svc_ctx *sr_svc_ctx; + /** (server side), pointed directly into req buffer */ + struct ptlrpc_user_desc *sr_user_desc; + /** separated reply state */ + struct ptlrpc_reply_state *sr_reply_state; + /** server-side hp handlers */ + struct ptlrpc_hpreq_ops *sr_ops; + /** incoming request buffer */ + struct ptlrpc_request_buffer_desc *sr_rqbd; +}; + +/** server request member alias */ +/* NB: these alias should NOT be used by any new code, instead they should + * be removed step by step to avoid potential abuse */ +#define rq_svc_thread rq_srv.sr_svc_thread +#define rq_timed_list rq_srv.sr_timed_list +#define rq_exp_list rq_srv.sr_exp_list +#define rq_history_list rq_srv.sr_hist_list +#define rq_history_seq rq_srv.sr_hist_seq +#define rq_at_index rq_srv.sr_at_index +#define rq_auth_uid rq_srv.sr_auth_uid +#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid +#define rq_sp_from rq_srv.sr_sp_from +#define rq_session rq_srv.sr_ses +#define rq_nrq rq_srv.sr_nrq +#define rq_arrival_time rq_srv.sr_arrival_time +#define rq_reply_state rq_srv.sr_reply_state +#define rq_svc_ctx rq_srv.sr_svc_ctx +#define rq_user_desc rq_srv.sr_user_desc +#define rq_ops rq_srv.sr_ops +#define rq_rqbd rq_srv.sr_rqbd + /** * Represents remote procedure call. * @@ -1779,46 +1945,18 @@ struct ptlrpc_hpreq_ops { */ struct ptlrpc_request { /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; + int rq_type; /** Result of request processing */ - int rq_status; + int rq_status; /** * Linkage item through which this request is included into * sending/delayed lists on client and into rqbd list on server */ - struct list_head rq_list; - /** - * Server side list of incoming unserved requests sorted by arrival - * time. Traversed from time to time to notice about to expire - * requests and sent back "early replies" to clients to let them - * know server is alive and well, just very busy to service their - * requests in time - */ - struct list_head rq_timed_list; - /** server-side history, used for debuging purposes. */ - struct list_head rq_history_list; - /** server-side per-export list */ - struct list_head rq_exp_list; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *rq_ops; - - /** initial thread servicing this request */ - struct ptlrpc_thread *rq_svc_thread; - - /** history sequence # */ - __u64 rq_history_seq; - /** \addtogroup nrs - * @{ + struct list_head rq_list; + /** Lock to protect request flags and some other important bits, like + * rq_list */ - /** stub for NRS request */ - struct ptlrpc_nrs_request rq_nrq; - /** @} nrs */ - /** the index of service's srv_at_array into which request is linked */ - time_t rq_at_index; - /** Lock to protect request flags and some other important bits, like - * rq_list - */ - spinlock_t rq_lock; + spinlock_t rq_lock; /** client-side flags are serialized by rq_lock */ unsigned int rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, @@ -1854,18 +1992,15 @@ struct ptlrpc_request { /* bulk request, sent to server, but uncommitted */ rq_unstable:1; - unsigned int rq_nr_resend; - - enum rq_phase rq_phase; /* one of RQ_PHASE_* */ - enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ - atomic_t rq_refcount;/* client-side refcount for SENT race, - server-side refcounf for multiple replies */ - - /** Portal to which this request would be sent */ - short rq_request_portal; /* XXX FIXME bug 249 */ - /** Portal where to wait for reply and where reply would be sent */ - short rq_reply_portal; /* XXX FIXME bug 249 */ - + /** one of RQ_PHASE_* */ + enum rq_phase rq_phase; + /** one of RQ_PHASE_* to be used next */ + enum rq_phase rq_next_phase; + /** + * client-side refcount for SENT race, server-side refcounf + * for multiple replies + */ + atomic_t rq_refcount; /** * client-side: * !rq_truncate : # reply bytes actually received, @@ -1876,6 +2011,8 @@ struct ptlrpc_request { int rq_reqlen; /** Reply length */ int rq_replen; + /** Pool if request is from preallocated list */ + struct ptlrpc_request_pool *rq_pool; /** Request message - what client sent */ struct lustre_msg *rq_reqmsg; /** Reply message - server response */ @@ -1884,22 +2021,23 @@ struct ptlrpc_request { __u64 rq_transno; /** xid */ __u64 rq_xid; - /** - * List item to for replay list. Not yet commited requests get linked - * there. - * Also see \a rq_replay comment above. - */ - struct list_head rq_replay_list; - + /** + * List item to for replay list. Not yet commited requests get linked + * there. + * Also see \a rq_replay comment above. + * It's also link chain on obd_export::exp_req_replay_queue + */ + struct list_head rq_replay_list; + /** non-shared members for client & server request*/ + union { + struct ptlrpc_cli_req rq_cli; + struct ptlrpc_srv_req rq_srv; + }; /** * security and encryption data * @{ */ - struct ptlrpc_cli_ctx *rq_cli_ctx; /**< client's half ctx */ - struct ptlrpc_svc_ctx *rq_svc_ctx; /**< server's half ctx */ - struct list_head rq_ctx_chain; /**< link to waited ctx */ - - struct sptlrpc_flavor rq_flvr; /**< for client & server */ - enum lustre_sec_part rq_sp_from; + /** description of flavors for client & server */ + struct sptlrpc_flavor rq_flvr; /* client/server security flags */ unsigned int @@ -1918,19 +2056,16 @@ struct ptlrpc_request { rq_pack_bulk:1, /* doesn't expect reply FIXME */ rq_no_reply:1, - rq_pill_init:1; /* pill initialized */ - - uid_t rq_auth_uid; /* authed uid */ - uid_t rq_auth_mapped_uid; /* authed uid mapped to */ + rq_pill_init:1, /* pill initialized */ + rq_srv_req:1; /* server request */ - /* (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *rq_user_desc; - /* various buffer pointers */ - struct lustre_msg *rq_reqbuf; /* req wrapper */ - char *rq_repbuf; /* rep buffer */ - struct lustre_msg *rq_repdata; /* rep wrapper msg */ - struct lustre_msg *rq_clrbuf; /* only in priv mode */ + /** various buffer pointers */ + struct lustre_msg *rq_reqbuf; /**< req wrapper */ + char *rq_repbuf; /**< rep buffer */ + struct lustre_msg *rq_repdata; /**< rep wrapper msg */ + /** only in priv mode */ + struct lustre_msg *rq_clrbuf; int rq_reqbuf_len; /* req wrapper buf len */ int rq_reqdata_len; /* req wrapper msg len */ int rq_repbuf_len; /* rep buffer len */ @@ -1939,107 +2074,37 @@ struct ptlrpc_request { int rq_clrdata_len; /* only in priv mode */ /** early replies go to offset 0, regular replies go after that */ - unsigned int rq_reply_off; - - /** @} */ - - /** Fields that help to see if request and reply were swabbed or not */ - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; - - /** What was import generation when this request was sent */ - int rq_import_generation; - enum lustre_imp_state rq_send_state; - - /** how many early replies (for stats) */ - int rq_early_count; - - /** client+server request */ - lnet_handle_md_t rq_req_md_h; - struct ptlrpc_cb_id rq_req_cbid; - /** optional time limit for send attempts */ - cfs_duration_t rq_delay_limit; - /** time request was first queued */ - cfs_time_t rq_queued_time; - - /* server-side... */ - /** request arrival time */ - struct timeval rq_arrival_time; - /** separated reply state */ - struct ptlrpc_reply_state *rq_reply_state; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *rq_rqbd; - - /** client-only incoming reply */ - lnet_handle_md_t rq_reply_md_h; - wait_queue_head_t rq_reply_waitq; - struct ptlrpc_cb_id rq_reply_cbid; - - /** our LNet NID */ - lnet_nid_t rq_self; - /** Peer description (the other side) */ - lnet_process_id_t rq_peer; - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** Client side, import where request is being sent */ - struct obd_import *rq_import; - - /** Replay callback, called after request is replayed at recovery */ - void (*rq_replay_cb)(struct ptlrpc_request *); - /** - * Commit callback, called when request is committed and about to be - * freed. - */ - void (*rq_commit_cb)(struct ptlrpc_request *); - /** Opaq data for replay and commit callbacks. */ - void *rq_cb_data; - - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *rq_bulk; - - /** client outgoing req */ - /** - * when request/reply sent (secs), or time when request should be sent - */ - time_t rq_sent; - /** time for request really sent out */ - time_t rq_real_sent; - - /** when request must finish. volatile - * so that servers' early reply updates to the deadline aren't - * kept in per-cpu cache */ - volatile time_t rq_deadline; - /** when req reply unlink must finish. */ - time_t rq_reply_deadline; - /** when req bulk unlink must finish. */ - time_t rq_bulk_deadline; - /** - * service time estimate (secs) - * If the requestsis not served by this time, it is marked as timed out. - */ - int rq_timeout; + unsigned int rq_reply_off; - /** Multi-rpc bits */ - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t rq_set_waitq; - /** Link item for request set lists */ - struct list_head rq_set_chain; - /** Link back to the request set */ - struct ptlrpc_request_set *rq_set; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t rq_interpret_reply; - /** Resend handler, called when request is resend to update RPC data */ - ptlrpc_resend_cb_t rq_resend_cb; - /** Async completion context */ - union ptlrpc_async_args rq_async_args; - - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - - struct lu_context rq_session; + /** @} */ - /** request format description */ - struct req_capsule rq_pill; + /** Fields that help to see if request and reply were swabbed or not */ + __u32 rq_req_swab_mask; + __u32 rq_rep_swab_mask; + + /** how many early replies (for stats) */ + int rq_early_count; + /** Server-side, export on which request was received */ + struct obd_export *rq_export; + /** import where request is being sent */ + struct obd_import *rq_import; + /** our LNet NID */ + lnet_nid_t rq_self; + /** Peer description (the other side) */ + lnet_process_id_t rq_peer; + /** + * service time estimate (secs) + * If the request is not served by this time, it is marked as timed out. + */ + int rq_timeout; + /** + * when request/reply sent (secs), or time when request should be sent + */ + time_t rq_sent; + /** when request must finish. */ + time_t rq_deadline; + /** request format description */ + struct req_capsule rq_pill; }; /** diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 3db7adc..c3f6551 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2652,6 +2652,7 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, long timeoutl = deadline - cfs_time_current_sec(); cfs_duration_t timeout = timeoutl <= 0 ? CFS_TICK : cfs_time_seconds(timeoutl); + time_t rq_deadline; *lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1), target_bulk_timeout, desc); @@ -2663,9 +2664,10 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); /* Wait again if we changed rq_deadline. */ + rq_deadline = ACCESS_ONCE(req->rq_deadline); deadline = start + bulk_timeout; - if (deadline > req->rq_deadline) - deadline = req->rq_deadline; + if (deadline > rq_deadline) + deadline = rq_deadline; } while ((rc == -ETIMEDOUT) && (deadline > cfs_time_current_sec())); diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index 083a751..6737bc4 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -94,14 +94,13 @@ static void osp_sync_remove_from_tracker(struct osp_device *d); #define OSP_JOB_MAGIC 0x26112005 -/** - * Return status: whether OSP thread should keep running - * - * \param[in] d OSP device - * - * \retval 1 should keep running - * \retval 0 should stop - */ +struct osp_job_req_args { + /** bytes reserved for ptlrpc_replay_req() */ + struct ptlrpc_replay_async_args jra_raa; + struct list_head jra_link; + __u32 jra_magic; +}; + static inline int osp_sync_running(struct osp_device *d) { return !!(d->opd_syn_thread.t_flags & SVC_RUNNING); @@ -410,6 +409,7 @@ int osp_sync_gap(const struct lu_env *env, struct osp_device *d, static void osp_sync_request_commit_cb(struct ptlrpc_request *req) { struct osp_device *d = req->rq_cb_data; + struct osp_job_req_args *jra; CDEBUG(D_HA, "commit req %p, transno "LPU64"\n", req, req->rq_transno); @@ -418,15 +418,16 @@ static void osp_sync_request_commit_cb(struct ptlrpc_request *req) /* do not do any opd_dyn_rpc_* accounting here * it's done in osp_sync_interpret sooner or later */ - LASSERT(d); - LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC); - LASSERT(list_empty(&req->rq_exp_list)); + + jra = ptlrpc_req_async_args(req); + LASSERT(jra->jra_magic == OSP_JOB_MAGIC); + LASSERT(list_empty(&jra->jra_link)); ptlrpc_request_addref(req); spin_lock(&d->opd_syn_lock); - list_add(&req->rq_exp_list, &d->opd_syn_committed_there); + list_add(&jra->jra_link, &d->opd_syn_committed_there); spin_unlock(&d->opd_syn_lock); /* XXX: some batching wouldn't hurt */ @@ -454,10 +455,12 @@ static int osp_sync_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *aa, int rc) { struct osp_device *d = req->rq_cb_data; + struct osp_job_req_args *jra = aa; - if (req->rq_svc_thread != (void *) OSP_JOB_MAGIC) - DEBUG_REQ(D_ERROR, req, "bad magic %p\n", req->rq_svc_thread); - LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC); + if (jra->jra_magic != OSP_JOB_MAGIC) { + DEBUG_REQ(D_ERROR, req, "bad magic %u\n", jra->jra_magic); + LBUG(); + } LASSERT(d); CDEBUG(D_HA, "reply req %p/%d, rc %d, transno %u\n", req, @@ -471,12 +474,12 @@ static int osp_sync_interpret(const struct lu_env *env, * but object doesn't exist anymore - cancell llog record */ LASSERT(req->rq_transno == 0); - LASSERT(list_empty(&req->rq_exp_list)); + LASSERT(list_empty(&jra->jra_link)); ptlrpc_request_addref(req); spin_lock(&d->opd_syn_lock); - list_add(&req->rq_exp_list, &d->opd_syn_committed_there); + list_add(&jra->jra_link, &d->opd_syn_committed_there); spin_unlock(&d->opd_syn_lock); wake_up(&d->opd_syn_waitq); @@ -537,8 +540,13 @@ static int osp_sync_interpret(const struct lu_env *env, static void osp_sync_send_new_rpc(struct osp_device *d, struct ptlrpc_request *req) { + struct osp_job_req_args *jra; + LASSERT(d->opd_syn_rpc_in_flight <= d->opd_syn_max_rpc_in_flight); - LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC); + + jra = ptlrpc_req_async_args(req); + jra->jra_magic = OSP_JOB_MAGIC; + INIT_LIST_HEAD(&jra->jra_link); ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); } @@ -596,8 +604,6 @@ static struct ptlrpc_request *osp_sync_new_job(struct osp_device *d, body->oa.o_lcookie.lgc_lgl = llh->lgh_id; body->oa.o_lcookie.lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; body->oa.o_lcookie.lgc_index = h->lrh_index; - INIT_LIST_HEAD(&req->rq_exp_list); - req->rq_svc_thread = (void *) OSP_JOB_MAGIC; req->rq_interpret_reply = osp_sync_interpret; req->rq_commit_cb = osp_sync_request_commit_cb; @@ -769,9 +775,6 @@ static int osp_prep_unlink_update_req(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - INIT_LIST_HEAD(&req->rq_exp_list); - req->rq_svc_thread = (void *)OSP_JOB_MAGIC; - req->rq_interpret_reply = osp_sync_interpret; req->rq_commit_cb = osp_sync_request_commit_cb; req->rq_cb_data = osp; @@ -969,7 +972,7 @@ static void osp_sync_process_committed(const struct lu_env *env, struct obd_device *obd = d->opd_obd; struct obd_import *imp = obd->u.cli.cl_import; struct ost_body *body; - struct ptlrpc_request *req, *tmp; + struct ptlrpc_request *req; struct llog_ctxt *ctxt; struct llog_handle *llh; struct list_head list; @@ -1008,12 +1011,16 @@ static void osp_sync_process_committed(const struct lu_env *env, INIT_LIST_HEAD(&d->opd_syn_committed_there); spin_unlock(&d->opd_syn_lock); - list_for_each_entry_safe(req, tmp, &list, rq_exp_list) { + while (!list_empty(&list)) { struct llog_cookie *lcookie = NULL; + struct osp_job_req_args *jra; - LASSERT(req->rq_svc_thread == (void *) OSP_JOB_MAGIC); - list_del_init(&req->rq_exp_list); + jra = list_entry(list.next, struct osp_job_req_args, jra_link); + LASSERT(jra->jra_magic == OSP_JOB_MAGIC); + list_del_init(&jra->jra_link); + req = container_of((void *)jra, struct ptlrpc_request, + rq_async_args); if (d->opd_connect_mdt) { struct object_update_request *ureq; struct object_update *update; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 422bdaf..b241515 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -614,7 +614,6 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, lustre_msg_add_version(request->rq_reqmsg, version); request->rq_send_state = LUSTRE_IMP_FULL; request->rq_type = PTL_RPC_MSG_REQUEST; - request->rq_export = NULL; request->rq_req_cbid.cbid_fn = request_out_callback; request->rq_req_cbid.cbid_arg = request; @@ -631,19 +630,7 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ptlrpc_at_set_req_timeout(request); - spin_lock_init(&request->rq_lock); - INIT_LIST_HEAD(&request->rq_list); - INIT_LIST_HEAD(&request->rq_timed_list); - INIT_LIST_HEAD(&request->rq_replay_list); - INIT_LIST_HEAD(&request->rq_ctx_chain); - INIT_LIST_HEAD(&request->rq_set_chain); - INIT_LIST_HEAD(&request->rq_history_list); - INIT_LIST_HEAD(&request->rq_exp_list); - init_waitqueue_head(&request->rq_reply_waitq); - init_waitqueue_head(&request->rq_set_waitq); request->rq_xid = ptlrpc_next_xid(); - atomic_set(&request->rq_refcount, 1); - lustre_msg_set_opc(request->rq_reqmsg, opcode); RETURN(0); @@ -720,7 +707,9 @@ struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp, request = ptlrpc_request_cache_alloc(GFP_NOFS); if (request) { - LASSERTF((unsigned long)imp > 0x1000, "%p\n", imp); + ptlrpc_cli_req_init(request); + + LASSERTF((unsigned long)imp > 0x1000, "%p", imp); LASSERT(imp != LP_POISON); LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n", imp->imp_client); @@ -1310,7 +1299,7 @@ static int after_reply(struct ptlrpc_request *req) } do_gettimeofday(&work_start); - timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL); + timediff = cfs_timeval_sub(&work_start, &req->rq_sent_tv, NULL); if (obd->obd_svc_stats != NULL) { lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, timediff); @@ -2266,24 +2255,23 @@ EXPORT_SYMBOL(ptlrpc_set_wait); */ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) { - ENTRY; - if (request == NULL) { - EXIT; - return; - } + ENTRY; + + if (request == NULL) + RETURN_EXIT; - LASSERTF(!request->rq_receiving_reply, "req %p\n", request); - LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */ + LASSERT(!request->rq_srv_req); + LASSERT(request->rq_export == NULL); + LASSERTF(!request->rq_receiving_reply, "req %p\n", request); LASSERTF(list_empty(&request->rq_list), "req %p\n", request); LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); - LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request); - LASSERTF(!request->rq_replay, "req %p\n", request); + LASSERTF(!request->rq_replay, "req %p\n", request); - req_capsule_fini(&request->rq_pill); + req_capsule_fini(&request->rq_pill); - /* We must take it off the imp_replay_list first. Otherwise, we'll set - * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ - if (request->rq_import != NULL) { + /* We must take it off the imp_replay_list first. Otherwise, we'll set + * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ + if (request->rq_import != NULL) { if (!locked) spin_lock(&request->rq_import->imp_lock); list_del_init(&request->rq_replay_list); @@ -2300,10 +2288,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (request->rq_repbuf != NULL) sptlrpc_cli_free_repbuf(request); - if (request->rq_export != NULL) { - class_export_put(request->rq_export); - request->rq_export = NULL; - } + if (request->rq_import != NULL) { class_import_put(request->rq_import); request->rq_import = NULL; @@ -2738,11 +2723,6 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) } EXPORT_SYMBOL(ptlrpc_queue_wait); -struct ptlrpc_replay_async_args { - int praa_old_state; - int praa_old_status; -}; - /** * Callback used for replayed requests reply processing. * In case of succesful reply calls registeresd request replay callback. @@ -3122,7 +3102,7 @@ static int ptlrpcd_check_work(struct ptlrpc_request *req) void *ptlrpcd_alloc_work(struct obd_import *imp, int (*cb)(const struct lu_env *, void *), void *cbdata) { - struct ptlrpc_request *req = NULL; + struct ptlrpc_request *req = NULL; struct ptlrpc_work_async_args *args; ENTRY; @@ -3138,10 +3118,11 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, RETURN(ERR_PTR(-ENOMEM)); } + ptlrpc_cli_req_init(req); + req->rq_send_state = LUSTRE_IMP_FULL; req->rq_type = PTL_RPC_MSG_REQUEST; req->rq_import = class_import_get(imp); - req->rq_export = NULL; req->rq_interpret_reply = work_interpreter; /* don't want reply */ req->rq_receiving_reply = 0; @@ -3149,16 +3130,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, req->rq_no_delay = req->rq_no_resend = 1; req->rq_pill.rc_fmt = (void *)&worker_format; - spin_lock_init(&req->rq_lock); - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_replay_list); - INIT_LIST_HEAD(&req->rq_set_chain); - INIT_LIST_HEAD(&req->rq_history_list); - INIT_LIST_HEAD(&req->rq_exp_list); - init_waitqueue_head(&req->rq_reply_waitq); - init_waitqueue_head(&req->rq_set_waitq); - atomic_set(&req->rq_refcount, 1); - CLASSERT (sizeof(*args) <= sizeof(req->rq_async_args)); args = ptlrpc_req_async_args(req); args->cb = cb; diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 1147ff3..ca4b0e6 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -325,22 +325,19 @@ void request_in_callback(lnet_event_t *ev) } } - /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, - * flags are reset and scalars are zero. We only set the message - * size to non-zero if this was a successful receive. */ - req->rq_xid = ev->match_bits; - req->rq_reqbuf = ev->md.start + ev->offset; + ptlrpc_srv_req_init(req); + /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, + * flags are reset and scalars are zero. We only set the message + * size to non-zero if this was a successful receive. */ + req->rq_xid = ev->match_bits; + req->rq_reqbuf = ev->md.start + ev->offset; if (ev->type == LNET_EVENT_PUT && ev->status == 0) req->rq_reqdata_len = ev->mlength; do_gettimeofday(&req->rq_arrival_time); req->rq_peer = ev->initiator; req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; - req->rq_phase = RQ_PHASE_NEW; - spin_lock_init(&req->rq_lock); - INIT_LIST_HEAD(&req->rq_timed_list); - INIT_LIST_HEAD(&req->rq_exp_list); - atomic_set(&req->rq_refcount, 1); + req->rq_phase = RQ_PHASE_NEW; if (ev->type == LNET_EVENT_PUT) CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n", req, req->rq_xid, ev->mlength); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 8a76e58..d1ae6b8 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -816,7 +816,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); - do_gettimeofday(&request->rq_arrival_time); + do_gettimeofday(&request->rq_sent_tv); request->rq_sent = cfs_time_current_sec(); /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 53f14dc..05de240 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -327,4 +327,38 @@ static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set) if (atomic_dec_and_test(&set->set_refcount)) OBD_FREE_PTR(set); } + +/** initialise ptlrpc common fields */ +static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req) +{ + spin_lock_init(&req->rq_lock); + atomic_set(&req->rq_refcount, 1); + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_replay_list); +} + +/** initialise client side ptlrpc request */ +static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_req *cr = &req->rq_cli; + + ptlrpc_req_comm_init(req); + INIT_LIST_HEAD(&cr->cr_set_chain); + INIT_LIST_HEAD(&cr->cr_ctx_chain); + init_waitqueue_head(&cr->cr_reply_waitq); + init_waitqueue_head(&cr->cr_set_waitq); +} + +/** initialise server side ptlrpc request */ +static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req) +{ + struct ptlrpc_srv_req *sr = &req->rq_srv; + + ptlrpc_req_comm_init(req); + req->rq_srv_req = 1; + INIT_LIST_HEAD(&sr->sr_exp_list); + INIT_LIST_HEAD(&sr->sr_timed_list); + INIT_LIST_HEAD(&sr->sr_hist_list); +} + #endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index aa09f15..2373084 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -88,11 +88,10 @@ static int ptlrpcd_users = 0; void ptlrpcd_wake(struct ptlrpc_request *req) { - struct ptlrpc_request_set *rq_set = req->rq_set; + struct ptlrpc_request_set *set = req->rq_set; - LASSERT(rq_set != NULL); - - wake_up(&rq_set->set_waitq); + LASSERT(set != NULL); + wake_up(&set->set_waitq); } EXPORT_SYMBOL(ptlrpcd_wake); diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index 7748663..c8f7731 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -928,11 +928,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp) if (!req) RETURN(-ENOMEM); - spin_lock_init(&req->rq_lock); + ptlrpc_cli_req_init(req); atomic_set(&req->rq_refcount, 10000); - INIT_LIST_HEAD(&req->rq_ctx_chain); - init_waitqueue_head(&req->rq_reply_waitq); - init_waitqueue_head(&req->rq_set_waitq); + req->rq_import = imp; req->rq_flvr = sec->ps_flvr; req->rq_cli_ctx = ctx; @@ -1106,15 +1104,17 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, struct ptlrpc_request **req_ret) { struct ptlrpc_request *early_req; - char *early_buf; - int early_bufsz, early_size; - int rc; + char *early_buf; + int early_bufsz, early_size; + int rc; ENTRY; early_req = ptlrpc_request_cache_alloc(GFP_NOFS); if (early_req == NULL) RETURN(-ENOMEM); + ptlrpc_cli_req_init(early_req); + early_size = req->rq_nob_received; early_bufsz = size_roundup_power2(early_size); OBD_ALLOC_LARGE(early_buf, early_bufsz); @@ -1157,7 +1157,6 @@ int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, memcpy(early_buf, req->rq_repbuf, early_size); spin_unlock(&req->rq_lock); - spin_lock_init(&early_req->rq_lock); early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx); early_req->rq_flvr = req->rq_flvr; early_req->rq_repbuf = early_buf; -- 1.8.3.1