X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Flustre_net.h;h=a1fd22e1ebed27ea1996fcb3f8a623b42c177776;hb=4463d3a15f9551ff6b1cafb9336dd5c929e1e2c2;hp=558930d0ece1c298481f5f39fe1679cb54f57d2b;hpb=fbf5870b9848929d352460f1f005b79c0b5ccc5a;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 558930d..a1fd22e 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -104,11 +104,11 @@ * considered full when less than ?_MAXREQSIZE is left in them. */ -#define LDLM_THREADS_AUTO_MIN \ - min((int)(num_online_cpus() * num_online_cpus() * 2), 8) -#define LDLM_THREADS_AUTO_MAX (LDLM_THREADS_AUTO_MIN * 16) +#define LDLM_THREADS_AUTO_MIN (2) +#define LDLM_THREADS_AUTO_MAX min_t(unsigned, cfs_num_online_cpus() * \ + cfs_num_online_cpus() * 32, 128) #define LDLM_BL_THREADS LDLM_THREADS_AUTO_MIN -#define LDLM_NBUFS (64 * num_online_cpus()) +#define LDLM_NBUFS (64 * cfs_num_online_cpus()) #define LDLM_BUFSIZE (8 * 1024) #define LDLM_MAXREQSIZE (5 * 1024) #define LDLM_MAXREPSIZE (1024) @@ -116,17 +116,20 @@ #define MDT_MIN_THREADS 2UL #define MDT_MAX_THREADS 512UL #define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ - num_physpages >> (25 - CFS_PAGE_SHIFT)), 2UL) + cfs_num_physpages >> (25 - CFS_PAGE_SHIFT)), \ + 2UL) #define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ - num_physpages >> (25 - CFS_PAGE_SHIFT)), 2UL) + cfs_num_physpages >> (25 - CFS_PAGE_SHIFT)), \ + 2UL) #define SEQ_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ - num_physpages >> (25 - CFS_PAGE_SHIFT)), 2UL) + cfs_num_physpages >> (25 - CFS_PAGE_SHIFT)), \ + 2UL) /* Absolute limits */ #define MDS_THREADS_MIN 2 #define MDS_THREADS_MAX 512 #define MDS_THREADS_MIN_READPAGE 2 -#define MDS_NBUFS (64 * num_online_cpus()) +#define MDS_NBUFS (64 * cfs_num_online_cpus()) #define MDS_BUFSIZE (8 * 1024) /* Assume file name length = FNAME_MAX = 256 (true for ext3). * path name length = PATH_MAX = 4096 @@ -146,7 +149,7 @@ * except in the open case where there are a large number of OSTs in a LOV. */ #define MDS_MAXREQSIZE (5 * 1024) -#define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) +#define MDS_MAXREPSIZE max(9 * 1024, 362 + LOV_MAX_STRIPE_COUNT * 56) /* FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + md_fld */ #define FLD_MAXREQSIZE (160) @@ -163,15 +166,15 @@ #define MGS_THREADS_AUTO_MIN 2 #define MGS_THREADS_AUTO_MAX 32 -#define MGS_NBUFS (64 * num_online_cpus()) +#define MGS_NBUFS (64 * cfs_num_online_cpus()) #define MGS_BUFSIZE (8 * 1024) #define MGS_MAXREQSIZE (7 * 1024) #define MGS_MAXREPSIZE (9 * 1024) /* Absolute limits */ -#define OSS_THREADS_MIN 2 +#define OSS_THREADS_MIN 3 /* difficult replies, HPQ, others */ #define OSS_THREADS_MAX 512 -#define OST_NBUFS (64 * num_online_cpus()) +#define OST_NBUFS (64 * cfs_num_online_cpus()) #define OST_BUFSIZE (8 * 1024) /* OST_MAXREQSIZE ~= 4768 bytes = * lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote @@ -186,11 +189,11 @@ #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) struct ptlrpc_connection { - struct hlist_node c_hash; + cfs_hlist_node_t c_hash; lnet_nid_t c_self; lnet_process_id_t c_peer; struct obd_uuid c_remote_uuid; - atomic_t c_refcount; + cfs_atomic_t c_refcount; }; struct ptlrpc_client { @@ -212,29 +215,29 @@ union ptlrpc_async_args { * big enough. For _tons_ of context, OBD_ALLOC a struct and store * a pointer to it here. The pointer_arg ensures this struct is at * least big enough for that. */ - void *pointer_arg[9]; - __u64 space[5]; + void *pointer_arg[11]; + __u64 space[6]; }; struct ptlrpc_request_set; typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); struct ptlrpc_request_set { - int set_remaining; /* # uncompleted requests */ - cfs_waitq_t set_waitq; - cfs_waitq_t *set_wakeup_ptr; - struct list_head set_requests; - struct list_head set_cblist; /* list of completion callbacks */ - set_interpreter_func set_interpret; /* completion callback */ - void *set_arg; /* completion context */ + int set_remaining; /* # uncompleted requests */ + cfs_waitq_t set_waitq; + cfs_waitq_t *set_wakeup_ptr; + cfs_list_t set_requests; + cfs_list_t set_cblist; /* list of completion callbacks */ + set_interpreter_func set_interpret; /* completion callback */ + void *set_arg; /* completion context */ /* locked so that any old caller can communicate requests to * the set holder who can then fold them into the lock-free set */ - spinlock_t set_new_req_lock; - struct list_head set_new_requests; + cfs_spinlock_t set_new_req_lock; + cfs_list_t set_new_requests; }; struct ptlrpc_set_cbdata { - struct list_head psc_item; + cfs_list_t psc_item; set_interpreter_func psc_interpret; void *psc_data; }; @@ -254,13 +257,15 @@ struct ptlrpc_cb_id { struct ptlrpc_reply_state { struct ptlrpc_cb_id rs_cb_id; - struct list_head rs_list; - struct list_head rs_exp_list; - struct list_head rs_obd_list; + cfs_list_t rs_list; + cfs_list_t rs_exp_list; + cfs_list_t rs_obd_list; #if RS_DEBUG - struct list_head rs_debug_list; + cfs_list_t rs_debug_list; #endif - /* updates to following flag serialised by srv_request_lock */ + /* A spinlock to protect the reply state flags */ + cfs_spinlock_t rs_lock; + /* Reply state flags */ unsigned long rs_difficult:1; /* ACK/commit stuff */ unsigned long rs_no_ack:1; /* no ACK, even for difficult requests */ @@ -269,14 +274,17 @@ struct ptlrpc_reply_state { unsigned long rs_handled:1; /* been handled yet? */ unsigned long rs_on_net:1; /* reply_out_callback pending? */ unsigned long rs_prealloc:1; /* rs from prealloc list */ - + unsigned long rs_committed:1;/* the transaction was committed + and the rs was dispatched + by ptlrpc_commit_replies */ int rs_size; + __u32 rs_opc; __u64 rs_transno; __u64 rs_xid; struct obd_export *rs_export; struct ptlrpc_service *rs_service; lnet_handle_md_t rs_md_h; - atomic_t rs_refcount; + cfs_atomic_t rs_refcount; struct ptlrpc_svc_ctx *rs_svc_ctx; struct lustre_msg *rs_repbuf; /* wrapper */ @@ -308,8 +316,8 @@ typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env, void *arg, int rc); struct ptlrpc_request_pool { - spinlock_t prp_lock; - struct list_head prp_req_list; /* list of ptlrpc_request structs */ + cfs_spinlock_t prp_lock; + cfs_list_t prp_req_list; /* list of ptlrpc_request structs */ int prp_rq_size; void (*prp_populate)(struct ptlrpc_request_pool *, int); }; @@ -317,17 +325,35 @@ struct ptlrpc_request_pool { struct lu_context; struct lu_env; +struct ldlm_lock; + +struct ptlrpc_hpreq_ops { + /** + * Check if the lock handle of the given lock is the same as + * taken from the request. + */ + int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *); + /** + * Check if the request is a high priority one. + */ + int (*hpreq_check)(struct ptlrpc_request *); +}; + /** * Represents remote procedure call. */ struct ptlrpc_request { int rq_type; /* one of PTL_RPC_MSG_* */ - struct list_head rq_list; - struct list_head rq_timed_list; /* server-side early replies */ - struct list_head rq_history_list; /* server-side history */ - __u64 rq_history_seq; /* history sequence # */ + cfs_list_t rq_list; + cfs_list_t rq_timed_list; /* server-side early replies */ + cfs_list_t rq_history_list; /* server-side history */ + cfs_list_t rq_exp_list; /* server-side per-export list */ + struct ptlrpc_hpreq_ops *rq_ops; /* server-side hp handlers */ + __u64 rq_history_seq; /* history sequence # */ + /* the index of service's srv_at_array into which request is linked */ + time_t rq_at_index; int rq_status; - spinlock_t rq_lock; + cfs_spinlock_t rq_lock; /* client-side flags are serialized by rq_lock */ unsigned long rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, @@ -336,24 +362,26 @@ struct ptlrpc_request { * after server commits corresponding transaction. This is * used for operations that require sequence of multiple * requests to be replayed. The only example currently is file - * open/close/dw/setattr. When last request in such a sequence - * is committed, ->rq_replay is cleared on all requests in the + * open/close. When last request in such a sequence is + * committed, ->rq_replay is cleared on all requests in the * sequence. */ rq_replay:1, - /* this is the last request in the sequence. */ - rq_sequence:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, rq_early:1, rq_must_unlink:1, + rq_fake:1, /* this fake req */ /* server-side flags */ rq_packed_final:1, /* packed final reply */ - rq_sent_final:1; /* stop sending early replies */ + rq_hp:1, /* high priority RPC */ + rq_at_linked:1, /* link into service's srv_at_array */ + rq_reply_truncate:1, + rq_committed:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ - atomic_t rq_refcount; /* client-side refcount for SENT race, - server-side refcounf for multiple replies */ + cfs_atomic_t rq_refcount;/* client-side refcount for SENT race, + server-side refcounf for multiple replies */ struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */ @@ -369,12 +397,11 @@ struct ptlrpc_request { struct lustre_msg *rq_repmsg; __u64 rq_transno; __u64 rq_xid; - struct list_head rq_replay_list; - struct list_head rq_mod_list; + cfs_list_t rq_replay_list; struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */ struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */ - struct list_head rq_ctx_chain; /* link to waited ctx */ + cfs_list_t rq_ctx_chain; /* link to waited ctx */ struct sptlrpc_flavor rq_flvr; /* client & server */ enum lustre_sec_part rq_sp_from; @@ -393,7 +420,8 @@ struct ptlrpc_request { rq_pack_udesc:1, rq_pack_bulk:1, /* doesn't expect reply FIXME */ - rq_no_reply:1; + rq_no_reply:1, + rq_pill_init:1; /* pill initialized */ uid_t rq_auth_uid; /* authed uid */ uid_t rq_auth_mapped_uid; /* authed uid mapped to */ @@ -460,10 +488,11 @@ struct ptlrpc_request { so that servers' early reply updates to the deadline aren't kept in per-cpu cache */ time_t rq_reply_deadline; /* when req reply unlink must finish. */ + time_t rq_bulk_deadline; /* when req bulk unlink must finish. */ int rq_timeout; /* service time estimate (secs) */ /* Multi-rpc bits */ - struct list_head rq_set_chain; + cfs_list_t rq_set_chain; struct ptlrpc_request_set *rq_set; /** Async completion handler */ ptlrpc_interpterer_t rq_interpret_reply; @@ -471,43 +500,58 @@ struct ptlrpc_request { struct ptlrpc_request_pool *rq_pool; /* Pool if request from preallocated list */ struct lu_context rq_session; + struct lu_context rq_recov_session; /* request format */ struct req_capsule rq_pill; }; -static inline void ptlrpc_close_replay_seq(struct ptlrpc_request *req) +static inline int ptlrpc_req_interpret(const struct lu_env *env, + struct ptlrpc_request *req, int rc) { - spin_lock(&req->rq_lock); - req->rq_replay = 0; - req->rq_sequence = 1; - spin_unlock(&req->rq_lock); + if (req->rq_interpret_reply != NULL) { + req->rq_status = req->rq_interpret_reply(env, req, + &req->rq_async_args, + rc); + return req->rq_status; + } + return rc; } -static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index) +static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index) { LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - LASSERT((req->rq_req_swab_mask & (1 << index)) == 0); - req->rq_req_swab_mask |= 1 << index; + return req->rq_req_swab_mask & (1 << index); } -static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index) +static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index) { LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0); - req->rq_rep_swab_mask |= 1 << index; + return req->rq_rep_swab_mask & (1 << index); } -static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index) +static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req) +{ + return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); +} + +static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req) +{ + return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); +} + +static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index) { LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - return req->rq_req_swab_mask & (1 << index); + LASSERT((req->rq_req_swab_mask & (1 << index)) == 0); + req->rq_req_swab_mask |= 1 << index; } -static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index) +static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index) { LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - return req->rq_rep_swab_mask & (1 << index); + LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0); + req->rq_rep_swab_mask |= 1 << index; } static inline const char * @@ -548,9 +592,9 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_no_resend, "N"), \ FLAG(req->rq_waiting, "W"), \ - FLAG(req->rq_wait_ctx, "C") + FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s" void _debug_req(struct ptlrpc_request *req, __u32 mask, struct libcfs_debug_msg_data *data, const char *fmt, ...) @@ -558,7 +602,7 @@ void _debug_req(struct ptlrpc_request *req, __u32 mask, #define debug_req(cdls, level, req, file, func, line, fmt, a...) \ do { \ - CHECK_STACK(); \ + CFS_CHECK_STACK(); \ \ if (((level) & D_CANTMASK) != 0 || \ ((libcfs_debug & (level)) != 0 && \ @@ -582,7 +626,7 @@ do { \ } while (0) struct ptlrpc_bulk_page { - struct list_head bp_link; + cfs_list_t bp_link; int bp_buflen; int bp_pageoffset; /* offset within a page */ struct page *bp_page; @@ -598,7 +642,7 @@ struct ptlrpc_bulk_desc { unsigned long bd_network_rw:1; /* accessible to the network */ unsigned long bd_type:2; /* {put,get}{source,sink} */ unsigned long bd_registered:1; /* client side */ - spinlock_t bd_lock; /* serialise with callback */ + cfs_spinlock_t bd_lock; /* serialise with callback */ int bd_import_generation; struct obd_export *bd_export; struct obd_import *bd_import; @@ -616,8 +660,12 @@ struct ptlrpc_bulk_desc { lnet_handle_md_t bd_md_h; /* associated MD */ lnet_nid_t bd_sender; /* stash event::sender */ - cfs_page_t **bd_enc_pages; #if defined(__KERNEL__) + /* + * encrypt iov, size is either 0 or bd_iov_count. + */ + lnet_kiov_t *bd_enc_iov; + lnet_kiov_t bd_iov[0]; #else lnet_md_iovec_t bd_iov[0]; @@ -625,20 +673,38 @@ struct ptlrpc_bulk_desc { }; struct ptlrpc_thread { - - struct list_head t_link; /* active threads in svc->srv_threads */ - - void *t_data; /* thread-private data (preallocated memory) */ + /** + * active threads in svc->srv_threads + */ + cfs_list_t t_link; + /** + * thread-private data (preallocated memory) + */ + void *t_data; __u32 t_flags; - - unsigned int t_id; /* service thread index, from ptlrpc_start_threads */ + /** + * service thread index, from ptlrpc_start_threads + */ + unsigned int t_id; + /** + * service thread pid + */ + pid_t t_pid; + /** + * put watchdog in the structure per thread b=14840 + */ + struct lc_watchdog *t_watchdog; + /** + * the svc this thread belonged to b=18582 + */ + struct ptlrpc_service *t_svc; cfs_waitq_t t_ctl_waitq; struct lu_env *t_env; }; struct ptlrpc_request_buffer_desc { - struct list_head rqbd_list; - struct list_head rqbd_reqs; + cfs_list_t rqbd_list; + cfs_list_t rqbd_reqs; struct ptlrpc_service *rqbd_service; lnet_handle_md_t rqbd_md_h; int rqbd_refcount; @@ -649,9 +715,12 @@ struct ptlrpc_request_buffer_desc { typedef int (*svc_handler_t)(struct ptlrpc_request *req); typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *); +typedef int (*svc_hpreq_handler_t)(struct ptlrpc_request *); + +#define PTLRPC_SVC_HP_RATIO 10 struct ptlrpc_service { - struct list_head srv_list; /* chain thru all services */ + cfs_list_t srv_list; /* chain thru all services */ int srv_max_req_size; /* biggest request to receive */ int srv_max_reply_size; /* biggest reply to send */ int srv_buf_size; /* size of individual buffers */ @@ -661,10 +730,11 @@ struct ptlrpc_service { int srv_threads_max; /* thread upper limit */ int srv_threads_started; /* index of last started thread */ int srv_threads_running; /* # running threads */ - int srv_n_difficult_replies; /* # 'difficult' replies */ + cfs_atomic_t srv_n_difficult_replies; /* # 'difficult' replies */ int srv_n_active_reqs; /* # reqs being served */ + int srv_n_hpreq; /* # HPreqs being served */ cfs_duration_t srv_rqbd_timeout; /* timeout before re-posting reqs, in tick */ - int srv_watchdog_factor; /* soft watchdog timeout mutiplier */ + int srv_watchdog_factor; /* soft watchdog timeout multiplier */ unsigned srv_cpu_affinity:1; /* bind threads to CPUs */ unsigned srv_at_check:1; /* check early replies */ unsigned srv_is_stopping:1; /* under unregister_service */ @@ -675,56 +745,61 @@ struct ptlrpc_service { /* AT stuff */ struct adaptive_timeout srv_at_estimate;/* estimated rpc service time */ - spinlock_t srv_at_lock; - struct list_head srv_at_list; /* reqs waiting for replies */ - cfs_timer_t srv_at_timer; /* early reply timer */ - - int srv_n_queued_reqs; /* # reqs in either of the queues below */ - struct list_head srv_req_in_queue; /* incoming reqs */ - struct list_head srv_request_queue; /* reqs waiting for service */ - - struct list_head srv_request_history; /* request history */ - __u64 srv_request_seq; /* next request sequence # */ - __u64 srv_request_max_cull_seq; /* highest seq culled from history */ - svcreq_printfn_t srv_request_history_print_fn; /* service-specific print fn */ - - struct list_head srv_idle_rqbds; /* request buffers to be reposted */ - struct list_head srv_active_rqbds; /* req buffers receiving */ - struct list_head srv_history_rqbds; /* request buffer history */ - int srv_nrqbd_receiving; /* # posted request buffers */ - int srv_n_history_rqbds; /* # request buffers in history */ - int srv_max_history_rqbds;/* max # request buffers in history */ - - atomic_t srv_outstanding_replies; - struct list_head srv_active_replies; /* all the active replies */ - struct list_head srv_reply_queue; /* replies waiting for service */ - - cfs_waitq_t srv_waitq; /* all threads sleep on this. This - * wait-queue is signalled when new - * incoming request arrives and when - * difficult reply has to be handled. */ - - struct list_head srv_threads; /* service thread list */ - svc_handler_t srv_handler; + cfs_spinlock_t srv_at_lock; + struct ptlrpc_at_array srv_at_array; /* reqs waiting for replies */ + cfs_timer_t srv_at_timer; /* early reply timer */ + + int srv_n_queued_reqs; /* # reqs in either of the queues below */ + int srv_hpreq_count; /* # hp requests handled */ + int srv_hpreq_ratio; /* # hp per lp reqs to handle */ + cfs_list_t srv_req_in_queue; /* incoming reqs */ + cfs_list_t srv_request_queue; /* reqs waiting for service */ + cfs_list_t srv_request_hpq; /* high priority queue */ + + cfs_list_t srv_request_history; /* request history */ + __u64 srv_request_seq; /* next request sequence # */ + __u64 srv_request_max_cull_seq; /* highest seq culled from history */ + svcreq_printfn_t srv_request_history_print_fn; /* service-specific print fn */ + + cfs_list_t srv_idle_rqbds; /* request buffers to be reposted */ + cfs_list_t srv_active_rqbds; /* req buffers receiving */ + cfs_list_t srv_history_rqbds; /* request buffer history */ + int srv_nrqbd_receiving; /* # posted request buffers */ + int srv_n_history_rqbds; /* # request buffers in history */ + int srv_max_history_rqbds;/* max # request buffers in history */ + + cfs_atomic_t srv_outstanding_replies; + cfs_list_t srv_active_replies; /* all the active replies */ +#ifndef __KERNEL__ + cfs_list_t srv_reply_queue; /* replies waiting for service */ +#endif + cfs_waitq_t srv_waitq; /* all threads sleep on this. This + * wait-queue is signalled when new + * incoming request arrives and when + * difficult reply has to be handled. */ + + cfs_list_t srv_threads; /* service thread list */ + svc_handler_t srv_handler; + svc_hpreq_handler_t srv_hpreq_handler; /* hp request handler */ char *srv_name; /* only statically allocated strings here; we don't clean them */ char *srv_thread_name; /* only statically allocated strings here; we don't clean them */ - spinlock_t srv_lock; + cfs_spinlock_t srv_lock; - cfs_proc_dir_entry_t *srv_procroot; - struct lprocfs_stats *srv_stats; + cfs_proc_dir_entry_t *srv_procroot; + struct lprocfs_stats *srv_stats; /* List of free reply_states */ - struct list_head srv_free_rs_list; + cfs_list_t srv_free_rs_list; /* waitq to run, when adding stuff to srv_free_rs_list */ - cfs_waitq_t srv_free_rs_waitq; + cfs_waitq_t srv_free_rs_waitq; /* * Tags for lu_context associated with this thread, see struct * lu_context. */ - __u32 srv_ctx_tags; + __u32 srv_ctx_tags; /* * if non-NULL called during thread creation (ptlrpc_start_thread()) * to initialize service specific per-thread state. @@ -741,21 +816,21 @@ struct ptlrpc_service { struct ptlrpcd_ctl { /** - * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_STOP_FORCE) + * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE) */ unsigned long pc_flags; /** * Thread lock protecting structure fields. */ - spinlock_t pc_lock; + cfs_spinlock_t pc_lock; /** * Start completion. */ - struct completion pc_starting; + cfs_completion_t pc_starting; /** * Stop completion. */ - struct completion pc_finishing; + cfs_completion_t pc_finishing; /** * Thread requests set. */ @@ -800,10 +875,11 @@ enum ptlrpcd_ctl_flags { */ LIOD_STOP = 1 << 1, /** - * Ptlrpc thread stop force flag. This will cause also - * aborting any inflight rpcs handled by thread. + * Ptlrpc thread force flag (only stop force so far). + * This will cause aborting any inflight rpcs handled + * by thread if LIOD_STOP is specified. */ - LIOD_STOP_FORCE = 1 << 2, + LIOD_FORCE = 1 << 2, /** * This is a recovery ptlrpc thread. */ @@ -835,16 +911,38 @@ extern lnet_pid_t ptl_get_pid(void); int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc); int ptlrpc_register_bulk(struct ptlrpc_request *req); -void ptlrpc_unregister_bulk (struct ptlrpc_request *req); +int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async); -static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc) +static inline int ptlrpc_server_bulk_active(struct ptlrpc_bulk_desc *desc) { - int rc; + int rc; + + LASSERT(desc != NULL); - spin_lock(&desc->bd_lock); + cfs_spin_lock(&desc->bd_lock); rc = desc->bd_network_rw; - spin_unlock(&desc->bd_lock); - return (rc); + cfs_spin_unlock(&desc->bd_lock); + return rc; +} + +static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) +{ + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + int rc; + + LASSERT(req != NULL); + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && + req->rq_bulk_deadline > cfs_time_current_sec()) + return 1; + + if (!desc) + return 0; + + cfs_spin_lock(&desc->bd_lock); + rc = desc->bd_network_rw; + cfs_spin_unlock(&desc->bd_lock); + return rc; } #define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01 @@ -869,6 +967,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req); int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async); void ptlrpc_restart_req(struct ptlrpc_request *req); void ptlrpc_abort_inflight(struct obd_import *imp); +void ptlrpc_cleanup_imp(struct obd_import *imp); void ptlrpc_abort_set(struct ptlrpc_request_set *set); struct ptlrpc_request_set *ptlrpc_prep_set(void); @@ -907,6 +1006,11 @@ struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp, int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, __u32 version, int opcode, char **bufs, struct ptlrpc_cli_ctx *ctx); +struct ptlrpc_request *ptlrpc_prep_fakereq(struct obd_import *imp, + unsigned int timeout, + ptlrpc_interpterer_t interpreter); +void ptlrpc_fakereq_finished(struct ptlrpc_request *req); + struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count, __u32 *lengths, char **bufs); @@ -946,7 +1050,8 @@ struct ptlrpc_service_conf { /* ptlrpc/service.c */ void ptlrpc_save_lock (struct ptlrpc_request *req, struct lustre_handle *lock, int mode, int no_ack); -void ptlrpc_commit_replies (struct obd_device *obd); +void ptlrpc_commit_replies(struct obd_export *exp); +void ptlrpc_dispatch_difficult_reply (struct ptlrpc_reply_state *rs); void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, svc_handler_t h, char *name, @@ -962,7 +1067,8 @@ struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, cfs_proc_dir_entry_t *proc_entry, svcreq_printfn_t, int min_threads, int max_threads, - char *threadname, __u32 ctx_tags); + char *threadname, __u32 ctx_tags, + svc_hpreq_handler_t); void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc); @@ -971,7 +1077,18 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service); int liblustre_check_services (void *arg); void ptlrpc_daemonize(char *name); int ptlrpc_service_health_check(struct ptlrpc_service *); +void ptlrpc_hpreq_reorder(struct ptlrpc_request *req); +void ptlrpc_server_active_request_inc(struct ptlrpc_request *req); +void ptlrpc_server_active_request_dec(struct ptlrpc_request *req); +void ptlrpc_server_drop_request(struct ptlrpc_request *req); +#ifdef __KERNEL__ +int ptlrpc_hr_init(void); +void ptlrpc_hr_fini(void); +#else +# define ptlrpc_hr_init() (0) +# define ptlrpc_hr_fini() do {} while(0) +#endif struct ptlrpc_svc_data { char *name; @@ -988,7 +1105,15 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp); /* ptlrpc/pack_generic.c */ int ptlrpc_reconnect_import(struct obd_import *imp); -int lustre_msg_swabbed(struct lustre_msg *msg); + +/** ptlrpc mgs buffer swab interface */ +int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, + int index); +void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, + int index); +int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len); +int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len); + int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, char **bufs); @@ -1004,23 +1129,18 @@ int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens, int lustre_shrink_msg(struct lustre_msg *msg, int segment, unsigned int newlen, int move_data); void lustre_free_reply_state(struct ptlrpc_reply_state *rs); +int __lustre_unpack_msg(struct lustre_msg *m, int len); int lustre_msg_hdr_size(__u32 magic, int count); int lustre_msg_size(__u32 magic, int count, __u32 *lengths); int lustre_msg_size_v2(int count, __u32 *lengths); int lustre_packed_msg_size(struct lustre_msg *msg); int lustre_msg_early_size(void); -int lustre_unpack_msg(struct lustre_msg *m, int len); void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size); void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); int lustre_msg_buflen(struct lustre_msg *m, int n); void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len); int lustre_msg_bufcount(struct lustre_msg *m); char *lustre_msg_string (struct lustre_msg *m, int n, int max_len); -void *lustre_swab_buf(struct lustre_msg *, int n, int minlen, void *swabber); -void *lustre_swab_reqbuf(struct ptlrpc_request *req, int n, int minlen, - void *swabber); -void *lustre_swab_repbuf(struct ptlrpc_request *req, int n, int minlen, - void *swabber); __u32 lustre_msghdr_get_flags(struct lustre_msg *msg); void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags); __u32 lustre_msg_get_flags(struct lustre_msg *msg); @@ -1037,6 +1157,7 @@ void lustre_msg_add_version(struct lustre_msg *msg, int version); __u32 lustre_msg_get_opc(struct lustre_msg *msg); __u64 lustre_msg_get_last_xid(struct lustre_msg *msg); __u64 lustre_msg_get_last_committed(struct lustre_msg *msg); +__u64 *lustre_msg_get_versions(struct lustre_msg *msg); __u64 lustre_msg_get_transno(struct lustre_msg *msg); __u64 lustre_msg_get_slv(struct lustre_msg *msg); __u32 lustre_msg_get_limit(struct lustre_msg *msg); @@ -1055,6 +1176,7 @@ void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid); void lustre_msg_set_last_committed(struct lustre_msg *msg,__u64 last_committed); +void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); @@ -1079,19 +1201,19 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) { if (req->rq_phase == new_phase) return; - + if (new_phase == RQ_PHASE_UNREGISTERING) { req->rq_next_phase = req->rq_phase; if (req->rq_import) - atomic_inc(&req->rq_import->imp_unregistering); + cfs_atomic_inc(&req->rq_import->imp_unregistering); } - + if (req->rq_phase == RQ_PHASE_UNREGISTERING) { if (req->rq_import) - atomic_dec(&req->rq_import->imp_unregistering); + cfs_atomic_dec(&req->rq_import->imp_unregistering); } - DEBUG_REQ(D_RPCTRACE, req, "move req \"%s\" -> \"%s\"", + DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"", ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase)); req->rq_phase = new_phase; @@ -1100,7 +1222,7 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) static inline int ptlrpc_client_early(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && req->rq_reply_deadline > cfs_time_current_sec()) return 0; return req->rq_early; @@ -1109,7 +1231,7 @@ ptlrpc_client_early(struct ptlrpc_request *req) static inline int ptlrpc_client_replied(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && req->rq_reply_deadline > cfs_time_current_sec()) return 0; return req->rq_replied; @@ -1118,7 +1240,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req) static inline int ptlrpc_client_recv(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && req->rq_reply_deadline > cfs_time_current_sec()) return 1; return req->rq_receiving_reply; @@ -1129,14 +1251,14 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) { int rc; - spin_lock(&req->rq_lock); - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_UNLINK) && + cfs_spin_lock(&req->rq_lock); + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && req->rq_reply_deadline > cfs_time_current_sec()) { - spin_unlock(&req->rq_lock); + cfs_spin_unlock(&req->rq_lock); return 1; } rc = req->rq_receiving_reply || req->rq_must_unlink; - spin_unlock(&req->rq_lock); + cfs_spin_unlock(&req->rq_lock); return rc; } @@ -1152,15 +1274,15 @@ ptlrpc_client_wake_req(struct ptlrpc_request *req) static inline void ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) { - LASSERT(atomic_read(&rs->rs_refcount) > 0); - atomic_inc(&rs->rs_refcount); + LASSERT(cfs_atomic_read(&rs->rs_refcount) > 0); + cfs_atomic_inc(&rs->rs_refcount); } static inline void ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) { - LASSERT(atomic_read(&rs->rs_refcount) > 0); - if (atomic_dec_and_test(&rs->rs_refcount)) + LASSERT(cfs_atomic_read(&rs->rs_refcount) > 0); + if (cfs_atomic_dec_and_test(&rs->rs_refcount)) lustre_free_reply_state(rs); } @@ -1195,7 +1317,7 @@ static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req) int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); int client_obd_cleanup(struct obd_device *obddev); int client_connect_import(const struct lu_env *env, - struct lustre_handle *conn, struct obd_device *obd, + struct obd_export **exp, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *, void *localdata); int client_disconnect_export(struct obd_export *exp); @@ -1203,10 +1325,25 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, int priority); int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); +void client_destroy_import(struct obd_import *imp); + +int server_disconnect_export(struct obd_export *exp); /* ptlrpc/pinger.c */ +enum timeout_event { + TIMEOUT_GRANT = 1 +}; +struct timeout_item; +typedef int (*timeout_cb_t)(struct timeout_item *, void *); int ptlrpc_pinger_add_import(struct obd_import *imp); int ptlrpc_pinger_del_import(struct obd_import *imp); +int ptlrpc_add_timeout_client(int time, enum timeout_event event, + timeout_cb_t cb, void *data, + cfs_list_t *obd_list); +int ptlrpc_del_timeout_client(cfs_list_t *obd_list, + enum timeout_event event); +struct ptlrpc_request * ptlrpc_prep_ping(struct obd_import *imp); +int ptlrpc_obd_ping(struct obd_device *obd); cfs_time_t ptlrpc_suspend_wakeup_time(void); #ifdef __KERNEL__ void ping_evictor_start(void); @@ -1236,7 +1373,8 @@ enum ptlrpcd_scope { int ptlrpcd_start(const char *name, struct ptlrpcd_ctl *pc); void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force); void ptlrpcd_wake(struct ptlrpc_request *req); -void ptlrpcd_add_req(struct ptlrpc_request *req, enum ptlrpcd_scope scope); +int ptlrpcd_add_req(struct ptlrpc_request *req, enum ptlrpcd_scope scope); +void ptlrpcd_add_rqset(struct ptlrpc_request_set *set); int ptlrpcd_addref(void); void ptlrpcd_decref(void);