1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include <linux/tqueue.h>
27 #include <linux/kp30.h>
28 // #include <linux/obd.h>
29 #include <portals/p30.h>
30 #include <linux/lustre_idl.h>
31 #include <linux/lustre_ha.h>
32 #include <linux/lustre_import.h>
34 /* The following constants determine how much memory is devoted to
35 * buffering in the lustre services.
37 * ?_NEVENTS # event queue entries
39 * ?_NBUFS # request buffers
40 * ?_BUFSIZE # bytes in a single request buffer
41 * total memory = ?_NBUFS * ?_BUFSIZE
43 * ?_MAXREQSIZE # maximum request service will receive
44 * larger messages will get dropped.
45 * request buffers are auto-unlinked when less than ?_MAXREQSIZE
49 #define LDLM_NEVENTS 1024
51 #define LDLM_BUFSIZE (64 * 1024)
52 #define LDLM_MAXREQSIZE 1024
54 #define MDS_NEVENTS 1024
56 #define MDS_BUFSIZE (64 * 1024)
57 #define MDS_MAXREQSIZE 1024
60 #define OST_NEVENTS 1024
62 #define OST_BUFSIZE (64 * 1024)
63 #define OST_MAXREQSIZE (8 * 1024)
65 #define OST_NEVENTS 32768
67 #define OST_BUFSIZE (128 * 1024)
68 #define OST_MAXREQSIZE (8 * 1024)
71 #define CONN_INVALID 1
73 struct ptlrpc_connection {
74 struct list_head c_link;
75 struct lustre_peer c_peer;
76 __u8 c_local_uuid[37]; /* XXX do we need this? */
77 __u8 c_remote_uuid[37];
80 __u32 c_generation; /* changes upon new connection */
81 __u32 c_epoch; /* changes when peer changes */
82 __u32 c_bootcount; /* peer's boot count */
84 spinlock_t c_lock; /* also protects req->rq_list */
93 __u64 c_last_xid; /* protected by c_lock */
94 __u64 c_last_committed;/* protected by c_lock */
95 struct list_head c_delayed_head;/* delayed until post-recovery */
96 struct list_head c_sending_head;/* protected by c_lock */
97 struct list_head c_dying_head; /* protected by c_lock */
98 struct recovd_data c_recovd_data;
100 struct list_head c_imports;
101 struct list_head c_exports;
102 struct list_head c_sb_chain;
103 __u32 c_flags; /* can we indicate INVALID elsewhere? */
106 struct ptlrpc_client {
107 __u32 cli_request_portal;
108 __u32 cli_reply_portal;
110 __u32 cli_target_devno;
113 // struct semaphore cli_rpc_sem; /* limits outstanding requests */
118 /* state flags of requests */
119 #define PTL_RPC_FL_INTR (1 << 0)
120 #define PTL_RPC_FL_REPLIED (1 << 1) /* reply was received */
121 #define PTL_RPC_FL_SENT (1 << 2)
122 #define PTL_BULK_FL_SENT (1 << 3)
123 #define PTL_BULK_FL_RCVD (1 << 4)
124 #define PTL_RPC_FL_ERR (1 << 5)
125 #define PTL_RPC_FL_TIMEOUT (1 << 6)
126 #define PTL_RPC_FL_RESEND (1 << 7)
127 #define PTL_RPC_FL_RECOVERY (1 << 8) /* retransmission for recovery */
128 #define PTL_RPC_FL_FINISHED (1 << 9)
129 #define PTL_RPC_FL_RETAIN (1 << 10) /* retain for replay after reply */
130 #define PTL_RPC_FL_REPLAY (1 << 11) /* replay upon recovery */
131 #define PTL_RPC_FL_ALLOCREP (1 << 12) /* reply buffer allocated */
133 struct ptlrpc_request {
134 int rq_type; /* one of PTL_RPC_MSG_* */
135 struct list_head rq_list;
136 struct obd_device *rq_obd;
139 atomic_t rq_refcount;
142 struct lustre_msg *rq_reqmsg;
145 struct lustre_msg *rq_repmsg;
151 // void * rq_reply_handle;
152 wait_queue_head_t rq_wait_for_rep;
155 ptl_md_t rq_reply_md;
156 ptl_handle_md_t rq_reply_md_h; /* we can lose this: set, never read */
157 ptl_handle_me_t rq_reply_me_h;
159 /* outgoing req/rep */
162 struct lustre_peer rq_peer; /* XXX see service.c can this be factored away? */
163 struct obd_export *rq_export;
164 struct ptlrpc_connection *rq_connection;
165 struct obd_import *rq_import;
166 struct ptlrpc_service *rq_svc;
168 void (*rq_replay_cb)(struct ptlrpc_request *, struct lustre_handle *);
169 struct lustre_handle rq_replay_cb_handle;
172 struct ptlrpc_bulk_page {
173 struct ptlrpc_bulk_desc *bp_desc;
174 struct list_head bp_link;
177 struct page *bp_page;
180 struct dentry *bp_dentry;
181 int (*bp_cb)(struct ptlrpc_bulk_page *);
184 struct ptlrpc_bulk_desc {
186 struct ptlrpc_connection *bd_connection;
187 struct ptlrpc_client *bd_client;
189 struct lustre_handle bd_conn;
190 void (*bd_cb)(struct ptlrpc_bulk_desc *, void *);
193 wait_queue_head_t bd_waitq;
194 struct list_head bd_page_list;
196 atomic_t bd_refcount;
197 void *bd_desc_private;
198 struct tq_struct bd_queue;
201 ptl_handle_md_t bd_md_h;
202 ptl_handle_me_t bd_me_h;
204 atomic_t bd_source_callback_count;
206 struct iovec bd_iov[16]; /* self-sized pre-allocated iov */
209 struct ptlrpc_thread {
210 struct list_head t_link;
213 wait_queue_head_t t_ctl_waitq;
216 struct ptlrpc_request_buffer_desc {
217 struct list_head rqbd_list;
218 struct ptlrpc_service *rqbd_service;
219 ptl_handle_me_t rqbd_me_h;
220 atomic_t rqbd_refcount;
224 struct ptlrpc_service {
228 /* incoming request buffers */
229 /* FIXME: perhaps a list of EQs, if multiple NIs are used? */
231 __u32 srv_max_req_size; /* biggest request to receive */
232 __u32 srv_buf_size; /* # bytes in a request buffer */
233 struct list_head srv_rqbds; /* all the request buffer descriptors */
234 __u32 srv_nrqbds; /* # request buffers */
235 atomic_t srv_nrqbds_receiving; /* # request buffers posted for input */
237 __u32 srv_req_portal;
238 __u32 srv_rep_portal;
243 ptl_handle_eq_t srv_eq_h;
245 struct lustre_peer srv_self;
247 wait_queue_head_t srv_waitq; /* all threads sleep on this */
250 struct list_head srv_threads;
251 int (*srv_handler)(struct ptlrpc_request *req);
252 char *srv_name; /* only statically allocated strings here; we don't clean them */
255 static inline void ptlrpc_hdl2req(struct ptlrpc_request *req, struct lustre_handle *h)
257 req->rq_reqmsg->addr = h->addr;
258 req->rq_reqmsg->cookie = h->cookie;
261 typedef void (*bulk_callback_t)(struct ptlrpc_bulk_desc *, void *);
263 typedef int (*svc_handler_t)(struct ptlrpc_request *req);
265 /* rpc/connection.c */
266 void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, obd_uuid_t uuid);
267 struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer,
269 int ptlrpc_put_connection(struct ptlrpc_connection *c);
270 struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
271 void ptlrpc_init_connection(void);
272 void ptlrpc_cleanup_connection(void);
275 int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *bulk);
276 int ptlrpc_check_bulk_received(struct ptlrpc_bulk_desc *bulk);
277 int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *);
278 int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *);
279 int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk);
280 int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req);
281 int ptlrpc_error(struct ptlrpc_service *svc, struct ptlrpc_request *req);
282 void ptlrpc_resend_req(struct ptlrpc_request *request);
283 int ptl_send_rpc(struct ptlrpc_request *request);
284 void ptlrpc_link_svc_me(struct ptlrpc_request_buffer_desc *rqbd);
287 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
288 struct ptlrpc_client *);
289 void ptlrpc_cleanup_client(struct obd_import *imp);
290 __u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req);
291 struct ptlrpc_connection *ptlrpc_uuid_to_connection(obd_uuid_t uuid);
293 int ptlrpc_queue_wait(struct ptlrpc_request *req);
294 void ptlrpc_continue_req(struct ptlrpc_request *req);
295 int ptlrpc_replay_req(struct ptlrpc_request *req);
296 void ptlrpc_restart_req(struct ptlrpc_request *req);
298 struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
299 int count, int *lengths, char **bufs);
300 void ptlrpc_free_req(struct ptlrpc_request *request);
301 void ptlrpc_req_finished(struct ptlrpc_request *request);
302 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *);
303 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
304 struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc);
305 void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *page);
306 int ptlrpc_check_status(struct ptlrpc_request *req, int err);
309 struct ptlrpc_service *
310 ptlrpc_init_svc(__u32 nevents, __u32 nbufs, __u32 bufsize, __u32 max_req_size,
311 int req_portal, int rep_portal,
312 obd_uuid_t uuid, svc_handler_t, char *name);
313 void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
314 int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
316 int ptlrpc_unregister_service(struct ptlrpc_service *service);
318 struct ptlrpc_svc_data {
320 struct ptlrpc_service *svc;
321 struct ptlrpc_thread *thread;
322 struct obd_device *dev;
325 /* rpc/pack_generic.c */
326 int lustre_pack_msg(int count, int *lens, char **bufs, int *len,
327 struct lustre_msg **msg);
328 int lustre_msg_size(int count, int *lengths);
329 int lustre_unpack_msg(struct lustre_msg *m, int len);
330 void *lustre_msg_buf(struct lustre_msg *m, int n);
332 static inline void ptlrpc_bulk_decref(struct ptlrpc_bulk_desc *desc)
334 if (atomic_dec_and_test(&desc->bd_refcount)) {
335 CDEBUG(D_PAGE, "Released last ref on %p, freeing\n", desc);
336 ptlrpc_free_bulk(desc);
338 CDEBUG(D_PAGE, "%p -> %d\n", desc,
339 atomic_read(&desc->bd_refcount));
343 static inline void ptlrpc_bulk_addref(struct ptlrpc_bulk_desc *desc)
345 atomic_inc(&desc->bd_refcount);
346 CDEBUG(D_PAGE, "Set refcount of %p to %d\n", desc,
347 atomic_read(&desc->bd_refcount));