1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5 * Author: Isaac Huang <isaac@clusterfs.com>
8 #ifndef __SELFTEST_SELFTEST_H__
9 #define __SELFTEST_SELFTEST_H__
14 /* XXX workaround XXX */
15 #ifdef HAVE_SYS_TYPES_H
16 #include <sys/types.h>
18 #include <liblustre.h> /* userland spinlock_t and atomic_t */
20 #include <libcfs/kp30.h>
21 #include <libcfs/libcfs.h>
22 #include <lnet/lnet.h>
23 #include <lnet/lib-lnet.h>
24 #include <lnet/lib-types.h>
25 #include <lnet/lnetst.h>
30 #ifndef MADE_WITHOUT_COMPROMISE
31 #define MADE_WITHOUT_COMPROMISE
35 #define SWI_STATE_NEWBORN 0
36 #define SWI_STATE_REPLY_SUBMITTED 1
37 #define SWI_STATE_REPLY_SENT 2
38 #define SWI_STATE_REQUEST_SUBMITTED 3
39 #define SWI_STATE_REQUEST_SENT 4
40 #define SWI_STATE_REPLY_RECEIVED 5
41 #define SWI_STATE_BULK_STARTED 6
42 #define SWI_STATE_DONE 10
48 struct sfw_test_instance;
51 * A workitems is deferred work with these semantics:
52 * - a workitem always runs in thread context.
53 * - a workitem can be concurrent with other workitems but is strictly
54 * serialized with respect to itself.
55 * - no CPU affinity, a workitem does not necessarily run on the same CPU
56 * that schedules it. However, this might change in the future.
57 * - if a workitem is scheduled again before it has a chance to run, it
59 * - if a workitem is scheduled while it runs, it runs again after it
60 * completes; this ensures that events occurring while other events are
61 * being processed receive due attention. This behavior also allows a
62 * workitem to reschedule itself.
65 * - a workitem can sleep but it should be aware of how that sleep might
67 * - a workitem runs inside a kernel thread so there's no user space to access.
68 * - do not use a workitem if the scheduling latency can't be tolerated.
70 * When wi_action returns non-zero, it means the workitem has either been
71 * freed or reused and workitem scheduler won't touch it any more.
73 typedef int (*swi_action_t) (struct swi_workitem *);
74 typedef struct swi_workitem {
75 struct list_head wi_list; /* chain on runq */
77 swi_action_t wi_action;
79 unsigned int wi_running:1;
80 unsigned int wi_scheduled:1;
84 swi_init_workitem (swi_workitem_t *wi, void *data, swi_action_t action)
86 CFS_INIT_LIST_HEAD(&wi->wi_list);
91 wi->wi_action = action;
92 wi->wi_state = SWI_STATE_NEWBORN;
95 #define SWI_RESCHED 128 /* # workitem scheduler loops before reschedule */
97 /* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework
98 * services, e.g. create/modify session.
100 #define SRPC_SERVICE_DEBUG 0
101 #define SRPC_SERVICE_MAKE_SESSION 1
102 #define SRPC_SERVICE_REMOVE_SESSION 2
103 #define SRPC_SERVICE_BATCH 3
104 #define SRPC_SERVICE_TEST 4
105 #define SRPC_SERVICE_QUERY_STAT 5
106 #define SRPC_SERVICE_JOIN 6
107 #define SRPC_FRAMEWORK_SERVICE_MAX_ID 10
108 /* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */
109 #define SRPC_SERVICE_BRW 11
110 #define SRPC_SERVICE_PING 12
111 #define SRPC_SERVICE_MAX_ID 12
113 #define SRPC_REQUEST_PORTAL 50
114 /* a lazy portal for framework RPC requests */
115 #define SRPC_FRAMEWORK_REQUEST_PORTAL 51
116 /* all reply/bulk RDMAs go to this portal */
117 #define SRPC_RDMA_PORTAL 52
119 static inline srpc_msg_type_t
120 srpc_service2request (int service)
125 case SRPC_SERVICE_DEBUG:
126 return SRPC_MSG_DEBUG_REQST;
128 case SRPC_SERVICE_MAKE_SESSION:
129 return SRPC_MSG_MKSN_REQST;
131 case SRPC_SERVICE_REMOVE_SESSION:
132 return SRPC_MSG_RMSN_REQST;
134 case SRPC_SERVICE_BATCH:
135 return SRPC_MSG_BATCH_REQST;
137 case SRPC_SERVICE_TEST:
138 return SRPC_MSG_TEST_REQST;
140 case SRPC_SERVICE_QUERY_STAT:
141 return SRPC_MSG_STAT_REQST;
143 case SRPC_SERVICE_BRW:
144 return SRPC_MSG_BRW_REQST;
146 case SRPC_SERVICE_PING:
147 return SRPC_MSG_PING_REQST;
149 case SRPC_SERVICE_JOIN:
150 return SRPC_MSG_JOIN_REQST;
154 static inline srpc_msg_type_t
155 srpc_service2reply (int service)
157 return srpc_service2request(service) + 1;
161 SRPC_BULK_REQ_RCVD = 0, /* passive bulk request(PUT sink/GET source) received */
162 SRPC_BULK_PUT_SENT = 1, /* active bulk PUT sent (source) */
163 SRPC_BULK_GET_RPLD = 2, /* active bulk GET replied (sink) */
164 SRPC_REPLY_RCVD = 3, /* incoming reply received */
165 SRPC_REPLY_SENT = 4, /* outgoing reply sent */
166 SRPC_REQUEST_RCVD = 5, /* incoming request received */
167 SRPC_REQUEST_SENT = 6, /* outgoing request sent */
172 srpc_event_type_t ev_type; /* what's up */
173 lnet_event_kind_t ev_lnet; /* LNet event type */
174 int ev_fired; /* LNet event fired? */
175 int ev_status; /* LNet event status */
176 void *ev_data; /* owning server/client RPC */
180 int bk_len; /* len of bulk data */
181 lnet_handle_md_t bk_mdh;
182 int bk_sink; /* sink/source */
183 int bk_niov; /* # iov in bk_iovs */
185 lnet_kiov_t bk_iovs[0];
187 cfs_page_t **bk_pages;
188 lnet_md_iovec_t bk_iovs[0];
190 } srpc_bulk_t; /* bulk descriptor */
192 typedef struct srpc_peer {
193 struct list_head stp_list; /* chain on peer hash */
194 struct list_head stp_rpcq; /* q of non-control RPCs */
195 struct list_head stp_ctl_rpcq; /* q of control RPCs */
196 spinlock_t stp_lock; /* serialize */
198 int stp_credits; /* available credits */
201 /* message buffer descriptor */
203 struct list_head buf_list; /* chain on srpc_service::*_msgq */
205 lnet_handle_md_t buf_mdh;
207 lnet_process_id_t buf_peer;
210 /* server-side state of a RPC */
211 typedef struct srpc_server_rpc {
212 struct list_head srpc_list; /* chain on srpc_service::*_rpcq */
213 struct srpc_service *srpc_service;
214 swi_workitem_t srpc_wi;
215 srpc_event_t srpc_ev; /* bulk/reply event */
216 lnet_nid_t srpc_self;
217 lnet_process_id_t srpc_peer;
218 srpc_msg_t srpc_replymsg;
219 lnet_handle_md_t srpc_replymdh;
220 srpc_buffer_t *srpc_reqstbuf;
221 srpc_bulk_t *srpc_bulk;
224 void (*srpc_done)(struct srpc_server_rpc *);
227 /* client-side state of a RPC */
228 typedef struct srpc_client_rpc {
229 struct list_head crpc_list; /* chain on user's lists */
230 struct list_head crpc_privl; /* chain on srpc_peer_t::*rpcq */
231 spinlock_t crpc_lock; /* serialize */
233 atomic_t crpc_refcount;
234 int crpc_timeout; /* # seconds to wait for reply */
235 stt_timer_t crpc_timer;
236 swi_workitem_t crpc_wi;
237 lnet_process_id_t crpc_dest;
238 srpc_peer_t *crpc_peer;
240 void (*crpc_done)(struct srpc_client_rpc *);
241 void (*crpc_fini)(struct srpc_client_rpc *);
242 int crpc_status; /* completion status */
243 void *crpc_priv; /* caller data */
246 unsigned int crpc_aborted:1; /* being given up */
247 unsigned int crpc_closed:1; /* completed */
250 srpc_event_t crpc_bulkev; /* bulk event */
251 srpc_event_t crpc_reqstev; /* request event */
252 srpc_event_t crpc_replyev; /* reply event */
254 /* bulk, request(reqst), and reply exchanged on wire */
255 srpc_msg_t crpc_reqstmsg;
256 srpc_msg_t crpc_replymsg;
257 lnet_handle_md_t crpc_reqstmdh;
258 lnet_handle_md_t crpc_replymdh;
259 srpc_bulk_t crpc_bulk;
262 #define srpc_client_rpc_size(rpc) \
263 offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
265 #define srpc_client_rpc_addref(rpc) \
267 CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \
268 (rpc), libcfs_id2str((rpc)->crpc_dest), \
269 atomic_read(&(rpc)->crpc_refcount)); \
270 LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
271 atomic_inc(&(rpc)->crpc_refcount); \
274 #define srpc_client_rpc_decref(rpc) \
276 CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \
277 (rpc), libcfs_id2str((rpc)->crpc_dest), \
278 atomic_read(&(rpc)->crpc_refcount)); \
279 LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \
280 if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \
281 srpc_destroy_client_rpc(rpc); \
284 #define srpc_event_pending(rpc) ((rpc)->crpc_bulkev.ev_fired == 0 || \
285 (rpc)->crpc_reqstev.ev_fired == 0 || \
286 (rpc)->crpc_replyev.ev_fired == 0)
288 typedef struct srpc_service {
289 int sv_id; /* service id */
290 const char *sv_name; /* human readable name */
291 int sv_nprune; /* # posted RPC to be pruned */
292 int sv_concur; /* max # concurrent RPCs */
296 srpc_event_t sv_ev; /* LNet event */
297 int sv_nposted_msg; /* # posted message buffers */
298 struct list_head sv_free_rpcq; /* free RPC descriptors */
299 struct list_head sv_active_rpcq; /* in-flight RPCs */
300 struct list_head sv_posted_msgq; /* posted message buffers */
301 struct list_head sv_blocked_msgq; /* blocked for RPC descriptor */
303 /* Service callbacks:
304 * - sv_handler: process incoming RPC request
305 * - sv_bulk_ready: notify bulk data
307 int (*sv_handler) (srpc_server_rpc_t *);
308 int (*sv_bulk_ready) (srpc_server_rpc_t *, int);
311 #define SFW_POST_BUFFERS 8
312 #define SFW_SERVICE_CONCURRENCY (SFW_POST_BUFFERS/2)
315 struct list_head sn_list; /* chain on fw_zombie_sessions */
316 lst_sid_t sn_id; /* unique identifier */
317 unsigned int sn_timeout; /* # seconds' inactivity to expire */
319 stt_timer_t sn_timer;
320 struct list_head sn_batches; /* list of batches */
321 char sn_name[LST_NAME_SIZE];
322 atomic_t sn_brw_errors;
323 atomic_t sn_ping_errors;
326 #define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \
327 (sid0).ses_stamp == (sid1).ses_stamp)
330 struct list_head bat_list; /* chain on sn_batches */
331 lst_bid_t bat_id; /* batch id */
332 int bat_error; /* error code of batch */
333 sfw_session_t *bat_session; /* batch's session */
334 atomic_t bat_nactive; /* # of active tests */
335 struct list_head bat_tests; /* test instances */
339 int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
340 void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
341 int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
342 lnet_process_id_t dest,
343 srpc_client_rpc_t **rpc); /* prep a tests rpc */
344 void (*tso_done_rpc)(struct sfw_test_unit *tsu,
345 srpc_client_rpc_t *rpc); /* done a test rpc */
346 } sfw_test_client_ops_t;
348 typedef struct sfw_test_instance {
349 struct list_head tsi_list; /* chain on batch */
350 int tsi_service; /* test type */
351 sfw_batch_t *tsi_batch; /* batch */
352 sfw_test_client_ops_t *tsi_ops; /* test client operations */
354 /* public parameter for all test units */
355 int tsi_is_client:1; /* is test client */
356 int tsi_stoptsu_onerr:1; /* stop tsu on error */
357 int tsi_concur; /* concurrency */
358 int tsi_loop; /* loop count */
360 /* status of test instance */
361 spinlock_t tsi_lock; /* serialize */
362 int tsi_stopping:1; /* test is stopping */
363 atomic_t tsi_nactive; /* # of active test unit */
364 struct list_head tsi_units; /* test units */
365 struct list_head tsi_free_rpcs; /* free rpcs */
366 struct list_head tsi_active_rpcs; /* active rpcs */
369 test_bulk_req_t bulk; /* bulk parameter */
370 test_ping_req_t ping; /* ping parameter */
372 } sfw_test_instance_t;
374 /* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at
375 * the end of pages are not used */
376 #define SFW_MAX_CONCUR LST_MAX_CONCUR
377 #define SFW_ID_PER_PAGE (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))
378 #define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE)
379 #define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
381 typedef struct sfw_test_unit {
382 struct list_head tsu_list; /* chain on lst_test_instance */
383 lnet_process_id_t tsu_dest; /* id of dest node */
384 int tsu_loop; /* loop count of the test */
385 sfw_test_instance_t *tsu_instance; /* pointer to test instance */
386 void *tsu_private; /* private data */
387 swi_workitem_t tsu_worker; /* workitem of the test unit */
391 struct list_head tsc_list; /* chain on fw_tests */
392 srpc_service_t *tsc_srv_service; /* test service */
393 sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */
398 sfw_create_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen,
399 void (*done) (srpc_client_rpc_t *), void *priv);
400 int sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
401 int nblk, int blklen, srpc_client_rpc_t **rpc);
402 void sfw_abort_rpc(srpc_client_rpc_t *rpc);
403 void sfw_post_rpc(srpc_client_rpc_t *rpc);
404 void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
405 void sfw_unpack_message(srpc_msg_t *msg);
406 void sfw_free_pages(srpc_server_rpc_t *rpc);
407 void sfw_add_bulk_page(srpc_bulk_t *bk, cfs_page_t *pg, int i);
408 int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink);
411 srpc_create_client_rpc(lnet_process_id_t peer, int service,
412 int nbulkiov, int bulklen,
413 void (*rpc_done)(srpc_client_rpc_t *),
414 void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
415 void srpc_post_rpc(srpc_client_rpc_t *rpc);
416 void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
417 void srpc_free_bulk(srpc_bulk_t *bk);
418 srpc_bulk_t *srpc_alloc_bulk(int npages, int sink);
419 int srpc_send_rpc(swi_workitem_t *wi);
420 int srpc_send_reply(srpc_server_rpc_t *rpc);
421 int srpc_add_service(srpc_service_t *sv);
422 int srpc_remove_service(srpc_service_t *sv);
423 void srpc_shutdown_service(srpc_service_t *sv);
424 int srpc_finish_service(srpc_service_t *sv);
425 int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
426 void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
427 void srpc_get_counters(srpc_counters_t *cnt);
428 void srpc_set_counters(const srpc_counters_t *cnt);
430 void swi_kill_workitem(swi_workitem_t *wi);
431 void swi_schedule_workitem(swi_workitem_t *wi);
432 void swi_schedule_serial_workitem(swi_workitem_t *wi);
433 int swi_startup(void);
434 int sfw_startup(void);
435 int srpc_startup(void);
436 void swi_shutdown(void);
437 void sfw_shutdown(void);
438 void srpc_shutdown(void);
441 srpc_destroy_client_rpc (srpc_client_rpc_t *rpc)
443 LASSERT (rpc != NULL);
444 LASSERT (!srpc_event_pending(rpc));
445 LASSERT (list_empty(&rpc->crpc_privl));
446 LASSERT (atomic_read(&rpc->crpc_refcount) == 0);
448 LASSERT (rpc->crpc_bulk.bk_pages == NULL);
451 if (rpc->crpc_fini == NULL) {
452 LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
454 (*rpc->crpc_fini) (rpc);
461 srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
462 int service, int nbulkiov, int bulklen,
463 void (*rpc_done)(srpc_client_rpc_t *),
464 void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
466 LASSERT (nbulkiov <= LNET_MAX_IOV);
468 memset(rpc, 0, offsetof(srpc_client_rpc_t,
469 crpc_bulk.bk_iovs[nbulkiov]));
471 CFS_INIT_LIST_HEAD(&rpc->crpc_list);
472 CFS_INIT_LIST_HEAD(&rpc->crpc_privl);
473 swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc);
474 spin_lock_init(&rpc->crpc_lock);
475 atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
477 rpc->crpc_dest = peer;
478 rpc->crpc_priv = priv;
479 rpc->crpc_service = service;
480 rpc->crpc_bulk.bk_len = bulklen;
481 rpc->crpc_bulk.bk_niov = nbulkiov;
482 rpc->crpc_done = rpc_done;
483 rpc->crpc_fini = rpc_fini;
486 rpc->crpc_bulk.bk_mdh = LNET_INVALID_HANDLE;
488 /* no event is expected at this point */
489 rpc->crpc_bulkev.ev_fired =
490 rpc->crpc_reqstev.ev_fired =
491 rpc->crpc_replyev.ev_fired = 1;
493 rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC;
494 rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION;
495 rpc->crpc_reqstmsg.msg_type = srpc_service2request(service);
499 static inline const char *
500 swi_state2str (int state)
502 #define STATE2STR(x) case x: return #x
506 STATE2STR(SWI_STATE_NEWBORN);
507 STATE2STR(SWI_STATE_REPLY_SUBMITTED);
508 STATE2STR(SWI_STATE_REPLY_SENT);
509 STATE2STR(SWI_STATE_REQUEST_SUBMITTED);
510 STATE2STR(SWI_STATE_REQUEST_SENT);
511 STATE2STR(SWI_STATE_REPLY_RECEIVED);
512 STATE2STR(SWI_STATE_BULK_STARTED);
513 STATE2STR(SWI_STATE_DONE);
518 #define UNUSED(x) ( (void)(x) )
522 int stt_poll_interval(void);
523 int sfw_session_removed(void);
525 int stt_check_events(void);
526 int swi_check_events(void);
527 int srpc_check_event(int timeout);
529 int lnet_selftest_init(void);
530 void lnet_selftest_fini(void);
531 int selftest_wait_events(void);
535 #define selftest_wait_events() cfs_pause(cfs_time_seconds(1))
539 #define lst_wait_until(cond, lock, fmt, a...) \
544 CDEBUG(((__I & (-__I)) == __I) ? D_WARNING : \
547 spin_unlock(&(lock)); \
549 selftest_wait_events(); \
551 spin_lock(&(lock)); \
556 srpc_wait_service_shutdown (srpc_service_t *sv)
560 spin_lock(&sv->sv_lock);
561 LASSERT (sv->sv_shuttingdown);
562 spin_unlock(&sv->sv_lock);
564 while (srpc_finish_service(sv) == 0) {
566 CDEBUG (((i & -i) == i) ? D_WARNING : D_NET,
567 "Waiting for %s service to shutdown...\n",
569 selftest_wait_events();
573 #endif /* __SELFTEST_SELFTEST_H__ */