1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/selftest/framework.c
38 * Author: Isaac Huang <isaac@clusterfs.com>
39 * Author: Liang Zhen <liangzhen@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_LNET
46 lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
48 int brw_inject_errors = 0;
49 CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
50 "# data errors to inject randomly, zero by default");
52 static int session_timeout = 100;
53 CFS_MODULE_PARM(session_timeout, "i", int, 0444,
54 "test session timeout in seconds (100 by default, 0 == never)");
56 static int rpc_timeout = 64;
57 CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
58 "rpc timeout in seconds (64 by default, 0 == never)");
60 #define SFW_TEST_CONCURRENCY 1792
61 #define SFW_EXTRA_TEST_BUFFERS 8 /* tolerate buggy peers with extra buffers */
63 #define sfw_test_buffers(tsi) ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS)
65 #define sfw_unpack_id(id) \
67 __swab64s(&(id).nid); \
68 __swab32s(&(id).pid); \
71 #define sfw_unpack_sid(sid) \
73 __swab64s(&(sid).ses_nid); \
74 __swab64s(&(sid).ses_stamp); \
77 #define sfw_unpack_fw_counters(fc) \
79 __swab32s(&(fc).running_ms); \
80 __swab32s(&(fc).active_batches); \
81 __swab32s(&(fc).zombie_sessions); \
82 __swab32s(&(fc).brw_errors); \
83 __swab32s(&(fc).ping_errors); \
86 #define sfw_unpack_rpc_counters(rc) \
88 __swab32s(&(rc).errors); \
89 __swab32s(&(rc).rpcs_sent); \
90 __swab32s(&(rc).rpcs_rcvd); \
91 __swab32s(&(rc).rpcs_dropped); \
92 __swab32s(&(rc).rpcs_expired); \
93 __swab64s(&(rc).bulk_get); \
94 __swab64s(&(rc).bulk_put); \
97 #define sfw_unpack_lnet_counters(lc) \
99 __swab32s(&(lc).errors); \
100 __swab32s(&(lc).msgs_max); \
101 __swab32s(&(lc).msgs_alloc); \
102 __swab32s(&(lc).send_count); \
103 __swab32s(&(lc).recv_count); \
104 __swab32s(&(lc).drop_count); \
105 __swab32s(&(lc).route_count); \
106 __swab64s(&(lc).send_length); \
107 __swab64s(&(lc).recv_length); \
108 __swab64s(&(lc).drop_length); \
109 __swab64s(&(lc).route_length); \
112 #define sfw_test_active(t) (cfs_atomic_read(&(t)->tsi_nactive) != 0)
113 #define sfw_batch_active(b) (cfs_atomic_read(&(b)->bat_nactive) != 0)
115 struct smoketest_framework {
116 cfs_list_t fw_zombie_rpcs; /* RPCs to be recycled */
117 cfs_list_t fw_zombie_sessions; /* stopping sessions */
118 cfs_list_t fw_tests; /* registered test cases */
119 cfs_atomic_t fw_nzombies; /* # zombie sessions */
120 cfs_spinlock_t fw_lock; /* serialise */
121 sfw_session_t *fw_session; /* _the_ session */
122 int fw_shuttingdown; /* shutdown in progress */
123 srpc_server_rpc_t *fw_active_srpc; /* running RPC */
127 int sfw_stop_batch (sfw_batch_t *tsb, int force);
128 void sfw_destroy_session (sfw_session_t *sn);
130 static inline sfw_test_case_t *
131 sfw_find_test_case(int id)
133 sfw_test_case_t *tsc;
135 LASSERT (id <= SRPC_SERVICE_MAX_ID);
136 LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
138 cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
139 sfw_test_case_t, tsc_list) {
140 if (tsc->tsc_srv_service->sv_id == id)
148 sfw_register_test (srpc_service_t *service, sfw_test_client_ops_t *cliops)
150 sfw_test_case_t *tsc;
152 if (sfw_find_test_case(service->sv_id) != NULL) {
153 CERROR ("Failed to register test %s (%d)\n",
154 service->sv_name, service->sv_id);
158 LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t));
162 memset(tsc, 0, sizeof(sfw_test_case_t));
163 tsc->tsc_cli_ops = cliops;
164 tsc->tsc_srv_service = service;
166 cfs_list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests);
171 sfw_add_session_timer (void)
173 sfw_session_t *sn = sfw_data.fw_session;
174 stt_timer_t *timer = &sn->sn_timer;
176 LASSERT (!sfw_data.fw_shuttingdown);
178 if (sn == NULL || sn->sn_timeout == 0)
181 LASSERT (!sn->sn_timer_active);
183 sn->sn_timer_active = 1;
184 timer->stt_expires = cfs_time_add(sn->sn_timeout,
185 cfs_time_current_sec());
186 stt_add_timer(timer);
191 sfw_del_session_timer (void)
193 sfw_session_t *sn = sfw_data.fw_session;
195 if (sn == NULL || !sn->sn_timer_active)
198 LASSERT (sn->sn_timeout != 0);
200 if (stt_del_timer(&sn->sn_timer)) { /* timer defused */
201 sn->sn_timer_active = 0;
206 /* Racing is impossible in single-threaded userland selftest */
209 return EBUSY; /* racing with sfw_session_expired() */
212 /* called with sfw_data.fw_lock held */
214 sfw_deactivate_session (void)
216 sfw_session_t *sn = sfw_data.fw_session;
219 sfw_test_case_t *tsc;
221 if (sn == NULL) return;
223 LASSERT (!sn->sn_timer_active);
225 sfw_data.fw_session = NULL;
226 cfs_atomic_inc(&sfw_data.fw_nzombies);
227 cfs_list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
229 cfs_spin_unlock(&sfw_data.fw_lock);
231 cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
232 sfw_test_case_t, tsc_list) {
233 srpc_abort_service(tsc->tsc_srv_service);
236 cfs_spin_lock(&sfw_data.fw_lock);
238 cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
239 sfw_batch_t, bat_list) {
240 if (sfw_batch_active(tsb)) {
242 sfw_stop_batch(tsb, 1);
247 return; /* wait for active batches to stop */
249 cfs_list_del_init(&sn->sn_list);
250 cfs_spin_unlock(&sfw_data.fw_lock);
252 sfw_destroy_session(sn);
254 cfs_spin_lock(&sfw_data.fw_lock);
261 sfw_session_removed (void)
263 return (sfw_data.fw_session == NULL) ? 1 : 0;
269 sfw_session_expired (void *data)
271 sfw_session_t *sn = data;
273 cfs_spin_lock(&sfw_data.fw_lock);
275 LASSERT (sn->sn_timer_active);
276 LASSERT (sn == sfw_data.fw_session);
278 CWARN ("Session expired! sid: %s-"LPU64", name: %s\n",
279 libcfs_nid2str(sn->sn_id.ses_nid),
280 sn->sn_id.ses_stamp, &sn->sn_name[0]);
282 sn->sn_timer_active = 0;
283 sfw_deactivate_session();
285 cfs_spin_unlock(&sfw_data.fw_lock);
290 sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
292 stt_timer_t *timer = &sn->sn_timer;
294 memset(sn, 0, sizeof(sfw_session_t));
295 CFS_INIT_LIST_HEAD(&sn->sn_list);
296 CFS_INIT_LIST_HEAD(&sn->sn_batches);
297 cfs_atomic_set(&sn->sn_refcount, 1); /* +1 for caller */
298 cfs_atomic_set(&sn->sn_brw_errors, 0);
299 cfs_atomic_set(&sn->sn_ping_errors, 0);
300 strncpy(&sn->sn_name[0], name, LST_NAME_SIZE);
302 sn->sn_timer_active = 0;
304 sn->sn_timeout = session_timeout;
305 sn->sn_started = cfs_time_current();
307 timer->stt_data = sn;
308 timer->stt_func = sfw_session_expired;
309 CFS_INIT_LIST_HEAD(&timer->stt_list);
312 /* completion handler for incoming framework RPCs */
314 sfw_server_rpc_done (srpc_server_rpc_t *rpc)
316 srpc_service_t *sv = rpc->srpc_service;
317 int status = rpc->srpc_status;
320 "Incoming framework RPC done: "
321 "service %s, peer %s, status %s:%d\n",
322 sv->sv_name, libcfs_id2str(rpc->srpc_peer),
323 swi_state2str(rpc->srpc_wi.swi_state),
326 if (rpc->srpc_bulk != NULL)
332 sfw_client_rpc_fini (srpc_client_rpc_t *rpc)
334 LASSERT (rpc->crpc_bulk.bk_niov == 0);
335 LASSERT (cfs_list_empty(&rpc->crpc_list));
336 LASSERT (cfs_atomic_read(&rpc->crpc_refcount) == 0);
338 LASSERT (rpc->crpc_bulk.bk_pages == NULL);
342 "Outgoing framework RPC done: "
343 "service %d, peer %s, status %s:%d:%d\n",
344 rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
345 swi_state2str(rpc->crpc_wi.swi_state),
346 rpc->crpc_aborted, rpc->crpc_status);
348 cfs_spin_lock(&sfw_data.fw_lock);
350 /* my callers must finish all RPCs before shutting me down */
351 LASSERT (!sfw_data.fw_shuttingdown);
352 cfs_list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs);
354 cfs_spin_unlock(&sfw_data.fw_lock);
359 sfw_find_batch (lst_bid_t bid)
361 sfw_session_t *sn = sfw_data.fw_session;
364 LASSERT (sn != NULL);
366 cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
367 sfw_batch_t, bat_list) {
368 if (bat->bat_id.bat_id == bid.bat_id)
376 sfw_bid2batch (lst_bid_t bid)
378 sfw_session_t *sn = sfw_data.fw_session;
381 LASSERT (sn != NULL);
383 bat = sfw_find_batch(bid);
387 LIBCFS_ALLOC(bat, sizeof(sfw_batch_t));
392 bat->bat_session = sn;
394 cfs_atomic_set(&bat->bat_nactive, 0);
395 CFS_INIT_LIST_HEAD(&bat->bat_tests);
397 cfs_list_add_tail(&bat->bat_list, &sn->sn_batches);
402 sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
404 sfw_session_t *sn = sfw_data.fw_session;
405 sfw_counters_t *cnt = &reply->str_fw;
409 reply->str_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
411 if (request->str_sid.ses_nid == LNET_NID_ANY) {
412 reply->str_status = EINVAL;
416 if (sn == NULL || !sfw_sid_equal(request->str_sid, sn->sn_id)) {
417 reply->str_status = ESRCH;
422 reply->str_lnet = the_lnet.ln_counters;
425 srpc_get_counters(&reply->str_rpc);
427 /* send over the msecs since the session was started
428 - with 32 bits to send, this is ~49 days */
429 cfs_duration_usec(cfs_time_sub(cfs_time_current(),
430 sn->sn_started), &tv);
432 cnt->running_ms = (__u32)(tv.tv_sec * 1000 + tv.tv_usec / 1000);
433 cnt->brw_errors = cfs_atomic_read(&sn->sn_brw_errors);
434 cnt->ping_errors = cfs_atomic_read(&sn->sn_ping_errors);
435 cnt->zombie_sessions = cfs_atomic_read(&sfw_data.fw_nzombies);
437 cnt->active_batches = 0;
438 cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
439 sfw_batch_t, bat_list) {
440 if (cfs_atomic_read(&bat->bat_nactive) > 0)
441 cnt->active_batches++;
444 reply->str_status = 0;
449 sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
451 sfw_session_t *sn = sfw_data.fw_session;
453 if (request->mksn_sid.ses_nid == LNET_NID_ANY) {
454 reply->mksn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
455 reply->mksn_status = EINVAL;
460 reply->mksn_status = 0;
461 reply->mksn_sid = sn->sn_id;
462 reply->mksn_timeout = sn->sn_timeout;
464 if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
465 cfs_atomic_inc(&sn->sn_refcount);
469 if (!request->mksn_force) {
470 reply->mksn_status = EBUSY;
471 strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
476 /* brand new or create by force */
477 LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
479 CERROR ("Dropping RPC (mksn) under memory pressure.\n");
483 sfw_init_session(sn, request->mksn_sid, &request->mksn_name[0]);
485 cfs_spin_lock(&sfw_data.fw_lock);
487 sfw_deactivate_session();
488 LASSERT (sfw_data.fw_session == NULL);
489 sfw_data.fw_session = sn;
491 cfs_spin_unlock(&sfw_data.fw_lock);
493 reply->mksn_status = 0;
494 reply->mksn_sid = sn->sn_id;
495 reply->mksn_timeout = sn->sn_timeout;
500 sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
502 sfw_session_t *sn = sfw_data.fw_session;
504 reply->rmsn_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
506 if (request->rmsn_sid.ses_nid == LNET_NID_ANY) {
507 reply->rmsn_status = EINVAL;
511 if (sn == NULL || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) {
512 reply->rmsn_status = (sn == NULL) ? ESRCH : EBUSY;
516 if (!cfs_atomic_dec_and_test(&sn->sn_refcount)) {
517 reply->rmsn_status = 0;
521 cfs_spin_lock(&sfw_data.fw_lock);
522 sfw_deactivate_session();
523 cfs_spin_unlock(&sfw_data.fw_lock);
525 reply->rmsn_status = 0;
526 reply->rmsn_sid = LST_INVALID_SID;
527 LASSERT (sfw_data.fw_session == NULL);
532 sfw_debug_session (srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
534 sfw_session_t *sn = sfw_data.fw_session;
537 reply->dbg_status = ESRCH;
538 reply->dbg_sid = LST_INVALID_SID;
542 reply->dbg_status = 0;
543 reply->dbg_sid = sn->sn_id;
544 reply->dbg_timeout = sn->sn_timeout;
545 strncpy(reply->dbg_name, &sn->sn_name[0], LST_NAME_SIZE);
551 sfw_test_rpc_fini (srpc_client_rpc_t *rpc)
553 sfw_test_unit_t *tsu = rpc->crpc_priv;
554 sfw_test_instance_t *tsi = tsu->tsu_instance;
556 /* Called with hold of tsi->tsi_lock */
557 LASSERT (cfs_list_empty(&rpc->crpc_list));
558 cfs_list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
562 sfw_load_test (sfw_test_instance_t *tsi)
564 sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
565 int nrequired = sfw_test_buffers(tsi);
568 LASSERT (tsc != NULL);
570 if (tsi->tsi_is_client) {
571 tsi->tsi_ops = tsc->tsc_cli_ops;
575 nposted = srpc_service_add_buffers(tsc->tsc_srv_service, nrequired);
576 if (nposted != nrequired) {
577 CWARN ("Failed to reserve enough buffers: "
578 "service %s, %d needed, %d reserved\n",
579 tsc->tsc_srv_service->sv_name, nrequired, nposted);
580 srpc_service_remove_buffers(tsc->tsc_srv_service, nposted);
584 CDEBUG (D_NET, "Reserved %d buffers for test %s\n",
585 nposted, tsc->tsc_srv_service->sv_name);
590 sfw_unload_test (sfw_test_instance_t *tsi)
592 sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
594 LASSERT (tsc != NULL);
596 if (!tsi->tsi_is_client)
597 srpc_service_remove_buffers(tsc->tsc_srv_service,
598 sfw_test_buffers(tsi));
603 sfw_destroy_test_instance (sfw_test_instance_t *tsi)
605 srpc_client_rpc_t *rpc;
606 sfw_test_unit_t *tsu;
608 if (!tsi->tsi_is_client) goto clean;
610 tsi->tsi_ops->tso_fini(tsi);
612 LASSERT (!tsi->tsi_stopping);
613 LASSERT (cfs_list_empty(&tsi->tsi_active_rpcs));
614 LASSERT (!sfw_test_active(tsi));
616 while (!cfs_list_empty(&tsi->tsi_units)) {
617 tsu = cfs_list_entry(tsi->tsi_units.next,
618 sfw_test_unit_t, tsu_list);
619 cfs_list_del(&tsu->tsu_list);
620 LIBCFS_FREE(tsu, sizeof(*tsu));
623 while (!cfs_list_empty(&tsi->tsi_free_rpcs)) {
624 rpc = cfs_list_entry(tsi->tsi_free_rpcs.next,
625 srpc_client_rpc_t, crpc_list);
626 cfs_list_del(&rpc->crpc_list);
627 LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
631 sfw_unload_test(tsi);
632 LIBCFS_FREE(tsi, sizeof(*tsi));
637 sfw_destroy_batch (sfw_batch_t *tsb)
639 sfw_test_instance_t *tsi;
641 LASSERT (!sfw_batch_active(tsb));
642 LASSERT (cfs_list_empty(&tsb->bat_list));
644 while (!cfs_list_empty(&tsb->bat_tests)) {
645 tsi = cfs_list_entry(tsb->bat_tests.next,
646 sfw_test_instance_t, tsi_list);
647 cfs_list_del_init(&tsi->tsi_list);
648 sfw_destroy_test_instance(tsi);
651 LIBCFS_FREE(tsb, sizeof(sfw_batch_t));
656 sfw_destroy_session (sfw_session_t *sn)
660 LASSERT (cfs_list_empty(&sn->sn_list));
661 LASSERT (sn != sfw_data.fw_session);
663 while (!cfs_list_empty(&sn->sn_batches)) {
664 batch = cfs_list_entry(sn->sn_batches.next,
665 sfw_batch_t, bat_list);
666 cfs_list_del_init(&batch->bat_list);
667 sfw_destroy_batch(batch);
670 LIBCFS_FREE(sn, sizeof(*sn));
671 cfs_atomic_dec(&sfw_data.fw_nzombies);
676 sfw_unpack_test_req (srpc_msg_t *msg)
678 srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
680 LASSERT (msg->msg_type == SRPC_MSG_TEST_REQST);
681 LASSERT (req->tsr_is_client);
683 if (msg->msg_magic == SRPC_MSG_MAGIC)
684 return; /* no flipping needed */
686 LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
688 if (req->tsr_service == SRPC_SERVICE_BRW) {
689 test_bulk_req_t *bulk = &req->tsr_u.bulk;
691 __swab32s(&bulk->blk_opc);
692 __swab32s(&bulk->blk_npg);
693 __swab32s(&bulk->blk_flags);
697 if (req->tsr_service == SRPC_SERVICE_PING) {
698 test_ping_req_t *ping = &req->tsr_u.ping;
700 __swab32s(&ping->png_size);
701 __swab32s(&ping->png_flags);
710 sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
712 srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg;
713 srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
714 srpc_bulk_t *bk = rpc->srpc_bulk;
715 int ndest = req->tsr_ndest;
716 sfw_test_unit_t *tsu;
717 sfw_test_instance_t *tsi;
721 LIBCFS_ALLOC(tsi, sizeof(*tsi));
723 CERROR ("Can't allocate test instance for batch: "LPU64"\n",
728 memset(tsi, 0, sizeof(*tsi));
729 cfs_spin_lock_init(&tsi->tsi_lock);
730 cfs_atomic_set(&tsi->tsi_nactive, 0);
731 CFS_INIT_LIST_HEAD(&tsi->tsi_units);
732 CFS_INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
733 CFS_INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
735 tsi->tsi_stopping = 0;
736 tsi->tsi_batch = tsb;
737 tsi->tsi_loop = req->tsr_loop;
738 tsi->tsi_concur = req->tsr_concur;
739 tsi->tsi_service = req->tsr_service;
740 tsi->tsi_is_client = !!(req->tsr_is_client);
741 tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
743 rc = sfw_load_test(tsi);
745 LIBCFS_FREE(tsi, sizeof(*tsi));
749 LASSERT (!sfw_batch_active(tsb));
751 if (!tsi->tsi_is_client) {
752 /* it's test server, just add it to tsb */
753 cfs_list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
757 LASSERT (bk != NULL);
759 LASSERT (bk->bk_pages != NULL);
761 LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
762 LASSERT ((unsigned int)bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
764 sfw_unpack_test_req(msg);
765 memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
767 for (i = 0; i < ndest; i++) {
768 lnet_process_id_packed_t *dests;
769 lnet_process_id_packed_t id;
773 dests = cfs_page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
774 LASSERT (dests != NULL); /* my pages are within KVM always */
776 dests = cfs_page_address(bk->bk_pages[i / SFW_ID_PER_PAGE]);
778 id = dests[i % SFW_ID_PER_PAGE];
779 if (msg->msg_magic != SRPC_MSG_MAGIC)
782 for (j = 0; j < tsi->tsi_concur; j++) {
783 LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t));
786 CERROR ("Can't allocate tsu for %d\n",
791 tsu->tsu_dest.nid = id.nid;
792 tsu->tsu_dest.pid = id.pid;
793 tsu->tsu_instance = tsi;
794 tsu->tsu_private = NULL;
795 cfs_list_add_tail(&tsu->tsu_list, &tsi->tsi_units);
799 rc = tsi->tsi_ops->tso_init(tsi);
801 cfs_list_add_tail(&tsi->tsi_list, &tsb->bat_tests);
807 sfw_destroy_test_instance(tsi);
812 sfw_test_unit_done (sfw_test_unit_t *tsu)
814 sfw_test_instance_t *tsi = tsu->tsu_instance;
815 sfw_batch_t *tsb = tsi->tsi_batch;
816 sfw_session_t *sn = tsb->bat_session;
818 LASSERT (sfw_test_active(tsi));
820 if (!cfs_atomic_dec_and_test(&tsi->tsi_nactive))
823 /* the test instance is done */
824 cfs_spin_lock(&tsi->tsi_lock);
826 tsi->tsi_stopping = 0;
828 cfs_spin_unlock(&tsi->tsi_lock);
830 cfs_spin_lock(&sfw_data.fw_lock);
832 if (!cfs_atomic_dec_and_test(&tsb->bat_nactive) ||/* tsb still active */
833 sn == sfw_data.fw_session) { /* sn also active */
834 cfs_spin_unlock(&sfw_data.fw_lock);
838 LASSERT (!cfs_list_empty(&sn->sn_list)); /* I'm a zombie! */
840 cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
841 sfw_batch_t, bat_list) {
842 if (sfw_batch_active(tsb)) {
843 cfs_spin_unlock(&sfw_data.fw_lock);
848 cfs_list_del_init(&sn->sn_list);
849 cfs_spin_unlock(&sfw_data.fw_lock);
851 sfw_destroy_session(sn);
856 sfw_test_rpc_done (srpc_client_rpc_t *rpc)
858 sfw_test_unit_t *tsu = rpc->crpc_priv;
859 sfw_test_instance_t *tsi = tsu->tsu_instance;
862 tsi->tsi_ops->tso_done_rpc(tsu, rpc);
864 cfs_spin_lock(&tsi->tsi_lock);
866 LASSERT (sfw_test_active(tsi));
867 LASSERT (!cfs_list_empty(&rpc->crpc_list));
869 cfs_list_del_init(&rpc->crpc_list);
871 /* batch is stopping or loop is done or get error */
872 if (tsi->tsi_stopping ||
873 tsu->tsu_loop == 0 ||
874 (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr))
877 /* dec ref for poster */
878 srpc_client_rpc_decref(rpc);
880 cfs_spin_unlock(&tsi->tsi_lock);
883 swi_schedule_workitem(&tsu->tsu_worker);
887 sfw_test_unit_done(tsu);
892 sfw_create_test_rpc (sfw_test_unit_t *tsu, lnet_process_id_t peer,
893 int nblk, int blklen, srpc_client_rpc_t **rpcpp)
895 srpc_client_rpc_t *rpc = NULL;
896 sfw_test_instance_t *tsi = tsu->tsu_instance;
898 cfs_spin_lock(&tsi->tsi_lock);
900 LASSERT (sfw_test_active(tsi));
902 if (!cfs_list_empty(&tsi->tsi_free_rpcs)) {
903 /* pick request from buffer */
904 rpc = cfs_list_entry(tsi->tsi_free_rpcs.next,
905 srpc_client_rpc_t, crpc_list);
906 LASSERT (nblk == rpc->crpc_bulk.bk_niov);
907 cfs_list_del_init(&rpc->crpc_list);
909 srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
910 blklen, sfw_test_rpc_done,
911 sfw_test_rpc_fini, tsu);
914 cfs_spin_unlock(&tsi->tsi_lock);
917 rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
918 blklen, sfw_test_rpc_done,
919 sfw_test_rpc_fini, tsu);
921 CERROR ("Can't create rpc for test %d\n", tsi->tsi_service);
930 sfw_run_test (swi_workitem_t *wi)
932 sfw_test_unit_t *tsu = wi->swi_workitem.wi_data;
933 sfw_test_instance_t *tsi = tsu->tsu_instance;
934 srpc_client_rpc_t *rpc = NULL;
936 LASSERT (wi == &tsu->tsu_worker);
938 if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) {
939 LASSERT (rpc == NULL);
943 LASSERT (rpc != NULL);
945 cfs_spin_lock(&tsi->tsi_lock);
947 if (tsi->tsi_stopping) {
948 cfs_list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
949 cfs_spin_unlock(&tsi->tsi_lock);
953 if (tsu->tsu_loop > 0)
956 cfs_list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
957 cfs_spin_unlock(&tsi->tsi_lock);
959 rpc->crpc_timeout = rpc_timeout;
961 cfs_spin_lock(&rpc->crpc_lock);
963 cfs_spin_unlock(&rpc->crpc_lock);
968 * No one can schedule me now since:
969 * - previous RPC, if any, has done and
970 * - no new RPC is initiated.
971 * - my batch is still active; no one can run it again now.
972 * Cancel pending schedules and prevent future schedule attempts:
974 swi_kill_workitem(wi);
975 sfw_test_unit_done(tsu);
980 sfw_run_batch (sfw_batch_t *tsb)
983 sfw_test_unit_t *tsu;
984 sfw_test_instance_t *tsi;
986 if (sfw_batch_active(tsb)) {
987 CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n",
988 tsb->bat_id.bat_id, cfs_atomic_read(&tsb->bat_nactive));
992 cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
993 sfw_test_instance_t, tsi_list) {
994 if (!tsi->tsi_is_client) /* skip server instances */
997 LASSERT (!tsi->tsi_stopping);
998 LASSERT (!sfw_test_active(tsi));
1000 cfs_atomic_inc(&tsb->bat_nactive);
1002 cfs_list_for_each_entry_typed (tsu, &tsi->tsi_units,
1003 sfw_test_unit_t, tsu_list) {
1004 cfs_atomic_inc(&tsi->tsi_nactive);
1005 tsu->tsu_loop = tsi->tsi_loop;
1006 wi = &tsu->tsu_worker;
1007 swi_init_workitem(wi, tsu, sfw_run_test,
1009 swi_schedule_workitem(wi);
1017 sfw_stop_batch (sfw_batch_t *tsb, int force)
1019 sfw_test_instance_t *tsi;
1020 srpc_client_rpc_t *rpc;
1022 if (!sfw_batch_active(tsb)) {
1023 CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id);
1027 cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
1028 sfw_test_instance_t, tsi_list) {
1029 cfs_spin_lock(&tsi->tsi_lock);
1031 if (!tsi->tsi_is_client ||
1032 !sfw_test_active(tsi) || tsi->tsi_stopping) {
1033 cfs_spin_unlock(&tsi->tsi_lock);
1037 tsi->tsi_stopping = 1;
1040 cfs_spin_unlock(&tsi->tsi_lock);
1044 /* abort launched rpcs in the test */
1045 cfs_list_for_each_entry_typed (rpc, &tsi->tsi_active_rpcs,
1046 srpc_client_rpc_t, crpc_list) {
1047 cfs_spin_lock(&rpc->crpc_lock);
1049 srpc_abort_rpc(rpc, -EINTR);
1051 cfs_spin_unlock(&rpc->crpc_lock);
1054 cfs_spin_unlock(&tsi->tsi_lock);
1061 sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
1063 sfw_test_instance_t *tsi;
1069 reply->bar_active = cfs_atomic_read(&tsb->bat_nactive);
1073 cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
1074 sfw_test_instance_t, tsi_list) {
1078 reply->bar_active = cfs_atomic_read(&tsi->tsi_nactive);
1086 sfw_free_pages (srpc_server_rpc_t *rpc)
1088 srpc_free_bulk(rpc->srpc_bulk);
1089 rpc->srpc_bulk = NULL;
1093 sfw_alloc_pages (srpc_server_rpc_t *rpc, int npages, int sink)
1095 LASSERT (rpc->srpc_bulk == NULL);
1096 LASSERT (npages > 0 && npages <= LNET_MAX_IOV);
1098 rpc->srpc_bulk = srpc_alloc_bulk(npages, sink);
1099 if (rpc->srpc_bulk == NULL) return -ENOMEM;
1105 sfw_add_test (srpc_server_rpc_t *rpc)
1107 sfw_session_t *sn = sfw_data.fw_session;
1108 srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
1109 srpc_test_reqst_t *request;
1113 request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
1114 reply->tsr_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
1116 if (request->tsr_loop == 0 ||
1117 request->tsr_concur == 0 ||
1118 request->tsr_sid.ses_nid == LNET_NID_ANY ||
1119 request->tsr_ndest > SFW_MAX_NDESTS ||
1120 (request->tsr_is_client && request->tsr_ndest == 0) ||
1121 request->tsr_concur > SFW_MAX_CONCUR ||
1122 request->tsr_service > SRPC_SERVICE_MAX_ID ||
1123 request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) {
1124 reply->tsr_status = EINVAL;
1128 if (sn == NULL || !sfw_sid_equal(request->tsr_sid, sn->sn_id) ||
1129 sfw_find_test_case(request->tsr_service) == NULL) {
1130 reply->tsr_status = ENOENT;
1134 bat = sfw_bid2batch(request->tsr_bid);
1136 CERROR ("Dropping RPC (%s) from %s under memory pressure.\n",
1137 rpc->srpc_service->sv_name,
1138 libcfs_id2str(rpc->srpc_peer));
1142 if (sfw_batch_active(bat)) {
1143 reply->tsr_status = EBUSY;
1147 if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
1148 /* rpc will be resumed later in sfw_bulk_ready */
1149 return sfw_alloc_pages(rpc,
1150 sfw_id_pages(request->tsr_ndest), 1);
1153 rc = sfw_add_test_instance(bat, rpc);
1154 CDEBUG (rc == 0 ? D_NET : D_WARNING,
1155 "%s test: sv %d %s, loop %d, concur %d, ndest %d\n",
1156 rc == 0 ? "Added" : "Failed to add", request->tsr_service,
1157 request->tsr_is_client ? "client" : "server",
1158 request->tsr_loop, request->tsr_concur, request->tsr_ndest);
1160 reply->tsr_status = (rc < 0) ? -rc : rc;
1165 sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
1167 sfw_session_t *sn = sfw_data.fw_session;
1171 reply->bar_sid = (sn == NULL) ? LST_INVALID_SID : sn->sn_id;
1173 if (sn == NULL || !sfw_sid_equal(request->bar_sid, sn->sn_id)) {
1174 reply->bar_status = ESRCH;
1178 bat = sfw_find_batch(request->bar_bid);
1180 reply->bar_status = ENOENT;
1184 switch (request->bar_opc) {
1185 case SRPC_BATCH_OPC_RUN:
1186 rc = sfw_run_batch(bat);
1189 case SRPC_BATCH_OPC_STOP:
1190 rc = sfw_stop_batch(bat, request->bar_arg);
1193 case SRPC_BATCH_OPC_QUERY:
1194 rc = sfw_query_batch(bat, request->bar_testidx, reply);
1198 return -EINVAL; /* drop it */
1201 reply->bar_status = (rc < 0) ? -rc : rc;
1206 sfw_handle_server_rpc (srpc_server_rpc_t *rpc)
1208 srpc_service_t *sv = rpc->srpc_service;
1209 srpc_msg_t *reply = &rpc->srpc_replymsg;
1210 srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg;
1213 LASSERT (sfw_data.fw_active_srpc == NULL);
1214 LASSERT (sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
1216 cfs_spin_lock(&sfw_data.fw_lock);
1218 if (sfw_data.fw_shuttingdown) {
1219 cfs_spin_unlock(&sfw_data.fw_lock);
1223 /* Remove timer to avoid racing with it or expiring active session */
1224 if (sfw_del_session_timer() != 0) {
1225 CERROR ("Dropping RPC (%s) from %s: racing with expiry timer.",
1226 sv->sv_name, libcfs_id2str(rpc->srpc_peer));
1227 cfs_spin_unlock(&sfw_data.fw_lock);
1231 sfw_data.fw_active_srpc = rpc;
1232 cfs_spin_unlock(&sfw_data.fw_lock);
1234 sfw_unpack_message(request);
1235 LASSERT (request->msg_type == srpc_service2request(sv->sv_id));
1240 case SRPC_SERVICE_TEST:
1241 rc = sfw_add_test(rpc);
1244 case SRPC_SERVICE_BATCH:
1245 rc = sfw_control_batch(&request->msg_body.bat_reqst,
1246 &reply->msg_body.bat_reply);
1249 case SRPC_SERVICE_QUERY_STAT:
1250 rc = sfw_get_stats(&request->msg_body.stat_reqst,
1251 &reply->msg_body.stat_reply);
1254 case SRPC_SERVICE_DEBUG:
1255 rc = sfw_debug_session(&request->msg_body.dbg_reqst,
1256 &reply->msg_body.dbg_reply);
1259 case SRPC_SERVICE_MAKE_SESSION:
1260 rc = sfw_make_session(&request->msg_body.mksn_reqst,
1261 &reply->msg_body.mksn_reply);
1264 case SRPC_SERVICE_REMOVE_SESSION:
1265 rc = sfw_remove_session(&request->msg_body.rmsn_reqst,
1266 &reply->msg_body.rmsn_reply);
1270 rpc->srpc_done = sfw_server_rpc_done;
1271 cfs_spin_lock(&sfw_data.fw_lock);
1274 if (!sfw_data.fw_shuttingdown)
1275 sfw_add_session_timer();
1277 LASSERT (!sfw_data.fw_shuttingdown);
1278 sfw_add_session_timer();
1281 sfw_data.fw_active_srpc = NULL;
1282 cfs_spin_unlock(&sfw_data.fw_lock);
1287 sfw_bulk_ready (srpc_server_rpc_t *rpc, int status)
1289 srpc_service_t *sv = rpc->srpc_service;
1292 LASSERT (rpc->srpc_bulk != NULL);
1293 LASSERT (sv->sv_id == SRPC_SERVICE_TEST);
1294 LASSERT (sfw_data.fw_active_srpc == NULL);
1295 LASSERT (rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client);
1297 cfs_spin_lock(&sfw_data.fw_lock);
1300 CERROR ("Bulk transfer failed for RPC: "
1301 "service %s, peer %s, status %d\n",
1302 sv->sv_name, libcfs_id2str(rpc->srpc_peer), status);
1303 cfs_spin_unlock(&sfw_data.fw_lock);
1307 if (sfw_data.fw_shuttingdown) {
1308 cfs_spin_unlock(&sfw_data.fw_lock);
1312 if (sfw_del_session_timer() != 0) {
1313 CERROR ("Dropping RPC (%s) from %s: racing with expiry timer",
1314 sv->sv_name, libcfs_id2str(rpc->srpc_peer));
1315 cfs_spin_unlock(&sfw_data.fw_lock);
1319 sfw_data.fw_active_srpc = rpc;
1320 cfs_spin_unlock(&sfw_data.fw_lock);
1322 rc = sfw_add_test(rpc);
1324 cfs_spin_lock(&sfw_data.fw_lock);
1327 if (!sfw_data.fw_shuttingdown)
1328 sfw_add_session_timer();
1330 LASSERT (!sfw_data.fw_shuttingdown);
1331 sfw_add_session_timer();
1334 sfw_data.fw_active_srpc = NULL;
1335 cfs_spin_unlock(&sfw_data.fw_lock);
1340 sfw_create_rpc (lnet_process_id_t peer, int service,
1341 int nbulkiov, int bulklen,
1342 void (*done) (srpc_client_rpc_t *), void *priv)
1344 srpc_client_rpc_t *rpc;
1346 cfs_spin_lock(&sfw_data.fw_lock);
1348 LASSERT (!sfw_data.fw_shuttingdown);
1349 LASSERT (service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
1351 if (nbulkiov == 0 && !cfs_list_empty(&sfw_data.fw_zombie_rpcs)) {
1352 rpc = cfs_list_entry(sfw_data.fw_zombie_rpcs.next,
1353 srpc_client_rpc_t, crpc_list);
1354 cfs_list_del(&rpc->crpc_list);
1355 cfs_spin_unlock(&sfw_data.fw_lock);
1357 srpc_init_client_rpc(rpc, peer, service, 0, 0,
1358 done, sfw_client_rpc_fini, priv);
1362 cfs_spin_unlock(&sfw_data.fw_lock);
1364 rpc = srpc_create_client_rpc(peer, service, nbulkiov, bulklen, done,
1365 nbulkiov != 0 ? NULL : sfw_client_rpc_fini,
1371 sfw_unpack_message (srpc_msg_t *msg)
1373 if (msg->msg_magic == SRPC_MSG_MAGIC)
1374 return; /* no flipping needed */
1376 LASSERT (msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
1378 __swab32s(&msg->msg_type);
1380 if (msg->msg_type == SRPC_MSG_STAT_REQST) {
1381 srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst;
1383 __swab32s(&req->str_type);
1384 __swab64s(&req->str_rpyid);
1385 sfw_unpack_sid(req->str_sid);
1389 if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
1390 srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
1392 __swab32s(&rep->str_status);
1393 sfw_unpack_sid(rep->str_sid);
1394 sfw_unpack_fw_counters(rep->str_fw);
1395 sfw_unpack_rpc_counters(rep->str_rpc);
1396 sfw_unpack_lnet_counters(rep->str_lnet);
1400 if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
1401 srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst;
1403 __swab64s(&req->mksn_rpyid);
1404 __swab32s(&req->mksn_force);
1405 sfw_unpack_sid(req->mksn_sid);
1409 if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
1410 srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply;
1412 __swab32s(&rep->mksn_status);
1413 __swab32s(&rep->mksn_timeout);
1414 sfw_unpack_sid(rep->mksn_sid);
1418 if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
1419 srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst;
1421 __swab64s(&req->rmsn_rpyid);
1422 sfw_unpack_sid(req->rmsn_sid);
1426 if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
1427 srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply;
1429 __swab32s(&rep->rmsn_status);
1430 sfw_unpack_sid(rep->rmsn_sid);
1434 if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
1435 srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst;
1437 __swab64s(&req->dbg_rpyid);
1438 __swab32s(&req->dbg_flags);
1439 sfw_unpack_sid(req->dbg_sid);
1443 if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
1444 srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply;
1446 __swab32s(&rep->dbg_nbatch);
1447 __swab32s(&rep->dbg_timeout);
1448 sfw_unpack_sid(rep->dbg_sid);
1452 if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
1453 srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst;
1455 __swab32s(&req->bar_opc);
1456 __swab64s(&req->bar_rpyid);
1457 __swab32s(&req->bar_testidx);
1458 __swab32s(&req->bar_arg);
1459 sfw_unpack_sid(req->bar_sid);
1460 __swab64s(&req->bar_bid.bat_id);
1464 if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
1465 srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
1467 __swab32s(&rep->bar_status);
1468 sfw_unpack_sid(rep->bar_sid);
1472 if (msg->msg_type == SRPC_MSG_TEST_REQST) {
1473 srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
1475 __swab64s(&req->tsr_rpyid);
1476 __swab64s(&req->tsr_bulkid);
1477 __swab32s(&req->tsr_loop);
1478 __swab32s(&req->tsr_ndest);
1479 __swab32s(&req->tsr_concur);
1480 __swab32s(&req->tsr_service);
1481 sfw_unpack_sid(req->tsr_sid);
1482 __swab64s(&req->tsr_bid.bat_id);
1486 if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
1487 srpc_test_reply_t *rep = &msg->msg_body.tes_reply;
1489 __swab32s(&rep->tsr_status);
1490 sfw_unpack_sid(rep->tsr_sid);
1494 if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
1495 srpc_join_reqst_t *req = &msg->msg_body.join_reqst;
1497 __swab64s(&req->join_rpyid);
1498 sfw_unpack_sid(req->join_sid);
1502 if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
1503 srpc_join_reply_t *rep = &msg->msg_body.join_reply;
1505 __swab32s(&rep->join_status);
1506 __swab32s(&rep->join_timeout);
1507 sfw_unpack_sid(rep->join_sid);
1516 sfw_abort_rpc (srpc_client_rpc_t *rpc)
1518 LASSERT (cfs_atomic_read(&rpc->crpc_refcount) > 0);
1519 LASSERT (rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
1521 cfs_spin_lock(&rpc->crpc_lock);
1522 srpc_abort_rpc(rpc, -EINTR);
1523 cfs_spin_unlock(&rpc->crpc_lock);
1528 sfw_post_rpc (srpc_client_rpc_t *rpc)
1530 cfs_spin_lock(&rpc->crpc_lock);
1532 LASSERT (!rpc->crpc_closed);
1533 LASSERT (!rpc->crpc_aborted);
1534 LASSERT (cfs_list_empty(&rpc->crpc_list));
1535 LASSERT (!sfw_data.fw_shuttingdown);
1537 rpc->crpc_timeout = rpc_timeout;
1540 cfs_spin_unlock(&rpc->crpc_lock);
1544 static srpc_service_t sfw_services[] =
1547 /* sv_id */ SRPC_SERVICE_DEBUG,
1548 /* sv_name */ "debug",
1552 /* sv_id */ SRPC_SERVICE_QUERY_STAT,
1553 /* sv_name */ "query stats",
1557 /* sv_id */ SRPC_SERVICE_MAKE_SESSION,
1558 /* sv_name */ "make session",
1562 /* sv_id */ SRPC_SERVICE_REMOVE_SESSION,
1563 /* sv_name */ "remove session",
1567 /* sv_id */ SRPC_SERVICE_BATCH,
1568 /* sv_name */ "batch service",
1572 /* sv_id */ SRPC_SERVICE_TEST,
1573 /* sv_name */ "test service",
1583 extern sfw_test_client_ops_t ping_test_client;
1584 extern srpc_service_t ping_test_service;
1585 extern void ping_init_test_client(void);
1586 extern void ping_init_test_service(void);
1588 extern sfw_test_client_ops_t brw_test_client;
1589 extern srpc_service_t brw_test_service;
1590 extern void brw_init_test_client(void);
1591 extern void brw_init_test_service(void);
1601 sfw_test_case_t *tsc;
1606 s = getenv("SESSION_TIMEOUT");
1607 session_timeout = s != NULL ? atoi(s) : session_timeout;
1609 s = getenv("BRW_INJECT_ERRORS");
1610 brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
1612 s = getenv("RPC_TIMEOUT");
1613 rpc_timeout = s != NULL ? atoi(s) : rpc_timeout;
1616 if (session_timeout < 0) {
1617 CERROR ("Session timeout must be non-negative: %d\n",
1622 if (rpc_timeout < 0) {
1623 CERROR ("RPC timeout must be non-negative: %d\n",
1628 if (session_timeout == 0)
1629 CWARN ("Zero session_timeout specified "
1630 "- test sessions never expire.\n");
1632 if (rpc_timeout == 0)
1633 CWARN ("Zero rpc_timeout specified "
1634 "- test RPC never expire.\n");
1636 memset(&sfw_data, 0, sizeof(struct smoketest_framework));
1638 sfw_data.fw_session = NULL;
1639 sfw_data.fw_active_srpc = NULL;
1640 cfs_spin_lock_init(&sfw_data.fw_lock);
1641 cfs_atomic_set(&sfw_data.fw_nzombies, 0);
1642 CFS_INIT_LIST_HEAD(&sfw_data.fw_tests);
1643 CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
1644 CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
1646 brw_init_test_client();
1647 brw_init_test_service();
1648 rc = sfw_register_test(&brw_test_service, &brw_test_client);
1651 ping_init_test_client();
1652 ping_init_test_service();
1653 rc = sfw_register_test(&ping_test_service, &ping_test_client);
1657 cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
1658 sfw_test_case_t, tsc_list) {
1659 sv = tsc->tsc_srv_service;
1660 sv->sv_concur = SFW_TEST_CONCURRENCY;
1662 rc = srpc_add_service(sv);
1663 LASSERT (rc != -EBUSY);
1665 CWARN ("Failed to add %s service: %d\n",
1671 for (i = 0; ; i++) {
1672 sv = &sfw_services[i];
1673 if (sv->sv_name == NULL) break;
1675 sv->sv_bulk_ready = NULL;
1676 sv->sv_handler = sfw_handle_server_rpc;
1677 sv->sv_concur = SFW_SERVICE_CONCURRENCY;
1678 if (sv->sv_id == SRPC_SERVICE_TEST)
1679 sv->sv_bulk_ready = sfw_bulk_ready;
1681 rc = srpc_add_service(sv);
1682 LASSERT (rc != -EBUSY);
1684 CWARN ("Failed to add %s service: %d\n",
1689 /* about to sfw_shutdown, no need to add buffer */
1690 if (error) continue;
1692 rc = srpc_service_add_buffers(sv, SFW_POST_BUFFERS);
1693 if (rc != SFW_POST_BUFFERS) {
1694 CWARN ("Failed to reserve enough buffers: "
1695 "service %s, %d needed, %d reserved\n",
1696 sv->sv_name, SFW_POST_BUFFERS, rc);
1710 sfw_test_case_t *tsc;
1713 cfs_spin_lock(&sfw_data.fw_lock);
1715 sfw_data.fw_shuttingdown = 1;
1717 lst_wait_until(sfw_data.fw_active_srpc == NULL, sfw_data.fw_lock,
1718 "waiting for active RPC to finish.\n");
1720 LASSERT (sfw_data.fw_active_srpc == NULL);
1723 if (sfw_del_session_timer() != 0)
1724 lst_wait_until(sfw_data.fw_session == NULL, sfw_data.fw_lock,
1725 "waiting for session timer to explode.\n");
1727 sfw_deactivate_session();
1728 lst_wait_until(cfs_atomic_read(&sfw_data.fw_nzombies) == 0,
1730 "waiting for %d zombie sessions to die.\n",
1731 cfs_atomic_read(&sfw_data.fw_nzombies));
1733 cfs_spin_unlock(&sfw_data.fw_lock);
1735 for (i = 0; ; i++) {
1736 sv = &sfw_services[i];
1737 if (sv->sv_name == NULL)
1740 srpc_shutdown_service(sv);
1741 srpc_remove_service(sv);
1744 cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
1745 sfw_test_case_t, tsc_list) {
1746 sv = tsc->tsc_srv_service;
1747 srpc_shutdown_service(sv);
1748 srpc_remove_service(sv);
1751 while (!cfs_list_empty(&sfw_data.fw_zombie_rpcs)) {
1752 srpc_client_rpc_t *rpc;
1754 rpc = cfs_list_entry(sfw_data.fw_zombie_rpcs.next,
1755 srpc_client_rpc_t, crpc_list);
1756 cfs_list_del(&rpc->crpc_list);
1758 LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
1761 for (i = 0; ; i++) {
1762 sv = &sfw_services[i];
1763 if (sv->sv_name == NULL)
1766 srpc_wait_service_shutdown(sv);
1769 while (!cfs_list_empty(&sfw_data.fw_tests)) {
1770 tsc = cfs_list_entry(sfw_data.fw_tests.next,
1771 sfw_test_case_t, tsc_list);
1773 srpc_wait_service_shutdown(tsc->tsc_srv_service);
1775 cfs_list_del(&tsc->tsc_list);
1776 LIBCFS_FREE(tsc, sizeof(*tsc));