X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fselftest%2Fframework.c;h=8758c37070d67477e302e527bb31c6e116e3b1bb;hb=b4efa1b2cbfd45f85439e1bb0a4c4eb719540dcd;hp=3fde9713cfcff1cb6b1a5931318c631e8a93f951;hpb=319437814658e6a9cd75dc549c73159b20e7825c;p=fs%2Flustre-release.git diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index 3fde971..8758c37 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -1,15 +1,50 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Authors: Isaac Huang - * Liang Zhen + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/selftest/framework.c + * + * Author: Isaac Huang + * Author: Liang Zhen */ #define DEBUG_SUBSYSTEM S_LNET #include "selftest.h" +lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1}; + int brw_inject_errors = 0; CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644, "# data errors to inject randomly, zero by default"); @@ -18,9 +53,11 @@ static int session_timeout = 100; CFS_MODULE_PARM(session_timeout, "i", int, 0444, "test session timeout in seconds (100 by default, 0 == never)"); -#define SFW_TEST_CONCURRENCY 128 -#define SFW_TEST_RPC_TIMEOUT 64 -#define SFW_CLIENT_RPC_TIMEOUT 64 /* in seconds */ +static int rpc_timeout = 64; +CFS_MODULE_PARM(rpc_timeout, "i", int, 0644, + "rpc timeout in seconds (64 by default, 0 == never)"); + +#define SFW_TEST_CONCURRENCY 1792 #define SFW_EXTRA_TEST_BUFFERS 8 /* tolerate buggy peers with extra buffers */ #define sfw_test_buffers(tsi) ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS) @@ -98,7 +135,8 @@ sfw_find_test_case(int id) LASSERT (id <= SRPC_SERVICE_MAX_ID); LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID); - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { + cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests, + sfw_test_case_t, tsc_list) { if (tsc->tsc_srv_service->sv_id == id) return tsc; } @@ -178,6 +216,7 @@ sfw_deactivate_session (void) sfw_session_t *sn = sfw_data.fw_session; int nactive = 0; sfw_batch_t *tsb; + sfw_test_case_t *tsc; if (sn == NULL) return; @@ -187,7 +226,17 @@ sfw_deactivate_session (void) atomic_inc(&sfw_data.fw_nzombies); list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions); - list_for_each_entry (tsb, &sn->sn_batches, bat_list) { + spin_unlock(&sfw_data.fw_lock); + + cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests, + sfw_test_case_t, tsc_list) { + srpc_abort_service(tsc->tsc_srv_service); + } + + spin_lock(&sfw_data.fw_lock); + + cfs_list_for_each_entry_typed (tsb, &sn->sn_batches, + sfw_batch_t, bat_list) { if (sfw_batch_active(tsb)) { nactive++; sfw_stop_batch(tsb, 1); @@ -245,6 +294,7 @@ sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name) memset(sn, 0, sizeof(sfw_session_t)); CFS_INIT_LIST_HEAD(&sn->sn_list); CFS_INIT_LIST_HEAD(&sn->sn_batches); + atomic_set(&sn->sn_refcount, 1); /* +1 for caller */ atomic_set(&sn->sn_brw_errors, 0); atomic_set(&sn->sn_ping_errors, 0); strncpy(&sn->sn_name[0], name, LST_NAME_SIZE); @@ -312,7 +362,8 @@ sfw_find_batch (lst_bid_t bid) LASSERT (sn != NULL); - list_for_each_entry (bat, &sn->sn_batches, bat_list) { + cfs_list_for_each_entry_typed (bat, &sn->sn_batches, + sfw_batch_t, bat_list) { if (bat->bat_id.bat_id == bid.bat_id) return bat; } @@ -376,7 +427,8 @@ sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies); cnt->active_tests = cnt->active_batches = 0; - list_for_each_entry (bat, &sn->sn_batches, bat_list) { + cfs_list_for_each_entry_typed (bat, &sn->sn_batches, + sfw_batch_t, bat_list) { int n = atomic_read(&bat->bat_nactive); if (n > 0) { @@ -400,13 +452,24 @@ sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) return 0; } - if (sn != NULL && !request->mksn_force) { - reply->mksn_sid = sn->sn_id; - reply->mksn_status = EBUSY; - strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE); - return 0; + if (sn != NULL) { + reply->mksn_status = 0; + reply->mksn_sid = sn->sn_id; + reply->mksn_timeout = sn->sn_timeout; + + if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) { + atomic_inc(&sn->sn_refcount); + return 0; + } + + if (!request->mksn_force) { + reply->mksn_status = EBUSY; + strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE); + return 0; + } } - + + /* brand new or create by force */ LIBCFS_ALLOC(sn, sizeof(sfw_session_t)); if (sn == NULL) { CERROR ("Dropping RPC (mksn) under memory pressure.\n"); @@ -446,6 +509,11 @@ sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) return 0; } + if (!atomic_dec_and_test(&sn->sn_refcount)) { + reply->rmsn_status = 0; + return 0; + } + spin_lock(&sfw_data.fw_lock); sfw_deactivate_session(); spin_unlock(&sfw_data.fw_lock); @@ -686,8 +754,8 @@ sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc) #ifndef __KERNEL__ LASSERT (bk->bk_pages != NULL); #endif - LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= ndest); - LASSERT (bk->bk_len >= sizeof(lnet_process_id_t) * ndest); + LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest); + LASSERT ((unsigned int)bk->bk_len >= sizeof(lnet_process_id_t) * ndest); sfw_unpack_test_req(msg); memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u)); @@ -764,7 +832,8 @@ sfw_test_unit_done (sfw_test_unit_t *tsu) LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */ - list_for_each_entry (tsb, &sn->sn_batches, bat_list) { + cfs_list_for_each_entry_typed (tsb, &sn->sn_batches, + sfw_batch_t, bat_list) { if (sfw_batch_active(tsb)) { spin_unlock(&sfw_data.fw_lock); return; @@ -882,7 +951,7 @@ sfw_run_test (swi_workitem_t *wi) list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs); spin_unlock(&tsi->tsi_lock); - rpc->crpc_timeout = SFW_TEST_RPC_TIMEOUT; + rpc->crpc_timeout = rpc_timeout; spin_lock(&rpc->crpc_lock); srpc_post_rpc(rpc); @@ -910,12 +979,13 @@ sfw_run_batch (sfw_batch_t *tsb) sfw_test_instance_t *tsi; if (sfw_batch_active(tsb)) { - CDEBUG (D_NET, "Can't start active batch: "LPU64" (%d)\n", - tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive)); - return -EPERM; + CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n", + tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive)); + return 0; } - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { + cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests, + sfw_test_instance_t, tsi_list) { if (!tsi->tsi_is_client) /* skip server instances */ continue; @@ -924,7 +994,8 @@ sfw_run_batch (sfw_batch_t *tsb) atomic_inc(&tsb->bat_nactive); - list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) { + cfs_list_for_each_entry_typed (tsu, &tsi->tsi_units, + sfw_test_unit_t, tsu_list) { atomic_inc(&tsi->tsi_nactive); tsu->tsu_loop = tsi->tsi_loop; wi = &tsu->tsu_worker; @@ -942,10 +1013,13 @@ sfw_stop_batch (sfw_batch_t *tsb, int force) sfw_test_instance_t *tsi; srpc_client_rpc_t *rpc; - if (!sfw_batch_active(tsb)) - return -EPERM; + if (!sfw_batch_active(tsb)) { + CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id); + return 0; + } - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { + cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests, + sfw_test_instance_t, tsi_list) { spin_lock(&tsi->tsi_lock); if (!tsi->tsi_is_client || @@ -962,7 +1036,8 @@ sfw_stop_batch (sfw_batch_t *tsb, int force) } /* abort launched rpcs in the test */ - list_for_each_entry (rpc, &tsi->tsi_active_rpcs, crpc_list) { + cfs_list_for_each_entry_typed (rpc, &tsi->tsi_active_rpcs, + srpc_client_rpc_t, crpc_list) { spin_lock(&rpc->crpc_lock); srpc_abort_rpc(rpc, -EINTR); @@ -989,7 +1064,8 @@ sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply) return 0; } - list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) { + cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests, + sfw_test_instance_t, tsi_list) { if (testidx-- > 1) continue; @@ -1452,7 +1528,7 @@ sfw_post_rpc (srpc_client_rpc_t *rpc) LASSERT (list_empty(&rpc->crpc_list)); LASSERT (!sfw_data.fw_shuttingdown); - rpc->crpc_timeout = SFW_CLIENT_RPC_TIMEOUT; + rpc->crpc_timeout = rpc_timeout; srpc_post_rpc(rpc); spin_unlock(&rpc->crpc_lock); @@ -1462,37 +1538,52 @@ sfw_post_rpc (srpc_client_rpc_t *rpc) static srpc_service_t sfw_services[] = { { - .sv_name = "debug", - .sv_id = SRPC_SERVICE_DEBUG, + /* sv_id */ SRPC_SERVICE_DEBUG, + /* sv_name */ "debug", + 0 }, { - .sv_name = "query stats", - .sv_id = SRPC_SERVICE_QUERY_STAT, + /* sv_id */ SRPC_SERVICE_QUERY_STAT, + /* sv_name */ "query stats", + 0 }, { - .sv_name = "make sessin", - .sv_id = SRPC_SERVICE_MAKE_SESSION, + /* sv_id */ SRPC_SERVICE_MAKE_SESSION, + /* sv_name */ "make session", + 0 }, { - .sv_name = "remove session", - .sv_id = SRPC_SERVICE_REMOVE_SESSION, + /* sv_id */ SRPC_SERVICE_REMOVE_SESSION, + /* sv_name */ "remove session", + 0 }, { - .sv_name = "batch service", - .sv_id = SRPC_SERVICE_BATCH, + /* sv_id */ SRPC_SERVICE_BATCH, + /* sv_name */ "batch service", + 0 }, { - .sv_name = "test service", - .sv_id = SRPC_SERVICE_TEST, + /* sv_id */ SRPC_SERVICE_TEST, + /* sv_name */ "test service", + 0 }, - { .sv_name = NULL, } + { + /* sv_id */ 0, + /* sv_name */ NULL, + 0 + } }; extern sfw_test_client_ops_t ping_test_client; extern srpc_service_t ping_test_service; +extern void ping_init_test_client(void); +extern void ping_init_test_service(void); extern sfw_test_client_ops_t brw_test_client; extern srpc_service_t brw_test_service; +extern void brw_init_test_client(void); +extern void brw_init_test_service(void); + int sfw_startup (void) @@ -1511,6 +1602,9 @@ sfw_startup (void) s = getenv("BRW_INJECT_ERRORS"); brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors; + + s = getenv("RPC_TIMEOUT"); + rpc_timeout = s != NULL ? atoi(s) : rpc_timeout; #endif if (session_timeout < 0) { @@ -1519,10 +1613,20 @@ sfw_startup (void) return -EINVAL; } + if (rpc_timeout < 0) { + CERROR ("RPC timeout must be non-negative: %d\n", + rpc_timeout); + return -EINVAL; + } + if (session_timeout == 0) CWARN ("Zero session_timeout specified " "- test sessions never expire.\n"); + if (rpc_timeout == 0) + CWARN ("Zero rpc_timeout specified " + "- test RPC never expire.\n"); + memset(&sfw_data, 0, sizeof(struct smoketest_framework)); sfw_data.fw_session = NULL; @@ -1533,13 +1637,19 @@ sfw_startup (void) CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); + brw_init_test_client(); + brw_init_test_service(); rc = sfw_register_test(&brw_test_service, &brw_test_client); LASSERT (rc == 0); + + ping_init_test_client(); + ping_init_test_service(); rc = sfw_register_test(&ping_test_service, &ping_test_client); LASSERT (rc == 0); error = 0; - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { + cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests, + sfw_test_case_t, tsc_list) { sv = tsc->tsc_srv_service; sv->sv_concur = SFW_TEST_CONCURRENCY; @@ -1625,7 +1735,8 @@ sfw_shutdown (void) srpc_remove_service(sv); } - list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) { + cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests, + sfw_test_case_t, tsc_list) { sv = tsc->tsc_srv_service; srpc_shutdown_service(sv); srpc_remove_service(sv);