Whamcloud - gitweb
b=19720
[fs/lustre-release.git] / lnet / selftest / framework.c
index 3fde971..8758c37 100644 (file)
@@ -1,15 +1,50 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Authors: Isaac Huang <isaac@clusterfs.com>
- *            Liang Zhen  <liangzhen@clusterfs.com>
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/framework.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ * Author: Liang Zhen  <liangzhen@clusterfs.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
 
 #include "selftest.h"
 
+lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
+
 int brw_inject_errors = 0;
 CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
                 "# data errors to inject randomly, zero by default");
@@ -18,9 +53,11 @@ static int session_timeout = 100;
 CFS_MODULE_PARM(session_timeout, "i", int, 0444,
                 "test session timeout in seconds (100 by default, 0 == never)");
 
-#define SFW_TEST_CONCURRENCY     128
-#define SFW_TEST_RPC_TIMEOUT     64
-#define SFW_CLIENT_RPC_TIMEOUT   64  /* in seconds */
+static int rpc_timeout = 64;
+CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
+                "rpc timeout in seconds (64 by default, 0 == never)");
+
+#define SFW_TEST_CONCURRENCY     1792
 #define SFW_EXTRA_TEST_BUFFERS   8 /* tolerate buggy peers with extra buffers */
 
 #define sfw_test_buffers(tsi)    ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS)
@@ -98,7 +135,8 @@ sfw_find_test_case(int id)
         LASSERT (id <= SRPC_SERVICE_MAX_ID);
         LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
 
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 if (tsc->tsc_srv_service->sv_id == id)
                         return tsc;
         }
@@ -178,6 +216,7 @@ sfw_deactivate_session (void)
         sfw_session_t *sn = sfw_data.fw_session;
         int            nactive = 0;
         sfw_batch_t   *tsb;
+        sfw_test_case_t *tsc;
 
         if (sn == NULL) return;
 
@@ -187,7 +226,17 @@ sfw_deactivate_session (void)
         atomic_inc(&sfw_data.fw_nzombies);
         list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
 
-        list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+        spin_unlock(&sfw_data.fw_lock);
+
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
+                srpc_abort_service(tsc->tsc_srv_service);
+        }
+
+        spin_lock(&sfw_data.fw_lock);
+
+        cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (sfw_batch_active(tsb)) {
                         nactive++;
                         sfw_stop_batch(tsb, 1);
@@ -245,6 +294,7 @@ sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
         memset(sn, 0, sizeof(sfw_session_t));
         CFS_INIT_LIST_HEAD(&sn->sn_list);
         CFS_INIT_LIST_HEAD(&sn->sn_batches);
+        atomic_set(&sn->sn_refcount, 1);        /* +1 for caller */
         atomic_set(&sn->sn_brw_errors, 0);
         atomic_set(&sn->sn_ping_errors, 0);
         strncpy(&sn->sn_name[0], name, LST_NAME_SIZE);
@@ -312,7 +362,8 @@ sfw_find_batch (lst_bid_t bid)
 
         LASSERT (sn != NULL);
 
-        list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (bat->bat_id.bat_id == bid.bat_id)
                         return bat;
         }
@@ -376,7 +427,8 @@ sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
         cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
 
         cnt->active_tests = cnt->active_batches = 0;
-        list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 int n = atomic_read(&bat->bat_nactive);
 
                 if (n > 0) {
@@ -400,13 +452,24 @@ sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
                 return 0;
         }
 
-        if (sn != NULL && !request->mksn_force) {
-                reply->mksn_sid    = sn->sn_id;
-                reply->mksn_status = EBUSY;
-                strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
-                return 0;
+        if (sn != NULL) {
+                reply->mksn_status  = 0;
+                reply->mksn_sid     = sn->sn_id;
+                reply->mksn_timeout = sn->sn_timeout;
+
+                if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
+                        atomic_inc(&sn->sn_refcount);
+                        return 0;
+                }
+
+                if (!request->mksn_force) {
+                        reply->mksn_status = EBUSY;
+                        strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
+                        return 0;
+                }
         }
-        
+
+        /* brand new or create by force */
         LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
         if (sn == NULL) {
                 CERROR ("Dropping RPC (mksn) under memory pressure.\n");
@@ -446,6 +509,11 @@ sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
                 return 0;
         }
 
+        if (!atomic_dec_and_test(&sn->sn_refcount)) {
+                reply->rmsn_status = 0;
+                return 0;
+        }
+
         spin_lock(&sfw_data.fw_lock);
         sfw_deactivate_session();
         spin_unlock(&sfw_data.fw_lock);
@@ -686,8 +754,8 @@ sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
 #ifndef __KERNEL__
         LASSERT (bk->bk_pages != NULL);
 #endif
-        LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= ndest);
-        LASSERT (bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
+        LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
+        LASSERT ((unsigned int)bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
 
         sfw_unpack_test_req(msg);
         memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
@@ -764,7 +832,8 @@ sfw_test_unit_done (sfw_test_unit_t *tsu)
         
         LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */
 
-        list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (sfw_batch_active(tsb)) {
                         spin_unlock(&sfw_data.fw_lock);
                         return;
@@ -882,7 +951,7 @@ sfw_run_test (swi_workitem_t *wi)
         list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
         spin_unlock(&tsi->tsi_lock);
 
-        rpc->crpc_timeout = SFW_TEST_RPC_TIMEOUT;
+        rpc->crpc_timeout = rpc_timeout;
 
         spin_lock(&rpc->crpc_lock);
         srpc_post_rpc(rpc);
@@ -910,12 +979,13 @@ sfw_run_batch (sfw_batch_t *tsb)
         sfw_test_instance_t *tsi;
 
         if (sfw_batch_active(tsb)) {
-                CDEBUG (D_NET, "Can't start active batch: "LPU64" (%d)\n",
-                        tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
-                return -EPERM;
+                CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n",
+                       tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
+                return 0;
         }
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 if (!tsi->tsi_is_client) /* skip server instances */
                         continue;
 
@@ -924,7 +994,8 @@ sfw_run_batch (sfw_batch_t *tsb)
 
                 atomic_inc(&tsb->bat_nactive);
 
-                list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
+                cfs_list_for_each_entry_typed (tsu, &tsi->tsi_units,
+                                               sfw_test_unit_t, tsu_list) {
                         atomic_inc(&tsi->tsi_nactive);
                         tsu->tsu_loop = tsi->tsi_loop;
                         wi = &tsu->tsu_worker;
@@ -942,10 +1013,13 @@ sfw_stop_batch (sfw_batch_t *tsb, int force)
         sfw_test_instance_t *tsi;
         srpc_client_rpc_t   *rpc;
 
-        if (!sfw_batch_active(tsb))
-                return -EPERM;
+        if (!sfw_batch_active(tsb)) {
+                CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id);
+                return 0;
+        }
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 spin_lock(&tsi->tsi_lock);
 
                 if (!tsi->tsi_is_client ||
@@ -962,7 +1036,8 @@ sfw_stop_batch (sfw_batch_t *tsb, int force)
                 }
 
                 /* abort launched rpcs in the test */
-                list_for_each_entry (rpc, &tsi->tsi_active_rpcs, crpc_list) {
+                cfs_list_for_each_entry_typed (rpc, &tsi->tsi_active_rpcs,
+                                               srpc_client_rpc_t, crpc_list) {
                         spin_lock(&rpc->crpc_lock);
 
                         srpc_abort_rpc(rpc, -EINTR);
@@ -989,7 +1064,8 @@ sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
                 return 0;
         }
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 if (testidx-- > 1)
                         continue;
 
@@ -1452,7 +1528,7 @@ sfw_post_rpc (srpc_client_rpc_t *rpc)
         LASSERT (list_empty(&rpc->crpc_list));
         LASSERT (!sfw_data.fw_shuttingdown);
 
-        rpc->crpc_timeout = SFW_CLIENT_RPC_TIMEOUT;
+        rpc->crpc_timeout = rpc_timeout;
         srpc_post_rpc(rpc);
 
         spin_unlock(&rpc->crpc_lock);
@@ -1462,37 +1538,52 @@ sfw_post_rpc (srpc_client_rpc_t *rpc)
 static srpc_service_t sfw_services[] = 
 {
         {
-                .sv_name = "debug",
-                .sv_id   = SRPC_SERVICE_DEBUG,
+                /* sv_id */    SRPC_SERVICE_DEBUG,
+                /* sv_name */  "debug",
+                0
         },
         {
-                .sv_name = "query stats",
-                .sv_id   = SRPC_SERVICE_QUERY_STAT,
+                /* sv_id */    SRPC_SERVICE_QUERY_STAT,
+                /* sv_name */  "query stats",
+                0
         },
         {
-                .sv_name = "make sessin",
-                .sv_id   = SRPC_SERVICE_MAKE_SESSION,
+                /* sv_id */    SRPC_SERVICE_MAKE_SESSION,
+                /* sv_name */  "make session",
+                0
         },
         {
-                .sv_name = "remove session",
-                .sv_id   = SRPC_SERVICE_REMOVE_SESSION,
+                /* sv_id */    SRPC_SERVICE_REMOVE_SESSION,
+                /* sv_name */  "remove session",
+                0
         },
         {
-                .sv_name = "batch service",
-                .sv_id   = SRPC_SERVICE_BATCH,
+                /* sv_id */    SRPC_SERVICE_BATCH,
+                /* sv_name */  "batch service",
+                0
         },
         {
-                .sv_name = "test service",
-                .sv_id   = SRPC_SERVICE_TEST,
+                /* sv_id */    SRPC_SERVICE_TEST,
+                /* sv_name */  "test service",
+                0
         },
-        {       .sv_name = NULL, }
+        {
+                /* sv_id */    0,
+                /* sv_name */  NULL,
+                0
+        }
 };
 
 extern sfw_test_client_ops_t ping_test_client;
 extern srpc_service_t        ping_test_service;
+extern void ping_init_test_client(void);
+extern void ping_init_test_service(void);
 
 extern sfw_test_client_ops_t brw_test_client;
 extern srpc_service_t        brw_test_service;
+extern void brw_init_test_client(void);
+extern void brw_init_test_service(void);
+
 
 int
 sfw_startup (void)
@@ -1511,6 +1602,9 @@ sfw_startup (void)
 
         s = getenv("BRW_INJECT_ERRORS");
         brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
+
+        s = getenv("RPC_TIMEOUT");
+        rpc_timeout = s != NULL ? atoi(s) : rpc_timeout;
 #endif
 
         if (session_timeout < 0) {
@@ -1519,10 +1613,20 @@ sfw_startup (void)
                 return -EINVAL;
         }
 
+        if (rpc_timeout < 0) {
+                CERROR ("RPC timeout must be non-negative: %d\n",
+                        rpc_timeout);
+                return -EINVAL;
+        }
+
         if (session_timeout == 0)
                 CWARN ("Zero session_timeout specified "
                        "- test sessions never expire.\n");
 
+        if (rpc_timeout == 0)
+                CWARN ("Zero rpc_timeout specified "
+                       "- test RPC never expire.\n");
+
         memset(&sfw_data, 0, sizeof(struct smoketest_framework));
 
         sfw_data.fw_session     = NULL;
@@ -1533,13 +1637,19 @@ sfw_startup (void)
         CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
         CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
 
+        brw_init_test_client();
+        brw_init_test_service();
         rc = sfw_register_test(&brw_test_service, &brw_test_client);
         LASSERT (rc == 0);
+
+        ping_init_test_client();
+        ping_init_test_service();
         rc = sfw_register_test(&ping_test_service, &ping_test_client);
         LASSERT (rc == 0);
 
         error = 0;
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 sv = tsc->tsc_srv_service;
                 sv->sv_concur = SFW_TEST_CONCURRENCY;
 
@@ -1625,7 +1735,8 @@ sfw_shutdown (void)
                 srpc_remove_service(sv);
         }
 
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 sv = tsc->tsc_srv_service;
                 srpc_shutdown_service(sv);
                 srpc_remove_service(sv);