Whamcloud - gitweb
b=19104
[fs/lustre-release.git] / lnet / selftest / framework.c
index 7e79455..283f875 100644 (file)
@@ -1,19 +1,53 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Authors: Isaac Huang <isaac@clusterfs.com>
- *            Liang Zhen  <liangzhen@clusterfs.com>
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/selftest/framework.c
+ *
+ * Author: Isaac Huang <isaac@clusterfs.com>
+ * Author: Liang Zhen  <liangzhen@clusterfs.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
 
-#include <libcfs/kp30.h>
-#include <libcfs/libcfs.h>
-#include <lnet/lib-lnet.h>
-
 #include "selftest.h"
 
+lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
+
+int brw_inject_errors = 0;
+CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
+                "# data errors to inject randomly, zero by default");
 
 static int session_timeout = 100;
 CFS_MODULE_PARM(session_timeout, "i", int, 0444,
@@ -41,6 +75,7 @@ do {                                    \
 #define sfw_unpack_fw_counters(fc)        \
 do {                                      \
         __swab32s(&(fc).brw_errors);      \
+        __swab32s(&(fc).ping_errors);     \
         __swab32s(&(fc).active_tests);    \
         __swab32s(&(fc).active_batches);  \
         __swab32s(&(fc).zombie_sessions); \
@@ -98,7 +133,8 @@ sfw_find_test_case(int id)
         LASSERT (id <= SRPC_SERVICE_MAX_ID);
         LASSERT (id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
 
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 if (tsc->tsc_srv_service->sv_id == id)
                         return tsc;
         }
@@ -179,8 +215,7 @@ sfw_deactivate_session (void)
         int            nactive = 0;
         sfw_batch_t   *tsb;
 
-        if (sn == NULL)
-                return;
+        if (sn == NULL) return;
 
         LASSERT (!sn->sn_timer_active);
 
@@ -188,7 +223,8 @@ sfw_deactivate_session (void)
         atomic_inc(&sfw_data.fw_nzombies);
         list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
 
-        list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (sfw_batch_active(tsb)) {
                         nactive++;
                         sfw_stop_batch(tsb, 1);
@@ -215,12 +251,6 @@ sfw_session_removed (void)
         return (sfw_data.fw_session == NULL) ? 1 : 0;
 }
 
-void
-sfw_set_session_timeout (int timeout)
-{
-        session_timeout = timeout;
-}
-
 #endif
 
 void
@@ -253,6 +283,7 @@ sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
         CFS_INIT_LIST_HEAD(&sn->sn_list);
         CFS_INIT_LIST_HEAD(&sn->sn_batches);
         atomic_set(&sn->sn_brw_errors, 0);
+        atomic_set(&sn->sn_ping_errors, 0);
         strncpy(&sn->sn_name[0], name, LST_NAME_SIZE);
 
         sn->sn_timer_active = 0;
@@ -318,7 +349,8 @@ sfw_find_batch (lst_bid_t bid)
 
         LASSERT (sn != NULL);
 
-        list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (bat->bat_id.bat_id == bid.bat_id)
                         return bat;
         }
@@ -378,10 +410,12 @@ sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
         srpc_get_counters(&reply->str_rpc);
 
         cnt->brw_errors      = atomic_read(&sn->sn_brw_errors);
+        cnt->ping_errors     = atomic_read(&sn->sn_ping_errors);
         cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies);
 
         cnt->active_tests = cnt->active_batches = 0;
-        list_for_each_entry (bat, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (bat, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 int n = atomic_read(&bat->bat_nactive);
 
                 if (n > 0) {
@@ -538,8 +572,7 @@ sfw_destroy_test_instance (sfw_test_instance_t *tsi)
         srpc_client_rpc_t *rpc;
         sfw_test_unit_t   *tsu;
 
-        if (!tsi->tsi_is_client)
-                goto clean;
+        if (!tsi->tsi_is_client) goto clean;
 
         tsi->tsi_ops->tso_fini(tsi);
 
@@ -666,13 +699,13 @@ sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
         CFS_INIT_LIST_HEAD(&tsi->tsi_free_rpcs);
         CFS_INIT_LIST_HEAD(&tsi->tsi_active_rpcs);
 
-        tsi->tsi_stopping   = 0;
-        tsi->tsi_batch      = tsb;
-        tsi->tsi_loop       = req->tsr_loop;
-        tsi->tsi_concur     = req->tsr_concur;
-        tsi->tsi_service    = req->tsr_service;
-        tsi->tsi_is_client  = !!(req->tsr_is_client);
-        tsi->tsi_stop_onerr = !!(req->tsr_stop_onerr);
+        tsi->tsi_stopping      = 0;
+        tsi->tsi_batch         = tsb;
+        tsi->tsi_loop          = req->tsr_loop;
+        tsi->tsi_concur        = req->tsr_concur;
+        tsi->tsi_service       = req->tsr_service;
+        tsi->tsi_is_client     = !!(req->tsr_is_client);
+        tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr);
 
         rc = sfw_load_test(tsi);
         if (rc != 0) {
@@ -692,8 +725,8 @@ sfw_add_test_instance (sfw_batch_t *tsb, srpc_server_rpc_t *rpc)
 #ifndef __KERNEL__
         LASSERT (bk->bk_pages != NULL);
 #endif
-        LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= ndest);
-        LASSERT (bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
+        LASSERT (bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest);
+        LASSERT ((unsigned int)bk->bk_len >= sizeof(lnet_process_id_t) * ndest);
 
         sfw_unpack_test_req(msg);
         memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u));
@@ -770,7 +803,8 @@ sfw_test_unit_done (sfw_test_unit_t *tsu)
         
         LASSERT (!list_empty(&sn->sn_list)); /* I'm a zombie! */
 
-        list_for_each_entry (tsb, &sn->sn_batches, bat_list) {
+        cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
+                                       sfw_batch_t, bat_list) {
                 if (sfw_batch_active(tsb)) {
                         spin_unlock(&sfw_data.fw_lock);
                         return;
@@ -791,10 +825,6 @@ sfw_test_rpc_done (srpc_client_rpc_t *rpc)
         sfw_test_instance_t *tsi = tsu->tsu_instance;
         int                  done = 0;
 
-        if (rpc->crpc_status != 0 && tsu->tsu_error == 0 &&
-            (rpc->crpc_status != -EINTR || !tsi->tsi_stopping))
-                tsu->tsu_error = rpc->crpc_status;
-
         tsi->tsi_ops->tso_done_rpc(tsu, rpc);
                       
         spin_lock(&tsi->tsi_lock);
@@ -807,7 +837,7 @@ sfw_test_rpc_done (srpc_client_rpc_t *rpc)
         /* batch is stopping or loop is done or get error */
         if (tsi->tsi_stopping ||
             tsu->tsu_loop == 0 ||
-            (tsu->tsu_error != 0 && tsi->tsi_stop_onerr))
+            (rpc->crpc_status != 0 && tsi->tsi_stoptsu_onerr))
                 done = 1;
 
         /* dec ref for poster */
@@ -871,8 +901,7 @@ sfw_run_test (swi_workitem_t *wi)
 
         LASSERT (wi == &tsu->tsu_worker);
 
-        tsu->tsu_error = tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc);
-        if (tsu->tsu_error != 0) {
+        if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc) != 0) {
                 LASSERT (rpc == NULL);
                 goto test_done;
         }
@@ -926,7 +955,8 @@ sfw_run_batch (sfw_batch_t *tsb)
                 return -EPERM;
         }
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 if (!tsi->tsi_is_client) /* skip server instances */
                         continue;
 
@@ -935,12 +965,10 @@ sfw_run_batch (sfw_batch_t *tsb)
 
                 atomic_inc(&tsb->bat_nactive);
 
-                list_for_each_entry (tsu, &tsi->tsi_units, tsu_list) {
+                cfs_list_for_each_entry_typed (tsu, &tsi->tsi_units,
+                                               sfw_test_unit_t, tsu_list) {
                         atomic_inc(&tsi->tsi_nactive);
-
-                        tsu->tsu_error = 0;
-                        tsu->tsu_loop  = tsi->tsi_loop;
-
+                        tsu->tsu_loop = tsi->tsi_loop;
                         wi = &tsu->tsu_worker;
                         swi_init_workitem(wi, tsu, sfw_run_test);
                         swi_schedule_workitem(wi);
@@ -959,7 +987,8 @@ sfw_stop_batch (sfw_batch_t *tsb, int force)
         if (!sfw_batch_active(tsb))
                 return -EPERM;
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 spin_lock(&tsi->tsi_lock);
 
                 if (!tsi->tsi_is_client ||
@@ -976,7 +1005,8 @@ sfw_stop_batch (sfw_batch_t *tsb, int force)
                 }
 
                 /* abort launched rpcs in the test */
-                list_for_each_entry (rpc, &tsi->tsi_active_rpcs, crpc_list) {
+                cfs_list_for_each_entry_typed (rpc, &tsi->tsi_active_rpcs,
+                                               srpc_client_rpc_t, crpc_list) {
                         spin_lock(&rpc->crpc_lock);
 
                         srpc_abort_rpc(rpc, -EINTR);
@@ -1003,7 +1033,8 @@ sfw_query_batch (sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
                 return 0;
         }
 
-        list_for_each_entry (tsi, &tsb->bat_tests, tsi_list) {
+        cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
+                                       sfw_test_instance_t, tsi_list) {
                 if (testidx-- > 1)
                         continue;
 
@@ -1476,37 +1507,52 @@ sfw_post_rpc (srpc_client_rpc_t *rpc)
 static srpc_service_t sfw_services[] = 
 {
         {
-                .sv_name = "debug",
-                .sv_id   = SRPC_SERVICE_DEBUG,
+                /* sv_id */    SRPC_SERVICE_DEBUG,
+                /* sv_name */  "debug",
+                0
         },
         {
-                .sv_name = "query stats",
-                .sv_id   = SRPC_SERVICE_QUERY_STAT,
+                /* sv_id */    SRPC_SERVICE_QUERY_STAT,
+                /* sv_name */  "query stats",
+                0
         },
         {
-                .sv_name = "make sessin",
-                .sv_id   = SRPC_SERVICE_MAKE_SESSION,
+                /* sv_id */    SRPC_SERVICE_MAKE_SESSION,
+                /* sv_name */  "make session",
+                0
         },
         {
-                .sv_name = "remove session",
-                .sv_id   = SRPC_SERVICE_REMOVE_SESSION,
+                /* sv_id */    SRPC_SERVICE_REMOVE_SESSION,
+                /* sv_name */  "remove session",
+                0
         },
         {
-                .sv_name = "batch service",
-                .sv_id   = SRPC_SERVICE_BATCH,
+                /* sv_id */    SRPC_SERVICE_BATCH,
+                /* sv_name */  "batch service",
+                0
         },
         {
-                .sv_name = "test service",
-                .sv_id   = SRPC_SERVICE_TEST,
+                /* sv_id */    SRPC_SERVICE_TEST,
+                /* sv_name */  "test service",
+                0
         },
-        {       .sv_name = NULL, }
+        {
+                /* sv_id */    0,
+                /* sv_name */  NULL,
+                0
+        }
 };
 
 extern sfw_test_client_ops_t ping_test_client;
 extern srpc_service_t        ping_test_service;
+extern void ping_init_test_client(void);
+extern void ping_init_test_service(void);
 
 extern sfw_test_client_ops_t brw_test_client;
 extern srpc_service_t        brw_test_service;
+extern void brw_init_test_client(void);
+extern void brw_init_test_service(void);
+
 
 int
 sfw_startup (void)
@@ -1517,6 +1563,16 @@ sfw_startup (void)
         srpc_service_t  *sv;
         sfw_test_case_t *tsc;
 
+#ifndef __KERNEL__
+        char *s;
+
+        s = getenv("SESSION_TIMEOUT");
+        session_timeout = s != NULL ? atoi(s) : session_timeout;
+
+        s = getenv("BRW_INJECT_ERRORS");
+        brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
+#endif
+
         if (session_timeout < 0) {
                 CERROR ("Session timeout must be non-negative: %d\n",
                         session_timeout);
@@ -1525,7 +1581,7 @@ sfw_startup (void)
 
         if (session_timeout == 0)
                 CWARN ("Zero session_timeout specified "
-                       "- test sessions never timeout.\n");
+                       "- test sessions never expire.\n");
 
         memset(&sfw_data, 0, sizeof(struct smoketest_framework));
 
@@ -1537,13 +1593,19 @@ sfw_startup (void)
         CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
         CFS_INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
 
+        brw_init_test_client();
+        brw_init_test_service();
         rc = sfw_register_test(&brw_test_service, &brw_test_client);
         LASSERT (rc == 0);
+
+        ping_init_test_client();
+        ping_init_test_service();
         rc = sfw_register_test(&ping_test_service, &ping_test_client);
         LASSERT (rc == 0);
 
         error = 0;
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 sv = tsc->tsc_srv_service;
                 sv->sv_concur = SFW_TEST_CONCURRENCY;
 
@@ -1629,7 +1691,8 @@ sfw_shutdown (void)
                 srpc_remove_service(sv);
         }
 
-        list_for_each_entry (tsc, &sfw_data.fw_tests, tsc_list) {
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
                 sv = tsc->tsc_srv_service;
                 srpc_shutdown_service(sv);
                 srpc_remove_service(sv);