Whamcloud - gitweb
b=19720
[fs/lustre-release.git] / lnet / selftest / framework.c
index 283f875..8758c37 100644 (file)
@@ -53,9 +53,11 @@ static int session_timeout = 100;
 CFS_MODULE_PARM(session_timeout, "i", int, 0444,
                 "test session timeout in seconds (100 by default, 0 == never)");
 
-#define SFW_TEST_CONCURRENCY     128
-#define SFW_TEST_RPC_TIMEOUT     64
-#define SFW_CLIENT_RPC_TIMEOUT   64  /* in seconds */
+static int rpc_timeout = 64;
+CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
+                "rpc timeout in seconds (64 by default, 0 == never)");
+
+#define SFW_TEST_CONCURRENCY     1792
 #define SFW_EXTRA_TEST_BUFFERS   8 /* tolerate buggy peers with extra buffers */
 
 #define sfw_test_buffers(tsi)    ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS)
@@ -214,6 +216,7 @@ sfw_deactivate_session (void)
         sfw_session_t *sn = sfw_data.fw_session;
         int            nactive = 0;
         sfw_batch_t   *tsb;
+        sfw_test_case_t *tsc;
 
         if (sn == NULL) return;
 
@@ -223,6 +226,15 @@ sfw_deactivate_session (void)
         atomic_inc(&sfw_data.fw_nzombies);
         list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions);
 
+        spin_unlock(&sfw_data.fw_lock);
+
+        cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
+                                       sfw_test_case_t, tsc_list) {
+                srpc_abort_service(tsc->tsc_srv_service);
+        }
+
+        spin_lock(&sfw_data.fw_lock);
+
         cfs_list_for_each_entry_typed (tsb, &sn->sn_batches,
                                        sfw_batch_t, bat_list) {
                 if (sfw_batch_active(tsb)) {
@@ -282,6 +294,7 @@ sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
         memset(sn, 0, sizeof(sfw_session_t));
         CFS_INIT_LIST_HEAD(&sn->sn_list);
         CFS_INIT_LIST_HEAD(&sn->sn_batches);
+        atomic_set(&sn->sn_refcount, 1);        /* +1 for caller */
         atomic_set(&sn->sn_brw_errors, 0);
         atomic_set(&sn->sn_ping_errors, 0);
         strncpy(&sn->sn_name[0], name, LST_NAME_SIZE);
@@ -439,13 +452,24 @@ sfw_make_session (srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
                 return 0;
         }
 
-        if (sn != NULL && !request->mksn_force) {
-                reply->mksn_sid    = sn->sn_id;
-                reply->mksn_status = EBUSY;
-                strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
-                return 0;
+        if (sn != NULL) {
+                reply->mksn_status  = 0;
+                reply->mksn_sid     = sn->sn_id;
+                reply->mksn_timeout = sn->sn_timeout;
+
+                if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) {
+                        atomic_inc(&sn->sn_refcount);
+                        return 0;
+                }
+
+                if (!request->mksn_force) {
+                        reply->mksn_status = EBUSY;
+                        strncpy(&reply->mksn_name[0], &sn->sn_name[0], LST_NAME_SIZE);
+                        return 0;
+                }
         }
-        
+
+        /* brand new or create by force */
         LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
         if (sn == NULL) {
                 CERROR ("Dropping RPC (mksn) under memory pressure.\n");
@@ -485,6 +509,11 @@ sfw_remove_session (srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
                 return 0;
         }
 
+        if (!atomic_dec_and_test(&sn->sn_refcount)) {
+                reply->rmsn_status = 0;
+                return 0;
+        }
+
         spin_lock(&sfw_data.fw_lock);
         sfw_deactivate_session();
         spin_unlock(&sfw_data.fw_lock);
@@ -922,7 +951,7 @@ sfw_run_test (swi_workitem_t *wi)
         list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs);
         spin_unlock(&tsi->tsi_lock);
 
-        rpc->crpc_timeout = SFW_TEST_RPC_TIMEOUT;
+        rpc->crpc_timeout = rpc_timeout;
 
         spin_lock(&rpc->crpc_lock);
         srpc_post_rpc(rpc);
@@ -950,9 +979,9 @@ sfw_run_batch (sfw_batch_t *tsb)
         sfw_test_instance_t *tsi;
 
         if (sfw_batch_active(tsb)) {
-                CDEBUG (D_NET, "Can't start active batch: "LPU64" (%d)\n",
-                        tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
-                return -EPERM;
+                CDEBUG(D_NET, "Batch already active: "LPU64" (%d)\n",
+                       tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive));
+                return 0;
         }
 
         cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
@@ -984,8 +1013,10 @@ sfw_stop_batch (sfw_batch_t *tsb, int force)
         sfw_test_instance_t *tsi;
         srpc_client_rpc_t   *rpc;
 
-        if (!sfw_batch_active(tsb))
-                return -EPERM;
+        if (!sfw_batch_active(tsb)) {
+                CDEBUG(D_NET, "Batch "LPU64" inactive\n", tsb->bat_id.bat_id);
+                return 0;
+        }
 
         cfs_list_for_each_entry_typed (tsi, &tsb->bat_tests,
                                        sfw_test_instance_t, tsi_list) {
@@ -1497,7 +1528,7 @@ sfw_post_rpc (srpc_client_rpc_t *rpc)
         LASSERT (list_empty(&rpc->crpc_list));
         LASSERT (!sfw_data.fw_shuttingdown);
 
-        rpc->crpc_timeout = SFW_CLIENT_RPC_TIMEOUT;
+        rpc->crpc_timeout = rpc_timeout;
         srpc_post_rpc(rpc);
 
         spin_unlock(&rpc->crpc_lock);
@@ -1571,6 +1602,9 @@ sfw_startup (void)
 
         s = getenv("BRW_INJECT_ERRORS");
         brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
+
+        s = getenv("RPC_TIMEOUT");
+        rpc_timeout = s != NULL ? atoi(s) : rpc_timeout;
 #endif
 
         if (session_timeout < 0) {
@@ -1579,10 +1613,20 @@ sfw_startup (void)
                 return -EINVAL;
         }
 
+        if (rpc_timeout < 0) {
+                CERROR ("RPC timeout must be non-negative: %d\n",
+                        rpc_timeout);
+                return -EINVAL;
+        }
+
         if (session_timeout == 0)
                 CWARN ("Zero session_timeout specified "
                        "- test sessions never expire.\n");
 
+        if (rpc_timeout == 0)
+                CWARN ("Zero rpc_timeout specified "
+                       "- test RPC never expire.\n");
+
         memset(&sfw_data, 0, sizeof(struct smoketest_framework));
 
         sfw_data.fw_session     = NULL;