Whamcloud - gitweb
LU-56 lnet: SMP improvements for LNet selftest
[fs/lustre-release.git] / lnet / selftest / framework.c
index c034665..23bae6e 100644 (file)
 
 lst_sid_t LST_INVALID_SID = {LNET_NID_ANY, -1};
 
-int brw_inject_errors = 0;
-CFS_MODULE_PARM(brw_inject_errors, "i", int, 0644,
-                "# data errors to inject randomly, zero by default");
-
 static int session_timeout = 100;
 CFS_MODULE_PARM(session_timeout, "i", int, 0444,
                 "test session timeout in seconds (100 by default, 0 == never)");
@@ -55,11 +51,6 @@ static int rpc_timeout = 64;
 CFS_MODULE_PARM(rpc_timeout, "i", int, 0644,
                 "rpc timeout in seconds (64 by default, 0 == never)");
 
-#define SFW_TEST_CONCURRENCY     1792
-#define SFW_EXTRA_TEST_BUFFERS   8 /* tolerate buggy peers with extra buffers */
-
-#define sfw_test_buffers(tsi)    ((tsi)->tsi_loop + SFW_EXTRA_TEST_BUFFERS)
-
 #define sfw_unpack_id(id)               \
 do {                                    \
         __swab64s(&(id).nid);           \
@@ -309,10 +300,10 @@ sfw_init_session (sfw_session_t *sn, lst_sid_t sid, const char *name)
 
 /* completion handler for incoming framework RPCs */
 void
-sfw_server_rpc_done (srpc_server_rpc_t *rpc)
+sfw_server_rpc_done(struct srpc_server_rpc *rpc)
 {
-        srpc_service_t *sv = rpc->srpc_service;
-        int             status = rpc->srpc_status;
+       struct srpc_service     *sv     = rpc->srpc_scd->scd_svc;
+       int                     status  = rpc->srpc_status;
 
         CDEBUG (D_NET,
                 "Incoming framework RPC done: "
@@ -553,45 +544,66 @@ sfw_test_rpc_fini (srpc_client_rpc_t *rpc)
         cfs_list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs);
 }
 
-int
-sfw_load_test (sfw_test_instance_t *tsi)
+static inline int
+sfw_test_buffers(sfw_test_instance_t *tsi)
 {
-        sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
-        int              nrequired = sfw_test_buffers(tsi);
-        int              nposted;
-
-        LASSERT (tsc != NULL);
-
-        if (tsi->tsi_is_client) {
-                tsi->tsi_ops = tsc->tsc_cli_ops;
-                return 0;
-        }
+       struct sfw_test_case    *tsc = sfw_find_test_case(tsi->tsi_service);
+       struct srpc_service     *svc = tsc->tsc_srv_service;
+       int                     nbuf;
 
-        nposted = srpc_service_add_buffers(tsc->tsc_srv_service, nrequired);
-        if (nposted != nrequired) {
-                CWARN ("Failed to reserve enough buffers: "
-                       "service %s, %d needed, %d reserved\n",
-                       tsc->tsc_srv_service->sv_name, nrequired, nposted);
-                srpc_service_remove_buffers(tsc->tsc_srv_service, nposted);
-                return -ENOMEM;
-        }
+       nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts;
+       return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA);
+}
 
-        CDEBUG (D_NET, "Reserved %d buffers for test %s\n",
-                nposted, tsc->tsc_srv_service->sv_name);
-        return 0;
+int
+sfw_load_test(struct sfw_test_instance *tsi)
+{
+       struct sfw_test_case    *tsc = sfw_find_test_case(tsi->tsi_service);
+       struct srpc_service     *svc = tsc->tsc_srv_service;
+       int                     nbuf = sfw_test_buffers(tsi);
+       int                     rc;
+
+       LASSERT(tsc != NULL);
+
+       if (tsi->tsi_is_client) {
+               tsi->tsi_ops = tsc->tsc_cli_ops;
+               return 0;
+       }
+
+       rc = srpc_service_add_buffers(svc, nbuf);
+       if (rc != 0) {
+               CWARN("Failed to reserve enough buffers: "
+                     "service %s, %d needed: %d\n", svc->sv_name, nbuf, rc);
+               /* NB: this error handler is not strictly correct, because
+                * it may release more buffers than already allocated,
+                * but it doesn't matter because request portal should
+                * be lazy portal and will grow buffers if necessary. */
+               srpc_service_remove_buffers(svc, nbuf);
+               return -ENOMEM;
+       }
+
+       CDEBUG(D_NET, "Reserved %d buffers for test %s\n",
+              nbuf * (srpc_serv_is_framework(svc) ?
+                      1 : cfs_cpt_number(cfs_cpt_table)), svc->sv_name);
+       return 0;
 }
 
 void
-sfw_unload_test (sfw_test_instance_t *tsi)
+sfw_unload_test(struct sfw_test_instance *tsi)
 {
-        sfw_test_case_t *tsc = sfw_find_test_case(tsi->tsi_service);
+       struct sfw_test_case *tsc = sfw_find_test_case(tsi->tsi_service);
 
-        LASSERT (tsc != NULL);
+       LASSERT(tsc != NULL);
 
-        if (!tsi->tsi_is_client)
-                srpc_service_remove_buffers(tsc->tsc_srv_service,
-                                            sfw_test_buffers(tsi));
-        return;
+       if (tsi->tsi_is_client)
+               return;
+
+       /* shrink buffers, because request portal is lazy portal
+        * which can grow buffers at runtime so we may leave
+        * some buffers behind, but never mind... */
+       srpc_service_remove_buffers(tsc->tsc_srv_service,
+                                   sfw_test_buffers(tsi));
+       return;
 }
 
 void
@@ -900,18 +912,20 @@ sfw_create_test_rpc (sfw_test_unit_t *tsu, lnet_process_id_t peer,
                                      srpc_client_rpc_t, crpc_list);
                 LASSERT (nblk == rpc->crpc_bulk.bk_niov);
                 cfs_list_del_init(&rpc->crpc_list);
-
-                srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
-                                     blklen, sfw_test_rpc_done,
-                                     sfw_test_rpc_fini, tsu);
         }
 
         cfs_spin_unlock(&tsi->tsi_lock);
-        
-        if (rpc == NULL)
-                rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
-                                             blklen, sfw_test_rpc_done, 
-                                             sfw_test_rpc_fini, tsu);
+
+       if (rpc == NULL) {
+               rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk,
+                                            blklen, sfw_test_rpc_done,
+                                            sfw_test_rpc_fini, tsu);
+       } else {
+               srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk,
+                                    blklen, sfw_test_rpc_done,
+                                    sfw_test_rpc_fini, tsu);
+       }
+
         if (rpc == NULL) {
                 CERROR ("Can't create rpc for test %d\n", tsi->tsi_service);
                 return -ENOMEM;
@@ -966,9 +980,9 @@ test_done:
          * - my batch is still active; no one can run it again now.
          * Cancel pending schedules and prevent future schedule attempts:
          */
-        swi_kill_workitem(wi);
-        sfw_test_unit_done(tsu);
-        return 1;
+       swi_exit_workitem(wi);
+       sfw_test_unit_done(tsu);
+       return 1;
 }
 
 int
@@ -1000,7 +1014,8 @@ sfw_run_batch (sfw_batch_t *tsb)
                         tsu->tsu_loop = tsi->tsi_loop;
                         wi = &tsu->tsu_worker;
                        swi_init_workitem(wi, tsu, sfw_run_test,
-                                         lst_sched_test);
+                                         lst_sched_test[\
+                                         lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
                         swi_schedule_workitem(wi);
                 }
         }
@@ -1085,15 +1100,16 @@ sfw_free_pages (srpc_server_rpc_t *rpc)
 }
 
 int
-sfw_alloc_pages (srpc_server_rpc_t *rpc, int npages, int sink)
+sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int sink)
 {
-        LASSERT (rpc->srpc_bulk == NULL);
-        LASSERT (npages > 0 && npages <= LNET_MAX_IOV);
+       LASSERT(rpc->srpc_bulk == NULL);
+       LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
 
-        rpc->srpc_bulk = srpc_alloc_bulk(npages, sink);
-        if (rpc->srpc_bulk == NULL) return -ENOMEM;
+       rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages, sink);
+       if (rpc->srpc_bulk == NULL)
+               return -ENOMEM;
 
-        return 0;
+       return 0;
 }
 
 int
@@ -1129,7 +1145,7 @@ sfw_add_test (srpc_server_rpc_t *rpc)
         bat = sfw_bid2batch(request->tsr_bid);
         if (bat == NULL) {
                 CERROR ("Dropping RPC (%s) from %s under memory pressure.\n",
-                        rpc->srpc_service->sv_name,
+                       rpc->srpc_scd->scd_svc->sv_name,
                         libcfs_id2str(rpc->srpc_peer));
                 return -ENOMEM;
         }
@@ -1140,9 +1156,9 @@ sfw_add_test (srpc_server_rpc_t *rpc)
         }
 
         if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
-                /* rpc will be resumed later in sfw_bulk_ready */
-                return sfw_alloc_pages(rpc,
-                                       sfw_id_pages(request->tsr_ndest), 1);
+               /* rpc will be resumed later in sfw_bulk_ready */
+               return sfw_alloc_pages(rpc, CFS_CPT_ANY,
+                                      sfw_id_pages(request->tsr_ndest), 1);
         }
 
         rc = sfw_add_test_instance(bat, rpc);
@@ -1198,9 +1214,9 @@ sfw_control_batch (srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
 }
 
 int
-sfw_handle_server_rpc (srpc_server_rpc_t *rpc)
+sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
 {
-        srpc_service_t *sv = rpc->srpc_service;
+       struct srpc_service     *sv = rpc->srpc_scd->scd_svc;
         srpc_msg_t     *reply = &rpc->srpc_replymsg;
         srpc_msg_t     *request = &rpc->srpc_reqstbuf->buf_msg;
         int             rc = 0;
@@ -1279,10 +1295,10 @@ sfw_handle_server_rpc (srpc_server_rpc_t *rpc)
 }
 
 int
-sfw_bulk_ready (srpc_server_rpc_t *rpc, int status)
+sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
 {
-        srpc_service_t *sv = rpc->srpc_service;
-        int             rc;
+       struct srpc_service     *sv = rpc->srpc_scd->scd_svc;
+       int                     rc;
 
         LASSERT (rpc->srpc_bulk != NULL);
         LASSERT (sv->sv_id == SRPC_SERVICE_TEST);
@@ -1601,9 +1617,6 @@ sfw_startup (void)
         s = getenv("SESSION_TIMEOUT");
         session_timeout = s != NULL ? atoi(s) : session_timeout;
 
-        s = getenv("BRW_INJECT_ERRORS");
-        brw_inject_errors = s != NULL ? atoi(s) : brw_inject_errors;
-
         s = getenv("RPC_TIMEOUT");
         rpc_timeout = s != NULL ? atoi(s) : rpc_timeout;
 #endif
@@ -1652,7 +1665,6 @@ sfw_startup (void)
         cfs_list_for_each_entry_typed (tsc, &sfw_data.fw_tests,
                                        sfw_test_case_t, tsc_list) {
                 sv = tsc->tsc_srv_service;
-                sv->sv_concur = SFW_TEST_CONCURRENCY;
 
                 rc = srpc_add_service(sv);
                 LASSERT (rc != -EBUSY);
@@ -1669,7 +1681,7 @@ sfw_startup (void)
 
                 sv->sv_bulk_ready = NULL;
                 sv->sv_handler    = sfw_handle_server_rpc;
-                sv->sv_concur     = SFW_SERVICE_CONCURRENCY;
+               sv->sv_wi_total   = SFW_FRWK_WI_MAX;
                 if (sv->sv_id == SRPC_SERVICE_TEST)
                         sv->sv_bulk_ready = sfw_bulk_ready;
 
@@ -1684,13 +1696,13 @@ sfw_startup (void)
                 /* about to sfw_shutdown, no need to add buffer */
                 if (error) continue;
 
-                rc = srpc_service_add_buffers(sv, SFW_POST_BUFFERS);
-                if (rc != SFW_POST_BUFFERS) {
-                        CWARN ("Failed to reserve enough buffers: "
-                               "service %s, %d needed, %d reserved\n",
-                               sv->sv_name, SFW_POST_BUFFERS, rc);
-                        error = -ENOMEM;
-                }
+               rc = srpc_service_add_buffers(sv, sv->sv_wi_total);
+               if (rc != 0) {
+                       CWARN("Failed to reserve enough buffers: "
+                             "service %s, %d needed: %d\n",
+                             sv->sv_name, sv->sv_wi_total, rc);
+                       error = -ENOMEM;
+               }
         }
 
         if (error != 0)