Whamcloud - gitweb
LU-9859 libcfs: always range-check libcfs_debug_mb setting.
[fs/lustre-release.git] / lnet / selftest / rpc.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lnet/selftest/rpc.c
33  *
34  * Author: Isaac Huang <isaac@clusterfs.com>
35  *
36  * 2012-05-13: Liang Zhen <liang@whamcloud.com>
37  * - percpt data for service to improve smp performance
38  * - code cleanup
39  */
40
41 #define DEBUG_SUBSYSTEM S_LNET
42
43 #include "selftest.h"
44
45 enum srpc_state {
46         SRPC_STATE_NONE,
47         SRPC_STATE_NI_INIT,
48         SRPC_STATE_EQ_INIT,
49         SRPC_STATE_RUNNING,
50         SRPC_STATE_STOPPING,
51 };
52
53 static struct smoketest_rpc {
54         spinlock_t       rpc_glock;     /* global lock */
55         struct srpc_service     *rpc_services[SRPC_SERVICE_MAX_ID + 1];
56         struct lnet_eq          *rpc_lnet_eq;   /* _the_ LNet event queue */
57         enum srpc_state          rpc_state;
58         struct srpc_counters     rpc_counters;
59         __u64                    rpc_matchbits; /* matchbits counter */
60 } srpc_data;
61
62 static inline int
63 srpc_serv_portal(int svc_id)
64 {
65         return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ?
66                SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL;
67 }
68
69 /* forward ref's */
70 static int srpc_handle_rpc(struct swi_workitem *wi);
71
72 void srpc_get_counters(struct srpc_counters *cnt)
73 {
74         spin_lock(&srpc_data.rpc_glock);
75         *cnt = srpc_data.rpc_counters;
76         spin_unlock(&srpc_data.rpc_glock);
77 }
78
79 void srpc_set_counters(const struct srpc_counters *cnt)
80 {
81         spin_lock(&srpc_data.rpc_glock);
82         srpc_data.rpc_counters = *cnt;
83         spin_unlock(&srpc_data.rpc_glock);
84 }
85
86 static int
87 srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
88                    int nob)
89 {
90         LASSERT(off < PAGE_SIZE);
91         LASSERT(nob > 0 && nob <= PAGE_SIZE);
92
93         bk->bk_iovs[i].bv_offset = off;
94         bk->bk_iovs[i].bv_page   = pg;
95         bk->bk_iovs[i].bv_len    = nob;
96         return nob;
97 }
98
99 void
100 srpc_free_bulk(struct srpc_bulk *bk)
101 {
102         int i;
103         struct page *pg;
104
105         LASSERT(bk != NULL);
106
107         for (i = 0; i < bk->bk_niov; i++) {
108                 pg = bk->bk_iovs[i].bv_page;
109                 if (pg == NULL)
110                         break;
111
112                 __free_page(pg);
113         }
114
115         LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_niov]));
116 }
117
118 struct srpc_bulk *
119 srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
120                 unsigned bulk_len, int sink)
121 {
122         struct srpc_bulk *bk;
123         int i;
124
125         LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
126
127         LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt,
128                          offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
129         if (bk == NULL) {
130                 CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
131                 return NULL;
132         }
133
134         memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
135         bk->bk_sink   = sink;
136         bk->bk_len    = bulk_len;
137         bk->bk_niov   = bulk_npg;
138
139         for (i = 0; i < bulk_npg; i++) {
140                 struct page *pg;
141                 int nob;
142
143                 pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL);
144                 if (pg == NULL) {
145                         CERROR("Can't allocate page %d of %d\n", i, bulk_npg);
146                         srpc_free_bulk(bk);
147                         return NULL;
148                 }
149
150                 nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) -
151                       bulk_off;
152
153                 srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
154                 bulk_len -= nob;
155                 bulk_off = 0;
156         }
157
158         return bk;
159 }
160
161 static inline __u64
162 srpc_next_id (void)
163 {
164         __u64 id;
165
166         spin_lock(&srpc_data.rpc_glock);
167         id = srpc_data.rpc_matchbits++;
168         spin_unlock(&srpc_data.rpc_glock);
169         return id;
170 }
171
172 static void
173 srpc_init_server_rpc(struct srpc_server_rpc *rpc,
174                      struct srpc_service_cd *scd,
175                      struct srpc_buffer *buffer)
176 {
177         memset(rpc, 0, sizeof(*rpc));
178         swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
179                           srpc_serv_is_framework(scd->scd_svc) ?
180                           lst_sched_serial : lst_sched_test[scd->scd_cpt]);
181
182         rpc->srpc_ev.ev_fired = 1; /* no event expected now */
183
184         rpc->srpc_scd      = scd;
185         rpc->srpc_reqstbuf = buffer;
186         rpc->srpc_peer     = buffer->buf_peer;
187         rpc->srpc_self     = buffer->buf_self;
188         LNetInvalidateMDHandle(&rpc->srpc_replymdh);
189 }
190
191 static void
192 srpc_service_fini(struct srpc_service *svc)
193 {
194         struct srpc_service_cd *scd;
195         struct srpc_server_rpc *rpc;
196         struct srpc_buffer *buf;
197         struct list_head *q;
198         int i;
199
200         if (svc->sv_cpt_data == NULL)
201                 return;
202
203         cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
204                 while (1) {
205                         if (!list_empty(&scd->scd_buf_posted))
206                                 q = &scd->scd_buf_posted;
207                         else if (!list_empty(&scd->scd_buf_blocked))
208                                 q = &scd->scd_buf_blocked;
209                         else
210                                 break;
211
212                         while (!list_empty(q)) {
213                                 buf = list_entry(q->next,
214                                                  struct srpc_buffer,
215                                                  buf_list);
216                                 list_del(&buf->buf_list);
217                                 LIBCFS_FREE(buf, sizeof(*buf));
218                         }
219                 }
220
221                 LASSERT(list_empty(&scd->scd_rpc_active));
222
223                 while (!list_empty(&scd->scd_rpc_free)) {
224                         rpc = list_entry(scd->scd_rpc_free.next,
225                                          struct srpc_server_rpc,
226                                          srpc_list);
227                         list_del(&rpc->srpc_list);
228                         LIBCFS_FREE(rpc, sizeof(*rpc));
229                 }
230         }
231
232         cfs_percpt_free(svc->sv_cpt_data);
233         svc->sv_cpt_data = NULL;
234 }
235
236 static int
237 srpc_service_nrpcs(struct srpc_service *svc)
238 {
239         int nrpcs = svc->sv_wi_total / svc->sv_ncpts;
240
241         return srpc_serv_is_framework(svc) ?
242                max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN);
243 }
244
245 int srpc_add_buffer(struct swi_workitem *wi);
246
247 static int
248 srpc_service_init(struct srpc_service *svc)
249 {
250         struct srpc_service_cd *scd;
251         struct srpc_server_rpc *rpc;
252         int nrpcs;
253         int i;
254         int j;
255
256         svc->sv_shuttingdown = 0;
257
258         svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
259                                             sizeof(struct srpc_service_cd));
260         if (svc->sv_cpt_data == NULL)
261                 return -ENOMEM;
262
263         svc->sv_ncpts = srpc_serv_is_framework(svc) ?
264                         1 : cfs_cpt_number(lnet_cpt_table());
265         nrpcs = srpc_service_nrpcs(svc);
266
267         cfs_percpt_for_each(scd, i, svc->sv_cpt_data) {
268                 scd->scd_cpt = i;
269                 scd->scd_svc = svc;
270                 spin_lock_init(&scd->scd_lock);
271                 INIT_LIST_HEAD(&scd->scd_rpc_free);
272                 INIT_LIST_HEAD(&scd->scd_rpc_active);
273                 INIT_LIST_HEAD(&scd->scd_buf_posted);
274                 INIT_LIST_HEAD(&scd->scd_buf_blocked);
275
276                 scd->scd_ev.ev_data = scd;
277                 scd->scd_ev.ev_type = SRPC_REQUEST_RCVD;
278
279                 /* NB: don't use lst_sched_serial for adding buffer,
280                  * see details in srpc_service_add_buffers() */
281                 swi_init_workitem(&scd->scd_buf_wi,
282                                   srpc_add_buffer, lst_sched_test[i]);
283
284                 if (i != 0 && srpc_serv_is_framework(svc)) {
285                         /* NB: framework service only needs srpc_service_cd for
286                          * one partition, but we allocate for all to make
287                          * it easier to implement, it will waste a little
288                          * memory but nobody should care about this */
289                         continue;
290                 }
291
292                 for (j = 0; j < nrpcs; j++) {
293                         LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
294                                          i, sizeof(*rpc));
295                         if (rpc == NULL) {
296                                 srpc_service_fini(svc);
297                                 return -ENOMEM;
298                         }
299                         list_add(&rpc->srpc_list, &scd->scd_rpc_free);
300                 }
301         }
302
303         return 0;
304 }
305
306 int
307 srpc_add_service(struct srpc_service *sv)
308 {
309         int id = sv->sv_id;
310
311         LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID);
312
313         if (srpc_service_init(sv) != 0)
314                 return -ENOMEM;
315
316         spin_lock(&srpc_data.rpc_glock);
317
318         LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
319
320         if (srpc_data.rpc_services[id] != NULL) {
321                 spin_unlock(&srpc_data.rpc_glock);
322                 goto failed;
323         }
324
325         srpc_data.rpc_services[id] = sv;
326         spin_unlock(&srpc_data.rpc_glock);
327
328         CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name);
329         return 0;
330
331 failed:
332         srpc_service_fini(sv);
333         return -EBUSY;
334 }
335
336 int
337 srpc_remove_service(struct srpc_service *sv)
338 {
339         int id = sv->sv_id;
340
341         spin_lock(&srpc_data.rpc_glock);
342
343         if (srpc_data.rpc_services[id] != sv) {
344                 spin_unlock(&srpc_data.rpc_glock);
345                 return -ENOENT;
346         }
347
348         srpc_data.rpc_services[id] = NULL;
349         spin_unlock(&srpc_data.rpc_glock);
350         return 0;
351 }
352
353 static int
354 srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
355                        int len, int options, struct lnet_process_id peer,
356                        struct lnet_handle_md *mdh, struct srpc_event *ev)
357 {
358         int rc;
359         struct lnet_md md;
360         struct lnet_me *me;
361
362         me = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK,
363                           local ? LNET_INS_LOCAL : LNET_INS_AFTER);
364         if (IS_ERR(me)) {
365                 rc = PTR_ERR(me);
366                 CERROR("LNetMEAttach failed: %d\n", rc);
367                 LASSERT(rc == -ENOMEM);
368                 return -ENOMEM;
369         }
370
371         md.threshold = 1;
372         md.user_ptr  = ev;
373         md.start     = buf;
374         md.length    = len;
375         md.options   = options;
376         md.eq_handle = srpc_data.rpc_lnet_eq;
377
378         rc = LNetMDAttach(me, md, LNET_UNLINK, mdh);
379         if (rc != 0) {
380                 CERROR("LNetMDAttach failed: %d\n", rc);
381                 LASSERT(rc == -ENOMEM);
382
383                 LNetMEUnlink(me);
384                 return -ENOMEM;
385         }
386
387         CDEBUG(D_NET,
388                "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n",
389                libcfs_id2str(peer), portal, matchbits);
390         return 0;
391 }
392
393 static int
394 srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
395                       int options, struct lnet_process_id peer,
396                       lnet_nid_t self, struct lnet_handle_md *mdh,
397                       struct srpc_event *ev)
398 {
399         int rc;
400         struct lnet_md md;
401
402         md.user_ptr  = ev;
403         md.start     = buf;
404         md.length    = len;
405         md.eq_handle = srpc_data.rpc_lnet_eq;
406         md.threshold = ((options & LNET_MD_OP_GET) != 0) ? 2 : 1;
407         md.options   = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET);
408
409         rc = LNetMDBind(md, LNET_UNLINK, mdh);
410         if (rc != 0) {
411                 CERROR("LNetMDBind failed: %d\n", rc);
412                 LASSERT(rc == -ENOMEM);
413                 return -ENOMEM;
414         }
415
416         /* this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options.
417          * they're only meaningful for MDs attached to an ME (i.e. passive
418          * buffers...
419          */
420         if ((options & LNET_MD_OP_PUT) != 0) {
421                 rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer,
422                              portal, matchbits, 0, 0);
423         } else {
424                 LASSERT((options & LNET_MD_OP_GET) != 0);
425
426                 rc = LNetGet(self, *mdh, peer, portal, matchbits, 0, false);
427         }
428
429         if (rc != 0) {
430                 CERROR("LNet%s(%s, %d, %lld) failed: %d\n",
431                        ((options & LNET_MD_OP_PUT) != 0) ? "Put" : "Get",
432                        libcfs_id2str(peer), portal, matchbits, rc);
433
434                 /* The forthcoming unlink event will complete this operation
435                  * with failure, so fall through and return success here.
436                  */
437                 rc = LNetMDUnlink(*mdh);
438                 LASSERT(rc == 0);
439         } else {
440                 CDEBUG(D_NET,
441                        "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n",
442                        libcfs_id2str(peer), portal, matchbits);
443         }
444         return 0;
445 }
446
447 static int
448 srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
449                          struct lnet_handle_md *mdh, struct srpc_event *ev)
450 {
451         struct lnet_process_id any = {0};
452
453         any.nid = LNET_NID_ANY;
454         any.pid = LNET_PID_ANY;
455
456         return srpc_post_passive_rdma(srpc_serv_portal(service),
457                                       local, service, buf, len,
458                                       LNET_MD_OP_PUT, any, mdh, ev);
459 }
460
461 static int
462 srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
463 __must_hold(&scd->scd_lock)
464 {
465         struct srpc_service *sv = scd->scd_svc;
466         struct srpc_msg *msg = &buf->buf_msg;
467         int rc;
468
469         LNetInvalidateMDHandle(&buf->buf_mdh);
470         list_add(&buf->buf_list, &scd->scd_buf_posted);
471         scd->scd_buf_nposted++;
472         spin_unlock(&scd->scd_lock);
473
474         rc = srpc_post_passive_rqtbuf(sv->sv_id,
475                                       !srpc_serv_is_framework(sv),
476                                       msg, sizeof(*msg), &buf->buf_mdh,
477                                       &scd->scd_ev);
478
479         /* At this point, a RPC (new or delayed) may have arrived in
480          * msg and its event handler has been called. So we must add
481          * buf to scd_buf_posted _before_ dropping scd_lock */
482
483         spin_lock(&scd->scd_lock);
484
485         if (rc == 0) {
486                 if (!sv->sv_shuttingdown)
487                         return 0;
488
489                 spin_unlock(&scd->scd_lock);
490                 /* srpc_shutdown_service might have tried to unlink me
491                  * when my buf_mdh was still invalid */
492                 LNetMDUnlink(buf->buf_mdh);
493                 spin_lock(&scd->scd_lock);
494                 return 0;
495         }
496
497         scd->scd_buf_nposted--;
498         if (sv->sv_shuttingdown)
499                 return rc; /* don't allow to change scd_buf_posted */
500
501         list_del(&buf->buf_list);
502         spin_unlock(&scd->scd_lock);
503
504         LIBCFS_FREE(buf, sizeof(*buf));
505
506         spin_lock(&scd->scd_lock);
507         return rc;
508 }
509
510 int
511 srpc_add_buffer(struct swi_workitem *wi)
512 {
513         struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd,
514                                                    scd_buf_wi);
515         struct srpc_buffer *buf;
516         int rc = 0;
517
518         /* it's called by workitem scheduler threads, these threads
519          * should have been set CPT affinity, so buffers will be posted
520          * on CPT local list of Portal */
521         spin_lock(&scd->scd_lock);
522
523         while (scd->scd_buf_adjust > 0 &&
524                !scd->scd_svc->sv_shuttingdown) {
525                 scd->scd_buf_adjust--; /* consume it */
526                 scd->scd_buf_posting++;
527
528                 spin_unlock(&scd->scd_lock);
529
530                 LIBCFS_ALLOC(buf, sizeof(*buf));
531                 if (buf == NULL) {
532                         CERROR("Failed to add new buf to service: %s\n",
533                                scd->scd_svc->sv_name);
534                         spin_lock(&scd->scd_lock);
535                         rc = -ENOMEM;
536                         break;
537                 }
538
539                 spin_lock(&scd->scd_lock);
540                 if (scd->scd_svc->sv_shuttingdown) {
541                         spin_unlock(&scd->scd_lock);
542                         LIBCFS_FREE(buf, sizeof(*buf));
543
544                         spin_lock(&scd->scd_lock);
545                         rc = -ESHUTDOWN;
546                         break;
547                 }
548
549                 rc = srpc_service_post_buffer(scd, buf);
550                 if (rc != 0)
551                         break; /* buf has been freed inside */
552
553                 LASSERT(scd->scd_buf_posting > 0);
554                 scd->scd_buf_posting--;
555                 scd->scd_buf_total++;
556                 scd->scd_buf_low = max(2, scd->scd_buf_total / 4);
557         }
558
559         if (rc != 0) {
560                 scd->scd_buf_err_stamp = ktime_get_real_seconds();
561                 scd->scd_buf_err = rc;
562
563                 LASSERT(scd->scd_buf_posting > 0);
564                 scd->scd_buf_posting--;
565         }
566
567         spin_unlock(&scd->scd_lock);
568         return 0;
569 }
570
571 int
572 srpc_service_add_buffers(struct srpc_service *sv, int nbuffer)
573 {
574         struct srpc_service_cd *scd;
575         int rc = 0;
576         int i;
577
578         LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer);
579
580         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
581                 spin_lock(&scd->scd_lock);
582
583                 scd->scd_buf_err = 0;
584                 scd->scd_buf_err_stamp = 0;
585                 scd->scd_buf_posting = 0;
586                 scd->scd_buf_adjust = nbuffer;
587                 /* start to post buffers */
588                 swi_schedule_workitem(&scd->scd_buf_wi);
589                 spin_unlock(&scd->scd_lock);
590
591                 /* framework service only post buffer for one partition  */
592                 if (srpc_serv_is_framework(sv))
593                         break;
594         }
595
596         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
597                 spin_lock(&scd->scd_lock);
598                 /*
599                  * NB: srpc_service_add_buffers() can be called inside
600                  * thread context of lst_sched_serial, and we don't normally
601                  * allow to sleep inside thread context of WI scheduler
602                  * because it will block current scheduler thread from doing
603                  * anything else, even worse, it could deadlock if it's
604                  * waiting on result from another WI of the same scheduler.
605                  * However, it's safe at here because scd_buf_wi is scheduled
606                  * by thread in a different WI scheduler (lst_sched_test),
607                  * so we don't have any risk of deadlock, though this could
608                  * block all WIs pending on lst_sched_serial for a moment
609                  * which is not good but not fatal.
610                  */
611                 lst_wait_until(scd->scd_buf_err != 0 ||
612                                (scd->scd_buf_adjust == 0 &&
613                                 scd->scd_buf_posting == 0),
614                                scd->scd_lock, "waiting for adding buffer\n");
615
616                 if (scd->scd_buf_err != 0 && rc == 0)
617                         rc = scd->scd_buf_err;
618
619                 spin_unlock(&scd->scd_lock);
620         }
621
622         return rc;
623 }
624
625 void
626 srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer)
627 {
628         struct srpc_service_cd *scd;
629         int num;
630         int i;
631
632         LASSERT(!sv->sv_shuttingdown);
633
634         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
635                 spin_lock(&scd->scd_lock);
636
637                 num = scd->scd_buf_total + scd->scd_buf_posting;
638                 scd->scd_buf_adjust -= min(nbuffer, num);
639
640                 spin_unlock(&scd->scd_lock);
641         }
642 }
643
644 /* returns 1 if sv has finished, otherwise 0 */
645 int
646 srpc_finish_service(struct srpc_service *sv)
647 {
648         struct srpc_service_cd *scd;
649         struct srpc_server_rpc *rpc;
650         int i;
651
652         LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */
653
654         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
655                 spin_lock(&scd->scd_lock);
656                 if (!swi_deschedule_workitem(&scd->scd_buf_wi)) {
657                         spin_unlock(&scd->scd_lock);
658                         return 0;
659                 }
660
661                 if (scd->scd_buf_nposted > 0) {
662                         CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n",
663                                scd->scd_buf_nposted);
664                         spin_unlock(&scd->scd_lock);
665                         return 0;
666                 }
667
668                 if (list_empty(&scd->scd_rpc_active)) {
669                         spin_unlock(&scd->scd_lock);
670                         continue;
671                 }
672
673                 rpc = list_entry(scd->scd_rpc_active.next,
674                                  struct srpc_server_rpc, srpc_list);
675                 CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s scheduled %d running %d, ev fired %d type %d status %d lnet %d\n",
676                         rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
677                         swi_state2str(rpc->srpc_wi.swi_state),
678                         rpc->srpc_wi.swi_workitem.wi_scheduled,
679                         rpc->srpc_wi.swi_workitem.wi_running,
680                         rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type,
681                         rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet);
682                 spin_unlock(&scd->scd_lock);
683                 return 0;
684         }
685
686         /* no lock needed from now on */
687         srpc_service_fini(sv);
688         return 1;
689 }
690
691 /* called with sv->sv_lock held */
692 static void
693 srpc_service_recycle_buffer(struct srpc_service_cd *scd,
694                             struct srpc_buffer *buf)
695 __must_hold(&scd->scd_lock)
696 {
697         if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
698                 if (srpc_service_post_buffer(scd, buf) != 0) {
699                         CWARN("Failed to post %s buffer\n",
700                               scd->scd_svc->sv_name);
701                 }
702                 return;
703         }
704
705         /* service is shutting down, or we want to recycle some buffers */
706         scd->scd_buf_total--;
707
708         if (scd->scd_buf_adjust < 0) {
709                 scd->scd_buf_adjust++;
710                 if (scd->scd_buf_adjust < 0 &&
711                     scd->scd_buf_total == 0 && scd->scd_buf_posting == 0) {
712                         CDEBUG(D_INFO,
713                                "Try to recyle %d buffers but nothing left\n",
714                                scd->scd_buf_adjust);
715                         scd->scd_buf_adjust = 0;
716                 }
717         }
718
719         spin_unlock(&scd->scd_lock);
720         LIBCFS_FREE(buf, sizeof(*buf));
721         spin_lock(&scd->scd_lock);
722 }
723
724 void
725 srpc_abort_service(struct srpc_service *sv)
726 {
727         struct srpc_service_cd *scd;
728         struct srpc_server_rpc *rpc;
729         int i;
730
731         CDEBUG(D_NET, "Aborting service: id %d, name %s\n",
732                sv->sv_id, sv->sv_name);
733
734         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
735                 spin_lock(&scd->scd_lock);
736
737                 /* schedule in-flight RPCs to notice the abort, NB:
738                  * racing with incoming RPCs; complete fix should make test
739                  * RPCs carry session ID in its headers
740                  */
741                 list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) {
742                         rpc->srpc_aborted = 1;
743                         swi_schedule_workitem(&rpc->srpc_wi);
744                 }
745
746                 spin_unlock(&scd->scd_lock);
747         }
748 }
749
750 void
751 srpc_shutdown_service(struct srpc_service *sv)
752 {
753         struct srpc_service_cd *scd;
754         struct srpc_server_rpc *rpc;
755         struct srpc_buffer *buf;
756         int i;
757
758         CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
759                sv->sv_id, sv->sv_name);
760
761         cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
762                 spin_lock(&scd->scd_lock);
763
764         sv->sv_shuttingdown = 1; /* i.e. no new active RPC */
765
766         cfs_percpt_for_each(scd, i, sv->sv_cpt_data)
767                 spin_unlock(&scd->scd_lock);
768
769         cfs_percpt_for_each(scd, i, sv->sv_cpt_data) {
770                 spin_lock(&scd->scd_lock);
771
772                 /* schedule in-flight RPCs to notice the shutdown */
773                 list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list)
774                         swi_schedule_workitem(&rpc->srpc_wi);
775
776                 spin_unlock(&scd->scd_lock);
777
778                 /* OK to traverse scd_buf_posted without lock, since no one
779                  * touches scd_buf_posted now
780                  */
781                 list_for_each_entry(buf, &scd->scd_buf_posted, buf_list)
782                         LNetMDUnlink(buf->buf_mdh);
783         }
784 }
785
786 static int
787 srpc_send_request(struct srpc_client_rpc *rpc)
788 {
789         struct srpc_event *ev = &rpc->crpc_reqstev;
790         int rc;
791
792         ev->ev_fired = 0;
793         ev->ev_data  = rpc;
794         ev->ev_type  = SRPC_REQUEST_SENT;
795
796         rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
797                                    rpc->crpc_service, &rpc->crpc_reqstmsg,
798                                    sizeof(struct srpc_msg), LNET_MD_OP_PUT,
799                                    rpc->crpc_dest, LNET_NID_ANY,
800                                    &rpc->crpc_reqstmdh, ev);
801         if (rc != 0) {
802                 LASSERT(rc == -ENOMEM);
803                 ev->ev_fired = 1;  /* no more event expected */
804         }
805         return rc;
806 }
807
808 static int
809 srpc_prepare_reply(struct srpc_client_rpc *rpc)
810 {
811         struct srpc_event *ev = &rpc->crpc_replyev;
812         u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
813         int rc;
814
815         ev->ev_fired = 0;
816         ev->ev_data  = rpc;
817         ev->ev_type  = SRPC_REPLY_RCVD;
818
819         *id = srpc_next_id();
820
821         rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
822                                     &rpc->crpc_replymsg,
823                                     sizeof(struct srpc_msg),
824                                     LNET_MD_OP_PUT, rpc->crpc_dest,
825                                     &rpc->crpc_replymdh, ev);
826         if (rc != 0) {
827                 LASSERT(rc == -ENOMEM);
828                 ev->ev_fired = 1;  /* no more event expected */
829         }
830         return rc;
831 }
832
833 static int
834 srpc_prepare_bulk(struct srpc_client_rpc *rpc)
835 {
836         struct srpc_bulk *bk = &rpc->crpc_bulk;
837         struct srpc_event *ev = &rpc->crpc_bulkev;
838         __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
839         int rc;
840         int opt;
841
842         LASSERT(bk->bk_niov <= LNET_MAX_IOV);
843
844         /* nothing to do */
845         if (bk->bk_niov == 0)
846                 return 0;
847
848         opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET;
849         opt |= LNET_MD_KIOV;
850
851         ev->ev_fired = 0;
852         ev->ev_data  = rpc;
853         ev->ev_type  = SRPC_BULK_REQ_RCVD;
854
855         *id = srpc_next_id();
856
857         rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
858                                     &bk->bk_iovs[0], bk->bk_niov, opt,
859                                     rpc->crpc_dest, &bk->bk_mdh, ev);
860         if (rc != 0) {
861                 LASSERT(rc == -ENOMEM);
862                 ev->ev_fired = 1;  /* no more event expected */
863         }
864         return rc;
865 }
866
867 static int
868 srpc_do_bulk(struct srpc_server_rpc *rpc)
869 {
870         struct srpc_event *ev = &rpc->srpc_ev;
871         struct srpc_bulk *bk = rpc->srpc_bulk;
872         __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
873         int rc;
874         int opt;
875
876         LASSERT(bk != NULL);
877
878         opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT;
879         opt |= LNET_MD_KIOV;
880
881         ev->ev_fired = 0;
882         ev->ev_data  = rpc;
883         ev->ev_type  = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT;
884
885         rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id,
886                                    &bk->bk_iovs[0], bk->bk_niov, opt,
887                                    rpc->srpc_peer, rpc->srpc_self,
888                                    &bk->bk_mdh, ev);
889         if (rc != 0)
890                 ev->ev_fired = 1;  /* no more event expected */
891         return rc;
892 }
893
894 /* only called from srpc_handle_rpc */
895 static void
896 srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
897 {
898         struct srpc_service_cd  *scd = rpc->srpc_scd;
899         struct srpc_service     *sv  = scd->scd_svc;
900         struct srpc_buffer *buffer;
901
902         LASSERT(status != 0 || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
903
904         rpc->srpc_status = status;
905
906         CDEBUG_LIMIT(status == 0 ? D_NET : D_NETERROR,
907                      "Server RPC %p done: service %s, peer %s, status %s:%d\n",
908                      rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer),
909                      swi_state2str(rpc->srpc_wi.swi_state), status);
910
911         if (status != 0) {
912                 spin_lock(&srpc_data.rpc_glock);
913                 srpc_data.rpc_counters.rpcs_dropped++;
914                 spin_unlock(&srpc_data.rpc_glock);
915         }
916
917         if (rpc->srpc_done != NULL)
918                 (*rpc->srpc_done) (rpc);
919         LASSERT(rpc->srpc_bulk == NULL);
920
921         spin_lock(&scd->scd_lock);
922
923         if (rpc->srpc_reqstbuf != NULL) {
924                 /* NB might drop sv_lock in srpc_service_recycle_buffer, but
925                  * sv won't go away for scd_rpc_active must not be empty
926                  */
927                 srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf);
928                 rpc->srpc_reqstbuf = NULL;
929         }
930
931         list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */
932
933         /*
934          * No one can schedule me now since:
935          * - I'm not on scd_rpc_active.
936          * - all LNet events have been fired.
937          * Cancel pending schedules and prevent future schedule attempts:
938          */
939         LASSERT(rpc->srpc_ev.ev_fired);
940         swi_exit_workitem(&rpc->srpc_wi);
941
942         if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
943                 buffer = list_entry(scd->scd_buf_blocked.next,
944                                     struct srpc_buffer, buf_list);
945                 list_del(&buffer->buf_list);
946
947                 srpc_init_server_rpc(rpc, scd, buffer);
948                 list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active);
949                 swi_schedule_workitem(&rpc->srpc_wi);
950         } else {
951                 list_add(&rpc->srpc_list, &scd->scd_rpc_free);
952         }
953
954         spin_unlock(&scd->scd_lock);
955 }
956
957 /* handles an incoming RPC */
958 static int srpc_handle_rpc(struct swi_workitem *wi)
959 {
960         struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc,
961                                                    srpc_wi);
962         struct srpc_service_cd *scd = rpc->srpc_scd;
963         struct srpc_service *sv = scd->scd_svc;
964         struct srpc_event *ev = &rpc->srpc_ev;
965         int rc = 0;
966
967         LASSERT(wi == &rpc->srpc_wi);
968
969         spin_lock(&scd->scd_lock);
970
971         if (sv->sv_shuttingdown || rpc->srpc_aborted) {
972                 spin_unlock(&scd->scd_lock);
973
974                 if (rpc->srpc_bulk != NULL)
975                         LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
976                 LNetMDUnlink(rpc->srpc_replymdh);
977
978                 if (ev->ev_fired) { /* no more event, OK to finish */
979                         srpc_server_rpc_done(rpc, -ESHUTDOWN);
980                         return 1;
981                 }
982                 return 0;
983         }
984
985         spin_unlock(&scd->scd_lock);
986
987         switch (wi->swi_state) {
988         default:
989                 LBUG();
990                 /* fallthrough */
991         case SWI_STATE_NEWBORN: {
992                 struct srpc_msg *msg;
993                 struct srpc_generic_reply *reply;
994
995                 msg = &rpc->srpc_reqstbuf->buf_msg;
996                 reply = &rpc->srpc_replymsg.msg_body.reply;
997
998                 if (msg->msg_magic == 0) {
999                         /* moaned already in srpc_lnet_ev_handler */
1000                         srpc_server_rpc_done(rpc, EBADMSG);
1001                         return 1;
1002                 }
1003
1004                 srpc_unpack_msg_hdr(msg);
1005                 if (msg->msg_version != SRPC_MSG_VERSION) {
1006                         CWARN("Version mismatch: %u, %u expected, from %s\n",
1007                               msg->msg_version, SRPC_MSG_VERSION,
1008                               libcfs_id2str(rpc->srpc_peer));
1009                         reply->status = EPROTO;
1010                         /* drop through and send reply */
1011                 } else {
1012                         reply->status = 0;
1013                         rc = (*sv->sv_handler)(rpc);
1014                         LASSERT(reply->status == 0 || !rpc->srpc_bulk);
1015                         if (rc != 0) {
1016                                 srpc_server_rpc_done(rpc, rc);
1017                                 return 1;
1018                         }
1019                 }
1020
1021                 wi->swi_state = SWI_STATE_BULK_STARTED;
1022
1023                 if (rpc->srpc_bulk != NULL) {
1024                         rc = srpc_do_bulk(rpc);
1025                         if (rc == 0)
1026                                 return 0; /* wait for bulk */
1027
1028                         LASSERT(ev->ev_fired);
1029                         ev->ev_status = rc;
1030                 }
1031         }
1032         /* fallthrough */
1033         case SWI_STATE_BULK_STARTED:
1034                 LASSERT(rpc->srpc_bulk == NULL || ev->ev_fired);
1035
1036                 if (rpc->srpc_bulk != NULL) {
1037                         rc = ev->ev_status;
1038
1039                         if (sv->sv_bulk_ready != NULL)
1040                                 rc = (*sv->sv_bulk_ready) (rpc, rc);
1041
1042                         if (rc != 0) {
1043                                 srpc_server_rpc_done(rpc, rc);
1044                                 return 1;
1045                         }
1046                 }
1047
1048                 wi->swi_state = SWI_STATE_REPLY_SUBMITTED;
1049                 rc = srpc_send_reply(rpc);
1050                 if (rc == 0)
1051                         return 0; /* wait for reply */
1052                 srpc_server_rpc_done(rpc, rc);
1053                 return 1;
1054
1055         case SWI_STATE_REPLY_SUBMITTED:
1056                 if (!ev->ev_fired) {
1057                         CERROR("RPC %p: bulk %p, service %d\n",
1058                                rpc, rpc->srpc_bulk, sv->sv_id);
1059                         CERROR("Event: status %d, type %d, lnet %d\n",
1060                                ev->ev_status, ev->ev_type, ev->ev_lnet);
1061                         LASSERT(ev->ev_fired);
1062                 }
1063
1064                 wi->swi_state = SWI_STATE_DONE;
1065                 srpc_server_rpc_done(rpc, ev->ev_status);
1066                 return 1;
1067         }
1068
1069         return 0;
1070 }
1071
1072 static void
1073 srpc_client_rpc_expired (void *data)
1074 {
1075         struct srpc_client_rpc *rpc = data;
1076
1077         CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
1078               rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
1079               rpc->crpc_timeout);
1080
1081         spin_lock(&rpc->crpc_lock);
1082
1083         rpc->crpc_timeout = 0;
1084         srpc_abort_rpc(rpc, -ETIMEDOUT);
1085
1086         spin_unlock(&rpc->crpc_lock);
1087
1088         spin_lock(&srpc_data.rpc_glock);
1089         srpc_data.rpc_counters.rpcs_expired++;
1090         spin_unlock(&srpc_data.rpc_glock);
1091 }
1092
1093 static void
1094 srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
1095 {
1096         struct stt_timer *timer = &rpc->crpc_timer;
1097
1098         if (rpc->crpc_timeout == 0)
1099                 return;
1100
1101         INIT_LIST_HEAD(&timer->stt_list);
1102         timer->stt_data    = rpc;
1103         timer->stt_func    = srpc_client_rpc_expired;
1104         timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout;
1105         stt_add_timer(timer);
1106 }
1107
1108 /*
1109  * Called with rpc->crpc_lock held.
1110  *
1111  * Upon exit the RPC expiry timer is not queued and the handler is not
1112  * running on any CPU.
1113  */
1114 static void
1115 srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
1116 {
1117         /* timer not planted or already exploded */
1118         if (rpc->crpc_timeout == 0)
1119                 return;
1120
1121         /* timer successfully defused */
1122         if (stt_del_timer(&rpc->crpc_timer))
1123                 return;
1124
1125         /* timer detonated, wait for it to explode */
1126         while (rpc->crpc_timeout != 0) {
1127                 spin_unlock(&rpc->crpc_lock);
1128
1129                 schedule();
1130
1131                 spin_lock(&rpc->crpc_lock);
1132         }
1133 }
1134
1135 static void
1136 srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
1137 {
1138         struct swi_workitem *wi = &rpc->crpc_wi;
1139
1140         LASSERT(status != 0 || wi->swi_state == SWI_STATE_DONE);
1141
1142         spin_lock(&rpc->crpc_lock);
1143
1144         rpc->crpc_closed = 1;
1145         if (rpc->crpc_status == 0)
1146                 rpc->crpc_status = status;
1147
1148         srpc_del_client_rpc_timer(rpc);
1149
1150         CDEBUG_LIMIT((status == 0) ? D_NET : D_NETERROR,
1151                      "Client RPC done: service %d, peer %s, status %s:%d:%d\n",
1152                      rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
1153                      swi_state2str(wi->swi_state), rpc->crpc_aborted, status);
1154
1155         /*
1156          * No one can schedule me now since:
1157          * - RPC timer has been defused.
1158          * - all LNet events have been fired.
1159          * - crpc_closed has been set, preventing srpc_abort_rpc from
1160          *   scheduling me.
1161          * Cancel pending schedules and prevent future schedule attempts:
1162          */
1163         LASSERT(!srpc_event_pending(rpc));
1164         swi_exit_workitem(wi);
1165
1166         spin_unlock(&rpc->crpc_lock);
1167
1168         (*rpc->crpc_done)(rpc);
1169 }
1170
1171 /* sends an outgoing RPC */
1172 int
1173 srpc_send_rpc(struct swi_workitem *wi)
1174 {
1175         int rc = 0;
1176         struct srpc_client_rpc *rpc;
1177         struct srpc_msg *reply;
1178         int do_bulk;
1179
1180         LASSERT(wi != NULL);
1181
1182         rpc = container_of(wi, struct srpc_client_rpc, crpc_wi);
1183
1184         LASSERT(rpc != NULL);
1185         LASSERT(wi == &rpc->crpc_wi);
1186
1187         reply = &rpc->crpc_replymsg;
1188         do_bulk = rpc->crpc_bulk.bk_niov > 0;
1189
1190         spin_lock(&rpc->crpc_lock);
1191
1192         if (rpc->crpc_aborted) {
1193                 spin_unlock(&rpc->crpc_lock);
1194                 goto abort;
1195         }
1196
1197         spin_unlock(&rpc->crpc_lock);
1198
1199         switch (wi->swi_state) {
1200         default:
1201                 LBUG();
1202         case SWI_STATE_NEWBORN:
1203                 LASSERT(!srpc_event_pending(rpc));
1204
1205                 rc = srpc_prepare_reply(rpc);
1206                 if (rc != 0) {
1207                         srpc_client_rpc_done(rpc, rc);
1208                         return 1;
1209                 }
1210
1211                 rc = srpc_prepare_bulk(rpc);
1212                 if (rc != 0)
1213                         break;
1214
1215                 wi->swi_state = SWI_STATE_REQUEST_SUBMITTED;
1216                 rc = srpc_send_request(rpc);
1217                 break;
1218
1219         case SWI_STATE_REQUEST_SUBMITTED:
1220                 /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
1221                  * order; however, they're processed in a strict order:
1222                  * rqt, rpy, and bulk.
1223                  */
1224                 if (!rpc->crpc_reqstev.ev_fired)
1225                         break;
1226
1227                 rc = rpc->crpc_reqstev.ev_status;
1228                 if (rc != 0)
1229                         break;
1230
1231                 wi->swi_state = SWI_STATE_REQUEST_SENT;
1232                 /* fallthrough */
1233         case SWI_STATE_REQUEST_SENT: {
1234                 enum srpc_msg_type type;
1235
1236                 type = srpc_service2reply(rpc->crpc_service);
1237
1238                 if (!rpc->crpc_replyev.ev_fired)
1239                         break;
1240
1241                 rc = rpc->crpc_replyev.ev_status;
1242                 if (rc != 0)
1243                         break;
1244
1245                 srpc_unpack_msg_hdr(reply);
1246                 if (reply->msg_type != type ||
1247                     (reply->msg_magic != SRPC_MSG_MAGIC &&
1248                      reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
1249                         CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n",
1250                               libcfs_id2str(rpc->crpc_dest),
1251                               reply->msg_type, type,
1252                               reply->msg_magic, SRPC_MSG_MAGIC);
1253                         rc = -EBADMSG;
1254                         break;
1255                 }
1256
1257                 if (do_bulk && reply->msg_body.reply.status != 0) {
1258                         CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n",
1259                               reply->msg_body.reply.status,
1260                               libcfs_id2str(rpc->crpc_dest));
1261                         LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
1262                 }
1263
1264                 wi->swi_state = SWI_STATE_REPLY_RECEIVED;
1265         }
1266         /* fallthrough */
1267         case SWI_STATE_REPLY_RECEIVED:
1268                 if (do_bulk && !rpc->crpc_bulkev.ev_fired)
1269                         break;
1270
1271                 rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0;
1272
1273                 /* Bulk buffer was unlinked due to remote error. Clear error
1274                  * since reply buffer still contains valid data.
1275                  * NB rpc->crpc_done shouldn't look into bulk data in case of
1276                  * remote error.
1277                  */
1278                 if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK &&
1279                     rpc->crpc_status == 0 && reply->msg_body.reply.status != 0)
1280                         rc = 0;
1281
1282                 wi->swi_state = SWI_STATE_DONE;
1283                 srpc_client_rpc_done(rpc, rc);
1284                 return 1;
1285         }
1286
1287         if (rc != 0) {
1288                 spin_lock(&rpc->crpc_lock);
1289                 srpc_abort_rpc(rpc, rc);
1290                 spin_unlock(&rpc->crpc_lock);
1291         }
1292
1293 abort:
1294         if (rpc->crpc_aborted) {
1295                 LNetMDUnlink(rpc->crpc_reqstmdh);
1296                 LNetMDUnlink(rpc->crpc_replymdh);
1297                 LNetMDUnlink(rpc->crpc_bulk.bk_mdh);
1298
1299                 if (!srpc_event_pending(rpc)) {
1300                         srpc_client_rpc_done(rpc, -EINTR);
1301                         return 1;
1302                 }
1303         }
1304         return 0;
1305 }
1306
1307 struct srpc_client_rpc *
1308 srpc_create_client_rpc(struct lnet_process_id peer, int service,
1309                        int nbulkiov, int bulklen,
1310                        void (*rpc_done)(struct srpc_client_rpc *),
1311                        void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
1312 {
1313         struct srpc_client_rpc *rpc;
1314
1315         LIBCFS_ALLOC(rpc, offsetof(struct srpc_client_rpc,
1316                                    crpc_bulk.bk_iovs[nbulkiov]));
1317         if (rpc == NULL)
1318                 return NULL;
1319
1320         srpc_init_client_rpc(rpc, peer, service, nbulkiov,
1321                              bulklen, rpc_done, rpc_fini, priv);
1322         return rpc;
1323 }
1324
1325 /* called with rpc->crpc_lock held */
1326 void
1327 srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
1328 {
1329         LASSERT(why != 0);
1330
1331         if (rpc->crpc_aborted || /* already aborted */
1332             rpc->crpc_closed)    /* callback imminent */
1333                 return;
1334
1335         CDEBUG(D_NET,
1336                "Aborting RPC: service %d, peer %s, state %s, why %d\n",
1337                rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
1338                swi_state2str(rpc->crpc_wi.swi_state), why);
1339
1340         rpc->crpc_aborted = 1;
1341         rpc->crpc_status  = why;
1342         swi_schedule_workitem(&rpc->crpc_wi);
1343 }
1344
1345 /* called with rpc->crpc_lock held */
1346 void
1347 srpc_post_rpc(struct srpc_client_rpc *rpc)
1348 {
1349         LASSERT(!rpc->crpc_aborted);
1350         LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
1351
1352         CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n",
1353                libcfs_id2str(rpc->crpc_dest), rpc->crpc_service,
1354                rpc->crpc_timeout);
1355
1356         srpc_add_client_rpc_timer(rpc);
1357         swi_schedule_workitem(&rpc->crpc_wi);
1358 }
1359
1360
1361 int
1362 srpc_send_reply(struct srpc_server_rpc *rpc)
1363 {
1364         struct srpc_event *ev = &rpc->srpc_ev;
1365         struct srpc_msg *msg = &rpc->srpc_replymsg;
1366         struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
1367         struct srpc_service_cd *scd = rpc->srpc_scd;
1368         struct srpc_service *sv = scd->scd_svc;
1369         __u64 rpyid;
1370         int rc;
1371
1372         LASSERT(buffer != NULL);
1373         rpyid = buffer->buf_msg.msg_body.reqst.rpyid;
1374
1375         spin_lock(&scd->scd_lock);
1376
1377         if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) {
1378                 /* Repost buffer before replying since test client
1379                  * might send me another RPC once it gets the reply
1380                  */
1381                 if (srpc_service_post_buffer(scd, buffer) != 0)
1382                         CWARN("Failed to repost %s buffer\n", sv->sv_name);
1383                 rpc->srpc_reqstbuf = NULL;
1384         }
1385
1386         spin_unlock(&scd->scd_lock);
1387
1388         ev->ev_fired = 0;
1389         ev->ev_data  = rpc;
1390         ev->ev_type  = SRPC_REPLY_SENT;
1391
1392         msg->msg_magic   = SRPC_MSG_MAGIC;
1393         msg->msg_version = SRPC_MSG_VERSION;
1394         msg->msg_type    = srpc_service2reply(sv->sv_id);
1395
1396         rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg,
1397                                    sizeof(*msg), LNET_MD_OP_PUT,
1398                                    rpc->srpc_peer, rpc->srpc_self,
1399                                    &rpc->srpc_replymdh, ev);
1400         if (rc != 0)
1401                 ev->ev_fired = 1;  /* no more event expected */
1402         return rc;
1403 }
1404
1405 /* when in kernel always called with LNET_LOCK() held, and in thread context */
1406 static void
1407 srpc_lnet_ev_handler(struct lnet_event *ev)
1408 {
1409         struct srpc_service_cd *scd;
1410         struct srpc_event *rpcev = ev->md.user_ptr;
1411         struct srpc_client_rpc *crpc;
1412         struct srpc_server_rpc *srpc;
1413         struct srpc_buffer *buffer;
1414         struct srpc_service *sv;
1415         struct srpc_msg *msg;
1416         enum srpc_msg_type type;
1417
1418         LASSERT(!in_interrupt());
1419
1420         if (ev->status != 0) {
1421                 __u32 errors;
1422
1423                 spin_lock(&srpc_data.rpc_glock);
1424                 if (ev->status != -ECANCELED) /* cancellation is not error */
1425                         srpc_data.rpc_counters.errors++;
1426                 errors = srpc_data.rpc_counters.errors;
1427                 spin_unlock(&srpc_data.rpc_glock);
1428
1429                 CNETERR("LNet event status %d type %d, RPC errors %u\n",
1430                         ev->status, ev->type, errors);
1431         }
1432
1433         rpcev->ev_lnet = ev->type;
1434
1435         switch (rpcev->ev_type) {
1436         default:
1437                 CERROR("Unknown event: status %d, type %d, lnet %d\n",
1438                        rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet);
1439                 LBUG();
1440                 /* fallthrough */
1441         case SRPC_REQUEST_SENT:
1442                 if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
1443                         spin_lock(&srpc_data.rpc_glock);
1444                         srpc_data.rpc_counters.rpcs_sent++;
1445                         spin_unlock(&srpc_data.rpc_glock);
1446                 }
1447                 /* fallthrough */
1448         case SRPC_REPLY_RCVD:
1449         case SRPC_BULK_REQ_RCVD:
1450                 crpc = rpcev->ev_data;
1451
1452                 if (rpcev != &crpc->crpc_reqstev &&
1453                     rpcev != &crpc->crpc_replyev &&
1454                     rpcev != &crpc->crpc_bulkev) {
1455                         CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n",
1456                                rpcev, crpc, &crpc->crpc_reqstev,
1457                                &crpc->crpc_replyev, &crpc->crpc_bulkev);
1458                         CERROR("Bad event: status %d, type %d, lnet %d\n",
1459                                rpcev->ev_status, rpcev->ev_type,
1460                                rpcev->ev_lnet);
1461                         LBUG();
1462                 }
1463
1464                 spin_lock(&crpc->crpc_lock);
1465
1466                 LASSERT(rpcev->ev_fired == 0);
1467                 rpcev->ev_fired  = 1;
1468                 rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
1469                                    -EINTR : ev->status;
1470                 swi_schedule_workitem(&crpc->crpc_wi);
1471
1472                 spin_unlock(&crpc->crpc_lock);
1473                 break;
1474
1475         case SRPC_REQUEST_RCVD:
1476                 scd = rpcev->ev_data;
1477                 sv = scd->scd_svc;
1478
1479                 LASSERT(rpcev == &scd->scd_ev);
1480
1481                 spin_lock(&scd->scd_lock);
1482
1483                 LASSERT(ev->unlinked);
1484                 LASSERT(ev->type == LNET_EVENT_PUT ||
1485                         ev->type == LNET_EVENT_UNLINK);
1486                 LASSERT(ev->type != LNET_EVENT_UNLINK ||
1487                         sv->sv_shuttingdown);
1488
1489                 buffer = container_of(ev->md.start, struct srpc_buffer,
1490                                       buf_msg);
1491                 buffer->buf_peer = ev->source;
1492                 buffer->buf_self = ev->target.nid;
1493
1494                 LASSERT(scd->scd_buf_nposted > 0);
1495                 scd->scd_buf_nposted--;
1496
1497                 if (sv->sv_shuttingdown) {
1498                         /* Leave buffer on scd->scd_buf_nposted since
1499                          * srpc_finish_service needs to traverse it.
1500                          */
1501                         spin_unlock(&scd->scd_lock);
1502                         break;
1503                 }
1504
1505                 if (scd->scd_buf_err_stamp != 0 &&
1506                     scd->scd_buf_err_stamp < ktime_get_real_seconds()) {
1507                         /* re-enable adding buffer */
1508                         scd->scd_buf_err_stamp = 0;
1509                         scd->scd_buf_err = 0;
1510                 }
1511
1512                 if (scd->scd_buf_err == 0 && /* adding buffer is enabled */
1513                     scd->scd_buf_adjust == 0 &&
1514                     scd->scd_buf_nposted < scd->scd_buf_low) {
1515                         scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
1516                                                   SFW_TEST_WI_MIN);
1517                         swi_schedule_workitem(&scd->scd_buf_wi);
1518                 }
1519
1520                 list_del(&buffer->buf_list); /* from scd->scd_buf_posted */
1521                 msg = &buffer->buf_msg;
1522                 type = srpc_service2request(sv->sv_id);
1523
1524                 if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
1525                     (msg->msg_type != type &&
1526                      msg->msg_type != __swab32(type)) ||
1527                     (msg->msg_magic != SRPC_MSG_MAGIC &&
1528                      msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
1529                         CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n",
1530                                sv->sv_name, libcfs_id2str(ev->initiator),
1531                                ev->status, ev->mlength,
1532                                msg->msg_type, msg->msg_magic);
1533
1534                         /* NB can't call srpc_service_recycle_buffer here since
1535                          * it may call LNetM[DE]Attach. The invalid magic tells
1536                          * srpc_handle_rpc to drop this RPC
1537                          */
1538                         msg->msg_magic = 0;
1539                 }
1540
1541                 if (!list_empty(&scd->scd_rpc_free)) {
1542                         srpc = list_entry(scd->scd_rpc_free.next,
1543                                           struct srpc_server_rpc,
1544                                           srpc_list);
1545                         list_del(&srpc->srpc_list);
1546
1547                         srpc_init_server_rpc(srpc, scd, buffer);
1548                         list_add_tail(&srpc->srpc_list,
1549                                       &scd->scd_rpc_active);
1550                         swi_schedule_workitem(&srpc->srpc_wi);
1551                 } else {
1552                         list_add_tail(&buffer->buf_list,
1553                                       &scd->scd_buf_blocked);
1554                 }
1555
1556                 spin_unlock(&scd->scd_lock);
1557
1558                 spin_lock(&srpc_data.rpc_glock);
1559                 srpc_data.rpc_counters.rpcs_rcvd++;
1560                 spin_unlock(&srpc_data.rpc_glock);
1561                 break;
1562
1563         case SRPC_BULK_GET_RPLD:
1564                 LASSERT(ev->type == LNET_EVENT_SEND ||
1565                         ev->type == LNET_EVENT_REPLY ||
1566                         ev->type == LNET_EVENT_UNLINK);
1567
1568                 if (!ev->unlinked)
1569                         break; /* wait for final event */
1570                 /* fallthrough */
1571         case SRPC_BULK_PUT_SENT:
1572                 if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
1573                         spin_lock(&srpc_data.rpc_glock);
1574
1575                         if (rpcev->ev_type == SRPC_BULK_GET_RPLD)
1576                                 srpc_data.rpc_counters.bulk_get += ev->mlength;
1577                         else
1578                                 srpc_data.rpc_counters.bulk_put += ev->mlength;
1579
1580                         spin_unlock(&srpc_data.rpc_glock);
1581                 }
1582                 /* fallthrough */
1583         case SRPC_REPLY_SENT:
1584                 srpc = rpcev->ev_data;
1585                 scd  = srpc->srpc_scd;
1586
1587                 LASSERT(rpcev == &srpc->srpc_ev);
1588
1589                 spin_lock(&scd->scd_lock);
1590
1591                 rpcev->ev_fired  = 1;
1592                 rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
1593                                    -EINTR : ev->status;
1594                 swi_schedule_workitem(&srpc->srpc_wi);
1595
1596                 spin_unlock(&scd->scd_lock);
1597                 break;
1598         }
1599 }
1600
1601
1602 int
1603 srpc_startup (void)
1604 {
1605         int rc;
1606
1607         memset(&srpc_data, 0, sizeof(struct smoketest_rpc));
1608         spin_lock_init(&srpc_data.rpc_glock);
1609
1610         /* 1 second pause to avoid timestamp reuse */
1611         schedule_timeout_uninterruptible(cfs_time_seconds(1));
1612         srpc_data.rpc_matchbits = ((__u64) ktime_get_real_seconds()) << 48;
1613
1614         srpc_data.rpc_state = SRPC_STATE_NONE;
1615
1616         rc = LNetNIInit(LNET_PID_LUSTRE);
1617         if (rc < 0) {
1618                 CERROR("LNetNIInit() has failed: %d\n", rc);
1619                 return rc;
1620         }
1621
1622         srpc_data.rpc_state = SRPC_STATE_NI_INIT;
1623
1624         srpc_data.rpc_lnet_eq = LNetEQAlloc(srpc_lnet_ev_handler);
1625         if (IS_ERR(srpc_data.rpc_lnet_eq)) {
1626                 rc = PTR_ERR(srpc_data.rpc_lnet_eq);
1627                 CERROR("LNetEQAlloc() has failed: %d\n", rc);
1628                 goto bail;
1629         }
1630
1631         rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
1632         LASSERT(rc == 0);
1633         rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL);
1634         LASSERT(rc == 0);
1635
1636         srpc_data.rpc_state = SRPC_STATE_EQ_INIT;
1637
1638         rc = stt_startup();
1639
1640 bail:
1641         if (rc != 0)
1642                 srpc_shutdown();
1643         else
1644                 srpc_data.rpc_state = SRPC_STATE_RUNNING;
1645
1646         return rc;
1647 }
1648
1649 void
1650 srpc_shutdown (void)
1651 {
1652         int i;
1653         int rc;
1654         int state;
1655
1656         state = srpc_data.rpc_state;
1657         srpc_data.rpc_state = SRPC_STATE_STOPPING;
1658
1659         switch (state) {
1660         default:
1661                 LBUG();
1662                 /* fallthrough */
1663         case SRPC_STATE_RUNNING:
1664                 spin_lock(&srpc_data.rpc_glock);
1665
1666                 for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
1667                         struct srpc_service *sv = srpc_data.rpc_services[i];
1668
1669                         LASSERTF(sv == NULL,
1670                                  "service not empty: id %d, name %s\n",
1671                                  i, sv->sv_name);
1672                 }
1673
1674                 spin_unlock(&srpc_data.rpc_glock);
1675
1676                 stt_shutdown();
1677                 /* fallthrough */
1678
1679         case SRPC_STATE_EQ_INIT:
1680                 rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL);
1681                 rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL);
1682                 LASSERT(rc == 0);
1683                 rc = LNetEQFree(srpc_data.rpc_lnet_eq);
1684                 LASSERT(rc == 0); /* the EQ should have no user by now */
1685                 /* fallthrough */
1686
1687         case SRPC_STATE_NI_INIT:
1688                 LNetNIFini();
1689         }
1690 }