Whamcloud - gitweb
LU-2675 build: assume __linux__ and __KERNEL__
[fs/lustre-release.git] / lustre / ptlrpc / sec_bulk.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/sec_bulk.c
37  *
38  * Author: Eric Mei <ericm@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_SEC
42
43 #include <libcfs/libcfs.h>
44
45 #include <obd.h>
46 #include <obd_cksum.h>
47 #include <obd_class.h>
48 #include <obd_support.h>
49 #include <lustre_net.h>
50 #include <lustre_import.h>
51 #include <lustre_dlm.h>
52 #include <lustre_sec.h>
53
54 #include "ptlrpc_internal.h"
55
56 /****************************************
57  * bulk encryption page pools           *
58  ****************************************/
59
60
61 #define PTRS_PER_PAGE   (PAGE_CACHE_SIZE / sizeof(void *))
62 #define PAGES_PER_POOL  (PTRS_PER_PAGE)
63
64 #define IDLE_IDX_MAX            (100)
65 #define IDLE_IDX_WEIGHT         (3)
66
67 #define CACHE_QUIESCENT_PERIOD  (20)
68
69 static struct ptlrpc_enc_page_pool {
70         /*
71          * constants
72          */
73         unsigned long    epp_max_pages;   /* maximum pages can hold, const */
74         unsigned int     epp_max_pools;   /* number of pools, const */
75
76         /*
77          * wait queue in case of not enough free pages.
78          */
79         wait_queue_head_t    epp_waitq;       /* waiting threads */
80         unsigned int     epp_waitqlen;    /* wait queue length */
81         unsigned long    epp_pages_short; /* # of pages wanted of in-q users */
82         unsigned int     epp_growing:1;   /* during adding pages */
83
84         /*
85          * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
86          * this is counted based on each time when getting pages from
87          * the pools, not based on time. which means in case that system
88          * is idled for a while but the idle_idx might still be low if no
89          * activities happened in the pools.
90          */
91         unsigned long    epp_idle_idx;
92
93         /* last shrink time due to mem tight */
94         long             epp_last_shrink;
95         long             epp_last_access;
96
97         /*
98          * in-pool pages bookkeeping
99          */
100         spinlock_t       epp_lock;         /* protect following fields */
101         unsigned long    epp_total_pages; /* total pages in pools */
102         unsigned long    epp_free_pages;  /* current pages available */
103
104         /*
105          * statistics
106          */
107         unsigned long    epp_st_max_pages;      /* # of pages ever reached */
108         unsigned int     epp_st_grows;          /* # of grows */
109         unsigned int     epp_st_grow_fails;     /* # of add pages failures */
110         unsigned int     epp_st_shrinks;        /* # of shrinks */
111         unsigned long    epp_st_access;         /* # of access */
112         unsigned long    epp_st_missings;       /* # of cache missing */
113         unsigned long    epp_st_lowfree;        /* lowest free pages reached */
114         unsigned int     epp_st_max_wqlen;      /* highest waitqueue length */
115         cfs_time_t       epp_st_max_wait;       /* in jeffies */
116         /*
117          * pointers to pools
118          */
119         struct page    ***epp_pools;
120 } page_pools;
121
122 /*
123  * memory shrinker
124  */
125 const int pools_shrinker_seeks = DEFAULT_SEEKS;
126 static struct shrinker *pools_shrinker;
127
128
129 /*
130  * /proc/fs/lustre/sptlrpc/encrypt_page_pools
131  */
132 int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
133 {
134         int     rc;
135
136         spin_lock(&page_pools.epp_lock);
137
138         rc = seq_printf(m,
139                       "physical pages:          %lu\n"
140                       "pages per pool:          %lu\n"
141                       "max pages:               %lu\n"
142                       "max pools:               %u\n"
143                       "total pages:             %lu\n"
144                       "total free:              %lu\n"
145                       "idle index:              %lu/100\n"
146                       "last shrink:             %lds\n"
147                       "last access:             %lds\n"
148                       "max pages reached:       %lu\n"
149                       "grows:                   %u\n"
150                       "grows failure:           %u\n"
151                       "shrinks:                 %u\n"
152                       "cache access:            %lu\n"
153                       "cache missing:           %lu\n"
154                       "low free mark:           %lu\n"
155                       "max waitqueue depth:     %u\n"
156                       "max wait time:           "CFS_TIME_T"/%u\n"
157                       ,
158                       totalram_pages,
159                       PAGES_PER_POOL,
160                       page_pools.epp_max_pages,
161                       page_pools.epp_max_pools,
162                       page_pools.epp_total_pages,
163                       page_pools.epp_free_pages,
164                       page_pools.epp_idle_idx,
165                       cfs_time_current_sec() - page_pools.epp_last_shrink,
166                       cfs_time_current_sec() - page_pools.epp_last_access,
167                       page_pools.epp_st_max_pages,
168                       page_pools.epp_st_grows,
169                       page_pools.epp_st_grow_fails,
170                       page_pools.epp_st_shrinks,
171                       page_pools.epp_st_access,
172                       page_pools.epp_st_missings,
173                       page_pools.epp_st_lowfree,
174                       page_pools.epp_st_max_wqlen,
175                       page_pools.epp_st_max_wait, HZ
176                      );
177
178         spin_unlock(&page_pools.epp_lock);
179         return rc;
180 }
181
182 static void enc_pools_release_free_pages(long npages)
183 {
184         int     p_idx, g_idx;
185         int     p_idx_max1, p_idx_max2;
186
187         LASSERT(npages > 0);
188         LASSERT(npages <= page_pools.epp_free_pages);
189         LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
190
191         /* max pool index before the release */
192         p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
193
194         page_pools.epp_free_pages -= npages;
195         page_pools.epp_total_pages -= npages;
196
197         /* max pool index after the release */
198         p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
199                      ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
200
201         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
202         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
203         LASSERT(page_pools.epp_pools[p_idx]);
204
205         while (npages--) {
206                 LASSERT(page_pools.epp_pools[p_idx]);
207                 LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
208
209                 __free_page(page_pools.epp_pools[p_idx][g_idx]);
210                 page_pools.epp_pools[p_idx][g_idx] = NULL;
211
212                 if (++g_idx == PAGES_PER_POOL) {
213                         p_idx++;
214                         g_idx = 0;
215                 }
216         }
217
218         /* free unused pools */
219         while (p_idx_max1 < p_idx_max2) {
220                 LASSERT(page_pools.epp_pools[p_idx_max2]);
221                 OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
222                 page_pools.epp_pools[p_idx_max2] = NULL;
223                 p_idx_max2--;
224         }
225 }
226
227 /*
228  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
229  */
230 static unsigned long enc_pools_shrink_count(struct shrinker *s,
231                                             struct shrink_control *sc)
232 {
233         /*
234          * if no pool access for a long time, we consider it's fully idle.
235          * a little race here is fine.
236          */
237         if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
238                      CACHE_QUIESCENT_PERIOD)) {
239                 spin_lock(&page_pools.epp_lock);
240                 page_pools.epp_idle_idx = IDLE_IDX_MAX;
241                 spin_unlock(&page_pools.epp_lock);
242         }
243
244         LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
245         return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
246                 (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
247 }
248
249 /*
250  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
251  */
252 static unsigned long enc_pools_shrink_scan(struct shrinker *s,
253                                            struct shrink_control *sc)
254 {
255         spin_lock(&page_pools.epp_lock);
256         sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
257                               page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
258         if (sc->nr_to_scan > 0) {
259                 enc_pools_release_free_pages(sc->nr_to_scan);
260                 CDEBUG(D_SEC, "released %ld pages, %ld left\n",
261                        (long)sc->nr_to_scan, page_pools.epp_free_pages);
262
263                 page_pools.epp_st_shrinks++;
264                 page_pools.epp_last_shrink = cfs_time_current_sec();
265         }
266         spin_unlock(&page_pools.epp_lock);
267
268         /*
269          * if no pool access for a long time, we consider it's fully idle.
270          * a little race here is fine.
271          */
272         if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
273                      CACHE_QUIESCENT_PERIOD)) {
274                 spin_lock(&page_pools.epp_lock);
275                 page_pools.epp_idle_idx = IDLE_IDX_MAX;
276                 spin_unlock(&page_pools.epp_lock);
277         }
278
279         LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
280         return sc->nr_to_scan;
281 }
282
283 #ifndef HAVE_SHRINKER_COUNT
284 /*
285  * could be called frequently for query (@nr_to_scan == 0).
286  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
287  */
288 static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
289 {
290         struct shrink_control scv = {
291                 .nr_to_scan = shrink_param(sc, nr_to_scan),
292                 .gfp_mask   = shrink_param(sc, gfp_mask)
293         };
294 #if !defined(HAVE_SHRINKER_WANT_SHRINK_PTR) && !defined(HAVE_SHRINK_CONTROL)
295         struct shrinker* shrinker = NULL;
296 #endif
297
298         enc_pools_shrink_scan(shrinker, &scv);
299
300         return enc_pools_shrink_count(shrinker, &scv);
301 }
302
303 #endif /* HAVE_SHRINKER_COUNT */
304
305 static inline
306 int npages_to_npools(unsigned long npages)
307 {
308         return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
309 }
310
311 /*
312  * return how many pages cleaned up.
313  */
314 static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
315 {
316         unsigned long cleaned = 0;
317         int           i, j;
318
319         for (i = 0; i < npools; i++) {
320                 if (pools[i]) {
321                         for (j = 0; j < PAGES_PER_POOL; j++) {
322                                 if (pools[i][j]) {
323                                         __free_page(pools[i][j]);
324                                         cleaned++;
325                                 }
326                         }
327                         OBD_FREE(pools[i], PAGE_CACHE_SIZE);
328                         pools[i] = NULL;
329                 }
330         }
331
332         return cleaned;
333 }
334
335 /*
336  * merge @npools pointed by @pools which contains @npages new pages
337  * into current pools.
338  *
339  * we have options to avoid most memory copy with some tricks. but we choose
340  * the simplest way to avoid complexity. It's not frequently called.
341  */
342 static void enc_pools_insert(struct page ***pools, int npools, int npages)
343 {
344         int     freeslot;
345         int     op_idx, np_idx, og_idx, ng_idx;
346         int     cur_npools, end_npools;
347
348         LASSERT(npages > 0);
349         LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
350         LASSERT(npages_to_npools(npages) == npools);
351         LASSERT(page_pools.epp_growing);
352
353         spin_lock(&page_pools.epp_lock);
354
355         /*
356          * (1) fill all the free slots of current pools.
357          */
358         /* free slots are those left by rent pages, and the extra ones with
359          * index >= total_pages, locate at the tail of last pool. */
360         freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
361         if (freeslot != 0)
362                 freeslot = PAGES_PER_POOL - freeslot;
363         freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
364
365         op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
366         og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
367         np_idx = npools - 1;
368         ng_idx = (npages - 1) % PAGES_PER_POOL;
369
370         while (freeslot) {
371                 LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
372                 LASSERT(pools[np_idx][ng_idx] != NULL);
373
374                 page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
375                 pools[np_idx][ng_idx] = NULL;
376
377                 freeslot--;
378
379                 if (++og_idx == PAGES_PER_POOL) {
380                         op_idx++;
381                         og_idx = 0;
382                 }
383                 if (--ng_idx < 0) {
384                         if (np_idx == 0)
385                                 break;
386                         np_idx--;
387                         ng_idx = PAGES_PER_POOL - 1;
388                 }
389         }
390
391         /*
392          * (2) add pools if needed.
393          */
394         cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
395                      PAGES_PER_POOL;
396         end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) /
397                      PAGES_PER_POOL;
398         LASSERT(end_npools <= page_pools.epp_max_pools);
399
400         np_idx = 0;
401         while (cur_npools < end_npools) {
402                 LASSERT(page_pools.epp_pools[cur_npools] == NULL);
403                 LASSERT(np_idx < npools);
404                 LASSERT(pools[np_idx] != NULL);
405
406                 page_pools.epp_pools[cur_npools++] = pools[np_idx];
407                 pools[np_idx++] = NULL;
408         }
409
410         page_pools.epp_total_pages += npages;
411         page_pools.epp_free_pages += npages;
412         page_pools.epp_st_lowfree = page_pools.epp_free_pages;
413
414         if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
415                 page_pools.epp_st_max_pages = page_pools.epp_total_pages;
416
417         CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
418                page_pools.epp_total_pages);
419
420         spin_unlock(&page_pools.epp_lock);
421 }
422
423 static int enc_pools_add_pages(int npages)
424 {
425         static DEFINE_MUTEX(add_pages_mutex);
426         struct page   ***pools;
427         int             npools, alloced = 0;
428         int             i, j, rc = -ENOMEM;
429
430         if (npages < PTLRPC_MAX_BRW_PAGES)
431                 npages = PTLRPC_MAX_BRW_PAGES;
432
433         mutex_lock(&add_pages_mutex);
434
435         if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
436                 npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
437         LASSERT(npages > 0);
438
439         page_pools.epp_st_grows++;
440
441         npools = npages_to_npools(npages);
442         OBD_ALLOC(pools, npools * sizeof(*pools));
443         if (pools == NULL)
444                 goto out;
445
446         for (i = 0; i < npools; i++) {
447                 OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
448                 if (pools[i] == NULL)
449                         goto out_pools;
450
451                 for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
452                         pools[i][j] = alloc_page(GFP_NOFS |
453                                                  __GFP_HIGHMEM);
454                         if (pools[i][j] == NULL)
455                                 goto out_pools;
456
457                         alloced++;
458                 }
459         }
460         LASSERT(alloced == npages);
461
462         enc_pools_insert(pools, npools, npages);
463         CDEBUG(D_SEC, "added %d pages into pools\n", npages);
464         rc = 0;
465
466 out_pools:
467         enc_pools_cleanup(pools, npools);
468         OBD_FREE(pools, npools * sizeof(*pools));
469 out:
470         if (rc) {
471                 page_pools.epp_st_grow_fails++;
472                 CERROR("Failed to allocate %d enc pages\n", npages);
473         }
474
475         mutex_unlock(&add_pages_mutex);
476         return rc;
477 }
478
479 static inline void enc_pools_wakeup(void)
480 {
481         assert_spin_locked(&page_pools.epp_lock);
482
483         if (unlikely(page_pools.epp_waitqlen)) {
484                 LASSERT(waitqueue_active(&page_pools.epp_waitq));
485                 wake_up_all(&page_pools.epp_waitq);
486         }
487 }
488
489 static int enc_pools_should_grow(int page_needed, long now)
490 {
491         /* don't grow if someone else is growing the pools right now,
492          * or the pools has reached its full capacity
493          */
494         if (page_pools.epp_growing ||
495             page_pools.epp_total_pages == page_pools.epp_max_pages)
496                 return 0;
497
498         /* if total pages is not enough, we need to grow */
499         if (page_pools.epp_total_pages < page_needed)
500                 return 1;
501
502         /*
503          * we wanted to return 0 here if there was a shrink just happened
504          * moment ago, but this may cause deadlock if both client and ost
505          * live on single node.
506          */
507 #if 0
508         if (now - page_pools.epp_last_shrink < 2)
509                 return 0;
510 #endif
511
512         /*
513          * here we perhaps need consider other factors like wait queue
514          * length, idle index, etc. ?
515          */
516
517         /* grow the pools in any other cases */
518         return 1;
519 }
520
521 /*
522  * we allocate the requested pages atomically.
523  */
524 int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
525 {
526         wait_queue_t  waitlink;
527         unsigned long   this_idle = -1;
528         cfs_time_t      tick = 0;
529         long            now;
530         int             p_idx, g_idx;
531         int             i;
532
533         LASSERT(desc->bd_iov_count > 0);
534         LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
535
536         /* resent bulk, enc iov might have been allocated previously */
537         if (desc->bd_enc_iov != NULL)
538                 return 0;
539
540         OBD_ALLOC(desc->bd_enc_iov,
541                   desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
542         if (desc->bd_enc_iov == NULL)
543                 return -ENOMEM;
544
545         spin_lock(&page_pools.epp_lock);
546
547         page_pools.epp_st_access++;
548 again:
549         if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
550                 if (tick == 0)
551                         tick = cfs_time_current();
552
553                 now = cfs_time_current_sec();
554
555                 page_pools.epp_st_missings++;
556                 page_pools.epp_pages_short += desc->bd_iov_count;
557
558                 if (enc_pools_should_grow(desc->bd_iov_count, now)) {
559                         page_pools.epp_growing = 1;
560
561                         spin_unlock(&page_pools.epp_lock);
562                         enc_pools_add_pages(page_pools.epp_pages_short / 2);
563                         spin_lock(&page_pools.epp_lock);
564
565                         page_pools.epp_growing = 0;
566
567                         enc_pools_wakeup();
568                 } else {
569                         if (++page_pools.epp_waitqlen >
570                             page_pools.epp_st_max_wqlen)
571                                 page_pools.epp_st_max_wqlen =
572                                                 page_pools.epp_waitqlen;
573
574                         set_current_state(TASK_UNINTERRUPTIBLE);
575                         init_waitqueue_entry_current(&waitlink);
576                         add_wait_queue(&page_pools.epp_waitq, &waitlink);
577
578                         spin_unlock(&page_pools.epp_lock);
579                         waitq_wait(&waitlink, TASK_UNINTERRUPTIBLE);
580                         remove_wait_queue(&page_pools.epp_waitq, &waitlink);
581                         LASSERT(page_pools.epp_waitqlen > 0);
582                         spin_lock(&page_pools.epp_lock);
583                         page_pools.epp_waitqlen--;
584                 }
585
586                 LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
587                 page_pools.epp_pages_short -= desc->bd_iov_count;
588
589                 this_idle = 0;
590                 goto again;
591         }
592
593         /* record max wait time */
594         if (unlikely(tick != 0)) {
595                 tick = cfs_time_current() - tick;
596                 if (tick > page_pools.epp_st_max_wait)
597                         page_pools.epp_st_max_wait = tick;
598         }
599
600         /* proceed with rest of allocation */
601         page_pools.epp_free_pages -= desc->bd_iov_count;
602
603         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
604         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
605
606         for (i = 0; i < desc->bd_iov_count; i++) {
607                 LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
608                 desc->bd_enc_iov[i].kiov_page =
609                                         page_pools.epp_pools[p_idx][g_idx];
610                 page_pools.epp_pools[p_idx][g_idx] = NULL;
611
612                 if (++g_idx == PAGES_PER_POOL) {
613                         p_idx++;
614                         g_idx = 0;
615                 }
616         }
617
618         if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
619                 page_pools.epp_st_lowfree = page_pools.epp_free_pages;
620
621         /*
622          * new idle index = (old * weight + new) / (weight + 1)
623          */
624         if (this_idle == -1) {
625                 this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
626                             page_pools.epp_total_pages;
627         }
628         page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
629                                    this_idle) /
630                                   (IDLE_IDX_WEIGHT + 1);
631
632         page_pools.epp_last_access = cfs_time_current_sec();
633
634         spin_unlock(&page_pools.epp_lock);
635         return 0;
636 }
637 EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
638
639 void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
640 {
641         int     p_idx, g_idx;
642         int     i;
643
644         if (desc->bd_enc_iov == NULL)
645                 return;
646
647         LASSERT(desc->bd_iov_count > 0);
648
649         spin_lock(&page_pools.epp_lock);
650
651         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
652         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
653
654         LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
655                 page_pools.epp_total_pages);
656         LASSERT(page_pools.epp_pools[p_idx]);
657
658         for (i = 0; i < desc->bd_iov_count; i++) {
659                 LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
660                 LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
661                 LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
662
663                 page_pools.epp_pools[p_idx][g_idx] =
664                                         desc->bd_enc_iov[i].kiov_page;
665
666                 if (++g_idx == PAGES_PER_POOL) {
667                         p_idx++;
668                         g_idx = 0;
669                 }
670         }
671
672         page_pools.epp_free_pages += desc->bd_iov_count;
673
674         enc_pools_wakeup();
675
676         spin_unlock(&page_pools.epp_lock);
677
678         OBD_FREE(desc->bd_enc_iov,
679                  desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
680         desc->bd_enc_iov = NULL;
681 }
682 EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
683
684 /*
685  * we don't do much stuff for add_user/del_user anymore, except adding some
686  * initial pages in add_user() if current pools are empty, rest would be
687  * handled by the pools's self-adaption.
688  */
689 int sptlrpc_enc_pool_add_user(void)
690 {
691         int     need_grow = 0;
692
693         spin_lock(&page_pools.epp_lock);
694         if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
695                 page_pools.epp_growing = 1;
696                 need_grow = 1;
697         }
698         spin_unlock(&page_pools.epp_lock);
699
700         if (need_grow) {
701                 enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
702                                     PTLRPC_MAX_BRW_PAGES);
703
704                 spin_lock(&page_pools.epp_lock);
705                 page_pools.epp_growing = 0;
706                 enc_pools_wakeup();
707                 spin_unlock(&page_pools.epp_lock);
708         }
709         return 0;
710 }
711 EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
712
713 int sptlrpc_enc_pool_del_user(void)
714 {
715         return 0;
716 }
717 EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
718
719 static inline void enc_pools_alloc(void)
720 {
721         LASSERT(page_pools.epp_max_pools);
722         OBD_ALLOC_LARGE(page_pools.epp_pools,
723                         page_pools.epp_max_pools *
724                         sizeof(*page_pools.epp_pools));
725 }
726
727 static inline void enc_pools_free(void)
728 {
729         LASSERT(page_pools.epp_max_pools);
730         LASSERT(page_pools.epp_pools);
731
732         OBD_FREE_LARGE(page_pools.epp_pools,
733                        page_pools.epp_max_pools *
734                        sizeof(*page_pools.epp_pools));
735 }
736
737 int sptlrpc_enc_pool_init(void)
738 {
739         DEF_SHRINKER_VAR(shvar, enc_pools_shrink,
740                          enc_pools_shrink_count, enc_pools_shrink_scan);
741         /*
742          * maximum capacity is 1/8 of total physical memory.
743          * is the 1/8 a good number?
744          */
745         page_pools.epp_max_pages = totalram_pages / 8;
746         page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
747
748         init_waitqueue_head(&page_pools.epp_waitq);
749         page_pools.epp_waitqlen = 0;
750         page_pools.epp_pages_short = 0;
751
752         page_pools.epp_growing = 0;
753
754         page_pools.epp_idle_idx = 0;
755         page_pools.epp_last_shrink = cfs_time_current_sec();
756         page_pools.epp_last_access = cfs_time_current_sec();
757
758         spin_lock_init(&page_pools.epp_lock);
759         page_pools.epp_total_pages = 0;
760         page_pools.epp_free_pages = 0;
761
762         page_pools.epp_st_max_pages = 0;
763         page_pools.epp_st_grows = 0;
764         page_pools.epp_st_grow_fails = 0;
765         page_pools.epp_st_shrinks = 0;
766         page_pools.epp_st_access = 0;
767         page_pools.epp_st_missings = 0;
768         page_pools.epp_st_lowfree = 0;
769         page_pools.epp_st_max_wqlen = 0;
770         page_pools.epp_st_max_wait = 0;
771
772         enc_pools_alloc();
773         if (page_pools.epp_pools == NULL)
774                 return -ENOMEM;
775
776         pools_shrinker = set_shrinker(pools_shrinker_seeks, &shvar);
777         if (pools_shrinker == NULL) {
778                 enc_pools_free();
779                 return -ENOMEM;
780         }
781
782         return 0;
783 }
784
785 void sptlrpc_enc_pool_fini(void)
786 {
787         unsigned long cleaned, npools;
788
789         LASSERT(pools_shrinker);
790         LASSERT(page_pools.epp_pools);
791         LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
792
793         remove_shrinker(pools_shrinker);
794
795         npools = npages_to_npools(page_pools.epp_total_pages);
796         cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
797         LASSERT(cleaned == page_pools.epp_total_pages);
798
799         enc_pools_free();
800
801         if (page_pools.epp_st_access > 0) {
802                 CDEBUG(D_SEC,
803                        "max pages %lu, grows %u, grow fails %u, shrinks %u, "
804                        "access %lu, missing %lu, max qlen %u, max wait "
805                        CFS_TIME_T"/%d\n",
806                        page_pools.epp_st_max_pages, page_pools.epp_st_grows,
807                        page_pools.epp_st_grow_fails,
808                        page_pools.epp_st_shrinks, page_pools.epp_st_access,
809                        page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
810                        page_pools.epp_st_max_wait, HZ);
811         }
812 }
813
814
815 static int cfs_hash_alg_id[] = {
816         [BULK_HASH_ALG_NULL]    = CFS_HASH_ALG_NULL,
817         [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
818         [BULK_HASH_ALG_CRC32]   = CFS_HASH_ALG_CRC32,
819         [BULK_HASH_ALG_MD5]     = CFS_HASH_ALG_MD5,
820         [BULK_HASH_ALG_SHA1]    = CFS_HASH_ALG_SHA1,
821         [BULK_HASH_ALG_SHA256]  = CFS_HASH_ALG_SHA256,
822         [BULK_HASH_ALG_SHA384]  = CFS_HASH_ALG_SHA384,
823         [BULK_HASH_ALG_SHA512]  = CFS_HASH_ALG_SHA512,
824 };
825 const char * sptlrpc_get_hash_name(__u8 hash_alg)
826 {
827         return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
828 }
829 EXPORT_SYMBOL(sptlrpc_get_hash_name);
830
831 __u8 sptlrpc_get_hash_alg(const char *algname)
832 {
833         return cfs_crypto_hash_alg(algname);
834 }
835 EXPORT_SYMBOL(sptlrpc_get_hash_alg);
836
837 int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
838 {
839         struct ptlrpc_bulk_sec_desc *bsd;
840         int                          size = msg->lm_buflens[offset];
841
842         bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
843         if (bsd == NULL) {
844                 CERROR("Invalid bulk sec desc: size %d\n", size);
845                 return -EINVAL;
846         }
847
848         if (swabbed) {
849                 __swab32s(&bsd->bsd_nob);
850         }
851
852         if (unlikely(bsd->bsd_version != 0)) {
853                 CERROR("Unexpected version %u\n", bsd->bsd_version);
854                 return -EPROTO;
855         }
856
857         if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
858                 CERROR("Invalid type %u\n", bsd->bsd_type);
859                 return -EPROTO;
860         }
861
862         /* FIXME more sanity check here */
863
864         if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
865                      bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
866                      bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
867                 CERROR("Invalid svc %u\n", bsd->bsd_svc);
868                 return -EPROTO;
869         }
870
871         return 0;
872 }
873 EXPORT_SYMBOL(bulk_sec_desc_unpack);
874
875 /*
876  * Compute the checksum of an RPC buffer payload.  If the return \a buflen
877  * is not large enough, truncate the result to fit so that it is possible
878  * to use a hash function with a large hash space, but only use a part of
879  * the resulting hash.
880  */
881 int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
882                               void *buf, int buflen)
883 {
884         struct cfs_crypto_hash_desc     *hdesc;
885         int                             hashsize;
886         unsigned int                    bufsize;
887         int                             i, err;
888
889         LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
890         LASSERT(buflen >= 4);
891
892         hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
893         if (IS_ERR(hdesc)) {
894                 CERROR("Unable to initialize checksum hash %s\n",
895                        cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
896                 return PTR_ERR(hdesc);
897         }
898
899         hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
900
901         for (i = 0; i < desc->bd_iov_count; i++) {
902                 cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
903                                   desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
904                                   desc->bd_iov[i].kiov_len);
905         }
906
907         if (hashsize > buflen) {
908                 unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
909
910                 bufsize = sizeof(hashbuf);
911                 LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
912                          bufsize, hashsize);
913                 err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
914                 memcpy(buf, hashbuf, buflen);
915         } else {
916                 bufsize = buflen;
917                 err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
918         }
919
920         return err;
921 }
922 EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
923
924