-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Copyright (C) 2006 Cluster File Systems, Inc.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * This file is part of Lustre, http://www.lustre.org.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Whamcloud, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * lustre/ptlrpc/sec_bulk.c
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Author: Eric Mei <ericm@clusterfs.com>
*/
-#ifndef EXPORT_SYMTAB
-#define EXPORT_SYMTAB
-#endif
#define DEBUG_SUBSYSTEM S_SEC
#include <libcfs/libcfs.h>
#endif
#include <obd.h>
+#include <obd_cksum.h>
#include <obd_class.h>
#include <obd_support.h>
#include <lustre_net.h>
#define PTRS_PER_PAGE (CFS_PAGE_SIZE / sizeof(void *))
#define PAGES_PER_POOL (PTRS_PER_PAGE)
+#define IDLE_IDX_MAX (100)
+#define IDLE_IDX_WEIGHT (3)
+
+#define CACHE_QUIESCENT_PERIOD (20)
+
static struct ptlrpc_enc_page_pool {
/*
* constants
*/
unsigned long epp_max_pages; /* maximum pages can hold, const */
unsigned int epp_max_pools; /* number of pools, const */
- /*
- * users of the pools. the capacity grow as more user added,
- * but doesn't shrink when users gone -- just current policy.
- * during failover there might be user add/remove activities.
- */
- atomic_t epp_users; /* shared by how many users (osc) */
- atomic_t epp_users_gone; /* users removed */
+
/*
* wait queue in case of not enough free pages.
*/
cfs_waitq_t epp_waitq; /* waiting threads */
unsigned int epp_waitqlen; /* wait queue length */
unsigned long epp_pages_short; /* # of pages wanted of in-q users */
- unsigned long epp_adding:1, /* during adding pages */
- epp_full:1; /* pools are all full */
+ unsigned int epp_growing:1; /* during adding pages */
+
+ /*
+ * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
+ * this is counted based on each time when getting pages from
+ * the pools, not based on time. which means in case that system
+ * is idled for a while but the idle_idx might still be low if no
+ * activities happened in the pools.
+ */
+ unsigned long epp_idle_idx;
+
+ /* last shrink time due to mem tight */
+ long epp_last_shrink;
+ long epp_last_access;
+
/*
* in-pool pages bookkeeping
*/
- spinlock_t epp_lock; /* protect following fields */
+ cfs_spinlock_t epp_lock; /* protect following fields */
unsigned long epp_total_pages; /* total pages in pools */
unsigned long epp_free_pages; /* current pages available */
+
/*
* statistics
*/
- unsigned int epp_st_adds;
- unsigned int epp_st_failadds; /* # of add pages failures */
- unsigned long epp_st_reqs; /* # of get_pages requests */
- unsigned long epp_st_missings; /* # of cache missing */
- unsigned long epp_st_lowfree; /* lowest free pages ever reached */
- unsigned long epp_st_max_wqlen;/* highest waitqueue length ever */
- cfs_time_t epp_st_max_wait; /* in jeffies */
+ unsigned long epp_st_max_pages; /* # of pages ever reached */
+ unsigned int epp_st_grows; /* # of grows */
+ unsigned int epp_st_grow_fails; /* # of add pages failures */
+ unsigned int epp_st_shrinks; /* # of shrinks */
+ unsigned long epp_st_access; /* # of access */
+ unsigned long epp_st_missings; /* # of cache missing */
+ unsigned long epp_st_lowfree; /* lowest free pages reached */
+ unsigned int epp_st_max_wqlen; /* highest waitqueue length */
+ cfs_time_t epp_st_max_wait; /* in jeffies */
/*
* pointers to pools
*/
cfs_page_t ***epp_pools;
} page_pools;
+/*
+ * memory shrinker
+ */
+const int pools_shrinker_seeks = CFS_DEFAULT_SEEKS;
+static struct cfs_shrinker *pools_shrinker = NULL;
+
+
+/*
+ * /proc/fs/lustre/sptlrpc/encrypt_page_pools
+ */
int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
int rc;
- spin_lock(&page_pools.epp_lock);
+ cfs_spin_lock(&page_pools.epp_lock);
rc = snprintf(page, count,
"physical pages: %lu\n"
"pages per pool: %lu\n"
"max pages: %lu\n"
"max pools: %u\n"
- "users: %d - %d\n"
- "current waitqueue len: %u\n"
- "current pages in short: %lu\n"
"total pages: %lu\n"
"total free: %lu\n"
- "add page times: %u\n"
- "add page failed times: %u\n"
- "total requests: %lu\n"
+ "idle index: %lu/100\n"
+ "last shrink: %lds\n"
+ "last access: %lds\n"
+ "max pages reached: %lu\n"
+ "grows: %u\n"
+ "grows failure: %u\n"
+ "shrinks: %u\n"
+ "cache access: %lu\n"
"cache missing: %lu\n"
- "lowest free pages: %lu\n"
- "max waitqueue depth: %lu\n"
- "max wait time: "CFS_TIME_T"\n"
+ "low free mark: %lu\n"
+ "max waitqueue depth: %u\n"
+ "max wait time: "CFS_TIME_T"/%u\n"
,
- num_physpages,
+ cfs_num_physpages,
PAGES_PER_POOL,
page_pools.epp_max_pages,
page_pools.epp_max_pools,
- atomic_read(&page_pools.epp_users),
- atomic_read(&page_pools.epp_users_gone),
- page_pools.epp_waitqlen,
- page_pools.epp_pages_short,
page_pools.epp_total_pages,
page_pools.epp_free_pages,
- page_pools.epp_st_adds,
- page_pools.epp_st_failadds,
- page_pools.epp_st_reqs,
+ page_pools.epp_idle_idx,
+ cfs_time_current_sec() - page_pools.epp_last_shrink,
+ cfs_time_current_sec() - page_pools.epp_last_access,
+ page_pools.epp_st_max_pages,
+ page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks,
+ page_pools.epp_st_access,
page_pools.epp_st_missings,
page_pools.epp_st_lowfree,
page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait
+ page_pools.epp_st_max_wait, CFS_HZ
);
- spin_unlock(&page_pools.epp_lock);
+ cfs_spin_unlock(&page_pools.epp_lock);
return rc;
}
+static void enc_pools_release_free_pages(long npages)
+{
+ int p_idx, g_idx;
+ int p_idx_max1, p_idx_max2;
+
+ LASSERT(npages > 0);
+ LASSERT(npages <= page_pools.epp_free_pages);
+ LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
+
+ /* max pool index before the release */
+ p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
+
+ page_pools.epp_free_pages -= npages;
+ page_pools.epp_total_pages -= npages;
+
+ /* max pool index after the release */
+ p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
+ ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ while (npages--) {
+ LASSERT(page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+
+ cfs_free_page(page_pools.epp_pools[p_idx][g_idx]);
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ };
+
+ /* free unused pools */
+ while (p_idx_max1 < p_idx_max2) {
+ LASSERT(page_pools.epp_pools[p_idx_max2]);
+ OBD_FREE(page_pools.epp_pools[p_idx_max2], CFS_PAGE_SIZE);
+ page_pools.epp_pools[p_idx_max2] = NULL;
+ p_idx_max2--;
+ }
+}
+
+/*
+ * could be called frequently for query (@nr_to_scan == 0).
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
+ cfs_spin_lock(&page_pools.epp_lock);
+ shrink_param(sc, nr_to_scan) = min_t(unsigned long,
+ shrink_param(sc, nr_to_scan),
+ page_pools.epp_free_pages -
+ PTLRPC_MAX_BRW_PAGES);
+ if (shrink_param(sc, nr_to_scan) > 0) {
+ enc_pools_release_free_pages(shrink_param(sc,
+ nr_to_scan));
+ CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+ (long)shrink_param(sc, nr_to_scan),
+ page_pools.epp_free_pages);
+
+ page_pools.epp_st_shrinks++;
+ page_pools.epp_last_shrink = cfs_time_current_sec();
+ }
+ cfs_spin_unlock(&page_pools.epp_lock);
+ }
+
+ /*
+ * if no pool access for a long time, we consider it's fully idle.
+ * a little race here is fine.
+ */
+ if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
+ CACHE_QUIESCENT_PERIOD)) {
+ cfs_spin_lock(&page_pools.epp_lock);
+ page_pools.epp_idle_idx = IDLE_IDX_MAX;
+ cfs_spin_unlock(&page_pools.epp_lock);
+ }
+
+ LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+ return max((int) page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+ (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
static inline
int npages_to_npools(unsigned long npages)
{
/*
* return how many pages cleaned up.
*/
-static unsigned long enc_cleanup_pools(cfs_page_t ***pools, int npools)
+static unsigned long enc_pools_cleanup(cfs_page_t ***pools, int npools)
{
unsigned long cleaned = 0;
int i, j;
* we have options to avoid most memory copy with some tricks. but we choose
* the simplest way to avoid complexity. It's not frequently called.
*/
-static void enc_insert_pool(cfs_page_t ***pools, int npools, int npages)
+static void enc_pools_insert(cfs_page_t ***pools, int npools, int npages)
{
int freeslot;
int op_idx, np_idx, og_idx, ng_idx;
LASSERT(npages > 0);
LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
LASSERT(npages_to_npools(npages) == npools);
+ LASSERT(page_pools.epp_growing);
- spin_lock(&page_pools.epp_lock);
+ cfs_spin_lock(&page_pools.epp_lock);
/*
* (1) fill all the free slots of current pools.
*/
- /*
- * free slots are those left by rent pages, and the extra ones with
- * index >= eep_total_pages, locate at the tail of last pool.
- */
+ /* free slots are those left by rent pages, and the extra ones with
+ * index >= total_pages, locate at the tail of last pool. */
freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
if (freeslot != 0)
freeslot = PAGES_PER_POOL - freeslot;
page_pools.epp_free_pages += npages;
page_pools.epp_st_lowfree = page_pools.epp_free_pages;
- if (page_pools.epp_total_pages == page_pools.epp_max_pages)
- page_pools.epp_full = 1;
+ if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
+ page_pools.epp_st_max_pages = page_pools.epp_total_pages;
CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
page_pools.epp_total_pages);
- spin_unlock(&page_pools.epp_lock);
+ cfs_spin_unlock(&page_pools.epp_lock);
}
static int enc_pools_add_pages(int npages)
{
- static DECLARE_MUTEX(sem_add_pages);
+ static CFS_DEFINE_MUTEX(add_pages_mutex);
cfs_page_t ***pools;
int npools, alloced = 0;
int i, j, rc = -ENOMEM;
- down(&sem_add_pages);
+ if (npages < PTLRPC_MAX_BRW_PAGES)
+ npages = PTLRPC_MAX_BRW_PAGES;
+
+ cfs_mutex_lock(&add_pages_mutex);
- if (npages > page_pools.epp_max_pages - page_pools.epp_total_pages)
+ if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
- if (npages == 0) {
- rc = 0;
- goto out;
- }
+ LASSERT(npages > 0);
- page_pools.epp_st_adds++;
+ page_pools.epp_st_grows++;
npools = npages_to_npools(npages);
OBD_ALLOC(pools, npools * sizeof(*pools));
goto out_pools;
for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
- pools[i][j] = cfs_alloc_page(CFS_ALLOC_IO |
- CFS_ALLOC_HIGH);
+ pools[i][j] = cfs_alloc_page(CFS_ALLOC_IO |
+ CFS_ALLOC_HIGHMEM);
if (pools[i][j] == NULL)
goto out_pools;
alloced++;
}
}
+ LASSERT(alloced == npages);
- enc_insert_pool(pools, npools, npages);
- CDEBUG(D_SEC, "add %d pages into enc page pools\n", npages);
+ enc_pools_insert(pools, npools, npages);
+ CDEBUG(D_SEC, "added %d pages into pools\n", npages);
rc = 0;
out_pools:
- enc_cleanup_pools(pools, npools);
+ enc_pools_cleanup(pools, npools);
OBD_FREE(pools, npools * sizeof(*pools));
out:
if (rc) {
- page_pools.epp_st_failadds++;
- CERROR("Failed to pre-allocate %d enc pages\n", npages);
+ page_pools.epp_st_grow_fails++;
+ CERROR("Failed to allocate %d enc pages\n", npages);
}
- up(&sem_add_pages);
+ cfs_mutex_unlock(&add_pages_mutex);
return rc;
}
-/*
- * both "max bulk rpcs inflight" and "lnet MTU" are tunable, we use the
- * default fixed value initially.
- */
-int sptlrpc_enc_pool_add_user(void)
+static inline void enc_pools_wakeup(void)
{
- int page_plus = PTLRPC_MAX_BRW_PAGES * OSC_MAX_RIF_DEFAULT;
- int users, users_gone, shift, rc;
+ LASSERT_SPIN_LOCKED(&page_pools.epp_lock);
+ LASSERT(page_pools.epp_waitqlen >= 0);
- LASSERT(!in_interrupt());
- LASSERT(atomic_read(&page_pools.epp_users) >= 0);
+ if (unlikely(page_pools.epp_waitqlen)) {
+ LASSERT(cfs_waitq_active(&page_pools.epp_waitq));
+ cfs_waitq_broadcast(&page_pools.epp_waitq);
+ }
+}
- users_gone = atomic_dec_return(&page_pools.epp_users_gone);
- if (users_gone >= 0) {
- CWARN("%d users gone, skip\n", users_gone + 1);
+static int enc_pools_should_grow(int page_needed, long now)
+{
+ /* don't grow if someone else is growing the pools right now,
+ * or the pools has reached its full capacity
+ */
+ if (page_pools.epp_growing ||
+ page_pools.epp_total_pages == page_pools.epp_max_pages)
return 0;
- }
- atomic_inc(&page_pools.epp_users_gone);
+
+ /* if total pages is not enough, we need to grow */
+ if (page_pools.epp_total_pages < page_needed)
+ return 1;
/*
- * prepare full pages for first 2 users; 1/2 for next 2 users;
- * 1/4 for next 4 users; 1/8 for next 8 users; 1/16 for next 16 users;
- * ...
+ * we wanted to return 0 here if there was a shrink just happened
+ * moment ago, but this may cause deadlock if both client and ost
+ * live on single node.
*/
- users = atomic_add_return(1, &page_pools.epp_users);
- shift = fls(users - 1);
- shift = shift > 1 ? shift - 1 : 0;
- page_plus = page_plus >> shift;
- page_plus = page_plus > 2 ? page_plus : 2;
+#if 0
+ if (now - page_pools.epp_last_shrink < 2)
+ return 0;
+#endif
- rc = enc_pools_add_pages(page_plus);
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+ /*
+ * here we perhaps need consider other factors like wait queue
+ * length, idle index, etc. ?
+ */
-int sptlrpc_enc_pool_del_user(void)
-{
- atomic_inc(&page_pools.epp_users_gone);
- return 0;
+ /* grow the pools in any other cases */
+ return 1;
}
-EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
/*
* we allocate the requested pages atomically.
int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
{
cfs_waitlink_t waitlink;
- cfs_time_t tick1 = 0, tick2;
+ unsigned long this_idle = -1;
+ cfs_time_t tick = 0;
+ long now;
int p_idx, g_idx;
int i;
- LASSERT(desc->bd_max_iov > 0);
- LASSERT(desc->bd_max_iov <= page_pools.epp_total_pages);
+ LASSERT(desc->bd_iov_count > 0);
+ LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
- /* resent bulk, enc pages might have been allocated previously */
- if (desc->bd_enc_pages != NULL)
+ /* resent bulk, enc iov might have been allocated previously */
+ if (desc->bd_enc_iov != NULL)
return 0;
- OBD_ALLOC(desc->bd_enc_pages,
- desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
- if (desc->bd_enc_pages == NULL)
+ OBD_ALLOC(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ if (desc->bd_enc_iov == NULL)
return -ENOMEM;
- spin_lock(&page_pools.epp_lock);
+ cfs_spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_st_access++;
again:
- page_pools.epp_st_reqs++;
+ if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
+ if (tick == 0)
+ tick = cfs_time_current();
- if (unlikely(page_pools.epp_free_pages < desc->bd_max_iov)) {
- if (tick1 == 0)
- tick1 = cfs_time_current();
+ now = cfs_time_current_sec();
page_pools.epp_st_missings++;
- page_pools.epp_pages_short += desc->bd_max_iov;
-
- if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen)
- page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen;
- /*
- * we just wait if someone else is adding more pages, or
- * wait queue length is not deep enough. otherwise try to
- * add more pages in the pools.
- *
- * FIXME the policy of detecting resource tight & growing pool
- * need to be reconsidered.
- */
- if (page_pools.epp_adding || page_pools.epp_waitqlen < 2 ||
- page_pools.epp_full) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- cfs_waitlink_init(&waitlink);
- cfs_waitq_add(&page_pools.epp_waitq, &waitlink);
+ page_pools.epp_pages_short += desc->bd_iov_count;
- spin_unlock(&page_pools.epp_lock);
- cfs_schedule();
- spin_lock(&page_pools.epp_lock);
- } else {
- page_pools.epp_adding = 1;
+ if (enc_pools_should_grow(desc->bd_iov_count, now)) {
+ page_pools.epp_growing = 1;
- spin_unlock(&page_pools.epp_lock);
+ cfs_spin_unlock(&page_pools.epp_lock);
enc_pools_add_pages(page_pools.epp_pages_short / 2);
- spin_lock(&page_pools.epp_lock);
+ cfs_spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_growing = 0;
+
+ enc_pools_wakeup();
+ } else {
+ if (++page_pools.epp_waitqlen >
+ page_pools.epp_st_max_wqlen)
+ page_pools.epp_st_max_wqlen =
+ page_pools.epp_waitqlen;
- page_pools.epp_adding = 0;
+ cfs_set_current_state(CFS_TASK_UNINT);
+ cfs_waitlink_init(&waitlink);
+ cfs_waitq_add(&page_pools.epp_waitq, &waitlink);
+
+ cfs_spin_unlock(&page_pools.epp_lock);
+ cfs_waitq_wait(&waitlink, CFS_TASK_UNINT);
+ cfs_waitq_del(&page_pools.epp_waitq, &waitlink);
+ LASSERT(page_pools.epp_waitqlen > 0);
+ cfs_spin_lock(&page_pools.epp_lock);
+ page_pools.epp_waitqlen--;
}
- LASSERT(page_pools.epp_pages_short >= desc->bd_max_iov);
- LASSERT(page_pools.epp_waitqlen > 0);
- page_pools.epp_pages_short -= desc->bd_max_iov;
- page_pools.epp_waitqlen--;
+ LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
+ page_pools.epp_pages_short -= desc->bd_iov_count;
+ this_idle = 0;
goto again;
}
- /*
- * record max wait time
- */
- if (unlikely(tick1 != 0)) {
- tick2 = cfs_time_current();
- if (tick2 - tick1 > page_pools.epp_st_max_wait)
- page_pools.epp_st_max_wait = tick2 - tick1;
+
+ /* record max wait time */
+ if (unlikely(tick != 0)) {
+ tick = cfs_time_current() - tick;
+ if (tick > page_pools.epp_st_max_wait)
+ page_pools.epp_st_max_wait = tick;
}
- /*
- * proceed with rest of allocation
- */
- page_pools.epp_free_pages -= desc->bd_max_iov;
+
+ /* proceed with rest of allocation */
+ page_pools.epp_free_pages -= desc->bd_iov_count;
p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
- for (i = 0; i < desc->bd_max_iov; i++) {
+ for (i = 0; i < desc->bd_iov_count; i++) {
LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
- desc->bd_enc_pages[i] = page_pools.epp_pools[p_idx][g_idx];
+ desc->bd_enc_iov[i].kiov_page =
+ page_pools.epp_pools[p_idx][g_idx];
page_pools.epp_pools[p_idx][g_idx] = NULL;
if (++g_idx == PAGES_PER_POOL) {
if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
page_pools.epp_st_lowfree = page_pools.epp_free_pages;
- spin_unlock(&page_pools.epp_lock);
+ /*
+ * new idle index = (old * weight + new) / (weight + 1)
+ */
+ if (this_idle == -1) {
+ this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
+ page_pools.epp_total_pages;
+ }
+ page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
+ this_idle) /
+ (IDLE_IDX_WEIGHT + 1);
+
+ page_pools.epp_last_access = cfs_time_current_sec();
+
+ cfs_spin_unlock(&page_pools.epp_lock);
return 0;
}
EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
int p_idx, g_idx;
int i;
- if (desc->bd_enc_pages == NULL)
- return;
- if (desc->bd_max_iov == 0)
+ if (desc->bd_enc_iov == NULL)
return;
- spin_lock(&page_pools.epp_lock);
+ LASSERT(desc->bd_iov_count > 0);
+
+ cfs_spin_lock(&page_pools.epp_lock);
p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
- LASSERT(page_pools.epp_free_pages + desc->bd_max_iov <=
+ LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
page_pools.epp_total_pages);
LASSERT(page_pools.epp_pools[p_idx]);
- for (i = 0; i < desc->bd_max_iov; i++) {
- LASSERT(desc->bd_enc_pages[i] != NULL);
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
- page_pools.epp_pools[p_idx][g_idx] = desc->bd_enc_pages[i];
+ page_pools.epp_pools[p_idx][g_idx] =
+ desc->bd_enc_iov[i].kiov_page;
if (++g_idx == PAGES_PER_POOL) {
p_idx++;
}
}
- page_pools.epp_free_pages += desc->bd_max_iov;
+ page_pools.epp_free_pages += desc->bd_iov_count;
- if (unlikely(page_pools.epp_waitqlen)) {
- LASSERT(page_pools.epp_waitqlen > 0);
- LASSERT(cfs_waitq_active(&page_pools.epp_waitq));
- cfs_waitq_broadcast(&page_pools.epp_waitq);
- }
+ enc_pools_wakeup();
- spin_unlock(&page_pools.epp_lock);
+ cfs_spin_unlock(&page_pools.epp_lock);
- OBD_FREE(desc->bd_enc_pages,
- desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
- desc->bd_enc_pages = NULL;
+ OBD_FREE(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ desc->bd_enc_iov = NULL;
}
EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
+/*
+ * we don't do much stuff for add_user/del_user anymore, except adding some
+ * initial pages in add_user() if current pools are empty, rest would be
+ * handled by the pools's self-adaption.
+ */
+int sptlrpc_enc_pool_add_user(void)
+{
+ int need_grow = 0;
+
+ cfs_spin_lock(&page_pools.epp_lock);
+ if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
+ page_pools.epp_growing = 1;
+ need_grow = 1;
+ }
+ cfs_spin_unlock(&page_pools.epp_lock);
+
+ if (need_grow) {
+ enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
+ PTLRPC_MAX_BRW_PAGES);
+
+ cfs_spin_lock(&page_pools.epp_lock);
+ page_pools.epp_growing = 0;
+ enc_pools_wakeup();
+ cfs_spin_unlock(&page_pools.epp_lock);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+
+int sptlrpc_enc_pool_del_user(void)
+{
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
+
+static inline void enc_pools_alloc(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ OBD_ALLOC_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
+static inline void enc_pools_free(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ LASSERT(page_pools.epp_pools);
+
+ OBD_FREE_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
int sptlrpc_enc_pool_init(void)
{
- /* constants */
- page_pools.epp_max_pages = num_physpages / 4;
+ /*
+ * maximum capacity is 1/8 of total physical memory.
+ * is the 1/8 a good number?
+ */
+ page_pools.epp_max_pages = cfs_num_physpages / 8;
page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
- atomic_set(&page_pools.epp_users, 0);
- atomic_set(&page_pools.epp_users_gone, 0);
-
cfs_waitq_init(&page_pools.epp_waitq);
page_pools.epp_waitqlen = 0;
page_pools.epp_pages_short = 0;
- page_pools.epp_adding = 0;
- page_pools.epp_full = 0;
+ page_pools.epp_growing = 0;
- spin_lock_init(&page_pools.epp_lock);
+ page_pools.epp_idle_idx = 0;
+ page_pools.epp_last_shrink = cfs_time_current_sec();
+ page_pools.epp_last_access = cfs_time_current_sec();
+
+ cfs_spin_lock_init(&page_pools.epp_lock);
page_pools.epp_total_pages = 0;
page_pools.epp_free_pages = 0;
- page_pools.epp_st_adds = 0;
- page_pools.epp_st_failadds = 0;
- page_pools.epp_st_reqs = 0;
+ page_pools.epp_st_max_pages = 0;
+ page_pools.epp_st_grows = 0;
+ page_pools.epp_st_grow_fails = 0;
+ page_pools.epp_st_shrinks = 0;
+ page_pools.epp_st_access = 0;
page_pools.epp_st_missings = 0;
page_pools.epp_st_lowfree = 0;
page_pools.epp_st_max_wqlen = 0;
page_pools.epp_st_max_wait = 0;
- OBD_ALLOC(page_pools.epp_pools,
- page_pools.epp_max_pools * sizeof(*page_pools.epp_pools));
+ enc_pools_alloc();
if (page_pools.epp_pools == NULL)
return -ENOMEM;
+ pools_shrinker = cfs_set_shrinker(pools_shrinker_seeks,
+ enc_pools_shrink);
+ if (pools_shrinker == NULL) {
+ enc_pools_free();
+ return -ENOMEM;
+ }
+
return 0;
}
{
unsigned long cleaned, npools;
+ LASSERT(pools_shrinker);
LASSERT(page_pools.epp_pools);
LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
+ cfs_remove_shrinker(pools_shrinker);
+
npools = npages_to_npools(page_pools.epp_total_pages);
- cleaned = enc_cleanup_pools(page_pools.epp_pools, npools);
+ cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
LASSERT(cleaned == page_pools.epp_total_pages);
- OBD_FREE(page_pools.epp_pools,
- page_pools.epp_max_pools * sizeof(*page_pools.epp_pools));
+ enc_pools_free();
+
+ if (page_pools.epp_st_access > 0) {
+ CDEBUG(D_SEC,
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, "
+ "access %lu, missing %lu, max qlen %u, max wait "
+ CFS_TIME_T"/%d\n",
+ page_pools.epp_st_max_pages, page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks, page_pools.epp_st_access,
+ page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait, CFS_HZ);
+ }
}
#else /* !__KERNEL__ */
}
#endif
-/****************************************
- * Helpers to assist policy modules to *
- * implement checksum funcationality *
- ****************************************/
-
-static struct {
- char *name;
- int size;
-} csum_types[] = {
- [BULK_CSUM_ALG_NULL] = { "null", 0 },
- [BULK_CSUM_ALG_CRC32] = { "crc32", 4 },
- [BULK_CSUM_ALG_MD5] = { "md5", 16 },
- [BULK_CSUM_ALG_SHA1] = { "sha1", 20 },
- [BULK_CSUM_ALG_SHA256] = { "sha256", 32 },
- [BULK_CSUM_ALG_SHA384] = { "sha384", 48 },
- [BULK_CSUM_ALG_SHA512] = { "sha512", 64 },
+static int cfs_hash_alg_id[] = {
+ [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL,
+ [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
+ [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32,
+ [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5,
+ [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1,
+ [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256,
+ [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384,
+ [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512,
};
-
-const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg)
+const char * sptlrpc_get_hash_name(__u8 hash_alg)
{
- if (csum_alg < BULK_CSUM_ALG_MAX)
- return csum_types[csum_alg].name;
- return "unknown_cksum";
+ return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
}
-EXPORT_SYMBOL(sptlrpc_bulk_csum_alg2name);
+EXPORT_SYMBOL(sptlrpc_get_hash_name);
-int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
+__u8 sptlrpc_get_hash_alg(const char *algname)
{
- int size = sizeof(struct ptlrpc_bulk_sec_desc);
-
- LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
-
- /* read request don't need extra data */
- if (!(read && request))
- size += csum_types[csum_alg].size;
-
- return size;
+ return cfs_crypto_hash_alg(algname);
}
-EXPORT_SYMBOL(bulk_sec_desc_size);
+EXPORT_SYMBOL(sptlrpc_get_hash_alg);
-int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
{
struct ptlrpc_bulk_sec_desc *bsd;
- int size = msg->lm_buflens[offset];
+ int size = msg->lm_buflens[offset];
bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
if (bsd == NULL) {
return -EINVAL;
}
- if (lustre_msg_swabbed(msg)) {
- __swab32s(&bsd->bsd_version);
- __swab32s(&bsd->bsd_pad);
- __swab32s(&bsd->bsd_csum_alg);
- __swab32s(&bsd->bsd_priv_alg);
+ if (swabbed) {
+ __swab32s(&bsd->bsd_nob);
}
- if (bsd->bsd_version != 0) {
+ if (unlikely(bsd->bsd_version != 0)) {
CERROR("Unexpected version %u\n", bsd->bsd_version);
return -EPROTO;
}
- if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
- CERROR("Unsupported checksum algorithm %u\n",
- bsd->bsd_csum_alg);
- return -EINVAL;
- }
- if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
- CERROR("Unsupported cipher algorithm %u\n",
- bsd->bsd_priv_alg);
- return -EINVAL;
- }
-
- if (size > sizeof(*bsd) &&
- size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
- CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
- bsd->bsd_csum_alg, size);
- return -EINVAL;
+ if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
+ CERROR("Invalid type %u\n", bsd->bsd_type);
+ return -EPROTO;
}
- return 0;
-}
-EXPORT_SYMBOL(bulk_sec_desc_unpack);
-
-#ifdef __KERNEL__
-static
-int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
-{
- struct page *page;
- int off;
- char *ptr;
- __u32 crc32 = ~0;
- int len, i;
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- page = desc->bd_iov[i].kiov_page;
- off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- ptr = cfs_kmap(page) + off;
- len = desc->bd_iov[i].kiov_len;
-
- crc32 = crc32_le(crc32, ptr, len);
+ /* FIXME more sanity check here */
- cfs_kunmap(page);
+ if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
+ CERROR("Invalid svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
}
- *((__u32 *) buf) = crc32;
return 0;
}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
-static
-int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
-{
- struct crypto_tfm *tfm;
- struct scatterlist *sl;
- int i, rc = 0;
-
- LASSERT(alg > BULK_CSUM_ALG_NULL &&
- alg < BULK_CSUM_ALG_MAX);
-
- if (alg == BULK_CSUM_ALG_CRC32)
- return do_bulk_checksum_crc32(desc, buf);
-
- tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
- if (tfm == NULL) {
- CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
- return -ENOMEM;
- }
-
- OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
- if (sl == NULL) {
- rc = -ENOMEM;
- goto out_tfm;
- }
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- sl[i].page = desc->bd_iov[i].kiov_page;
- sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- sl[i].length = desc->bd_iov[i].kiov_len;
- }
-
- crypto_digest_init(tfm);
- crypto_digest_update(tfm, sl, desc->bd_iov_count);
- crypto_digest_final(tfm, buf);
-
- OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
-
-out_tfm:
- crypto_free_tfm(tfm);
- return rc;
-}
-
-#else /* !__KERNEL__ */
-static
-int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen)
{
- __u32 crc32 = ~0;
- int i;
+ struct cfs_crypto_hash_desc *hdesc;
+ int hashsize;
+ char hashbuf[64];
+ unsigned int bufsize;
+ int i, err;
- LASSERT(alg == BULK_CSUM_ALG_CRC32);
+ LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
+ LASSERT(buflen >= 4);
- for (i = 0; i < desc->bd_iov_count; i++) {
- char *ptr = desc->bd_iov[i].iov_base;
- int len = desc->bd_iov[i].iov_len;
+ hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
+ if (IS_ERR(hdesc)) {
+ CERROR("Unable to initialize checksum hash %s\n",
+ cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
+ return PTR_ERR(hdesc);
+ }
- crc32 = crc32_le(crc32, ptr, len);
- }
+ hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
- *((__u32 *) buf) = crc32;
- return 0;
-}
+ for (i = 0; i < desc->bd_iov_count; i++) {
+#ifdef __KERNEL__
+ cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
+ desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+ desc->bd_iov[i].kiov_len);
+#else
+ cfs_crypto_hash_update(hdesc, desc->bd_iov[i].iov_base,
+ desc->bd_iov[i].iov_len);
#endif
-
-/*
- * perform algorithm @alg checksum on @desc, store result in @buf.
- * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
- */
-static
-int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
- struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
-{
- int rc;
-
- LASSERT(bsd);
- LASSERT(alg < BULK_CSUM_ALG_MAX);
-
- bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
-
- if (alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
-
- rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
- if (rc == 0)
- bsd->bsd_csum_alg = alg;
-
- return rc;
-}
-
-static
-int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
- struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
- struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
-{
- char *csum_p;
- char *buf = NULL;
- int csum_size, rc = 0;
-
- LASSERT(bsdv);
- LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
-
- if (bsdr)
- bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
-
- if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- /* for all supported algorithms */
- csum_size = csum_types[bsdv->bsd_csum_alg].size;
-
- if (bsdvsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- bsdvsize, (int) sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- if (bsdr) {
- LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
- csum_p = (char *) bsdr->bsd_csum;
- } else {
- OBD_ALLOC(buf, csum_size);
- if (buf == NULL)
- return -EINVAL;
- csum_p = buf;
- }
-
- rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
-
- if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
- CERROR("BAD %s CHECKSUM (%s), data mutated during "
- "transfer!\n", read ? "READ" : "WRITE",
- csum_types[bsdv->bsd_csum_alg].name);
- rc = -EINVAL;
- } else {
- CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
- read ? "read" : "write",
- csum_types[bsdv->bsd_csum_alg].name);
- }
-
- if (bsdr) {
- bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
- memcpy(bsdr->bsd_csum, csum_p, csum_size);
- } else {
- LASSERT(buf);
- OBD_FREE(buf, csum_size);
- }
-
- return rc;
-}
-
-int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
- __u32 alg, struct lustre_msg *rmsg, int roff)
-{
- struct ptlrpc_bulk_sec_desc *bsdr;
- int rsize, rc = 0;
-
- rsize = rmsg->lm_buflens[roff];
- bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(alg < BULK_CSUM_ALG_MAX);
-
- if (read)
- bsdr->bsd_csum_alg = alg;
- else {
- rc = generate_bulk_csum(desc, alg, bsdr, rsize);
- if (rc) {
- CERROR("client bulk write: failed to perform "
- "checksum: %d\n", rc);
- }
- }
-
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_cli_request);
-
-int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *rmsg, int roff,
- struct lustre_msg *vmsg, int voff)
-{
- struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
- int rsize, vsize;
-
- rsize = rmsg->lm_buflens[roff];
- vsize = vmsg->lm_buflens[voff];
- bsdr = lustre_msg_buf(rmsg, roff, 0);
- bsdv = lustre_msg_buf(vmsg, voff, 0);
-
- if (bsdv == NULL || vsize < sizeof(*bsdv)) {
- CERROR("Invalid checksum verifier from server: size %d\n",
- vsize);
- return -EINVAL;
- }
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(vsize >= sizeof(*bsdv));
-
- if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
- CERROR("bulk %s: checksum algorithm mismatch: client request "
- "%s but server reply with %s. try to use the new one "
- "for checksum verification\n",
- read ? "read" : "write",
- csum_types[bsdr->bsd_csum_alg].name,
- csum_types[bsdv->bsd_csum_alg].name);
- }
-
- if (read)
- return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
- else {
- char *cli, *srv, *new = NULL;
- int csum_size = csum_types[bsdr->bsd_csum_alg].size;
-
- LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
- if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- if (vsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- vsize, (int) sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- cli = (char *) (bsdr + 1);
- srv = (char *) (bsdv + 1);
-
- if (!memcmp(cli, srv, csum_size)) {
- /* checksum confirmed */
- CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
- csum_types[bsdr->bsd_csum_alg].name);
- return 0;
- }
-
- /* checksum mismatch, re-compute a new one and compare with
- * others, give out proper warnings.
- */
- OBD_ALLOC(new, csum_size);
- if (new == NULL)
- return -ENOMEM;
-
- do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
-
- if (!memcmp(new, srv, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "on the client after we checksummed them\n",
- csum_types[bsdr->bsd_csum_alg].name);
- } else if (!memcmp(new, cli, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit\n",
- csum_types[bsdr->bsd_csum_alg].name);
- } else {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit, and the current page contents "
- "don't match the originals and what the server "
- "received\n",
- csum_types[bsdr->bsd_csum_alg].name);
- }
- OBD_FREE(new, csum_size);
-
- return -EINVAL;
- }
+ }
+ if (hashsize > buflen) {
+ bufsize = sizeof(hashbuf);
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
+ &bufsize);
+ memcpy(buf, hashbuf, buflen);
+ } else {
+ bufsize = buflen;
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf,
+ &bufsize);
+ }
+
+ if (err)
+ cfs_crypto_hash_final(hdesc, NULL, NULL);
+ return err;
}
-EXPORT_SYMBOL(bulk_csum_cli_reply);
+EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
-int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *vmsg, int voff,
- struct lustre_msg *rmsg, int roff)
-{
- struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
- int vsize, rsize, rc;
-
- vsize = vmsg->lm_buflens[voff];
- rsize = rmsg->lm_buflens[roff];
- bsdv = lustre_msg_buf(vmsg, voff, 0);
- bsdr = lustre_msg_buf(rmsg, roff, 0);
-
- LASSERT(vsize >= sizeof(*bsdv));
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(bsdv && bsdr);
-
- if (read) {
- rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
- if (rc)
- CERROR("bulk read: server failed to generate %s "
- "checksum: %d\n",
- csum_types[bsdv->bsd_csum_alg].name, rc);
- } else
- rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_svc);
-
-/****************************************
- * Helpers to assist policy modules to *
- * implement encryption funcationality *
- ****************************************/
-
-/*
- * NOTE: These algorithms must be stream cipher!
- */
-static struct {
- char *name;
- __u32 flags;
-} priv_types[] = {
- [BULK_PRIV_ALG_NULL] = { "null", 0 },
- [BULK_PRIV_ALG_ARC4] = { "arc4", 0 },
-};
-
-const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg)
-{
- if (priv_alg < BULK_PRIV_ALG_MAX)
- return priv_types[priv_alg].name;
- return "unknown_priv";
-}
-EXPORT_SYMBOL(sptlrpc_bulk_priv_alg2name);
-
-__u32 sptlrpc_bulk_priv_alg2flags(__u32 priv_alg)
-{
- if (priv_alg < BULK_PRIV_ALG_MAX)
- return priv_types[priv_alg].flags;
- return 0;
-}
-EXPORT_SYMBOL(sptlrpc_bulk_priv_alg2flags);