*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include "ptlrpc_internal.h"
+static int mult = 20 - PAGE_SHIFT;
+static int enc_pool_max_memory_mb;
+module_param(enc_pool_max_memory_mb, int, 0644);
+MODULE_PARM_DESC(enc_pool_max_memory_mb,
+ "Encoding pool max memory (MB), 1/8 of total physical memory by default");
+
/****************************************
* bulk encryption page pools *
****************************************/
-#define PTRS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *))
+#define PTRS_PER_PAGE (PAGE_SIZE / sizeof(void *))
#define PAGES_PER_POOL (PTRS_PER_PAGE)
#define IDLE_IDX_MAX (100)
unsigned long epp_st_lowfree; /* lowest free pages reached */
unsigned int epp_st_max_wqlen; /* highest waitqueue length */
cfs_time_t epp_st_max_wait; /* in jeffies */
+ unsigned long epp_st_outofmem; /* # of out of mem requests */
/*
- * pointers to pools
+ * pointers to pools, may be vmalloc'd
*/
struct page ***epp_pools;
} page_pools;
*/
int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
{
- int rc;
-
spin_lock(&page_pools.epp_lock);
- rc = seq_printf(m,
- "physical pages: %lu\n"
- "pages per pool: %lu\n"
- "max pages: %lu\n"
- "max pools: %u\n"
- "total pages: %lu\n"
- "total free: %lu\n"
- "idle index: %lu/100\n"
- "last shrink: %lds\n"
- "last access: %lds\n"
- "max pages reached: %lu\n"
- "grows: %u\n"
- "grows failure: %u\n"
- "shrinks: %u\n"
- "cache access: %lu\n"
- "cache missing: %lu\n"
- "low free mark: %lu\n"
- "max waitqueue depth: %u\n"
- "max wait time: "CFS_TIME_T"/%lu\n"
- ,
- totalram_pages,
- PAGES_PER_POOL,
- page_pools.epp_max_pages,
- page_pools.epp_max_pools,
- page_pools.epp_total_pages,
- page_pools.epp_free_pages,
- page_pools.epp_idle_idx,
- cfs_time_current_sec() - page_pools.epp_last_shrink,
- cfs_time_current_sec() - page_pools.epp_last_access,
- page_pools.epp_st_max_pages,
- page_pools.epp_st_grows,
- page_pools.epp_st_grow_fails,
- page_pools.epp_st_shrinks,
- page_pools.epp_st_access,
- page_pools.epp_st_missings,
- page_pools.epp_st_lowfree,
- page_pools.epp_st_max_wqlen,
- page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC)
- );
+ seq_printf(m, "physical pages: %lu\n"
+ "pages per pool: %lu\n"
+ "max pages: %lu\n"
+ "max pools: %u\n"
+ "total pages: %lu\n"
+ "total free: %lu\n"
+ "idle index: %lu/100\n"
+ "last shrink: %lds\n"
+ "last access: %lds\n"
+ "max pages reached: %lu\n"
+ "grows: %u\n"
+ "grows failure: %u\n"
+ "shrinks: %u\n"
+ "cache access: %lu\n"
+ "cache missing: %lu\n"
+ "low free mark: %lu\n"
+ "max waitqueue depth: %u\n"
+ "max wait time: "CFS_TIME_T"/%lu\n"
+ "out of mem: %lu\n",
+ totalram_pages, PAGES_PER_POOL,
+ page_pools.epp_max_pages,
+ page_pools.epp_max_pools,
+ page_pools.epp_total_pages,
+ page_pools.epp_free_pages,
+ page_pools.epp_idle_idx,
+ cfs_time_current_sec() - page_pools.epp_last_shrink,
+ cfs_time_current_sec() - page_pools.epp_last_access,
+ page_pools.epp_st_max_pages,
+ page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks,
+ page_pools.epp_st_access,
+ page_pools.epp_st_missings,
+ page_pools.epp_st_lowfree,
+ page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait,
+ msecs_to_jiffies(MSEC_PER_SEC),
+ page_pools.epp_st_outofmem);
spin_unlock(&page_pools.epp_lock);
- return rc;
+ return 0;
}
static void enc_pools_release_free_pages(long npages)
/* free unused pools */
while (p_idx_max1 < p_idx_max2) {
LASSERT(page_pools.epp_pools[p_idx_max2]);
- OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
+ OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_SIZE);
page_pools.epp_pools[p_idx_max2] = NULL;
p_idx_max2--;
}
cleaned++;
}
}
- OBD_FREE(pools[i], PAGE_CACHE_SIZE);
+ OBD_FREE(pools[i], PAGE_SIZE);
pools[i] = NULL;
}
}
goto out;
for (i = 0; i < npools; i++) {
- OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
+ OBD_ALLOC(pools[i], PAGE_SIZE);
if (pools[i] == NULL)
goto out_pools;
static int enc_pools_should_grow(int page_needed, long now)
{
- /* don't grow if someone else is growing the pools right now,
- * or the pools has reached its full capacity
- */
- if (page_pools.epp_growing ||
- page_pools.epp_total_pages == page_pools.epp_max_pages)
- return 0;
+ /* don't grow if someone else is growing the pools right now,
+ * or the pools has reached its full capacity
+ */
+ if (page_pools.epp_growing ||
+ page_pools.epp_total_pages == page_pools.epp_max_pages)
+ return 0;
- /* if total pages is not enough, we need to grow */
- if (page_pools.epp_total_pages < page_needed)
- return 1;
+ /* if total pages is not enough, we need to grow */
+ if (page_pools.epp_total_pages < page_needed)
+ return 1;
- /*
- * we wanted to return 0 here if there was a shrink just happened
- * moment ago, but this may cause deadlock if both client and ost
- * live on single node.
- */
-#if 0
- if (now - page_pools.epp_last_shrink < 2)
- return 0;
-#endif
+ /*
+ * we wanted to return 0 here if there was a shrink just
+ * happened a moment ago, but this may cause deadlock if both
+ * client and ost live on single node.
+ */
- /*
- * here we perhaps need consider other factors like wait queue
- * length, idle index, etc. ?
- */
+ /*
+ * here we perhaps need consider other factors like wait queue
+ * length, idle index, etc. ?
+ */
+
+ /* grow the pools in any other cases */
+ return 1;
+}
- /* grow the pools in any other cases */
- return 1;
+/*
+ * Export the number of free pages in the pool
+ */
+int get_free_pages_in_pool(void)
+{
+ return page_pools.epp_free_pages;
}
+EXPORT_SYMBOL(get_free_pages_in_pool);
+
+/*
+ * Let outside world know if enc_pool full capacity is reached
+ */
+int pool_is_at_full_capacity(void)
+{
+ return (page_pools.epp_total_pages == page_pools.epp_max_pages);
+}
+EXPORT_SYMBOL(pool_is_at_full_capacity);
/*
* we allocate the requested pages atomically.
if (GET_ENC_KIOV(desc) != NULL)
return 0;
- OBD_ALLOC(GET_ENC_KIOV(desc),
+ OBD_ALLOC_LARGE(GET_ENC_KIOV(desc),
desc->bd_iov_count * sizeof(*GET_ENC_KIOV(desc)));
if (GET_ENC_KIOV(desc) == NULL)
return -ENOMEM;
enc_pools_wakeup();
} else {
- if (++page_pools.epp_waitqlen >
- page_pools.epp_st_max_wqlen)
- page_pools.epp_st_max_wqlen =
- page_pools.epp_waitqlen;
-
- set_current_state(TASK_UNINTERRUPTIBLE);
- init_waitqueue_entry(&waitlink, current);
- add_wait_queue(&page_pools.epp_waitq, &waitlink);
-
- spin_unlock(&page_pools.epp_lock);
- schedule();
- remove_wait_queue(&page_pools.epp_waitq, &waitlink);
- LASSERT(page_pools.epp_waitqlen > 0);
- spin_lock(&page_pools.epp_lock);
- page_pools.epp_waitqlen--;
+ if (page_pools.epp_growing) {
+ if (++page_pools.epp_waitqlen >
+ page_pools.epp_st_max_wqlen)
+ page_pools.epp_st_max_wqlen =
+ page_pools.epp_waitqlen;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ init_waitqueue_entry(&waitlink, current);
+ add_wait_queue(&page_pools.epp_waitq,
+ &waitlink);
+
+ spin_unlock(&page_pools.epp_lock);
+ schedule();
+ remove_wait_queue(&page_pools.epp_waitq,
+ &waitlink);
+ LASSERT(page_pools.epp_waitqlen > 0);
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_waitqlen--;
+ } else {
+ /* ptlrpcd thread should not sleep in that case,
+ * or deadlock may occur!
+ * Instead, return -ENOMEM so that upper layers
+ * will put request back in queue. */
+ page_pools.epp_st_outofmem++;
+ spin_unlock(&page_pools.epp_lock);
+ OBD_FREE_LARGE(GET_ENC_KIOV(desc),
+ desc->bd_iov_count *
+ sizeof(*GET_ENC_KIOV(desc)));
+ GET_ENC_KIOV(desc) = NULL;
+ return -ENOMEM;
+ }
}
LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
spin_unlock(&page_pools.epp_lock);
- OBD_FREE(GET_ENC_KIOV(desc),
+ OBD_FREE_LARGE(GET_ENC_KIOV(desc),
desc->bd_iov_count * sizeof(*GET_ENC_KIOV(desc)));
GET_ENC_KIOV(desc) = NULL;
}
static inline void enc_pools_alloc(void)
{
- LASSERT(page_pools.epp_max_pools);
- OBD_ALLOC_LARGE(page_pools.epp_pools,
- page_pools.epp_max_pools *
- sizeof(*page_pools.epp_pools));
+ LASSERT(page_pools.epp_max_pools);
+ OBD_ALLOC_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
}
static inline void enc_pools_free(void)
{
- LASSERT(page_pools.epp_max_pools);
- LASSERT(page_pools.epp_pools);
+ LASSERT(page_pools.epp_max_pools);
+ LASSERT(page_pools.epp_pools);
- OBD_FREE_LARGE(page_pools.epp_pools,
- page_pools.epp_max_pools *
- sizeof(*page_pools.epp_pools));
+ OBD_FREE_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
}
int sptlrpc_enc_pool_init(void)
{
DEF_SHRINKER_VAR(shvar, enc_pools_shrink,
enc_pools_shrink_count, enc_pools_shrink_scan);
- /*
- * maximum capacity is 1/8 of total physical memory.
- * is the 1/8 a good number?
- */
+
page_pools.epp_max_pages = totalram_pages / 8;
+ if (enc_pool_max_memory_mb > 0 &&
+ enc_pool_max_memory_mb <= (totalram_pages >> mult))
+ page_pools.epp_max_pages = enc_pool_max_memory_mb << mult;
+
page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
init_waitqueue_head(&page_pools.epp_waitq);
page_pools.epp_st_lowfree = 0;
page_pools.epp_st_max_wqlen = 0;
page_pools.epp_st_max_wait = 0;
+ page_pools.epp_st_outofmem = 0;
enc_pools_alloc();
if (page_pools.epp_pools == NULL)
CDEBUG(D_SEC,
"max pages %lu, grows %u, grow fails %u, shrinks %u, "
"access %lu, missing %lu, max qlen %u, max wait "
- CFS_TIME_T"/%lu\n",
+ CFS_TIME_T"/%lu, out of mem %lu\n",
page_pools.epp_st_max_pages, page_pools.epp_st_grows,
page_pools.epp_st_grow_fails,
page_pools.epp_st_shrinks, page_pools.epp_st_access,
page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
page_pools.epp_st_max_wait,
- msecs_to_jiffies(MSEC_PER_SEC));
+ msecs_to_jiffies(MSEC_PER_SEC),
+ page_pools.epp_st_outofmem);
}
}