From: Sergey Cheremencev Date: Tue, 9 Jul 2024 02:43:29 +0000 (+0300) Subject: LU-18086 obdclass: page_pools shrinker fix X-Git-Tag: 2.15.91~57 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=14fc034e983e36f09d1308262c0d53872d5cfba0;p=fs%2Flustre-release.git LU-18086 obdclass: page_pools shrinker fix page_pool shrinker should consider opp_free_pages as a number of memory regions with the size of 2^pool_order pages. Otherwise it starts shrinking only when becomes too large, for example 2GB instead of 64MB. It often might be the reason of OOM especially when compressed files with different chunk-sizes have been written. In a such case several pools created for different chunk sizes might hold gigabytes of memory without any chance to release it. Fix element_size to return correct element size instead of number of pages. Because of that elements in page_pools had below sizes: order 0 = 4096 order 1 = 2 order 2 = 4 .... order 17 = 131072 This made possible to have elements with sizes 2,4,8,... bytes that have been allocated with OBD_VMALLOC. Now all sizes are miltiple of PAGE_SIZE: order 0 = 4096 order 1 = 8192 ... order 5 = 131072 Change the logic around opp_idle_idx. 1. Recalculate opp_idle_idx in __sptlrpc_pool_put_pages the same way as it done in __sptlrpc_pool_get_pages. It is possible that opp_idle_idx might become 0 or very small (less than 10). If there is no new allocations it would be impossible to free anything from the pool during 40 seconds(CACHE_QUIESCENT_PERIOD) despite the large amount of free elements ready to shrinking. 2. In pool_shrink_count when opp_idle_idx == IDLE_IDX_MAX, it means no one accessed pool for CACHE_QUIESCENT_PERIOD (40 seconds), i.e. we may release as much as possible pages. Earlier in a such case pool_shrink_count always returned 0. Another words it was impossible to shrink anything from the pool if there was no allocations for 40s. Signed-off-by: Sergey Cheremencev Signed-off-by: Artem Blagodarenko Change-Id: I6b55ba67b0d21cdffdb57034e8e66063745f796e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55914 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/obdclass/page_pools.c b/lustre/obdclass/page_pools.c index ecb23cc..7b56427 100644 --- a/lustre/obdclass/page_pools.c +++ b/lustre/obdclass/page_pools.c @@ -147,7 +147,7 @@ static inline int get_pool_index(struct shrinker *shrinker) static int element_size(struct obd_page_pool *pool) { - return 1 << pool->opp_order; + return PAGE_SIZE << pool->opp_order; } /* @@ -323,26 +323,28 @@ static unsigned long pool_shrink_count(struct shrinker *s, { int pool_order; struct obd_page_pool *pool; + unsigned long max_objects; pool_order = get_pool_index(s); pool = page_pools[pool_order]; + max_objects = PTLRPC_MAX_BRW_PAGES >> pool_order; /* * if no pool access for a long time, we consider it's fully * idle. A little race here is fine. */ - if (unlikely(ktime_get_seconds() - pool->opp_last_access > + if (pool->opp_idle_idx != IDLE_IDX_MAX && + unlikely(ktime_get_seconds() - pool->opp_last_access > CACHE_QUIESCENT_PERIOD)) { spin_lock(&pool->opp_lock); pool->opp_idle_idx = IDLE_IDX_MAX; spin_unlock(&pool->opp_lock); } - LASSERT(pool->opp_idle_idx <= IDLE_IDX_MAX); - return (pool->opp_free_pages <= PTLRPC_MAX_BRW_PAGES) ? 0 : - (pool->opp_free_pages - PTLRPC_MAX_BRW_PAGES) * - (IDLE_IDX_MAX - pool->opp_idle_idx) / IDLE_IDX_MAX; + return (pool->opp_free_pages <= max_objects) ? 0 : + (pool->opp_free_pages - max_objects) * + pool->opp_idle_idx / IDLE_IDX_MAX; } /* @@ -353,38 +355,27 @@ static unsigned long pool_shrink_scan(struct shrinker *s, { int pool_order; struct obd_page_pool *pool; + unsigned long max_objects; pool_order = get_pool_index(s); pool = page_pools[pool_order]; + max_objects = PTLRPC_MAX_BRW_PAGES >> pool_order; spin_lock(&pool->opp_lock); - if (pool->opp_free_pages <= PTLRPC_MAX_BRW_PAGES) + if (pool->opp_free_pages <= max_objects) sc->nr_to_scan = 0; else sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, - pool->opp_free_pages - PTLRPC_MAX_BRW_PAGES); + pool->opp_free_pages - max_objects); if (sc->nr_to_scan > 0) { pool_release_free_pages(sc->nr_to_scan, pool); - CDEBUG(D_SEC, "released %ld pages, %ld left\n", - (long)sc->nr_to_scan, pool->opp_free_pages); - pool->opp_st_shrinks++; pool->opp_last_shrink = ktime_get_seconds(); } spin_unlock(&pool->opp_lock); - - /* - * if no pool access for a long time, we consider it's fully idle. - * a little race here is fine. - */ - if (unlikely(ktime_get_seconds() - pool->opp_last_access > - CACHE_QUIESCENT_PERIOD)) { - spin_lock(&pool->opp_lock); - pool->opp_idle_idx = IDLE_IDX_MAX; - spin_unlock(&pool->opp_lock); - } - - LASSERT(pool->opp_idle_idx <= IDLE_IDX_MAX); + if (sc->nr_to_scan > 0) + CDEBUG(D_SEC, "released %lu objects, %ld left, order:%u\n", + sc->nr_to_scan, pool->opp_free_pages, pool->opp_order); return sc->nr_to_scan; } @@ -587,7 +578,7 @@ static int pool_add_pages(int npages, struct obd_page_pool *page_pool) LASSERT(alloced == npages); pool_insert_ptrs(ptr_pages, nptr_pages, npages, page_pool); - CDEBUG(D_SEC, "added %d pages into pool\n", npages); + CDEBUG(D_SEC, "added %d elements into pool:%d\n", npages, pool_order); OBD_FREE_PTR_ARRAY(ptr_pages, nptr_pages); rc = 0; @@ -842,6 +833,7 @@ int obd_pool_get_pages_array(struct page **pa, unsigned int count) } EXPORT_SYMBOL(obd_pool_get_pages_array); +/* get 2^order pages region */ int obd_pool_get_pages(void **pages, unsigned int order) { return __obd_pool_get_pages((void *)pages, 1, order, @@ -854,6 +846,7 @@ static int __obd_pool_put_pages(void *array, unsigned int count, void **(*page_from)(void *, int)) { struct obd_page_pool *page_pool; + unsigned long this_idle; int p_idx, g_idx; int i, rc = 0; @@ -895,6 +888,21 @@ static int __obd_pool_put_pages(void *array, unsigned int count, page_pool->opp_free_pages += count; pool_wakeup(page_pool); + /* + * Recalculate opp_idle_idx in __sptlrpc_pool_put_pages the + * same way as it done in __sptlrpc_pool_get_pages. It is + * possible that opp_idle_idx might become 0 or very small + * (less than 10). If there is no new allocations it would + * be impossible to free anything from the pool during 40 + * seconds(CACHE_QUIESCENT_PERIOD) despite the large amount + * of free elements ready to shrinking. + */ + this_idle = page_pool->opp_free_pages * IDLE_IDX_MAX / + page_pool->opp_total_pages; + page_pool->opp_idle_idx = (page_pool->opp_idle_idx * + IDLE_IDX_WEIGHT + this_idle) / + (IDLE_IDX_WEIGHT + 1); + out_unlock: spin_unlock(&page_pool->opp_lock); return rc; @@ -929,6 +937,7 @@ void obd_pool_put_pages_array(struct page **pa, unsigned int count) } EXPORT_SYMBOL(obd_pool_put_pages_array); +/* put 2^order pages region */ void obd_pool_put_pages(void *buf, unsigned int order) { int rc;