From 94b04d47d7b71dc84868dda6db7cd97fc27b1ed0 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 27 Mar 2024 17:24:18 -0400 Subject: [PATCH 01/16] LU-16724 ptlrpc: refactor page pools patch 2 This is a combined series that refactors the page pools code to make it more readable. It used to be many separate patches but has been combined in to just three, and this is the second. LU-16724 ptlrpc: stop passing around pool_index We pass pool_index around from function to function over and over, but it's easier to just pass the pool around. This does require the pool to know its own index, but that seems better anyway. LU-16724 ptlrpc: convert to void Convert functions without meaningful return to void. Change-Id-Was: I81f0baefd5b77b60ba699fa8749eaa83acadd8dd LU-16724 ptlrpc: refactor pool growing code This refactors the pool growing code, combining two separate instances of it in to a single function. Change-Id-Was: I175abc7e61d55563e989f87207a8c59da852f5f9 LU-16724 ptlrpc: replace ELEMENT_SIZE The ELEMENT_SIZE macro is fine, but it takes a pool index and doesn't handle the pool of order 0. Change it to a function. (This is marginally less efficient in one spot, since it replaces a shift with a divide, but it should be just fine.) Change-Id-Was: I322037e50bbdb8e0274b37f82618b6907b6d2906 LU-16724 ptlrpc: simplify pool arrays Currently, we do a fancy trick where we have a pool of order 0, then subsequent pools start at PPOOL_MIN_CHUNK_BITS (which is actually the minimum compresison size). So pool index 1 isn't a pool of order 1 (2 pages), it's a pool of order PPOOL_MIN_CHUNK_BITS. All this saves us is the cost of the empty pools below PPOOL_MIN_CHUNK_BITS, but it makes the code notably harder to read. With this change, the order of the pool and the pool index are the same. This simplification will be embraced more in subsequent patches. Change-Id-Was: I650e05d25727f10b0ca2d556cba17e9c4fccc309 LU-16724 ptlrpc: begin renaming pool_index to order Replace local variables for pool_index with pool_order. Other renames will be in a subsequent patch, to keep these as simple as possible. Change-Id-Was: If347ff39776f9a75c0f7d9d9981d01e19bc2cbc9 LU-16724 ptlrpc: rename ppp_index to ppp_order Rename ppp_index to ppp_order. Other renames will be in a subsequent patch, to keep these as simple as possible. Change-Id-Was: I96559e27a67b7cc4e56e06378e5686370438850c LU-16724 ptlrpc: rename INDEX macros Rename INDEX macros to ORDER. Change-Id-Was: Ic1123d25bc855dc7671c9cb587a0d6680662b729 LU-16724 ptlrpc: remove PAGES_POOL macro PAGES_POOL is just the order 0 pool now, so remove the special naming, and adjust a few associated functions. Change-Id-Was: I09e1debeadecbce33c7be43a8859815084623358 Signed-off-by: Patrick Farrell Change-Id: I42dc8b8094212c69b7a29cc3766bd0a10860f7af Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52645 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- lustre/include/lustre_sec.h | 7 +- lustre/ptlrpc/client.c | 6 +- lustre/ptlrpc/sec_bulk.c | 300 ++++++++++++++++++++++++-------------------- 3 files changed, 168 insertions(+), 145 deletions(-) diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h index bad288e..94ceabf 100644 --- a/lustre/include/lustre_sec.h +++ b/lustre/include/lustre_sec.h @@ -1191,16 +1191,15 @@ int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, struct ptlrpc_cli_ctx *ctx); /* bulk security api */ -#define PAGES_POOL 0 -int sptlrpc_pool_add_user(void); +void sptlrpc_pool_add_user(void); int sptlrpc_pool_get_desc_pages(struct ptlrpc_bulk_desc *desc); int sptlrpc_pool_get_pages_array(struct page **pa, unsigned int count); int sptlrpc_pool_get_pages(void **buf, unsigned int order); void sptlrpc_pool_put_desc_pages(struct ptlrpc_bulk_desc *desc); void sptlrpc_pool_put_pages_array(struct page **pa, unsigned int count); void sptlrpc_pool_put_pages(void *buf, unsigned int order); -int sptlrpc_pool_get_free_pages(unsigned int pool); -int pool_is_at_full_capacity(void); +int sptlrpc_pool_get_free_pages(unsigned int order); +int pool_is_at_full_capacity(int order); int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 4efeb20..fcfd417 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1670,11 +1670,11 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) ENTRY; LASSERT(req->rq_phase == RQ_PHASE_NEW); - /* do not try to go further if there is not enough memory in enc_pool */ + /* do not try to go further if there is not enough memory in pool */ if (req->rq_sent && req->rq_bulk) if (req->rq_bulk->bd_iov_count > - sptlrpc_pool_get_free_pages(PAGES_POOL) && - pool_is_at_full_capacity()) + sptlrpc_pool_get_free_pages(0) && + pool_is_at_full_capacity(0)) RETURN(-ENOMEM); if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) && diff --git a/lustre/ptlrpc/sec_bulk.c b/lustre/ptlrpc/sec_bulk.c index d8d8080..96590ca 100644 --- a/lustre/ptlrpc/sec_bulk.c +++ b/lustre/ptlrpc/sec_bulk.c @@ -48,12 +48,10 @@ #include "ptlrpc_internal.h" -#define PPOOL_MIN_CHUNK_BITS 16 /* 2^16 bytes = 64KiB */ -#define PPOOL_MAX_CHUNK_BITS PTLRPC_MAX_BRW_BITS -#define POOLS_COUNT (PPOOL_MAX_CHUNK_BITS - PPOOL_MIN_CHUNK_BITS + 1) -#define PPOOL_ORDER_TO_INDEX(bits) ((bits) - PPOOL_MIN_CHUNK_BITS + 1) -#define POOL_BITS(pool) ((pool) + PPOOL_MIN_CHUNK_BITS - 1) -#define ELEMENT_SIZE(pool) (1 << (PPOOL_MIN_CHUNK_BITS + (pool) - 1)) +/* we have a pool for every power of 2 number of pages <= MAX_BRW_BITS. + * most pools will be unused, but that's OK - unused pools are very cheap + */ +#define POOLS_COUNT (PTLRPC_MAX_BRW_BITS + 1) #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) /* deprecated - see pool_max_memory_mb below */ @@ -89,6 +87,9 @@ static struct ptlrpc_page_pool { unsigned int ppp_waitqlen; /* wait queue length */ unsigned long ppp_pages_short; /* # of pages wanted of in-q users */ unsigned int ppp_growing:1; /* during adding pages */ + unsigned int ppp_order; /* page pool order and index in pools + * array (element size is 2^order pages), + */ /* * indicating how idle the pools are, from 0 to MAX_IDLE_IDX @@ -131,6 +132,11 @@ static struct ptlrpc_page_pool { struct mutex add_pages_mutex; } **page_pools; +static int element_size(struct ptlrpc_page_pool *pool) +{ + return 1 << pool->ppp_order; +} + /* * Keep old name (encrypt_page_pool vs page_pool) for compatibility with user * tools pulling stats @@ -139,7 +145,7 @@ static struct ptlrpc_page_pool { */ int encrypt_page_pools_seq_show(struct seq_file *m, void *v) { - struct ptlrpc_page_pool *pool = page_pools[PAGES_POOL]; + struct ptlrpc_page_pool *pool = page_pools[0]; spin_lock(&pool->ppp_lock); seq_printf(m, "physical pages: %lu\n" @@ -189,7 +195,7 @@ int encrypt_page_pools_seq_show(struct seq_file *m, void *v) */ int page_pools_seq_show(struct seq_file *m, void *v) { - int pool_index; + int pool_order; struct ptlrpc_page_pool *pool; seq_printf(m, "physical_pages: %lu\n" @@ -197,12 +203,12 @@ int page_pools_seq_show(struct seq_file *m, void *v) "pools:\n", cfs_totalram_pages(), PAGES_PER_POOL); - for (pool_index = 0; pool_index < POOLS_COUNT; pool_index++) { - pool = page_pools[pool_index]; + for (pool_order = 0; pool_order < POOLS_COUNT; pool_order++) { + pool = page_pools[pool_order]; if (!pool->ppp_st_access) continue; spin_lock(&pool->ppp_lock); - seq_printf(m, " pool_%luk:\n" + seq_printf(m, " pool_%dk:\n" " max_pages: %lu\n" " max_pools: %u\n" " total_pages: %lu\n" @@ -220,8 +226,8 @@ int page_pools_seq_show(struct seq_file *m, void *v) " max_waitqueue_depth: %u\n" " max_wait_time_ms: %lld\n" " out_of_mem: %lu\n", - (pool_index ? ELEMENT_SIZE(pool_index - 10) : - PAGE_SIZE >> 10), + /* convert from bytes to KiB */ + element_size(pool) >> 10, pool->ppp_max_pages, pool->ppp_max_pools, pool->ppp_total_pages, @@ -245,11 +251,10 @@ int page_pools_seq_show(struct seq_file *m, void *v) return 0; } -static void pool_release_free_pages(long npages, unsigned int pool_idx) +static void pool_release_free_pages(long npages, struct ptlrpc_page_pool *pool) { int p_idx, g_idx; int p_idx_max1, p_idx_max2; - struct ptlrpc_page_pool *pool = page_pools[pool_idx]; LASSERT(npages > 0); LASSERT(npages <= pool->ppp_free_pages); @@ -273,11 +278,11 @@ static void pool_release_free_pages(long npages, unsigned int pool_idx) LASSERT(pool->ppp_pools[p_idx]); LASSERT(pool->ppp_pools[p_idx][g_idx] != NULL); - if (pool_idx == PAGES_POOL) + if (pool->ppp_order == 0) __free_page(pool->ppp_pools[p_idx][g_idx]); else OBD_FREE_LARGE(pool->ppp_pools[p_idx][g_idx], - ELEMENT_SIZE(pool_idx)); + element_size(pool)); pool->ppp_pools[p_idx][g_idx] = NULL; if (++g_idx == PAGES_PER_POOL) { @@ -295,16 +300,16 @@ static void pool_release_free_pages(long npages, unsigned int pool_idx) } } -#define SEEKS_TO_INDEX(s) (((s)->seeks >> 8) & 0xff) -#define INDEX_TO_SEEKS(i) (DEFAULT_SEEKS | (i << 8)) +#define SEEKS_TO_ORDER(s) (((s)->seeks >> 8) & 0xff) +#define ORDER_TO_SEEKS(i) (DEFAULT_SEEKS | (i << 8)) /* * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ static unsigned long pool_shrink_count(struct shrinker *s, struct shrink_control *sc) { - unsigned int pool_index = SEEKS_TO_INDEX(s); - struct ptlrpc_page_pool *pool = page_pools[pool_index]; + unsigned int pool_order = SEEKS_TO_ORDER(s); + struct ptlrpc_page_pool *pool = page_pools[pool_order]; /* * if no pool access for a long time, we consider it's fully * idle. A little race here is fine. @@ -330,8 +335,8 @@ static unsigned long pool_shrink_scan(struct shrinker *s, struct shrink_control *sc) { /* Get pool number passed as part of pool_shrinker_seeks value */ - unsigned int pool_index = SEEKS_TO_INDEX(s); - struct ptlrpc_page_pool *pool = page_pools[pool_index]; + unsigned int pool_order = SEEKS_TO_ORDER(s); + struct ptlrpc_page_pool *pool = page_pools[pool_order]; spin_lock(&pool->ppp_lock); if (pool->ppp_free_pages <= PTLRPC_MAX_BRW_PAGES) @@ -340,7 +345,7 @@ static unsigned long pool_shrink_scan(struct shrinker *s, sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, pool->ppp_free_pages - PTLRPC_MAX_BRW_PAGES); if (sc->nr_to_scan > 0) { - pool_release_free_pages(sc->nr_to_scan, pool_index); + pool_release_free_pages(sc->nr_to_scan, pool); CDEBUG(D_SEC, "released %ld pages, %ld left\n", (long)sc->nr_to_scan, pool->ppp_free_pages); @@ -387,7 +392,8 @@ int npages_to_npools(unsigned long npages) /* * return how many pages cleaned up. */ -static unsigned long pool_cleanup(void ***pools, int npools, int pool_idx) +static unsigned long pool_cleanup(void ***pools, int npools, + struct ptlrpc_page_pool *pool) { unsigned long cleaned = 0; int i, j; @@ -396,11 +402,11 @@ static unsigned long pool_cleanup(void ***pools, int npools, int pool_idx) if (pools[i]) { for (j = 0; j < PAGES_PER_POOL; j++) { if (pools[i][j]) { - if (pool_idx == PAGES_POOL) { + if (pool->ppp_order == 0) { __free_page(pools[i][j]); } else { OBD_FREE_LARGE(pools[i][j], - ELEMENT_SIZE(pool_idx)); + element_size(pool)); } cleaned++; } @@ -421,12 +427,11 @@ static unsigned long pool_cleanup(void ***pools, int npools, int pool_idx) * the simplest way to avoid complexity. It's not frequently called. */ static void pool_insert(void ***pools, int npools, int npages, - unsigned int pool_idx) + struct ptlrpc_page_pool *page_pool) { int freeslot; int op_idx, np_idx, og_idx, ng_idx; int cur_npools, end_npools; - struct ptlrpc_page_pool *page_pool = page_pools[pool_idx]; LASSERT(npages > 0); LASSERT(page_pool->ppp_total_pages+npages <= page_pool->ppp_max_pages); @@ -517,20 +522,15 @@ static void pool_insert(void ***pools, int npools, int npages, } #define POOL_INIT_SIZE (PTLRPC_MAX_BRW_SIZE / 4) -static int pool_add_pages(int npages, int pool_index) +static int pool_add_pages(int npages, struct ptlrpc_page_pool *page_pool) { void ***pools; int npools, alloced = 0; int i, j, rc = -ENOMEM; - struct ptlrpc_page_pool *page_pool = page_pools[pool_index]; + unsigned int pool_order = page_pool->ppp_order; - if (pool_index == PAGES_POOL) { - if (npages < POOL_INIT_SIZE >> PAGE_SHIFT) - npages = POOL_INIT_SIZE >> PAGE_SHIFT; - } else { - if (npages < POOL_INIT_SIZE / ELEMENT_SIZE(pool_index)) - npages = POOL_INIT_SIZE / ELEMENT_SIZE(pool_index); - } + if (npages < POOL_INIT_SIZE / element_size(page_pool)) + npages = POOL_INIT_SIZE / element_size(page_pool); mutex_lock(&page_pool->add_pages_mutex); @@ -551,12 +551,12 @@ static int pool_add_pages(int npages, int pool_index) goto out_pools; for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { - if (pool_index == PAGES_POOL) + if (pool_order == 0) pools[i][j] = alloc_page(GFP_NOFS | __GFP_HIGHMEM); else { OBD_ALLOC_LARGE(pools[i][j], - ELEMENT_SIZE(pool_index)); + element_size(page_pool)); } if (pools[i][j] == NULL) goto out_pools; @@ -566,14 +566,14 @@ static int pool_add_pages(int npages, int pool_index) } LASSERT(alloced == npages); - pool_insert(pools, npools, npages, pool_index); + pool_insert(pools, npools, npages, page_pool); CDEBUG(D_SEC, "added %d pages into pools\n", npages); OBD_FREE_PTR_ARRAY(pools, npools); rc = 0; out_pools: if (rc) { - pool_cleanup(pools, npools, pool_index); + pool_cleanup(pools, npools, page_pool); } out: if (rc) { @@ -585,19 +585,17 @@ out: return rc; } -static inline void pool_wakeup(unsigned int pool) +static inline void pool_wakeup(struct ptlrpc_page_pool *pool) { - assert_spin_locked(&page_pools[pool]->ppp_lock); + assert_spin_locked(&pool->ppp_lock); /* waitqueue_active */ - if (unlikely(waitqueue_active(&page_pools[pool]->ppp_waitq))) - wake_up_all(&page_pools[pool]->ppp_waitq); + if (unlikely(waitqueue_active(&pool->ppp_waitq))) + wake_up_all(&pool->ppp_waitq); } -static int pool_should_grow(int needed, unsigned int pool_index) +static int pool_should_grow(int needed, struct ptlrpc_page_pool *pool) { - struct ptlrpc_page_pool *pool = page_pools[pool_index]; - /* * don't grow if someone else is growing the pools right now, * or the pools has reached its full capacity @@ -624,29 +622,21 @@ static int pool_should_grow(int needed, unsigned int pool_index) } /* - * Export the number of free pages in the pool + * Export the number of free pages in the pool of 'order' */ -int sptlrpc_pool_get_free_pages(unsigned int pool) +int sptlrpc_pool_get_free_pages(unsigned int order) { - return page_pools[pool]->ppp_free_pages; + return page_pools[order]->ppp_free_pages; } EXPORT_SYMBOL(sptlrpc_pool_get_free_pages); /* * Let outside world know if pool full capacity is reached */ -int __pool_is_at_full_capacity(unsigned int pool) -{ - return (page_pools[pool]->ppp_total_pages == - page_pools[pool]->ppp_max_pages); -} - -/* - * Let outside world know if pool full capacity is reached - */ -int pool_is_at_full_capacity(void) +int pool_is_at_full_capacity(int order) { - return __pool_is_at_full_capacity(PAGES_POOL); + return (page_pools[order]->ppp_total_pages == + page_pools[order]->ppp_max_pages); } EXPORT_SYMBOL(pool_is_at_full_capacity); @@ -669,14 +659,16 @@ static inline void **page_from_bufarray(void *array, int index) return (void **)array; } +static bool __grow_pool_try(int needed, struct ptlrpc_page_pool *pool); + /* * we allocate the requested pages atomically. */ static inline int __sptlrpc_pool_get_pages(void *array, unsigned int count, - unsigned int pool_idx, + unsigned int order, void **(*page_from)(void *, int)) { - struct ptlrpc_page_pool *page_pool = page_pools[pool_idx]; + struct ptlrpc_page_pool *page_pool = page_pools[order]; wait_queue_entry_t waitlink; unsigned long this_idle = -1; u64 tick_ns = 0; @@ -697,19 +689,17 @@ again: page_pool->ppp_st_missings++; page_pool->ppp_pages_short += count; - if (pool_should_grow(count, pool_idx)) { - page_pool->ppp_growing = 1; - - spin_unlock(&page_pool->ppp_lock); - CDEBUG(D_SEC, "ppp_pages_short: %lu\n", - page_pool->ppp_pages_short); - pool_add_pages(8, pool_idx); - spin_lock(&page_pool->ppp_lock); - - page_pool->ppp_growing = 0; - - pool_wakeup(pool_idx); - } else { + /* if we aren't able to add pages, check if someone else is + * growing the pool and sleep if so, otherwise we return + * ENOMEM because we can't sleep here waiting for other ops to + * complete (main user is ptlrpcd, which must not sleep waiting + * for other ops... technically sleeping for pool growth is + * also questionable but it's very unlikely in practice to get + * stuck from this) + * + * if ENOMEM is returned here, the RPC will go back in the queue + */ + if (!__grow_pool_try(count, page_pool)) { if (page_pool->ppp_growing) { if (++page_pool->ppp_waitqlen > page_pool->ppp_st_max_wqlen) @@ -802,7 +792,7 @@ int sptlrpc_pool_get_desc_pages(struct ptlrpc_bulk_desc *desc) int rc; LASSERT(desc->bd_iov_count > 0); - LASSERT(desc->bd_iov_count <= page_pools[PAGES_POOL]->ppp_max_pages); + LASSERT(desc->bd_iov_count <= page_pools[0]->ppp_max_pages); /* resent bulk, enc iov might have been allocated previously */ if (desc->bd_enc_vec != NULL) @@ -813,8 +803,8 @@ int sptlrpc_pool_get_desc_pages(struct ptlrpc_bulk_desc *desc) if (desc->bd_enc_vec == NULL) return -ENOMEM; - rc = __sptlrpc_pool_get_pages((void *)desc, desc->bd_iov_count, - PAGES_POOL, page_from_bulkdesc); + rc = __sptlrpc_pool_get_pages((void *)desc, desc->bd_iov_count, 0, + page_from_bulkdesc); if (rc) { OBD_FREE_LARGE(desc->bd_enc_vec, desc->bd_iov_count * @@ -827,37 +817,36 @@ EXPORT_SYMBOL(sptlrpc_pool_get_desc_pages); int sptlrpc_pool_get_pages_array(struct page **pa, unsigned int count) { - return __sptlrpc_pool_get_pages((void *)pa, count, PAGES_POOL, - page_from_pagearray); + return __sptlrpc_pool_get_pages((void *)pa, count, 0, + page_from_pagearray); } EXPORT_SYMBOL(sptlrpc_pool_get_pages_array); int sptlrpc_pool_get_pages(void **pages, unsigned int order) { - return __sptlrpc_pool_get_pages((void *)pages, 1, - PPOOL_ORDER_TO_INDEX(order), - page_from_bufarray); + return __sptlrpc_pool_get_pages((void *)pages, 1, order, + page_from_bufarray); } EXPORT_SYMBOL(sptlrpc_pool_get_pages); static int __sptlrpc_pool_put_pages(void *array, unsigned int count, - unsigned int pool_idx, - void **(*page_from)(void *, int)) + unsigned int order, + void **(*page_from)(void *, int)) { + struct ptlrpc_page_pool *page_pool; int p_idx, g_idx; int i, rc = 0; - struct ptlrpc_page_pool *page_pool; - LASSERTF(pool_idx < POOLS_COUNT, "count %u, pool %u\n", - count, pool_idx); + LASSERTF(order < POOLS_COUNT, "count %u, pool %u\n", + count, order); if (!array) { CERROR("Faled to put %u pages, from pool %u\n", - count, pool_idx); + count, order); return -EINVAL; } - page_pool = page_pools[pool_idx]; - LASSERTF(page_pool != NULL, "count %u, pool %u\n", count, pool_idx); + page_pool = page_pools[order]; + LASSERTF(page_pool != NULL, "count %u, pool %u\n", count, order); spin_lock(&page_pool->ppp_lock); @@ -884,7 +873,7 @@ static int __sptlrpc_pool_put_pages(void *array, unsigned int count, } page_pool->ppp_free_pages += count; - pool_wakeup(pool_idx); + pool_wakeup(page_pool); out_unlock: spin_unlock(&page_pool->ppp_lock); @@ -898,8 +887,8 @@ void sptlrpc_pool_put_desc_pages(struct ptlrpc_bulk_desc *desc) if (desc->bd_enc_vec == NULL) return; - rc = __sptlrpc_pool_put_pages((void *)desc, desc->bd_iov_count, - PAGES_POOL, page_from_bulkdesc); + rc = __sptlrpc_pool_put_pages((void *)desc, desc->bd_iov_count, 0, + page_from_bulkdesc); if (rc) CDEBUG(D_SEC, "error putting pages in pool: %d\n", rc); @@ -912,8 +901,8 @@ void sptlrpc_pool_put_pages_array(struct page **pa, unsigned int count) { int rc; - rc = __sptlrpc_pool_put_pages((void *)pa, count, PAGES_POOL, - page_from_pagearray); + rc = __sptlrpc_pool_put_pages((void *)pa, count, 0, + page_from_pagearray); if (rc) CDEBUG(D_SEC, "error putting pages in pool: %d\n", rc); @@ -924,40 +913,75 @@ void sptlrpc_pool_put_pages(void *buf, unsigned int order) { int rc; - rc = __sptlrpc_pool_put_pages(buf, 1, - PPOOL_ORDER_TO_INDEX(order), - page_from_bufarray); + rc = __sptlrpc_pool_put_pages(buf, 1, order, page_from_bufarray); if (rc) CDEBUG(D_SEC, "error putting pages in pool: %d\n", rc); } EXPORT_SYMBOL(sptlrpc_pool_put_pages); - -/* - * we don't do much stuff for add_user/del_user anymore, except adding some - * initial pages in add_user() if current pools are empty, rest would be - * handled by the pools's self-adaption. - */ -int sptlrpc_pool_add_user(void) +/* called with pool->ppp_lock held */ +static bool __grow_pool_try(int needed, struct ptlrpc_page_pool *pool) { - struct ptlrpc_page_pool *pool = page_pools[PAGES_POOL]; + bool pool_grown = false; + + assert_spin_locked(&pool->ppp_lock); + + if (pool_should_grow(needed, pool)) { + unsigned int to_add; + int rc; - spin_lock(&pool->ppp_lock); - /* ask for 1 page - so if the pool is empty, it will grow - * (this might also grow an in-use pool if it's full, which is fine) - */ - if (pool_should_grow(1, PAGES_POOL)) { pool->ppp_growing = 1; + /* the pool of single pages is grown a large amount on + * first use + */ + if (pool->ppp_order == 0 && + pool->ppp_total_pages == 0) + to_add = PTLRPC_MAX_BRW_PAGES * 2; + else /* otherwise, we add requested or at least 8 items */ + to_add = max(needed, 8); spin_unlock(&pool->ppp_lock); - pool_add_pages(PTLRPC_MAX_BRW_PAGES * 2, PAGES_POOL); + CDEBUG(D_SEC, + "pool %d is %lu elements (size %d bytes), growing by %d items\n", + pool->ppp_order, pool->ppp_pages_short, + element_size(pool), to_add); + /* we can't hold a spinlock over page allocation */ + rc = pool_add_pages(to_add, pool); + if (rc == 0) + pool_grown = true; spin_lock(&pool->ppp_lock); pool->ppp_growing = 0; - pool_wakeup(PAGES_POOL); + pool_wakeup(pool); } + + return pool_grown; +} + +static bool grow_pool_try(int needed, struct ptlrpc_page_pool *pool) +{ + bool rc; + + spin_lock(&pool->ppp_lock); + rc = __grow_pool_try(needed, pool); spin_unlock(&pool->ppp_lock); - return 0; + + return rc; +} + +/* + * we don't do much stuff for add_user/del_user anymore, except adding some + * initial pages in add_user() if current pools are empty, rest would be + * handled by the pools's self-adaption. + */ +void sptlrpc_pool_add_user(void) +{ + struct ptlrpc_page_pool *pool = page_pools[0]; + + /* since this is startup, no one is waiting for these pages, so we + * don't worry about sucess or failure here + */ + grow_pool_try(1, pool); } EXPORT_SYMBOL(sptlrpc_pool_add_user); @@ -969,10 +993,8 @@ static inline void pool_alloc(struct ptlrpc_page_pool *pool) sizeof(*pool->ppp_pools)); } -static inline void pool_free(unsigned int pool_index) +static inline void pool_free(struct ptlrpc_page_pool *pool) { - struct ptlrpc_page_pool *pool = page_pools[pool_index]; - LASSERT(pool->ppp_max_pools); LASSERT(pool->ppp_pools); @@ -982,10 +1004,11 @@ static inline void pool_free(unsigned int pool_index) int sptlrpc_pool_init(void) { - int pool_index = 0, to_revert; - int rc = 0; struct ptlrpc_page_pool *pool; int pool_max_pages = cfs_totalram_pages() / POOLS_COUNT; + int pool_order = 0; + int to_revert; + int rc = 0; ENTRY; @@ -998,12 +1021,12 @@ int sptlrpc_pool_init(void) OBD_ALLOC(page_pools, POOLS_COUNT * sizeof(*page_pools)); if (page_pools == NULL) RETURN(-ENOMEM); - for (pool_index = 0; pool_index < POOLS_COUNT; pool_index++) { - OBD_ALLOC(page_pools[pool_index], sizeof(**page_pools)); - if (page_pools[pool_index] == NULL) + for (pool_order = 0; pool_order < POOLS_COUNT; pool_order++) { + OBD_ALLOC(page_pools[pool_order], sizeof(**page_pools)); + if (page_pools[pool_order] == NULL) GOTO(fail, rc = -ENOMEM); - pool = page_pools[pool_index]; + pool = page_pools[pool_order]; pool->ppp_max_pages = pool_max_pages; pool->ppp_max_pools = @@ -1017,7 +1040,8 @@ int sptlrpc_pool_init(void) pool->ppp_st_max_wait = ktime_set(0, 0); pool_alloc(pool); - CDEBUG(D_SEC, "Allocated pool %i\n", pool_index); + pool->ppp_order = pool_order; + CDEBUG(D_SEC, "Allocated pool %i\n", pool_order); if (pool->ppp_pools == NULL) GOTO(fail, rc = -ENOMEM); /* Pass pool number as part of pool_shrinker_seeks value */ @@ -1027,7 +1051,7 @@ int sptlrpc_pool_init(void) #else pool->ppp_shops.shrink = pool_shrink; #endif - pool->ppp_shops.seeks = INDEX_TO_SEEKS(pool_index); + pool->ppp_shops.seeks = ORDER_TO_SEEKS(pool_order); pool->pool_shrinker = ll_shrinker_create(&pool->ppp_shops, 0, "sptlrpc_pool"); @@ -1039,12 +1063,12 @@ int sptlrpc_pool_init(void) RETURN(0); fail: - to_revert = pool_index; - for (pool_index = 0; pool_index <= to_revert; pool_index++) { - pool = page_pools[pool_index]; + to_revert = pool_order; + for (pool_order = 0; pool_order <= to_revert; pool_order++) { + pool = page_pools[pool_order]; if (pool) { if (pool->ppp_pools) - pool_free(pool_index); + pool_free(pool); OBD_FREE(pool, sizeof(**page_pools)); } } @@ -1056,20 +1080,20 @@ fail: void sptlrpc_pool_fini(void) { unsigned long cleaned, npools; - int pool_index; + int pool_order; struct ptlrpc_page_pool *pool; - for (pool_index = 0; pool_index < POOLS_COUNT; pool_index++) { - pool = page_pools[pool_index]; + for (pool_order = 0; pool_order < POOLS_COUNT; pool_order++) { + pool = page_pools[pool_order]; shrinker_free(pool->pool_shrinker); LASSERT(pool->ppp_pools); LASSERT(pool->ppp_total_pages == pool->ppp_free_pages); npools = npages_to_npools(pool->ppp_total_pages); - cleaned = pool_cleanup(pool->ppp_pools, npools, pool_index); + cleaned = pool_cleanup(pool->ppp_pools, npools, pool); LASSERT(cleaned == pool->ppp_total_pages); - pool_free(pool_index); + pool_free(pool); if (pool->ppp_st_access > 0) { CDEBUG(D_SEC, -- 1.8.3.1 From 10a04e323b4b6cd92d0e72f9fe1327f7fb3052d4 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 27 Mar 2024 17:28:11 -0400 Subject: [PATCH 02/16] LU-16724 ptlrpc: refactor page pools patch 3 This is a combined series that refactors the page pools code to make it more readable. It used to be many separate patches but has been combined in to just three, and this is the third of three. LU-16724 ptlrpc: remove PAGES_PER_POOL macro The page pool code *also* likes to refer to each page of pointers it uses to track items in it as a "POOL", which is incredibly confusing. Start unwinding this by removing the PAGES_PER_POOL macro. LU-16724 ptlrpc: change "pool" to "page_ptrs" The page pool code *also* likes to refer to each page of pointers it uses to track items in it as a "POOL", which is incredibly confusing. This patch works on renaming that to page_ptrs, but leaves some steps for a future patch. Change-Id-Was: I56ee54c7f39b52d7cceffec9e3decf71bd313ddc LU-16724 ptlrpc: rename max_pools to max_ptr_pages Continue removal of referring to page pointers as pools with another rename. Change-Id-Was: I14796f670a7f06fbec3b40ec23b4dd2e50f22d46 LU-16724 ptlrpc: rename npools to nptr_pages Continue removal of 'pool' as a name for a page of pointers to items in a pool. Change-Id-Was: I97b320027a0a6b5870d246e1527fa3fbe15fccb5 LU-16724 ptlrpc: rename 'pools' to 'ptr_pages' This finalizes the removal of the overloading of 'pools' to also mean pointers of pages to items in each page pool. Change-Id-Was: I0f4aba95f573f4afdc6f5d92f22fd67391fa6dab Signed-off-by: Patrick Farrell Change-Id: Ie29434f53eeb945b8d35df7c1212ae3f51a2aafa Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52663 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- lustre/ptlrpc/sec_bulk.c | 272 +++++++++++++++++++++++------------------------ 1 file changed, 136 insertions(+), 136 deletions(-) diff --git a/lustre/ptlrpc/sec_bulk.c b/lustre/ptlrpc/sec_bulk.c index 96590ca..a005c2b 100644 --- a/lustre/ptlrpc/sec_bulk.c +++ b/lustre/ptlrpc/sec_bulk.c @@ -69,7 +69,6 @@ MODULE_PARM_DESC(pool_max_memory_mb, */ #define PTRS_PER_PAGE (PAGE_SIZE / sizeof(void *)) -#define PAGES_PER_POOL (PTRS_PER_PAGE) #define IDLE_IDX_MAX (100) #define IDLE_IDX_WEIGHT (3) @@ -78,7 +77,7 @@ MODULE_PARM_DESC(pool_max_memory_mb, static struct ptlrpc_page_pool { unsigned long ppp_max_pages; /* maximum pages can hold, const */ - unsigned int ppp_max_pools; /* number of pools, const */ + unsigned int ppp_max_ptr_pages; /* number of ptr_pages, const */ /* * wait queue in case of not enough free pages. @@ -92,11 +91,11 @@ static struct ptlrpc_page_pool { */ /* - * indicating how idle the pools are, from 0 to MAX_IDLE_IDX + * indicating how idle the pool is, from 0 to MAX_IDLE_IDX * this is counted based on each time when getting pages from - * the pools, not based on time. which means in case that system + * the pool, not based on time. which means in case that system * is idled for a while but the idle_idx might still be low if no - * activities happened in the pools. + * activities happened in the pool. */ unsigned long ppp_idle_idx; @@ -106,7 +105,7 @@ static struct ptlrpc_page_pool { /* in-pool pages bookkeeping */ spinlock_t ppp_lock; /* protect following fields */ - unsigned long ppp_total_pages; /* total pages in pools */ + unsigned long ppp_total_pages; /* total pages in pool */ unsigned long ppp_free_pages; /* current pages available */ /* statistics */ @@ -121,9 +120,9 @@ static struct ptlrpc_page_pool { ktime_t ppp_st_max_wait; /* in nanoseconds */ unsigned long ppp_st_outofmem; /* # of out of mem requests */ /* - * pointers to pools, may be vmalloc'd + * pointers to ptr_pages, may be vmalloc'd */ - void ***ppp_pools; + void ***ppp_ptr_pages; /* * memory shrinker */ @@ -148,7 +147,8 @@ int encrypt_page_pools_seq_show(struct seq_file *m, void *v) struct ptlrpc_page_pool *pool = page_pools[0]; spin_lock(&pool->ppp_lock); - seq_printf(m, "physical pages: %lu\n" + seq_printf(m, + "physical pages: %lu\n" "pages per pool: %lu\n" "max pages: %lu\n" "max pools: %u\n" @@ -167,9 +167,9 @@ int encrypt_page_pools_seq_show(struct seq_file *m, void *v) "max waitqueue depth: %u\n" "max wait time ms: %lld\n" "out of mem: %lu\n", - cfs_totalram_pages(), PAGES_PER_POOL, + cfs_totalram_pages(), PTRS_PER_PAGE, pool->ppp_max_pages, - pool->ppp_max_pools, + pool->ppp_max_ptr_pages, pool->ppp_total_pages, pool->ppp_free_pages, pool->ppp_idle_idx, @@ -199,9 +199,8 @@ int page_pools_seq_show(struct seq_file *m, void *v) struct ptlrpc_page_pool *pool; seq_printf(m, "physical_pages: %lu\n" - "pages per pool: %lu\n\n" "pools:\n", - cfs_totalram_pages(), PAGES_PER_POOL); + cfs_totalram_pages()); for (pool_order = 0; pool_order < POOLS_COUNT; pool_order++) { pool = page_pools[pool_order]; @@ -210,7 +209,7 @@ int page_pools_seq_show(struct seq_file *m, void *v) spin_lock(&pool->ppp_lock); seq_printf(m, " pool_%dk:\n" " max_pages: %lu\n" - " max_pools: %u\n" + " max_items: %lu\n" " total_pages: %lu\n" " total_free: %lu\n" " idle_index: %lu/100\n" @@ -229,7 +228,7 @@ int page_pools_seq_show(struct seq_file *m, void *v) /* convert from bytes to KiB */ element_size(pool) >> 10, pool->ppp_max_pages, - pool->ppp_max_pools, + pool->ppp_max_ptr_pages * PTRS_PER_PAGE, pool->ppp_total_pages, pool->ppp_free_pages, pool->ppp_idle_idx, @@ -261,41 +260,41 @@ static void pool_release_free_pages(long npages, struct ptlrpc_page_pool *pool) LASSERT(pool->ppp_free_pages <= pool->ppp_total_pages); /* max pool index before the release */ - p_idx_max2 = (pool->ppp_total_pages - 1) / PAGES_PER_POOL; + p_idx_max2 = (pool->ppp_total_pages - 1) / PTRS_PER_PAGE; pool->ppp_free_pages -= npages; pool->ppp_total_pages -= npages; /* max pool index after the release */ p_idx_max1 = pool->ppp_total_pages == 0 ? -1 : - ((pool->ppp_total_pages - 1) / PAGES_PER_POOL); + ((pool->ppp_total_pages - 1) / PTRS_PER_PAGE); - p_idx = pool->ppp_free_pages / PAGES_PER_POOL; - g_idx = pool->ppp_free_pages % PAGES_PER_POOL; - LASSERT(pool->ppp_pools[p_idx]); + p_idx = pool->ppp_free_pages / PTRS_PER_PAGE; + g_idx = pool->ppp_free_pages % PTRS_PER_PAGE; + LASSERT(pool->ppp_ptr_pages[p_idx]); while (npages--) { - LASSERT(pool->ppp_pools[p_idx]); - LASSERT(pool->ppp_pools[p_idx][g_idx] != NULL); + LASSERT(pool->ppp_ptr_pages[p_idx]); + LASSERT(pool->ppp_ptr_pages[p_idx][g_idx] != NULL); if (pool->ppp_order == 0) - __free_page(pool->ppp_pools[p_idx][g_idx]); + __free_page(pool->ppp_ptr_pages[p_idx][g_idx]); else - OBD_FREE_LARGE(pool->ppp_pools[p_idx][g_idx], + OBD_FREE_LARGE(pool->ppp_ptr_pages[p_idx][g_idx], element_size(pool)); - pool->ppp_pools[p_idx][g_idx] = NULL; + pool->ppp_ptr_pages[p_idx][g_idx] = NULL; - if (++g_idx == PAGES_PER_POOL) { + if (++g_idx == PTRS_PER_PAGE) { p_idx++; g_idx = 0; } } - /* free unused pools */ + /* free unused ptr_pages */ while (p_idx_max1 < p_idx_max2) { - LASSERT(pool->ppp_pools[p_idx_max2]); - OBD_FREE(pool->ppp_pools[p_idx_max2], PAGE_SIZE); - pool->ppp_pools[p_idx_max2] = NULL; + LASSERT(pool->ppp_ptr_pages[p_idx_max2]); + OBD_FREE(pool->ppp_ptr_pages[p_idx_max2], PAGE_SIZE); + pool->ppp_ptr_pages[p_idx_max2] = NULL; p_idx_max2--; } } @@ -384,35 +383,35 @@ static int pool_shrink(struct shrinker *shrinker, struct shrink_control *sc) #endif /* HAVE_SHRINKER_COUNT */ static inline -int npages_to_npools(unsigned long npages) +int npages_to_nptr_pages(unsigned long npages) { - return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); + return (int) ((npages + PTRS_PER_PAGE - 1) / PTRS_PER_PAGE); } /* * return how many pages cleaned up. */ -static unsigned long pool_cleanup(void ***pools, int npools, +static unsigned long pool_cleanup(void ***ptr_pages, int nptr_pages, struct ptlrpc_page_pool *pool) { unsigned long cleaned = 0; int i, j; - for (i = 0; i < npools; i++) { - if (pools[i]) { - for (j = 0; j < PAGES_PER_POOL; j++) { - if (pools[i][j]) { + for (i = 0; i < nptr_pages; i++) { + if (ptr_pages[i]) { + for (j = 0; j < PTRS_PER_PAGE; j++) { + if (ptr_pages[i][j]) { if (pool->ppp_order == 0) { - __free_page(pools[i][j]); + __free_page(ptr_pages[i][j]); } else { - OBD_FREE_LARGE(pools[i][j], + OBD_FREE_LARGE(ptr_pages[i][j], element_size(pool)); } cleaned++; } } - OBD_FREE(pools[i], PAGE_SIZE); - pools[i] = NULL; + OBD_FREE(ptr_pages[i], PAGE_SIZE); + ptr_pages[i] = NULL; } } @@ -420,53 +419,54 @@ static unsigned long pool_cleanup(void ***pools, int npools, } /* - * merge @npools pointed by @pools which contains @npages new pages - * into current pools. + * merge @nptr_pages pointed by @ptr_pages which contains @npages new pages + * into current pool. * * we have options to avoid most memory copy with some tricks. but we choose * the simplest way to avoid complexity. It's not frequently called. */ -static void pool_insert(void ***pools, int npools, int npages, - struct ptlrpc_page_pool *page_pool) +static void pool_insert_ptrs(void ***ptr_pages, int nptr_pages, int npages, + struct ptlrpc_page_pool *page_pool) { int freeslot; int op_idx, np_idx, og_idx, ng_idx; - int cur_npools, end_npools; + int cur_nptr_page, end_nptr_page; LASSERT(npages > 0); LASSERT(page_pool->ppp_total_pages+npages <= page_pool->ppp_max_pages); - LASSERT(npages_to_npools(npages) == npools); + LASSERT(npages_to_nptr_pages(npages) == nptr_pages); LASSERT(page_pool->ppp_growing); spin_lock(&page_pool->ppp_lock); /* - * (1) fill all the free slots of current pools. + * (1) fill all the free slots in current pool ptr_pages */ /* * free slots are those left by rent pages, and the extra ones with * index >= total_pages, locate at the tail of last pool. */ - freeslot = page_pool->ppp_total_pages % PAGES_PER_POOL; + freeslot = page_pool->ppp_total_pages % PTRS_PER_PAGE; if (freeslot != 0) - freeslot = PAGES_PER_POOL - freeslot; + freeslot = PTRS_PER_PAGE - freeslot; freeslot += page_pool->ppp_total_pages - page_pool->ppp_free_pages; - op_idx = page_pool->ppp_free_pages / PAGES_PER_POOL; - og_idx = page_pool->ppp_free_pages % PAGES_PER_POOL; - np_idx = npools - 1; - ng_idx = (npages - 1) % PAGES_PER_POOL; + op_idx = page_pool->ppp_free_pages / PTRS_PER_PAGE; + og_idx = page_pool->ppp_free_pages % PTRS_PER_PAGE; + np_idx = nptr_pages - 1; + ng_idx = (npages - 1) % PTRS_PER_PAGE; while (freeslot) { - LASSERT(page_pool->ppp_pools[op_idx][og_idx] == NULL); - LASSERT(pools[np_idx][ng_idx] != NULL); + LASSERT(page_pool->ppp_ptr_pages[op_idx][og_idx] == NULL); + LASSERT(ptr_pages[np_idx][ng_idx] != NULL); - page_pool->ppp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; - pools[np_idx][ng_idx] = NULL; + page_pool->ppp_ptr_pages[op_idx][og_idx] = + ptr_pages[np_idx][ng_idx]; + ptr_pages[np_idx][ng_idx] = NULL; freeslot--; - if (++og_idx == PAGES_PER_POOL) { + if (++og_idx == PTRS_PER_PAGE) { op_idx++; og_idx = 0; } @@ -474,38 +474,38 @@ static void pool_insert(void ***pools, int npools, int npages, if (np_idx == 0) break; np_idx--; - ng_idx = PAGES_PER_POOL - 1; + ng_idx = PTRS_PER_PAGE - 1; } } /* - * (2) add pools if needed. + * (2) add ptr pages if needed. */ - cur_npools = (page_pool->ppp_total_pages + PAGES_PER_POOL - 1) / - PAGES_PER_POOL; - end_npools = (page_pool->ppp_total_pages + npages + - PAGES_PER_POOL - 1) / PAGES_PER_POOL; - LASSERT(end_npools <= page_pool->ppp_max_pools); + cur_nptr_page = (page_pool->ppp_total_pages + PTRS_PER_PAGE - 1) / + PTRS_PER_PAGE; + end_nptr_page = (page_pool->ppp_total_pages + npages + + PTRS_PER_PAGE - 1) / PTRS_PER_PAGE; + LASSERT(end_nptr_page <= page_pool->ppp_max_ptr_pages); np_idx = 0; - while (cur_npools < end_npools) { - LASSERT(page_pool->ppp_pools[cur_npools] == NULL); - LASSERT(np_idx < npools); - LASSERT(pools[np_idx] != NULL); + while (cur_nptr_page < end_nptr_page) { + LASSERT(page_pool->ppp_ptr_pages[cur_nptr_page] == NULL); + LASSERT(np_idx < nptr_pages); + LASSERT(ptr_pages[np_idx] != NULL); - page_pool->ppp_pools[cur_npools++] = pools[np_idx]; - pools[np_idx++] = NULL; + page_pool->ppp_ptr_pages[cur_nptr_page++] = ptr_pages[np_idx]; + ptr_pages[np_idx++] = NULL; } /* - * (3) free useless source pools + * (3) free useless source ptr pages */ - while (np_idx < npools) { - LASSERT(pools[np_idx] != NULL); - CDEBUG(D_SEC, "Free useless pool buffer: %i, %p\n", np_idx, - pools[np_idx]); - OBD_FREE(pools[np_idx], PAGE_SIZE); - pools[np_idx++] = NULL; + while (np_idx < nptr_pages) { + LASSERT(ptr_pages[np_idx] != NULL); + CDEBUG(D_SEC, "Free useless ptr pages: %i, %p\n", np_idx, + ptr_pages[np_idx]); + OBD_FREE(ptr_pages[np_idx], PAGE_SIZE); + ptr_pages[np_idx++] = NULL; } page_pool->ppp_total_pages += npages; @@ -524,8 +524,8 @@ static void pool_insert(void ***pools, int npools, int npages, #define POOL_INIT_SIZE (PTLRPC_MAX_BRW_SIZE / 4) static int pool_add_pages(int npages, struct ptlrpc_page_pool *page_pool) { - void ***pools; - int npools, alloced = 0; + void ***ptr_pages; + int nptr_pages, alloced = 0; int i, j, rc = -ENOMEM; unsigned int pool_order = page_pool->ppp_order; @@ -540,40 +540,40 @@ static int pool_add_pages(int npages, struct ptlrpc_page_pool *page_pool) page_pool->ppp_st_grows++; - npools = npages_to_npools(npages); - OBD_ALLOC_PTR_ARRAY(pools, npools); - if (pools == NULL) + nptr_pages = npages_to_nptr_pages(npages); + OBD_ALLOC_PTR_ARRAY(ptr_pages, nptr_pages); + if (ptr_pages == NULL) goto out; - for (i = 0; i < npools; i++) { - OBD_ALLOC(pools[i], PAGE_SIZE); - if (pools[i] == NULL) - goto out_pools; + for (i = 0; i < nptr_pages; i++) { + OBD_ALLOC(ptr_pages[i], PAGE_SIZE); + if (ptr_pages[i] == NULL) + goto out_ptr_pages; - for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { + for (j = 0; j < PTRS_PER_PAGE && alloced < npages; j++) { if (pool_order == 0) - pools[i][j] = alloc_page(GFP_NOFS | + ptr_pages[i][j] = alloc_page(GFP_NOFS | __GFP_HIGHMEM); else { - OBD_ALLOC_LARGE(pools[i][j], + OBD_ALLOC_LARGE(ptr_pages[i][j], element_size(page_pool)); } - if (pools[i][j] == NULL) - goto out_pools; + if (ptr_pages[i][j] == NULL) + goto out_ptr_pages; alloced++; } } LASSERT(alloced == npages); - pool_insert(pools, npools, npages, page_pool); - CDEBUG(D_SEC, "added %d pages into pools\n", npages); - OBD_FREE_PTR_ARRAY(pools, npools); + pool_insert_ptrs(ptr_pages, nptr_pages, npages, page_pool); + CDEBUG(D_SEC, "added %d pages into pool\n", npages); + OBD_FREE_PTR_ARRAY(ptr_pages, nptr_pages); rc = 0; -out_pools: +out_ptr_pages: if (rc) { - pool_cleanup(pools, npools, page_pool); + pool_cleanup(ptr_pages, nptr_pages, page_pool); } out: if (rc) { @@ -597,8 +597,8 @@ static inline void pool_wakeup(struct ptlrpc_page_pool *pool) static int pool_should_grow(int needed, struct ptlrpc_page_pool *pool) { /* - * don't grow if someone else is growing the pools right now, - * or the pools has reached its full capacity + * don't grow if someone else is growing the pool right now, + * or the pool has reached its full capacity */ if (pool->ppp_growing || pool->ppp_total_pages == pool->ppp_max_pages) return 0; @@ -617,7 +617,7 @@ static int pool_should_grow(int needed, struct ptlrpc_page_pool *pool) * length, idle index, etc. ? */ - /* grow the pools in any other cases */ + /* grow the pool in any other cases */ return 1; } @@ -748,18 +748,18 @@ again: /* proceed with rest of allocation */ page_pool->ppp_free_pages -= count; - p_idx = page_pool->ppp_free_pages / PAGES_PER_POOL; - g_idx = page_pool->ppp_free_pages % PAGES_PER_POOL; + p_idx = page_pool->ppp_free_pages / PTRS_PER_PAGE; + g_idx = page_pool->ppp_free_pages % PTRS_PER_PAGE; for (i = 0; i < count; i++) { void **pagep = page_from(array, i); - if (page_pool->ppp_pools[p_idx][g_idx] == NULL) + if (page_pool->ppp_ptr_pages[p_idx][g_idx] == NULL) GOTO(out_unlock, rc = -EPROTO); - *pagep = page_pool->ppp_pools[p_idx][g_idx]; - page_pool->ppp_pools[p_idx][g_idx] = NULL; + *pagep = page_pool->ppp_ptr_pages[p_idx][g_idx]; + page_pool->ppp_ptr_pages[p_idx][g_idx] = NULL; - if (++g_idx == PAGES_PER_POOL) { + if (++g_idx == PTRS_PER_PAGE) { p_idx++; g_idx = 0; } @@ -850,23 +850,23 @@ static int __sptlrpc_pool_put_pages(void *array, unsigned int count, spin_lock(&page_pool->ppp_lock); - p_idx = page_pool->ppp_free_pages / PAGES_PER_POOL; - g_idx = page_pool->ppp_free_pages % PAGES_PER_POOL; + p_idx = page_pool->ppp_free_pages / PTRS_PER_PAGE; + g_idx = page_pool->ppp_free_pages % PTRS_PER_PAGE; if (page_pool->ppp_free_pages + count > page_pool->ppp_total_pages) GOTO(out_unlock, rc = -EPROTO); - if (!page_pool->ppp_pools[p_idx]) + if (!page_pool->ppp_ptr_pages[p_idx]) GOTO(out_unlock, rc = -EPROTO); for (i = 0; i < count; i++) { void **pagep = page_from(array, i); if (!*pagep || - page_pool->ppp_pools[p_idx][g_idx] != NULL) + page_pool->ppp_ptr_pages[p_idx][g_idx] != NULL) GOTO(out_unlock, rc = -EPROTO); - page_pool->ppp_pools[p_idx][g_idx] = *pagep; - if (++g_idx == PAGES_PER_POOL) { + page_pool->ppp_ptr_pages[p_idx][g_idx] = *pagep; + if (++g_idx == PTRS_PER_PAGE) { p_idx++; g_idx = 0; } @@ -971,8 +971,8 @@ static bool grow_pool_try(int needed, struct ptlrpc_page_pool *pool) /* * we don't do much stuff for add_user/del_user anymore, except adding some - * initial pages in add_user() if current pools are empty, rest would be - * handled by the pools's self-adaption. + * initial pages in add_user() if current pool is empty, rest would be + * handled by the pool self-adaption. */ void sptlrpc_pool_add_user(void) { @@ -985,21 +985,21 @@ void sptlrpc_pool_add_user(void) } EXPORT_SYMBOL(sptlrpc_pool_add_user); -static inline void pool_alloc(struct ptlrpc_page_pool *pool) +static inline void pool_ptrs_alloc(struct ptlrpc_page_pool *pool) { - LASSERT(pool->ppp_max_pools); - OBD_ALLOC_LARGE(pool->ppp_pools, - pool->ppp_max_pools * - sizeof(*pool->ppp_pools)); + LASSERT(pool->ppp_max_ptr_pages); + OBD_ALLOC_LARGE(pool->ppp_ptr_pages, + pool->ppp_max_ptr_pages * + sizeof(*pool->ppp_ptr_pages)); } -static inline void pool_free(struct ptlrpc_page_pool *pool) +static inline void pool_ptrs_free(struct ptlrpc_page_pool *pool) { - LASSERT(pool->ppp_max_pools); - LASSERT(pool->ppp_pools); + LASSERT(pool->ppp_max_ptr_pages); + LASSERT(pool->ppp_ptr_pages); - OBD_FREE_LARGE(pool->ppp_pools, - pool->ppp_max_pools * sizeof(*pool->ppp_pools)); + OBD_FREE_LARGE(pool->ppp_ptr_pages, + pool->ppp_max_ptr_pages * sizeof(*pool->ppp_ptr_pages)); } int sptlrpc_pool_init(void) @@ -1029,8 +1029,8 @@ int sptlrpc_pool_init(void) pool = page_pools[pool_order]; pool->ppp_max_pages = pool_max_pages; - pool->ppp_max_pools = - npages_to_npools(pool->ppp_max_pages); + pool->ppp_max_ptr_pages = + npages_to_nptr_pages(pool->ppp_max_pages); init_waitqueue_head(&pool->ppp_waitq); pool->ppp_last_shrink = ktime_get_seconds(); @@ -1039,10 +1039,10 @@ int sptlrpc_pool_init(void) spin_lock_init(&pool->ppp_lock); pool->ppp_st_max_wait = ktime_set(0, 0); - pool_alloc(pool); + pool_ptrs_alloc(pool); pool->ppp_order = pool_order; CDEBUG(D_SEC, "Allocated pool %i\n", pool_order); - if (pool->ppp_pools == NULL) + if (pool->ppp_ptr_pages == NULL) GOTO(fail, rc = -ENOMEM); /* Pass pool number as part of pool_shrinker_seeks value */ #ifdef HAVE_SHRINKER_COUNT @@ -1067,8 +1067,8 @@ fail: for (pool_order = 0; pool_order <= to_revert; pool_order++) { pool = page_pools[pool_order]; if (pool) { - if (pool->ppp_pools) - pool_free(pool); + if (pool->ppp_ptr_pages) + pool_ptrs_free(pool); OBD_FREE(pool, sizeof(**page_pools)); } } @@ -1079,21 +1079,21 @@ fail: void sptlrpc_pool_fini(void) { - unsigned long cleaned, npools; + unsigned long cleaned, nptr_pages; int pool_order; struct ptlrpc_page_pool *pool; for (pool_order = 0; pool_order < POOLS_COUNT; pool_order++) { pool = page_pools[pool_order]; shrinker_free(pool->pool_shrinker); - LASSERT(pool->ppp_pools); + LASSERT(pool->ppp_ptr_pages); LASSERT(pool->ppp_total_pages == pool->ppp_free_pages); - npools = npages_to_npools(pool->ppp_total_pages); - cleaned = pool_cleanup(pool->ppp_pools, npools, pool); + nptr_pages = npages_to_nptr_pages(pool->ppp_total_pages); + cleaned = pool_cleanup(pool->ppp_ptr_pages, nptr_pages, pool); LASSERT(cleaned == pool->ppp_total_pages); - pool_free(pool); + pool_ptrs_free(pool); if (pool->ppp_st_access > 0) { CDEBUG(D_SEC, -- 1.8.3.1 From b09f56c208c6c34375d098f66075688f329b7c76 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Fri, 9 Feb 2024 16:42:40 +0100 Subject: [PATCH 03/16] LU-17518 gss: do not trust supp groups from client with krb Thanks to Kerberos, Lustre does not have to trust clients anymore, but relies on keytabs and tickets, cryptographically validated, to recognize clients and users. RPC provided supplementary groups should not be trusted, but checked thanks to identity upcall and the trusted UID from the ticket. Add sanity-krb5 test_9 to exercise this. Test-Parameters: kerberos=true testlist=sanity-krb5 Signed-off-by: Sebastien Buisson Change-Id: I4113ef654492e76fcd377b2c0cc74e484b27850b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53987 Reviewed-by: Andreas Dilger Reviewed-by: Aurelien Degremont Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/include/lustre_idmap.h | 2 ++ lustre/mdt/mdt_lib.c | 57 +++++++++++++++++++++++++++++++++++++------ lustre/obdclass/idmap.c | 30 +++++++++++++++++++++-- lustre/tests/sanity-krb5.sh | 51 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 9 deletions(-) diff --git a/lustre/include/lustre_idmap.h b/lustre/include/lustre_idmap.h index 5ba270f..1f986ed 100644 --- a/lustre/include/lustre_idmap.h +++ b/lustre/include/lustre_idmap.h @@ -64,7 +64,9 @@ struct lu_ucred; extern void lustre_groups_from_list(struct group_info *ginfo, gid_t *glist); extern void lustre_groups_sort(struct group_info *group_info); +extern int lustre_groups_search(struct group_info *group_info, gid_t grp); extern int lustre_in_group_p(struct lu_ucred *mu, gid_t grp); +extern int has_proper_groups(struct lu_ucred *ucred); /** @} idmap */ diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index ffb950e..f69d19f 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -296,10 +296,12 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, /* check permission of setgid */ if (setgid && !(perm & CFS_SETGID_PERM)) { - CDEBUG(D_SEC, "mdt blocked setgid attempt (%u:%u/%u:%u -> %u) " - "from %s\n", pud->pud_uid, pud->pud_gid, + CDEBUG(D_SEC, + "mdt blocked setgid attempt (%u:%u/%u:%u -> %d) from %s\n", + pud->pud_uid, pud->pud_gid, pud->pud_fsuid, pud->pud_fsgid, - ucred->uc_identity->mi_gid, libcfs_nidstr(&peernid)); + ucred->uc_identity ? ucred->uc_identity->mi_gid : -1, + libcfs_nidstr(&peernid)); GOTO(out, rc = -EACCES); } @@ -359,6 +361,26 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, mdt->mdt_enable_cap_mask); } + /* Thanks to Kerberos, Lustre does not have to trust clients anymore, + * but relies on keytabs and tickets, cryptographically validated, to + * recognize clients and users. + * RPC provided primary group should match the one got from the + * identity upcall. + * And RPC provided supplementary groups should not be trusted, + * but checked thanks to identity upcall and the trusted UID + * from the ticket. + */ + if (SPTLRPC_FLVR_MECH(req->rq_flvr.sf_rpc) == SPTLRPC_MECH_GSS_KRB5) { + if (!has_proper_groups(ucred)) + GOTO(out, rc = -EACCES); + ucred->uc_suppgids[0] = -1; + ucred->uc_suppgids[1] = -1; + if (ucred->uc_ginfo) { + put_group_info(ucred->uc_ginfo); + ucred->uc_ginfo = NULL; + } + } + ucred->uc_valid = UCRED_NEW; ucred_set_jobid(info, ucred); ucred_set_nid(info, ucred); @@ -499,9 +521,10 @@ out: static int old_init_ucred_common(struct mdt_thread_info *info, struct lu_nodemap *nodemap) { - struct lu_ucred *uc = mdt_ucred(info); - struct mdt_device *mdt = info->mti_mdt; - struct md_identity *identity = NULL; + struct ptlrpc_request *req = mdt_info_req(info); + struct lu_ucred *uc = mdt_ucred(info); + struct mdt_device *mdt = info->mti_mdt; + struct md_identity *identity = NULL; if (nodemap && uc->uc_o_uid == nodemap->nm_squash_uid && nodemap->nmf_deny_unknown) @@ -550,6 +573,27 @@ static int old_init_ucred_common(struct mdt_thread_info *info, libcfs_nidstr(&mdt_info_req(info)->rq_peer.nid)); uc->uc_cap = cap_intersect(uc->uc_cap,mdt->mdt_enable_cap_mask); } + + /* Thanks to Kerberos, Lustre does not have to trust clients anymore, + * but relies on keytabs and tickets, cryptographically validated, to + * recognize clients and users. + * RPC provided primary group should match the one got from the + * identity upcall. + * And RPC provided supplementary groups should not be trusted, + * but checked thanks to identity upcall and the trusted UID + * from the ticket. + */ + if (SPTLRPC_FLVR_MECH(req->rq_flvr.sf_rpc) == SPTLRPC_MECH_GSS_KRB5) { + if (!has_proper_groups(uc)) { + mdt_identity_put(mdt->mdt_identity_cache, + uc->uc_identity); + uc->uc_identity = NULL; + RETURN(-EACCES); + } + uc->uc_suppgids[0] = -1; + uc->uc_suppgids[1] = -1; + } + uc->uc_valid = UCRED_OLD; ucred_set_jobid(info, uc); ucred_set_nid(info, uc); @@ -557,7 +601,6 @@ static int old_init_ucred_common(struct mdt_thread_info *info, ucred_set_rbac_roles(info, uc); EXIT; - return 0; } diff --git a/lustre/obdclass/idmap.c b/lustre/obdclass/idmap.c index 0a5b593..dab099b 100644 --- a/lustre/obdclass/idmap.c +++ b/lustre/obdclass/idmap.c @@ -49,8 +49,7 @@ * groups_search() is copied from linux kernel! * A simple bsearch. */ -static int lustre_groups_search(struct group_info *group_info, - gid_t grp) +int lustre_groups_search(struct group_info *group_info, gid_t grp) { int left, right; @@ -73,6 +72,7 @@ static int lustre_groups_search(struct group_info *group_info, } return 0; } +EXPORT_SYMBOL(lustre_groups_search); void lustre_groups_from_list(struct group_info *ginfo, gid_t *glist) { @@ -158,3 +158,29 @@ int lustre_in_group_p(struct lu_ucred *mu, gid_t grp) return rc; } EXPORT_SYMBOL(lustre_in_group_p); + +/* make sure fsgid is one of primary or supplementary groups + * fetched from identity upcall + */ +int has_proper_groups(struct lu_ucred *ucred) +{ + struct group_info *group_info = NULL; + int rc; + + if (!ucred->uc_identity) + return 1; + + if (ucred->uc_fsgid == ucred->uc_identity->mi_gid) + return 1; + + group_info = ucred->uc_identity->mi_ginfo; + if (!group_info) + return 0; + + get_group_info(group_info); + rc = lustre_groups_search(group_info, ucred->uc_fsgid); + put_group_info(group_info); + + return rc; +} +EXPORT_SYMBOL(has_proper_groups); diff --git a/lustre/tests/sanity-krb5.sh b/lustre/tests/sanity-krb5.sh index 6ce9c6b..fea6e38 100755 --- a/lustre/tests/sanity-krb5.sh +++ b/lustre/tests/sanity-krb5.sh @@ -459,6 +459,57 @@ test_8() } run_test 8 "Early reply sent for slow gss context negotiation" +test_9() { + local test9user=$(getent passwd $RUNAS_ID | cut -d: -f1) + + $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed" + chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed" + $RUNAS ls -ld $DIR/$tdir + + # Add group, and client to new group, on client only. + # Server is not aware. + groupadd -g 5000 grptest9 + stack_trap "groupdel grptest9" EXIT + + usermod -g grptest9 $test9user + stack_trap "usermod -g $test9user $test9user" EXIT + id $RUNAS_ID + # Thanks to Kerberos, client should not be able to create file + # with primary group not known on server side + $RUNAS touch $DIR/$tdir/fileA && + error "server should not trust client's primary gid" + do_facet mds1 "lctl set_param mdt.*.identity_flush=-1" + + do_facet mds1 groupadd -g 5000 grptest9 + stack_trap "do_facet mds1 groupdel grptest9 || true" EXIT + do_facet mds1 usermod -a -G grptest9 $test9user + stack_trap "do_facet mds1 gpasswd -d $test9user grptest9 || true" EXIT + id $RUNAS_ID + do_facet mds1 "id $RUNAS_ID" + # Thanks to Kerberos, client should be able to create file + # with primary group taken as one of supp groups, as long as + # server side knows the supp groups. + $RUNAS touch $DIR/$tdir/fileA || + error "server should know client's supp gid" + ls -l $DIR/$tdir + do_facet mds1 "lctl set_param mdt.*.identity_flush=-1" + do_facet mds1 gpasswd -d $test9user grptest9 + do_facet mds1 groupdel grptest9 + usermod -g $test9user $test9user + + usermod -a -G grptest9 $test9user + stack_trap "gpasswd -d $test9user grptest9" EXIT + id $RUNAS_ID + $RUNAS touch $DIR/$tdir/fileB + ls -l $DIR/$tdir + # Thanks to Kerberos, client should not be able to chgrp + $RUNAS chgrp grptest9 $DIR/$tdir/fileB && + error "server should not trust client's supp gid" + ls -l $DIR/$tdir + do_facet mds1 "lctl set_param mdt.*.identity_flush=-1" +} +run_test 9 "Do not trust primary and supp gids from client" + # # following tests will manipulate flavors and may end with any flavor set, # so each test should not assume any start flavor. -- 1.8.3.1 From 7a0517fa25162a6af2e10827851d7d8c7a4ed1ac Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 24 Mar 2024 13:55:08 +0700 Subject: [PATCH 04/16] LU-17592 build: kernel 6.8 removed strlcpy() Linux commit v6.7-11707-gd26270061ae6 string: Remove strlcpy() strlcpy() is removed, use strscpy() and provide a strscpy() for kernels that do not have one. HPE-bug-id: LUS-12181 Signed-off-by: Shaun Tancheff Change-Id: Ieab872f20e08d17a4842bc944fa38f9867de81f9 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54227 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- contrib/scripts/spelling.txt | 3 +- libcfs/autoconf/lustre-libcfs.m4 | 4 +- libcfs/include/libcfs/linux/linux-misc.h | 7 ++- libcfs/libcfs/hash.c | 2 +- libcfs/libcfs/tracefile.c | 4 +- lnet/klnds/o2iblnd/o2iblnd.c | 20 +++---- lnet/klnds/socklnd/socklnd.c | 2 +- lnet/lnet/config.c | 4 +- lnet/selftest/conrpc.c | 28 ++++----- lnet/selftest/console.c | 98 ++++++++++++++++---------------- lnet/selftest/framework.c | 16 +++--- lustre/fid/fid_store.c | 2 +- lustre/fld/fld_cache.c | 2 +- lustre/llite/pcc.c | 2 +- lustre/lmv/lmv_obd.c | 6 +- lustre/lod/lod_internal.h | 4 +- lustre/lod/lod_lov.c | 6 +- lustre/lod/lod_object.c | 4 +- lustre/lod/lod_pool.c | 2 +- lustre/lov/lov_ea.c | 6 +- lustre/lov/lov_pack.c | 4 +- lustre/lov/lov_pool.c | 2 +- lustre/mdc/mdc_lib.c | 4 +- lustre/mdd/mdd_device.c | 2 +- lustre/mdd/mdd_dir.c | 6 +- lustre/mdd/mdd_lproc.c | 2 +- lustre/mdt/mdt_coordinator.c | 2 +- lustre/mdt/mdt_lib.c | 2 +- lustre/mgc/mgc_request.c | 6 +- lustre/mgc/mgc_request_server.c | 6 +- lustre/mgs/mgs_handler.c | 2 +- lustre/mgs/mgs_llog.c | 69 +++++++++++----------- lustre/obdclass/dt_object.c | 2 +- lustre/obdclass/jobid.c | 8 +-- lustre/obdclass/obd_config.c | 2 +- lustre/obdclass/obd_mount.c | 5 +- lustre/obdclass/upcall_cache.c | 4 +- lustre/ofd/ofd_access_log.c | 2 +- lustre/ofd/ofd_io.c | 2 +- lustre/ofd/ofd_objects.c | 2 +- lustre/osd-ldiskfs/osd_handler.c | 14 ++--- lustre/osd-ldiskfs/osd_lproc.c | 2 +- lustre/osd-zfs/osd_handler.c | 12 ++-- lustre/osd-zfs/osd_lproc.c | 2 +- lustre/osd-zfs/osd_oi.c | 6 +- lustre/osd-zfs/osd_quota.c | 2 +- lustre/ptlrpc/nodemap_handler.c | 7 +-- lustre/ptlrpc/nodemap_storage.c | 2 +- lustre/ptlrpc/nrs.c | 4 +- lustre/ptlrpc/nrs_tbf.c | 8 +-- lustre/ptlrpc/sec_config.c | 10 ++-- lustre/ptlrpc/sec_lproc.c | 2 +- lustre/quota/qmt_dev.c | 6 +- lustre/quota/qsd_lib.c | 6 +- lustre/target/tgt_grant.c | 2 +- lustre/target/tgt_lastrcvd.c | 4 +- lustre/target/tgt_mount.c | 14 ++--- 57 files changed, 231 insertions(+), 228 deletions(-) diff --git a/contrib/scripts/spelling.txt b/contrib/scripts/spelling.txt index a2e5ddb..ed1b29e 100644 --- a/contrib/scripts/spelling.txt +++ b/contrib/scripts/spelling.txt @@ -104,7 +104,7 @@ l_wait_condition||wait_event_idle \bMIN\(||min_t mdo2fid||mdd_object_fid mktemp||mkstemp -nla_strlcpy|nla_strscpy +nla_strlcpy||nla_strscpy OBD_FAILED||CFS_FAILED OBD_FAIL_CHECK||CFS_FAIL_CHECK OBD_FAIL_CHECK_ORSET||CFS_FAIL_CHECK_ORSET @@ -139,6 +139,7 @@ setup_timer||cfs_timer_setup sprintf||snprintf strcat||strncat strcpy||strncpy +strlcpy||strscpy struct timeval||struct timespec64 tempnam||mkstemp time_t||timeout_t diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 3048b6f..ba0c8d1 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -625,7 +625,9 @@ AC_DEFUN([LIBCFS_SRC_STRSCPY_EXISTS], [ LB2_LINUX_TEST_SRC([strscpy_exists], [ #include ],[ - strscpy((char *)NULL, (const char *)NULL, 0); + char buf[129]; + + strscpy(buf, "something", sizeof(buf)); ],[-Werror]) ]) AC_DEFUN([LIBCFS_STRSCPY_EXISTS], [ diff --git a/libcfs/include/libcfs/linux/linux-misc.h b/libcfs/include/libcfs/linux/linux-misc.h index 5936c78..f267ecb 100644 --- a/libcfs/include/libcfs/linux/linux-misc.h +++ b/libcfs/include/libcfs/linux/linux-misc.h @@ -204,7 +204,12 @@ static inline void *cfs_kallsyms_lookup_name(const char *name) #endif #ifndef HAVE_STRSCPY -#define strscpy(s1, s2, sz) strlcpy((s1), (s2), (sz)) +static inline ssize_t strscpy(char *s1, const char *s2, size_t sz) +{ + ssize_t len = strlcpy(s1, s2, sz); + + return (len >= sz) ? -E2BIG : len; +} #endif #ifndef HAVE_BITMAP_TO_ARR32 diff --git a/libcfs/libcfs/hash.c b/libcfs/libcfs/hash.c index 86f321b..34da19d 100644 --- a/libcfs/libcfs/hash.c +++ b/libcfs/libcfs/hash.c @@ -1068,7 +1068,7 @@ cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, if (hs == NULL) RETURN(NULL); - strlcpy(hs->hs_name, name, len); + strscpy(hs->hs_name, name, len); hs->hs_flags = flags; kref_init(&hs->hs_refcount); diff --git a/libcfs/libcfs/tracefile.c b/libcfs/libcfs/tracefile.c index 9c638a6b0..1a0ce6f 100644 --- a/libcfs/libcfs/tracefile.c +++ b/libcfs/libcfs/tracefile.c @@ -660,12 +660,12 @@ void libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, debug_buf += sizeof(header); } - strlcpy(debug_buf, file, PAGE_SIZE - tage->used); + snprintf(debug_buf, PAGE_SIZE - tage->used, "%s", file); tage->used += strlen(file) + 1; debug_buf += strlen(file) + 1; if (fn) { - strlcpy(debug_buf, fn, PAGE_SIZE - tage->used); + snprintf(debug_buf, PAGE_SIZE - tage->used, "%s", fn); tage->used += strlen(fn) + 1; debug_buf += strlen(fn) + 1; } diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 993525d..7a7ca38 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2276,15 +2276,15 @@ kiblnd_init_poolset(struct kib_poolset *ps, int cpt, memset(ps, 0, sizeof(struct kib_poolset)); ps->ps_cpt = cpt; - ps->ps_net = net; - ps->ps_pool_create = po_create; - ps->ps_pool_destroy = po_destroy; - ps->ps_node_init = nd_init; - ps->ps_node_fini = nd_fini; - ps->ps_pool_size = size; - if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name)) - >= sizeof(ps->ps_name)) - return -E2BIG; + ps->ps_net = net; + ps->ps_pool_create = po_create; + ps->ps_pool_destroy = po_destroy; + ps->ps_node_init = nd_init; + ps->ps_node_fini = nd_fini; + ps->ps_pool_size = size; + rc = strscpy(ps->ps_name, name, sizeof(ps->ps_name)); + if (rc < 0) + return rc; spin_lock_init(&ps->ps_lock); INIT_LIST_HEAD(&ps->ps_pool_list); INIT_LIST_HEAD(&ps->ps_failed_pool_list); @@ -3736,7 +3736,7 @@ kiblnd_startup(struct lnet_ni *ni) } ibdev->ibd_ifip = ntohl(ifaces[i].li_ipaddr); - strlcpy(ibdev->ibd_ifname, ifaces[i].li_name, + strscpy(ibdev->ibd_ifname, ifaces[i].li_name, sizeof(ibdev->ibd_ifname)); ibdev->ibd_can_failover = ifaces[i].li_iff_master; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index cbcfeb7..d187969 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -2633,7 +2633,7 @@ ksocknal_startup(struct lnet_ni *ni) ni->ni_nid.nid_size = 0; ni->ni_nid.nid_addr[0] = sa->sin_addr.s_addr; } - strlcpy(ksi->ksni_name, ifaces[if_idx].li_name, sizeof(ksi->ksni_name)); + strscpy(ksi->ksni_name, ifaces[if_idx].li_name, sizeof(ksi->ksni_name)); /* call it before add it to ksocknal_data.ksnd_nets */ rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 9fb59e8..85ba572 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -1573,7 +1573,7 @@ int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns, bool v6) ifaces[nip].li_index = dev->ifindex; ifaces[nip].li_ipaddr = ifa->ifa_local; ifaces[nip].li_netmask = ntohl(ifa->ifa_mask); - strlcpy(ifaces[nip].li_name, ifa->ifa_label, + strscpy(ifaces[nip].li_name, ifa->ifa_label, sizeof(ifaces[nip].li_name)); nip++; } @@ -1615,7 +1615,7 @@ int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns, bool v6) ifaces[nip].li_index = dev->ifindex; memcpy(ifaces[nip].li_ipv6addr, &ifa6->addr, sizeof(struct in6_addr)); - strlcpy(ifaces[nip].li_name, dev->name, + strscpy(ifaces[nip].li_name, dev->name, sizeof(ifaces[nip].li_name)); nip++; /* As different IPv6 addresses don't have unique diff --git a/lnet/selftest/conrpc.c b/lnet/selftest/conrpc.c index df5e09a..ba0d820 100644 --- a/lnet/selftest/conrpc.c +++ b/lnet/selftest/conrpc.c @@ -571,27 +571,27 @@ lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, struct srpc_rmsn_reqst *rsrq; int rc; - switch (transop) { - case LST_TRANS_SESNEW: + switch (transop) { + case LST_TRANS_SESNEW: rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, feats, 0, 0, crpc); - if (rc != 0) - return rc; + if (rc != 0) + return rc; - msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; + msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; msrq->mksn_sid.ses_stamp = console_session.ses_id.ses_stamp; msrq->mksn_sid.ses_nid = lnet_nid_to_nid4(&console_session.ses_id.ses_nid); msrq->mksn_force = console_session.ses_force; - strlcpy(msrq->mksn_name, console_session.ses_name, + strscpy(msrq->mksn_name, console_session.ses_name, sizeof(msrq->mksn_name)); - break; + break; - case LST_TRANS_SESEND: + case LST_TRANS_SESEND: rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, feats, 0, 0, crpc); - if (rc != 0) - return rc; + if (rc != 0) + return rc; rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst; rsrq->rmsn_sid.ses_stamp = console_session.ses_id.ses_stamp; @@ -599,11 +599,11 @@ lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, lnet_nid_to_nid4(&console_session.ses_id.ses_nid); break; - default: - LBUG(); - } + default: + LBUG(); + } - return 0; + return 0; } int diff --git a/lnet/selftest/console.c b/lnet/selftest/console.c index e46624e..cfba9fc 100644 --- a/lnet/selftest/console.c +++ b/lnet/selftest/console.c @@ -1708,22 +1708,22 @@ lstcon_session_new(char *name, int key, unsigned feats, if (strlen(name) > sizeof(console_session.ses_name)-1) return -E2BIG; - strlcpy(console_session.ses_name, name, + strscpy(console_session.ses_name, name, sizeof(console_session.ses_name)); - rc = lstcon_batch_add(LST_DEFAULT_BATCH); - if (rc != 0) - return rc; + rc = lstcon_batch_add(LST_DEFAULT_BATCH); + if (rc != 0) + return rc; - rc = lstcon_rpc_pinger_start(); - if (rc != 0) { + rc = lstcon_rpc_pinger_start(); + if (rc != 0) { struct lstcon_batch *bat = NULL; - lstcon_batch_find(LST_DEFAULT_BATCH, &bat); - lstcon_batch_destroy(bat); + lstcon_batch_find(LST_DEFAULT_BATCH, &bat); + lstcon_batch_destroy(bat); - return rc; - } + return rc; + } console_session.ses_state = LST_SESSION_ACTIVE; @@ -1834,7 +1834,7 @@ lstcon_acceptor_handle(struct srpc_server_rpc *rpc) struct lstcon_ndlink *ndl; int rc = 0; - sfw_unpack_message(req); + sfw_unpack_message(req); mutex_lock(&console_session.ses_mutex); @@ -1842,70 +1842,70 @@ lstcon_acceptor_handle(struct srpc_server_rpc *rpc) jrep->join_sid.ses_nid = lnet_nid_to_nid4(&console_session.ses_id.ses_nid); if (LNET_NID_IS_ANY(&console_session.ses_id.ses_nid)) { - jrep->join_status = ESRCH; - goto out; - } + jrep->join_status = ESRCH; + goto out; + } if (lstcon_session_feats_check(req->msg_ses_feats) != 0) { jrep->join_status = EPROTO; goto out; } - if (jreq->join_sid.ses_nid != LNET_NID_ANY && - !lstcon_session_match(jreq->join_sid)) { - jrep->join_status = EBUSY; - goto out; - } + if (jreq->join_sid.ses_nid != LNET_NID_ANY && + !lstcon_session_match(jreq->join_sid)) { + jrep->join_status = EBUSY; + goto out; + } - if (lstcon_group_find(jreq->join_group, &grp) != 0) { - rc = lstcon_group_alloc(jreq->join_group, &grp); - if (rc != 0) { - CERROR("Out of memory\n"); - goto out; - } + if (lstcon_group_find(jreq->join_group, &grp) != 0) { + rc = lstcon_group_alloc(jreq->join_group, &grp); + if (rc != 0) { + CERROR("Out of memory\n"); + goto out; + } list_add_tail(&grp->grp_link, &console_session.ses_grp_list); lstcon_group_addref(grp); } - if (grp->grp_ref > 2) { - /* Group in using */ - jrep->join_status = EBUSY; - goto out; - } + if (grp->grp_ref > 2) { + /* Group in using */ + jrep->join_status = EBUSY; + goto out; + } - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); - if (rc == 0) { - jrep->join_status = EEXIST; - goto out; - } + rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); + if (rc == 0) { + jrep->join_status = EEXIST; + goto out; + } - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); - if (rc != 0) { - CERROR("Out of memory\n"); - goto out; - } + rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); + if (rc != 0) { + CERROR("Out of memory\n"); + goto out; + } - ndl->ndl_node->nd_state = LST_NODE_ACTIVE; - ndl->ndl_node->nd_timeout = console_session.ses_timeout; + ndl->ndl_node->nd_state = LST_NODE_ACTIVE; + ndl->ndl_node->nd_timeout = console_session.ses_timeout; - if (grp->grp_userland == 0) - grp->grp_userland = 1; + if (grp->grp_userland == 0) + grp->grp_userland = 1; - strlcpy(jrep->join_session, console_session.ses_name, + strscpy(jrep->join_session, console_session.ses_name, sizeof(jrep->join_session)); - jrep->join_timeout = console_session.ses_timeout; - jrep->join_status = 0; + jrep->join_timeout = console_session.ses_timeout; + jrep->join_status = 0; out: rep->msg_ses_feats = console_session.ses_features; - if (grp != NULL) + if (grp != NULL) lstcon_group_decref(grp); mutex_unlock(&console_session.ses_mutex); - return rc; + return rc; } static struct srpc_service lstcon_acceptor_service; diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index 6afad0d..aa45e47 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -265,7 +265,7 @@ sfw_init_session(struct sfw_session *sn, struct lst_sid sid, refcount_set(&sn->sn_refcount, 1); /* +1 for caller */ atomic_set(&sn->sn_brw_errors, 0); atomic_set(&sn->sn_ping_errors, 0); - strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name)); + strscpy(&sn->sn_name[0], name, sizeof(sn->sn_name)); sn->sn_timer_active = 0; sn->sn_id.ses_stamp = sid.ses_stamp; @@ -438,10 +438,10 @@ sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply) if (!request->mksn_force) { reply->mksn_status = EBUSY; - cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0], + cplen = strscpy(&reply->mksn_name[0], &sn->sn_name[0], sizeof(reply->mksn_name)); - if (cplen >= sizeof(reply->mksn_name)) - return -E2BIG; + if (cplen < 0) + return cplen; return 0; } } @@ -520,6 +520,7 @@ sfw_debug_session(struct srpc_debug_reqst *request, struct srpc_debug_reply *reply) { struct sfw_session *sn = sfw_data.fw_session; + int cplen; if (sn == NULL) { reply->dbg_status = ESRCH; @@ -530,9 +531,10 @@ sfw_debug_session(struct srpc_debug_reqst *request, reply->dbg_status = 0; reply->dbg_sid = get_old_sid(sn); reply->dbg_timeout = sn->sn_timeout; - if (strlcpy(reply->dbg_name, &sn->sn_name[0], - sizeof(reply->dbg_name)) >= sizeof(reply->dbg_name)) - return -E2BIG; + cplen = strscpy(reply->dbg_name, &sn->sn_name[0], + sizeof(reply->dbg_name)); + if (cplen < 0) + return cplen; return 0; } diff --git a/lustre/fid/fid_store.c b/lustre/fid/fid_store.c index 547f92c..43b17db 100644 --- a/lustre/fid/fid_store.c +++ b/lustre/fid/fid_store.c @@ -88,7 +88,7 @@ static int seq_update_cb_add(struct thandle *th, struct lu_server_seq *seq) dcb = &ccb->suc_cb; dcb->dcb_func = seq_update_cb; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "seq_update_cb", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "seq_update_cb", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) diff --git a/lustre/fld/fld_cache.c b/lustre/fld/fld_cache.c index e77df96..02fbe1f 100644 --- a/lustre/fld/fld_cache.c +++ b/lustre/fld/fld_cache.c @@ -68,7 +68,7 @@ struct fld_cache *fld_cache_init(const char *name, int cache_size, cache->fci_cache_count = 0; rwlock_init(&cache->fci_lock); - strlcpy(cache->fci_name, name, sizeof(cache->fci_name)); + strscpy(cache->fci_name, name, sizeof(cache->fci_name)); cache->fci_cache_size = cache_size; cache->fci_threshold = cache_threshold; diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index d9ed477..472325d 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -3020,7 +3020,7 @@ int pcc_ioctl_state(struct file *file, struct inode *inode, if (IS_ERR(path)) GOTO(out_unlock, rc = PTR_ERR(path)); - if (strlcpy(state->pccs_path, path, buf_len) >= buf_len) + if (strscpy(state->pccs_path, path, buf_len) < 0) GOTO(out_unlock, rc = -ENAMETOOLONG); out_unlock: diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index ab1d9e3..a8aa5ca 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -3616,11 +3616,11 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version); lsm->lsm_md_migrate_offset = le32_to_cpu(lmm1->lmv_migrate_offset); lsm->lsm_md_migrate_hash = le32_to_cpu(lmm1->lmv_migrate_hash); - cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, + cplen = strscpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, sizeof(lsm->lsm_md_pool_name)); - if (cplen >= sizeof(lsm->lsm_md_pool_name)) - RETURN(-E2BIG); + if (cplen < 0) + RETURN(cplen); CDEBUG(D_INFO, "unpack lsm count %d/%d, master %d hash_type %#x/%#x layout_version %d\n", lsm->lsm_md_stripe_count, diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 1bb2afd9..412aef1 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -365,7 +365,7 @@ static inline int lod_set_pool(char **pool, const char *new_pool) OBD_ALLOC(*pool, len); if (*pool == NULL) return -ENOMEM; - strlcpy(*pool, new_pool, len); + strscpy(*pool, new_pool, len); } return 0; } @@ -639,7 +639,7 @@ static inline void lod_layout_get_pool(struct lod_layout_component *entries, if (entries[i].llc_magic == LOV_MAGIC_FOREIGN) continue; if (entries[i].llc_pool != NULL) { - strlcpy(pool, entries[i].llc_pool, len); + strscpy(pool, entries[i].llc_pool, len); break; } } diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index de080bf..626938e 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -778,11 +778,11 @@ static int lod_gen_component_ea(const struct lu_env *env, objs = &lmm->lmm_objects[0]; } else { struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *)lmm; - size_t cplen = strlcpy(v3->lmm_pool_name, + size_t cplen = strscpy(v3->lmm_pool_name, lod_comp->llc_pool, sizeof(v3->lmm_pool_name)); - if (cplen >= sizeof(v3->lmm_pool_name)) - RETURN(-E2BIG); + if (cplen < 0) + RETURN(cplen); objs = &v3->lmm_objects[0]; } lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 07f6bfc..6bccd97 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -4106,7 +4106,7 @@ static void embed_pool_to_comp_v1(const struct lov_comp_md_v1 *src, } else { lum3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3); entry->lcme_size = cpu_to_le32(sizeof(*lum3)); - strlcpy(lum3->lmm_pool_name, pool, + strscpy(lum3->lmm_pool_name, pool, sizeof(lum3->lmm_pool_name)); shift += sizeof(*lum3) - sizeof(*lum); } @@ -4176,7 +4176,7 @@ static int lod_xattr_set_default_lov_on_dir(const struct lu_env *env, v3->lmm_stripe_offset = cpu_to_le32(v1->lmm_stripe_offset); v3->lmm_stripe_size = cpu_to_le32(v1->lmm_stripe_size); - strlcpy(v3->lmm_pool_name, pool, sizeof(v3->lmm_pool_name)); + strscpy(v3->lmm_pool_name, pool, sizeof(v3->lmm_pool_name)); info->lti_buf.lb_buf = v3; info->lti_buf.lb_len = sizeof(*v3); diff --git a/lustre/lod/lod_pool.c b/lustre/lod/lod_pool.c index 2c85740..96b1bcf 100644 --- a/lustre/lod/lod_pool.c +++ b/lustre/lod/lod_pool.c @@ -466,7 +466,7 @@ int lod_pool_new(struct obd_device *obd, char *poolname) if (new_pool == NULL) RETURN(-ENOMEM); - strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); + strscpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); new_pool->pool_spill_target[0] = '\0'; atomic_set(&new_pool->pool_spill_hit, 0); new_pool->pool_lobd = obd; diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index a1ccd9d..45a1a65 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -241,10 +241,10 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, if (pool_name) { size_t pool_name_len; - pool_name_len = strlcpy(lsme->lsme_pool_name, pool_name, + pool_name_len = strscpy(lsme->lsme_pool_name, pool_name, sizeof(lsme->lsme_pool_name)); - if (pool_name_len >= sizeof(lsme->lsme_pool_name)) - GOTO(out_lsme, rc = -E2BIG); + if (pool_name_len < 0) + GOTO(out_lsme, rc = pool_name_len); } /* with Data-on-MDT set maxbytes to stripe size */ diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 8399f3e..8f0a061 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -132,7 +132,7 @@ static ssize_t lov_lsm_pack_v1v3(const struct lov_stripe_md *lsm, void *buf, if (lsm->lsm_magic == LOV_MAGIC_V3) { BUILD_BUG_ON(sizeof(lsm->lsm_entries[0]->lsme_pool_name) != sizeof(lmmv3->lmm_pool_name)); - strlcpy(lmmv3->lmm_pool_name, + strscpy(lmmv3->lmm_pool_name, lsm->lsm_entries[0]->lsme_pool_name, sizeof(lmmv3->lmm_pool_name)); lmm_objects = lmmv3->lmm_objects; @@ -215,7 +215,7 @@ unsigned int lov_lsme_pack_v1v3(struct lov_stripe_md_entry *lsme, if (lsme->lsme_magic == LOV_MAGIC_V3) { struct lov_mds_md_v3 *lmmv3 = (struct lov_mds_md_v3 *)lmm; - strlcpy(lmmv3->lmm_pool_name, lsme->lsme_pool_name, + strscpy(lmmv3->lmm_pool_name, lsme->lsme_pool_name, sizeof(lmmv3->lmm_pool_name)); lmm_objects = lmmv3->lmm_objects; } else { diff --git a/lustre/lov/lov_pool.c b/lustre/lov/lov_pool.c index 31a4287..c31d8d5 100644 --- a/lustre/lov/lov_pool.c +++ b/lustre/lov/lov_pool.c @@ -390,7 +390,7 @@ int lov_pool_new(struct obd_device *obd, char *poolname) if (new_pool == NULL) RETURN(-ENOMEM); - strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); + strscpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); new_pool->pool_lobd = obd; /* ref count init to 1 because when created a pool is always used * up to deletion diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index a60396c..0a8d2b4 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -113,7 +113,7 @@ static void mdc_pack_name(struct req_capsule *pill, buf[name_len] = '\0'; return; } - cpy_len = strlcpy(buf, name, buf_size); + cpy_len = strscpy(buf, name, buf_size); LASSERT(lu_name_is_valid_2(buf, cpy_len)); if (cpy_len != name_len) @@ -175,7 +175,7 @@ void mdc_file_sepol_pack(struct req_capsule *pill, struct sptlrpc_sepol *p) RCL_CLIENT); LASSERT(buf_size == p->ssp_sepol_size); - strlcpy(buf, p->ssp_sepol, p->ssp_sepol_size); + strscpy(buf, p->ssp_sepol, p->ssp_sepol_size); } void mdc_readdir_pack(struct req_capsule *pill, __u64 pgoff, size_t size, diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 74327f5..d39fbef 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -1816,7 +1816,7 @@ static int mdd_changelog_user_register(const struct lu_env *env, rc = mdd_changelog_name_check(env, ctxt, mdd, name); if (rc) GOTO(out_users, rc); - strlcpy(rec->cur_name, name, sizeof(rec->cur_name)); + strscpy(rec->cur_name, name, sizeof(rec->cur_name)); } mdd_chlg_username(rec, user_name, sizeof(user_name)); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 685f0c3..45d6aa0 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -1045,7 +1045,7 @@ static void mdd_changelog_rec_ext_rename(struct changelog_rec *rec, rnm->cr_spfid = *spfid; changelog_rec_name(rec)[rec->cr_namelen] = '\0'; - strlcpy(changelog_rec_sname(rec), sname->ln_name, extsize); + strscpy(changelog_rec_sname(rec), sname->ln_name, extsize); rec->cr_namelen += extsize; } @@ -1056,7 +1056,7 @@ void mdd_changelog_rec_ext_jobid(struct changelog_rec *rec, const char *jobid) if (jobid == NULL || jobid[0] == '\0') return; - strlcpy(jid->cr_jobid, jobid, sizeof(jid->cr_jobid)); + strscpy(jid->cr_jobid, jobid, sizeof(jid->cr_jobid)); } void mdd_changelog_rec_ext_extra_flags(struct changelog_rec *rec, __u64 eflags) @@ -1095,7 +1095,7 @@ void mdd_changelog_rec_extra_xattr(struct changelog_rec *rec, { struct changelog_ext_xattr *xattr = changelog_rec_xattr(rec); - strlcpy(xattr->cr_xattr, xattr_name, sizeof(xattr->cr_xattr)); + strscpy(xattr->cr_xattr, xattr_name, sizeof(xattr->cr_xattr)); } /** diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index 86e6532..aec27cb 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -754,7 +754,7 @@ static ssize_t append_pool_store(struct kobject *kobj, struct attribute *attr, if (!count || count > LOV_MAXPOOLNAME + 1 || buffer[0] == '\n') return -EINVAL; - strlcpy(mdd->mdd_append_pool, buffer, LOV_MAXPOOLNAME + 1); + strscpy(mdd->mdd_append_pool, buffer, LOV_MAXPOOLNAME + 1); if (mdd->mdd_append_pool[count - 1] == '\n') mdd->mdd_append_pool[count - 1] = '\0'; diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index ee4dd71..b7bf445 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -256,7 +256,7 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env, RETURN(-ENOMEM); hal->hal_version = HAL_VERSION; - strlcpy(hal->hal_fsname, hsd->hsd_fsname, MTI_NAME_MAXLEN + 1); + strscpy(hal->hal_fsname, hsd->hsd_fsname, MTI_NAME_MAXLEN + 1); hal->hal_archive_id = larr->arr_archive_id; hal->hal_flags = larr->arr_flags; hal->hal_count = 0; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index f69d19f..9925214 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -139,7 +139,7 @@ static void ucred_set_jobid(struct mdt_thread_info *info, struct lu_ucred *uc) /* set jobid if specified. */ if (jobid) - strlcpy(uc->uc_jobid, jobid, sizeof(uc->uc_jobid)); + strscpy(uc->uc_jobid, jobid, sizeof(uc->uc_jobid)); else uc->uc_jobid[0] = '\0'; } diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 5a53bfb..100ce53 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1524,9 +1524,9 @@ again: body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY); LASSERT(body != NULL); LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname)); - if (strlcpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name)) - >= sizeof(body->mcb_name)) - GOTO(out, rc = -E2BIG); + rc = strscpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name)); + if (rc < 0) + GOTO(out, rc); body->mcb_offset = cfg->cfg_last_idx + 1; body->mcb_type = cld->cld_type; body->mcb_bits = PAGE_SHIFT; diff --git a/lustre/mgc/mgc_request_server.c b/lustre/mgc/mgc_request_server.c index 6eadd9d..15e2bf5 100644 --- a/lustre/mgc/mgc_request_server.c +++ b/lustre/mgc/mgc_request_server.c @@ -361,9 +361,9 @@ again: body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY); LASSERT(body); LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname)); - if (strlcpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name)) - >= sizeof(body->mcb_name)) - GOTO(out, rc = -E2BIG); + rc = strscpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name)); + if (rc < 0) + GOTO(out, rc); body->mcb_offset = config_read_offset; body->mcb_type = cld->cld_type; body->mcb_bits = PAGE_SHIFT; diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 1cda408..0e15af1 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -743,7 +743,7 @@ static int mgs_extract_fs_pool(char *arg, char *fsname, char *poolname) /* Also make sure poolname is not to long. */ if (strlen(ptr) > LOV_MAXPOOLNAME) return -ENAMETOOLONG; - strlcpy(poolname, ptr, LOV_MAXPOOLNAME + 1); + strscpy(poolname, ptr, LOV_MAXPOOLNAME + 1); /* Test if fsname is empty */ len = strlen(arg) - strlen(ptr) - 1; diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 3fca59e..20887d4 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -995,14 +995,14 @@ static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs, OBD_ALLOC_PTR(mml); if (!mml) GOTO(out_close, rc = -ENOMEM); - if (strlcpy(mml->mml_marker.cm_comment, comment, - sizeof(mml->mml_marker.cm_comment)) >= - sizeof(mml->mml_marker.cm_comment)) - GOTO(out_free, rc = -E2BIG); - if (strlcpy(mml->mml_marker.cm_tgtname, devname, - sizeof(mml->mml_marker.cm_tgtname)) >= - sizeof(mml->mml_marker.cm_tgtname)) - GOTO(out_free, rc = -E2BIG); + rc = strscpy(mml->mml_marker.cm_comment, comment, + sizeof(mml->mml_marker.cm_comment)); + if (rc < 0) + GOTO(out_free, rc); + rc = strscpy(mml->mml_marker.cm_tgtname, devname, + sizeof(mml->mml_marker.cm_tgtname)); + if (rc < 0) + GOTO(out_free, rc); /* Modify mostly means cancel */ mml->mml_marker.cm_flags = flags; mml->mml_marker.cm_canceltime = flags ? ktime_get_real_seconds() : 0; @@ -1479,11 +1479,11 @@ static int mgs_replace_log(const struct lu_env *env, GOTO(out_close, rc = -ENOMEM); /* devname is only needed information to replace UUID records */ if (devname) - strlcpy(mrd->target.mti_svname, devname, + strscpy(mrd->target.mti_svname, devname, sizeof(mrd->target.mti_svname)); /* data is parsed in llog callback */ if (data) - strlcpy(mrd->target.mti_params, data, + strscpy(mrd->target.mti_params, data, sizeof(mrd->target.mti_params)); /* Copy records to this temporary llog */ mrd->temp_llh = orig_llh; @@ -1971,14 +1971,14 @@ static int record_marker(const struct lu_env *env, mgi->mgi_marker.cm_step = fsdb->fsdb_gen; mgi->mgi_marker.cm_flags = flags; mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE; - cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname, + cplen = strscpy(mgi->mgi_marker.cm_tgtname, tgtname, sizeof(mgi->mgi_marker.cm_tgtname)); - if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname)) - return -E2BIG; - cplen = strlcpy(mgi->mgi_marker.cm_comment, comment, + if (cplen < 0) + return cplen; + cplen = strscpy(mgi->mgi_marker.cm_comment, comment, sizeof(mgi->mgi_marker.cm_comment)); - if (cplen >= sizeof(mgi->mgi_marker.cm_comment)) - return -E2BIG; + if (cplen < 0) + return cplen; mgi->mgi_marker.cm_createtime = ktime_get_real_seconds(); mgi->mgi_marker.cm_canceltime = 0; lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); @@ -2211,10 +2211,10 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env, (marker->cm_flags & CM_START) && !(marker->cm_flags & CM_SKIP)) { got_an_osc_or_mdc = 1; - cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname, + cplen = strscpy(tmti->mti_svname, marker->cm_tgtname, sizeof(tmti->mti_svname)); - if (cplen >= sizeof(tmti->mti_svname)) - RETURN(-E2BIG); + if (cplen < 0) + RETURN(cplen); rc = record_start_log(env, mgs, &mdt_llh, mti->mti_svname); if (rc) @@ -3499,7 +3499,7 @@ static int mgs_wlp_lcfg(const struct lu_env *env, int rc, del; /* Erase any old settings of this same parameter */ - strlcpy(comment, ptr, sizeof(comment)); + strscpy(comment, ptr, sizeof(comment)); /* But don't try to match the value. */ tmp = strchr(comment, '='); if (tmp != NULL) @@ -4080,11 +4080,9 @@ static int mgs_write_log_param2(const struct lu_env *env, } param = strstr(ptr, PARAM_FAILNODE); - if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= - sizeof(mti->mti_params)) { - rc = -E2BIG; + rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params)); + if (rc < 0) goto end; - } CDEBUG(D_MGS, "Adding failnode with param %s\n", mti->mti_params); @@ -5403,9 +5401,9 @@ static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs, "%.*s", (int)len, param); param += len + 1; } else { - if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname)) >= - sizeof(mti->mti_svname)) - RETURN(-E2BIG); + rc = strscpy(mti->mti_svname, devname, sizeof(mti->mti_svname)); + if (rc < 0) + RETURN(rc); } if (!strlen(mti->mti_svname)) { @@ -5419,7 +5417,7 @@ static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs, /* For this case we have an invalid obd device name */ case -ENXIO: CDEBUG(D_MGS, "%s don't contain an index\n", mti->mti_svname); - strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); + strscpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); dev_type = 0; break; /* Not an obd device, assume devname is the fsname. @@ -5427,7 +5425,7 @@ static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs, */ case -EINVAL: CDEBUG(D_MGS, "%s is seen as a file system name\n", mti->mti_svname); - strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); + strscpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); dev_type = 0; break; default: @@ -5452,10 +5450,9 @@ static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs, } break; } - - if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= - sizeof(mti->mti_params)) - GOTO(out, rc = -E2BIG); + rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params)); + if (rc < 0) + GOTO(out, rc); CDEBUG(D_MGS, "set_conf_param fs='%s' device='%s' param='%s'\n", mti->mti_fsname, mti->mti_svname, mti->mti_params); @@ -5499,9 +5496,9 @@ static int mgs_set_param2(const struct lu_env *env, struct mgs_device *mgs, size_t len; int rc; - if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= - sizeof(mti->mti_params)) - GOTO(out, rc = -E2BIG); + rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params)); + if (rc < 0) + GOTO(out, rc); len = strcspn(param, ".="); if (len && param[len] != '=') { diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index 8a732b3..f14f9fb 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -342,7 +342,7 @@ dt_store_resolve(const struct lu_env *env, struct dt_device *dt, dfh->dfh_dt = dt; dfh->dfh_fid = fid; - strlcpy(info->dti_buf, path, sizeof(info->dti_buf)); + strscpy(info->dti_buf, path, sizeof(info->dti_buf)); result = dt->dd_ops->dt_root_get(env, dt, fid); if (result == 0) { diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index 5efc951..1767c17 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -523,7 +523,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) rcu_read_lock(); jid = jobid_current(); if (jid) { - strlcpy(jobid, jid, joblen); + strscpy(jobid, jid, joblen); joblen = strlen(jobid); } else { rc = -ENOENT; @@ -602,7 +602,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) spin_lock(&pidmap->jp_lock); if (!rc) { pidmap->jp_joblen = env_len; - strlcpy(pidmap->jp_jobid, env_jobid, + strscpy(pidmap->jp_jobid, env_jobid, sizeof(pidmap->jp_jobid)); rc = 0; } else if (rc == -ENOENT) { @@ -619,7 +619,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) * If a cached missing entry was found, return -ENOENT. */ if (pidmap->jp_joblen) { - strlcpy(jobid, pidmap->jp_jobid, joblen); + strscpy(jobid, pidmap->jp_jobid, joblen); joblen = pidmap->jp_joblen; rc = 0; } else if (!rc) { @@ -959,7 +959,7 @@ void lustre_jobid_clear(const char *find_jobid) if (jobid_hash == NULL) return; - strlcpy(jobid, find_jobid, sizeof(jobid)); + strscpy(jobid, find_jobid, sizeof(jobid)); /* trim \n off the end of the incoming jobid */ end = strchr(jobid, '\n'); if (end && *end == '\n') diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 37d3563..ed6fa85 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -1257,7 +1257,7 @@ struct lustre_cfg *lustre_cfg_rename(struct lustre_cfg *cfg, if (!new_param) GOTO(out_nocfg, new_cfg = ERR_PTR(-ENOMEM)); - strlcpy(new_param, new_name, new_len); + strscpy(new_param, new_name, new_len); if (value) strcat(new_param, value); diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index a841692..c921a43 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -848,8 +848,9 @@ int server_name2svname(const char *label, char *svname, const char **endptr, if (endptr != NULL) *endptr = dash; - if (strlcpy(svname, dash + 1, svsize) >= svsize) - return -E2BIG; + rc = strscpy(svname, dash + 1, svsize); + if (rc < 0) + return rc; return 0; } diff --git a/lustre/obdclass/upcall_cache.c b/lustre/obdclass/upcall_cache.c index 4bc61b7..f71901b 100644 --- a/lustre/obdclass/upcall_cache.c +++ b/lustre/obdclass/upcall_cache.c @@ -562,9 +562,9 @@ struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, RETURN(ERR_PTR(-ENOMEM)); for (i = 0; i < cache->uc_hashsize; i++) INIT_LIST_HEAD(&cache->uc_hashtable[i]); - strlcpy(cache->uc_name, name, sizeof(cache->uc_name)); + strscpy(cache->uc_name, name, sizeof(cache->uc_name)); /* upcall pathname proc tunable */ - strlcpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall)); + strscpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall)); cache->uc_entry_expire = entry_expire; cache->uc_acquire_expire = acquire_expire; cache->uc_acquire_replay = replayable; diff --git a/lustre/ofd/ofd_access_log.c b/lustre/ofd/ofd_access_log.c index 1024c08..1015203 100644 --- a/lustre/ofd/ofd_access_log.c +++ b/lustre/ofd/ofd_access_log.c @@ -468,7 +468,7 @@ struct ofd_access_log *ofd_access_log_create(const char *ofd_name, size_t size) if (!oal) return ERR_PTR(-ENOMEM); - strlcpy(oal->oal_name, ofd_name, sizeof(oal->oal_name)); + strscpy(oal->oal_name, ofd_name, sizeof(oal->oal_name)); oal->oal_log_size = size; oal->oal_entry_size = entry_size; INIT_LIST_HEAD(&oal->oal_circ_buf_list); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index a7583cc..54cfe02 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -1187,7 +1187,7 @@ static int ofd_soft_sync_cb_add(struct thandle *th, struct obd_export *exp) dcb = &ossc->ossc_cb; dcb->dcb_func = ofd_cb_soft_sync; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "ofd_cb_soft_sync", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "ofd_cb_soft_sync", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 02b4248..fe74fa1 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -206,7 +206,7 @@ static int ofd_precreate_cb_add(const struct lu_env *env, struct thandle *th, dcb = &opc->opc_cb; dcb->dcb_func = ofd_cb_precreate; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "ofd_cb_precreate", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "ofd_cb_precreate", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 7ef62b0..314aa79 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -8209,10 +8209,10 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { struct osd_device *osd = osd_dev(d); + int cplen = strscpy(osd->od_svname, name, sizeof(osd->od_svname)); - if (strlcpy(osd->od_svname, name, sizeof(osd->od_svname)) >= - sizeof(osd->od_svname)) - return -E2BIG; + if (cplen < 0) + return cplen; return osd_procfs_init(osd, name); } @@ -8591,12 +8591,10 @@ static int osd_device_init0(const struct lu_env *env, /* default fallocate to unwritten extents: LU-14326/LU-14333 */ o->od_fallocate_zero_blocks = 0; - cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), + cplen = strscpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname)); - if (cplen >= sizeof(o->od_svname)) { - rc = -E2BIG; - GOTO(out, rc); - } + if (cplen < 0) + GOTO(out, rc = cplen); o->od_index_backup_stop = 0; o->od_index = -1; /* -1 means index is invalid */ diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 9e6cbf9..e6b71fb 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -80,7 +80,7 @@ static void osd_symlink_brw_stats(struct osd_device *osd) if (len_root > (p - path) || len_root + len_path + 16 > PATH_MAX) goto out; - strlcpy(path, root, len_root); + strscpy(path, root, len_root); if (p > path + len_root) { s = path + len_root; while ((*s++ = *p++) != '\0'); diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 5d265d5..531b89d 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -1106,13 +1106,13 @@ static int osd_mount(const struct lu_env *env, if (mntdev == NULL || svname == NULL) RETURN(-EINVAL); - rc = strlcpy(o->od_mntdev, mntdev, sizeof(o->od_mntdev)); - if (rc >= sizeof(o->od_mntdev)) - RETURN(-E2BIG); + rc = strscpy(o->od_mntdev, mntdev, sizeof(o->od_mntdev)); + if (rc < 0) + RETURN(rc); - rc = strlcpy(o->od_svname, svname, sizeof(o->od_svname)); - if (rc >= sizeof(o->od_svname)) - RETURN(-E2BIG); + rc = strscpy(o->od_svname, svname, sizeof(o->od_svname)); + if (rc < 0) + RETURN(rc); opts = lustre_cfg_string(cfg, 3); diff --git a/lustre/osd-zfs/osd_lproc.c b/lustre/osd-zfs/osd_lproc.c index 05c3a27..a81c6d4 100644 --- a/lustre/osd-zfs/osd_lproc.c +++ b/lustre/osd-zfs/osd_lproc.c @@ -67,7 +67,7 @@ static void osd_symlink_brw_stats(struct osd_device *osd) if (len_root > (p - path) || len_root + len_path + 16 > PATH_MAX) goto out; - strlcpy(path, root, len_root); + strscpy(path, root, len_root); if (p > path + len_root) { s = path + len_root; while ((*s++ = *p++) != '\0'); diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index b51ae1e..9389c2f 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -130,9 +130,9 @@ osd_oi_lookup(const struct lu_env *env, struct osd_device *o, if (rc) return rc; - rc = strlcpy(oi->oi_name, name, sizeof(oi->oi_name)); - if (rc >= sizeof(oi->oi_name)) - return -E2BIG; + rc = strscpy(oi->oi_name, name, sizeof(oi->oi_name)); + if (rc < 0) + return rc; oi->oi_zapid = zde->zde_dnode; diff --git a/lustre/osd-zfs/osd_quota.c b/lustre/osd-zfs/osd_quota.c index 7079aeb..33b2d41 100644 --- a/lustre/osd-zfs/osd_quota.c +++ b/lustre/osd-zfs/osd_quota.c @@ -377,7 +377,7 @@ static int osd_it_acct_rec(const struct lu_env *env, /* inode accounting is maintained by DMU since 0.7.0 */ strncpy(info->oti_buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN); - strlcpy(info->oti_buf + DMU_OBJACCT_PREFIX_LEN, za->za_name, + strscpy(info->oti_buf + DMU_OBJACCT_PREFIX_LEN, za->za_name, sizeof(info->oti_buf) - DMU_OBJACCT_PREFIX_LEN); rc = osd_zap_lookup(osd, it->oiq_obj->oo_dn->dn_object, it->oiq_obj->oo_dn, info->oti_buf, sizeof(uint64_t), diff --git a/lustre/ptlrpc/nodemap_handler.c b/lustre/ptlrpc/nodemap_handler.c index 18a77b9..a6da068 100644 --- a/lustre/ptlrpc/nodemap_handler.c +++ b/lustre/ptlrpc/nodemap_handler.c @@ -972,9 +972,8 @@ static int nodemap_set_fileset_helper(struct nodemap_config *config, nodemap->nm_fileset[0] = '\0'; else if (fileset[0] != '/') rc = -EINVAL; - else if (strlcpy(nodemap->nm_fileset, fileset, - sizeof(nodemap->nm_fileset)) >= - sizeof(nodemap->nm_fileset)) + else if (strscpy(nodemap->nm_fileset, fileset, + sizeof(nodemap->nm_fileset)) < 0) rc = -ENAMETOOLONG; return rc; @@ -1096,7 +1095,7 @@ int nodemap_set_sepol(const char *name, const char *sepol) /* truncation cannot happen, as string length was checked in * nodemap_validate_sepol() */ - strlcpy(nodemap->nm_sepol, sepol, sizeof(nodemap->nm_sepol)); + strscpy(nodemap->nm_sepol, sepol, sizeof(nodemap->nm_sepol)); out_putref: mutex_unlock(&active_config_lock); diff --git a/lustre/ptlrpc/nodemap_storage.c b/lustre/ptlrpc/nodemap_storage.c index 18ec01f..e40f145b 100644 --- a/lustre/ptlrpc/nodemap_storage.c +++ b/lustre/ptlrpc/nodemap_storage.c @@ -805,7 +805,7 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config, mutex_lock(&active_config_lock); old_nm = nodemap_lookup(rec->ncr.ncr_name); if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0') - strlcpy(nodemap->nm_fileset, old_nm->nm_fileset, + strscpy(nodemap->nm_fileset, old_nm->nm_fileset, sizeof(nodemap->nm_fileset)); mutex_unlock(&active_config_lock); if (!IS_ERR(old_nm)) diff --git a/lustre/ptlrpc/nrs.c b/lustre/ptlrpc/nrs.c index 3bf42ab..3f5b59d 100644 --- a/lustre/ptlrpc/nrs.c +++ b/lustre/ptlrpc/nrs.c @@ -328,7 +328,7 @@ static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy, char *arg) } if (arg) - strlcpy(policy->pol_arg, arg, sizeof(policy->pol_arg)); + strscpy(policy->pol_arg, arg, sizeof(policy->pol_arg)); /* take the started reference */ refcount_set(&policy->pol_start_ref, 1); @@ -1253,7 +1253,7 @@ static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf) if (desc == NULL) GOTO(fail, rc = -ENOMEM); - if (strlcpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name)) >= + if (strscpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name)) >= sizeof(desc->pd_name)) { OBD_FREE_PTR(desc); GOTO(fail, rc = -E2BIG); diff --git a/lustre/ptlrpc/nrs_tbf.c b/lustre/ptlrpc/nrs_tbf.c index 7623174..116bcc16 100644 --- a/lustre/ptlrpc/nrs_tbf.c +++ b/lustre/ptlrpc/nrs_tbf.c @@ -297,7 +297,7 @@ nrs_tbf_rule_start(struct ptlrpc_nrs_policy *policy, if (rule == NULL) return -ENOMEM; - strlcpy(rule->tr_name, start->tc_name, sizeof(rule->tr_name)); + strscpy(rule->tr_name, start->tc_name, sizeof(rule->tr_name)); rule->tr_rpc_rate = start->u.tc_start.ts_rpc_rate; rule->tr_flags = start->u.tc_start.ts_rule_flags; rule->tr_nsecs_per_rpc = NSEC_PER_SEC / rule->tr_rpc_rate; @@ -1647,8 +1647,8 @@ static inline void nrs_tbf_cli_gen_key(struct nrs_tbf_client *cli, if (cli) { INIT_LIST_HEAD(&cli->tc_lru); - strlcpy(cli->tc_key, keystr, sizeof(cli->tc_key)); - strlcpy(cli->tc_jobid, jobid, sizeof(cli->tc_jobid)); + strscpy(cli->tc_key, keystr, sizeof(cli->tc_key)); + strscpy(cli->tc_jobid, jobid, sizeof(cli->tc_jobid)); cli->tc_nid = req->rq_peer.nid; cli->tc_opcode = opc; cli->tc_id = id; @@ -2621,7 +2621,7 @@ nrs_tbf_id_rule_init(struct ptlrpc_nrs_policy *policy, if (rule->tr_ids_str == NULL) return -ENOMEM; - strlcpy(rule->tr_ids_str, start->u.tc_start.ts_ids_str, + strscpy(rule->tr_ids_str, start->u.tc_start.ts_ids_str, ids_len); if (!list_empty(&start->u.tc_start.ts_ids)) { diff --git a/lustre/ptlrpc/sec_config.c b/lustre/ptlrpc/sec_config.c index be7965b..a818717 100644 --- a/lustre/ptlrpc/sec_config.c +++ b/lustre/ptlrpc/sec_config.c @@ -99,7 +99,7 @@ int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr) return 0; } - strlcpy(buf, str, sizeof(buf)); + strscpy(buf, str, sizeof(buf)); bulk = strchr(buf, '-'); if (bulk) @@ -533,7 +533,7 @@ struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf, OBD_ALLOC_PTR(conf_tgt); if (conf_tgt) { - strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name)); + strscpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name)); sptlrpc_rule_set_init(&conf_tgt->sct_rset); list_add(&conf_tgt->sct_list, &conf->sc_tgts); } @@ -558,9 +558,7 @@ struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname, OBD_ALLOC_PTR(conf); if (conf == NULL) return NULL; - - if (strlcpy(conf->sc_fsname, fsname, sizeof(conf->sc_fsname)) >= - sizeof(conf->sc_fsname)) { + if (strscpy(conf->sc_fsname, fsname, sizeof(conf->sc_fsname)) < 0) { OBD_FREE_PTR(conf); return NULL; } @@ -674,7 +672,7 @@ int sptlrpc_process_config(struct lustre_cfg *lcfg) * is a actual filesystem. */ if (server_name2fsname(target, fsname, NULL)) - strlcpy(fsname, target, sizeof(fsname)); + strscpy(fsname, target, sizeof(fsname)); rc = sptlrpc_parse_rule(param, &rule); if (rc) diff --git a/lustre/ptlrpc/sec_lproc.c b/lustre/ptlrpc/sec_lproc.c index 8e8e875..8531103 100644 --- a/lustre/ptlrpc/sec_lproc.c +++ b/lustre/ptlrpc/sec_lproc.c @@ -178,7 +178,7 @@ static int sptlrpc_sepol_update(struct obd_import *imp, kref_init(&new->ssp_ref); new->ssp_sepol_size = pol_len + 1; new->ssp_mtime = mtime; - strlcpy(new->ssp_sepol, pol, new->ssp_sepol_size); + strscpy(new->ssp_sepol, pol, new->ssp_sepol_size); spin_lock(&imp_sec->ps_lock); old = rcu_dereference_protected(imp_sec->ps_sepol, 1); diff --git a/lustre/quota/qmt_dev.c b/lustre/quota/qmt_dev.c index ae93750..3126355 100644 --- a/lustre/quota/qmt_dev.c +++ b/lustre/quota/qmt_dev.c @@ -221,9 +221,9 @@ static int qmt_device_init0(const struct lu_env *env, struct qmt_device *qmt, RETURN(-EINVAL); /* record who i am, it might be useful ... */ - rc = strlcpy(qmt->qmt_svname, svname, sizeof(qmt->qmt_svname)); - if (rc >= sizeof(qmt->qmt_svname)) - RETURN(-E2BIG); + rc = strscpy(qmt->qmt_svname, svname, sizeof(qmt->qmt_svname)); + if (rc < 0) + RETURN(rc); /* look-up the obd_device associated with the qmt */ obd = class_name2obd(qmt->qmt_svname); diff --git a/lustre/quota/qsd_lib.c b/lustre/quota/qsd_lib.c index 2d27fab..130a311 100644 --- a/lustre/quota/qsd_lib.c +++ b/lustre/quota/qsd_lib.c @@ -713,9 +713,9 @@ struct qsd_instance *qsd_init(const struct lu_env *env, char *svname, qsd->qsd_exclusive = excl; /* copy service name */ - if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname)) - >= sizeof(qsd->qsd_svname)) - GOTO(out, rc = -E2BIG); + rc = strscpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname)); + if (rc < 0) + GOTO(out, rc); /* grab reference on osd device */ lu_device_get(&dev->dd_lu_dev); diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 1f21085..a835071 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -1563,7 +1563,7 @@ int tgt_grant_commit_cb_add(struct thandle *th, struct obd_export *exp, dcb = &tgc->tgc_cb; dcb->dcb_func = tgt_grant_commit_cb; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "tgt_grant_commit_cb", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "tgt_grant_commit_cb", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 3a748bc..8801607 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -575,7 +575,7 @@ static int tgt_new_client_cb_add(struct thandle *th, struct obd_export *exp) dcb = &ccb->lncc_cb; dcb->dcb_func = tgt_cb_new_client; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "tgt_cb_new_client", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "tgt_cb_new_client", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { @@ -995,7 +995,7 @@ static int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *tgt, dcb = &ccb->llcc_cb; dcb->dcb_func = tgt_cb_last_committed; INIT_LIST_HEAD(&dcb->dcb_linkage); - strlcpy(dcb->dcb_name, "tgt_cb_last_committed", sizeof(dcb->dcb_name)); + strscpy(dcb->dcb_name, "tgt_cb_last_committed", sizeof(dcb->dcb_name)); rc = dt_trans_cb_add(th, dcb); if (rc) { diff --git a/lustre/target/tgt_mount.c b/lustre/target/tgt_mount.c index 99bb489..6ca4016 100644 --- a/lustre/target/tgt_mount.c +++ b/lustre/target/tgt_mount.c @@ -1280,9 +1280,9 @@ static struct mgs_target_info *server_lsi2mti(struct lustre_sb_info *lsi) if (!mti) GOTO(free_list, mti = ERR_PTR(-ENOMEM)); - if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname)) - >= sizeof(mti->mti_svname)) - GOTO(free_mti, rc = -E2BIG); + rc = strscpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname)); + if (rc < 0) + GOTO(free_mti, rc); mti->mti_nid_count = nid_count; for (i = 0; i < mti->mti_nid_count; i++) { @@ -1314,7 +1314,7 @@ static struct mgs_target_info *server_lsi2mti(struct lustre_sb_info *lsi) /* use NID strings instead */ if (large_nid) mti->mti_flags |= LDD_F_LARGE_NID; - cplen = strlcpy(mti->mti_params, lsi->lsi_lmd->lmd_params, + cplen = strscpy(mti->mti_params, lsi->lsi_lmd->lmd_params, sizeof(mti->mti_params)); if (cplen >= sizeof(mti->mti_params)) rc = -E2BIG; @@ -1635,13 +1635,13 @@ static int lsi_prepare(struct lustre_sb_info *lsi) strlen(fstype) >= sizeof(lsi->lsi_fstype)) RETURN(-ENAMETOOLONG); - strlcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile, + strscpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile, sizeof(lsi->lsi_svname)); - strlcpy(lsi->lsi_osd_type, osd_type, sizeof(lsi->lsi_osd_type)); + strscpy(lsi->lsi_osd_type, osd_type, sizeof(lsi->lsi_osd_type)); /* XXX: a temp. solution for components using ldiskfs * to be removed in one of the subsequent patches */ - strlcpy(lsi->lsi_fstype, fstype, sizeof(lsi->lsi_fstype)); + strscpy(lsi->lsi_fstype, fstype, sizeof(lsi->lsi_fstype)); /* Determine server type */ rc = server_name2index(lsi->lsi_svname, &index, NULL); -- 1.8.3.1 From 6d27c2c8c72e853a238fd3fc7f42d658188ca02f Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 24 Mar 2024 15:33:15 +0700 Subject: [PATCH 05/16] LU-17592 build: compatibility updates for kernel 6.8 Linux commit v4.9-12227-g7b737965b331 introduced staging/lustre/libcfs: Convert to hotplug state machine Linux commit v4.10-rc1-5-g4205e4786d0b cpu/hotplug: Provide dynamic range for prepare stage Linux commit v6.7-rc2-1-g15bece7bec0d cpu/hotplug: Remove unused CPU hotplug states CPUHP_LUSTRE_CFS_DEAD was introduced in 4.9 and removed in 6.8 CPUHP_BP_PREPARE_DYN was introduced in 4.10 With no distro kernels between 4.10 and 4.11 switch to CPUHP_BP_PREPARE_DYN Linux commit v6.7-rc1-3-gda549bdd15c2 dentry: switch the lists of children to hlist Provide trival wrappers to abstract the changed members Linux commit v6.7-rc4-79-gaf7628d6ec19 fs: convert error_remove_page to error_remove_folio Proved a generic_error_remove_folio() for older kernels. HPE-bug-id: LUS-12181 Fixes: ce98bfe5f72 ("LU-10499 pcc: add readonly mode for PCC") Signed-off-by: Shaun Tancheff Change-Id: Ib2e85c2acd3d0934e1c4712dad53b80f0ddb1b08 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54229 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- libcfs/autoconf/lustre-libcfs.m4 | 19 +++++++++++--- lnet/lnet/lib-cpt.c | 6 ++--- lustre/autoconf/lustre-core.m4 | 56 ++++++++++++++++++++++++++++++++++++++++ lustre/include/lustre_compat.h | 36 +++++++++++++++++++++++++- lustre/llite/dcache.c | 2 +- lustre/llite/namei.c | 2 +- lustre/llite/vvp_io.c | 2 +- lustre/llite/vvp_page.c | 3 ++- lustre/obdclass/cl_page.c | 2 +- lustre/osd-ldiskfs/osd_io.c | 8 +++--- 10 files changed, 120 insertions(+), 16 deletions(-) diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index ba0c8d1..b8c47cd 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -1009,14 +1009,27 @@ AC_DEFUN([LIBCFS_GET_USER_PAGES_GUP_FLAGS], [ ]) # LIBCFS_GET_USER_PAGES_GUP_FLAGS # -# Kernel version 4.10 commit 7b737965b33188bd3dbb44e938535c4006d97fbb -# libcfs: Convert to hotplug state machine +# LIBCFS_HOTPLUG_STATE_MACHINE +# +# Linux commit v4.9-12227-g7b737965b331 introduced +# staging/lustre/libcfs: Convert to hotplug state machine +# Which introduced: CPUHP_LUSTRE_CFS_DEAD +# +# Linux commit v4.10-rc1-5-g4205e4786d0b +# cpu/hotplug: Provide dynamic range for prepare stage +# Which introduced: CPUHP_BP_PREPARE_DYN +# +# Linux commit v6.7-rc2-1-g15bece7bec0d +# cpu/hotplug: Remove unused CPU hotplug states +# Which removed: CPUHP_LUSTRE_CFS_DEAD +# +# With no distro kernels between 4.10 and 4.11 switch to CPUHP_BP_PREPARE_DYN # AC_DEFUN([LIBCFS_SRC_HOTPLUG_STATE_MACHINE], [ LB2_LINUX_TEST_SRC([cpu_hotplug_state_machine], [ #include ],[ - cpuhp_remove_state(CPUHP_LUSTRE_CFS_DEAD); + cpuhp_remove_state(CPUHP_BP_PREPARE_DYN); ]) ]) AC_DEFUN([LIBCFS_HOTPLUG_STATE_MACHINE], [ diff --git a/lnet/lnet/lib-cpt.c b/lnet/lnet/lib-cpt.c index 5e4d2ab..c86f566 100644 --- a/lnet/lnet/lib-cpt.c +++ b/lnet/lnet/lib-cpt.c @@ -1253,7 +1253,7 @@ void cfs_cpu_fini(void) #ifdef HAVE_HOTPLUG_STATE_MACHINE if (lustre_cpu_online > 0) cpuhp_remove_state_nocalls(lustre_cpu_online); - cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); + cpuhp_remove_state_nocalls(CPUHP_BP_PREPARE_DYN); #else unregister_hotcpu_notifier(&cfs_cpu_notifier); #endif /* !HAVE_HOTPLUG_STATE_MACHINE */ @@ -1268,7 +1268,7 @@ int cfs_cpu_init(void) #ifdef CONFIG_HOTPLUG_CPU #ifdef HAVE_HOTPLUG_STATE_MACHINE - ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD, + ret = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, "fs/lustre/cfe:dead", NULL, cfs_cpu_dead); if (ret < 0) @@ -1323,7 +1323,7 @@ failed_alloc_table: if (lustre_cpu_online > 0) cpuhp_remove_state_nocalls(lustre_cpu_online); failed_cpu_online: - cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); + cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN); failed_cpu_dead: #else unregister_hotcpu_notifier(&cfs_cpu_notifier); diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 5afeb19..f139924 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -4386,6 +4386,54 @@ AC_DEFUN([LC_HAVE_SHRINKER_ALLOC], [ ]) # LC_HAVE_SHRINKER_ALLOC # +# LC_HAVE_DENTRY_D_CHILDREN +# +# Linux commit v6.7-rc1-3-gda549bdd15c2 +# dentry: switch the lists of children to hlist +# +AC_DEFUN([LC_SRC_HAVE_DENTRY_D_CHILDREN], [ + LB2_LINUX_TEST_SRC([dentry_d_children], [ + #include + ],[ + struct dentry *dentry = NULL; + + return hlist_empty(&dentry->d_children); + ],[-Werror]) +]) +AC_DEFUN([LC_HAVE_DENTRY_D_CHILDREN], [ + LB2_MSG_LINUX_TEST_RESULT([if sruct dentry has d_children member], + [dentry_d_children], [ + AC_DEFINE(HAVE_DENTRY_D_CHILDREN, 1, + [sruct dentry has d_children member]) + ]) +]) # LC_HAVE_DENTRY_D_CHILDREN + +# +# LC_HAVE_GENERIC_ERROR_REMOVE_FOLIO +# +# Linux commit v6.7-rc4-79-gaf7628d6ec19 +# fs: convert error_remove_page to error_remove_folio +# +AC_DEFUN([LC_SRC_HAVE_GENERIC_ERROR_REMOVE_FOLIO], [ + LB2_LINUX_TEST_SRC([generic_error_remove_folio], [ + #include + ],[ + struct address_space *mapping = NULL; + struct folio *folio = NULL; + int err = generic_error_remove_folio(mapping, folio); + + (void) err; + ],[-Werror]) +]) +AC_DEFUN([LC_HAVE_GENERIC_ERROR_REMOVE_FOLIO], [ + LB2_MSG_LINUX_TEST_RESULT([if generic_error_remove_folio() exists], + [generic_error_remove_folio], [ + AC_DEFINE(HAVE_GENERIC_ERROR_REMOVE_FOLIO, 1, + [generic_error_remove_folio() exists]) + ]) +]) # LC_HAVE_GENERIC_ERROR_REMOVE_FOLIO + +# # LC_PROG_LINUX # # Lustre linux kernel checks @@ -4669,6 +4717,10 @@ AC_DEFUN([LC_PROG_LINUX_SRC], [ LC_SRC_HAVE_INODE_GET_MTIME_SEC LC_SRC_HAVE_SHRINKER_ALLOC + # 6.8 + LC_SRC_HAVE_DENTRY_D_CHILDREN + LC_SRC_HAVE_GENERIC_ERROR_REMOVE_FOLIO + # kernel patch to extend integrity interface LC_SRC_BIO_INTEGRITY_PREP_FN ]) @@ -4973,6 +5025,10 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [ LC_HAVE_INODE_GET_MTIME_SEC LC_HAVE_SHRINKER_ALLOC + # 6.8 + LC_HAVE_DENTRY_D_CHILDREN + LC_HAVE_GENERIC_ERROR_REMOVE_FOLIO + # kernel patch to extend integrity interface LC_BIO_INTEGRITY_PREP_FN ]) diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index 8cf7e5b..4969b39 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -124,6 +124,16 @@ static inline int d_in_lookup(struct dentry *dentry) } #endif +#ifdef HAVE_DENTRY_D_CHILDREN +#define d_no_children(dentry) (hlist_empty(&(dentry)->d_children)) +#define d_for_each_child(child, dentry) \ + hlist_for_each_entry((child), &(dentry)->d_children, d_sib) +#else +#define d_no_children(dentry) (list_empty(&(dentry)->d_subdirs)) +#define d_for_each_child(child, dentry) \ + list_for_each_entry((child), &(dentry)->d_subdirs, d_child) +#endif + #ifndef HAVE_VM_FAULT_T #define vm_fault_t int #endif @@ -777,6 +787,30 @@ static inline void ll_security_release_secctx(char *secdata, u32 seclen, #define ll_set_acl(ns, inode, acl, type) ll_set_acl(inode, acl, type) #endif +#ifndef HAVE_GENERIC_ERROR_REMOVE_FOLIO +#ifdef HAVE_FOLIO_BATCH +#define generic_folio folio +#else +#define generic_folio page +#define folio_page(page, n) (page) +#define folio_nr_pages(page) (1) +#define page_folio(page) (page) +#endif +static inline int generic_error_remove_folio(struct address_space *mapping, + struct generic_folio *folio) +{ + int pg, npgs = folio_nr_pages(folio); + int err = 0; + + for (pg = 0; pg < npgs; pg++) { + err = generic_error_remove_page(mapping, folio_page(folio, pg)); + if (err) + break; + } + return err; +} +#endif + /** * delete_from_page_cache is not exported anymore */ @@ -792,7 +826,7 @@ static inline void cfs_delete_from_page_cache(struct page *page) unlock_page(page); /* on entry page is locked */ if (S_ISREG(page->mapping->host->i_mode)) { - generic_error_remove_page(page->mapping, page); + generic_error_remove_folio(page->mapping, page_folio(page)); } else { loff_t lstart = page->index << PAGE_SHIFT; loff_t lend = lstart + PAGE_SIZE - 1; diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 60299c9..3d285f7 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -131,7 +131,7 @@ static int ll_ddelete(const struct dentry *de) d_lustre_invalid(de) ? "deleting" : "keeping", de, de, de->d_parent, de->d_inode, d_unhashed((struct dentry *)de) ? "" : "hashed,", - list_empty(&de->d_subdirs) ? "" : "subdirs"); + d_no_children(de) ? "" : "subdirs"); if (d_lustre_invalid(de)) RETURN(1); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 79e9e29..f5c9177 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -178,7 +178,7 @@ restart: spin_lock(&dir->i_lock); hlist_for_each_entry(dentry, &dir->i_dentry, d_alias) { spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) { + d_for_each_child(child, dentry) { if (child->d_inode) continue; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index a04ad8c..393167a 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1565,7 +1565,7 @@ static int vvp_io_fault_start(const struct lu_env *env, LASSERT(PageLocked(vmpage)); if (CFS_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) - generic_error_remove_page(vmpage->mapping, vmpage); + generic_error_remove_folio(vmpage->mapping, page_folio(vmpage)); size = i_size_read(inode); /* Though we have already held a cl_lock upon this page, but diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index a91a176..f92c9de 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -159,7 +159,8 @@ static void vvp_page_completion_read(const struct lu_env *env, * because subpage would be from wrong osc when trying * to read from a new mirror */ - generic_error_remove_page(vmpage->mapping, vmpage); + generic_error_remove_folio(vmpage->mapping, + page_folio(vmpage)); } } diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 6a05aad..7a501f5 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -784,7 +784,7 @@ void cl_page_discard(const struct lu_env *env, vmpage = cp->cp_vmpage; LASSERT(vmpage != NULL); LASSERT(PageLocked(vmpage)); - generic_error_remove_page(vmpage->mapping, vmpage); + generic_error_remove_folio(vmpage->mapping, page_folio(vmpage)); } else { cl_page_delete(env, cp); } diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index b3375b0..7d7a609 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1476,8 +1476,8 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, CDEBUG(D_INODE, "Skipping [%d] == %d\n", i, lnb[i].lnb_rc); LASSERT(lnb[i].lnb_page); - generic_error_remove_page(inode->i_mapping, - lnb[i].lnb_page); + generic_error_remove_folio(inode->i_mapping, + page_folio(lnb[i].lnb_page)); continue; } @@ -1528,8 +1528,8 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, continue; if (!PagePrivate2(lnb[i].lnb_page)) { LASSERT(PageLocked(lnb[i].lnb_page)); - generic_error_remove_page(inode->i_mapping, - lnb[i].lnb_page); + generic_error_remove_folio(inode->i_mapping, + page_folio(lnb[i].lnb_page)); } } } -- 1.8.3.1 From 5dc91df283fb5a7030b384f224085d73268dcca5 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Wed, 6 Mar 2024 15:33:25 +0000 Subject: [PATCH 06/16] LU-17624 ssk: support FIPS mode on client In FIPS mode, only certain crypto methods are allowed. This has an impact on the DHKE mechanism implemented for SSK, as this relies on a prime number generated for the client key. More specifically, FIPS mode imposes that only certain safe, well-known primes be used. OpenSSL prior to v1.1 just imposes a requirement on the prime length. OpenSSL v1.1 requires the use of a specific primitive when FIPS mode is on, to fetch a well-known prime based on a prime NID. OpenSSL v3 is capable of detecting FIPS mode is enforced, and picks up a well-known prime instead of generating one. Because of this, primes used for the DHKE are identical on all clients in FIPS mode. So urge admins to use a short expiration time on SSK keys, one day instead of one week, so that security contexts are re-negotiated more often. The NIST recommended primes are from see Table 26 in Appendix D of: https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-56Ar3.pdf Test-Parameters: trivial Test-Parameters: testgroup=review-dne-selinux-ssk-part-1 Test-Parameters: testgroup=review-dne-selinux-ssk-part-2 Test-Parameters: testgroup=review-dne-selinux-ssk-part-1 clientdistro=el9.2 Test-Parameters: testgroup=review-dne-selinux-ssk-part-2 clientdistro=el9.2 Signed-off-by: Sebastien Buisson Change-Id: I52b1926393e51fba6a9e92a837f86a38516ef6ad Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54314 Reviewed-by: Oleg Drokin Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo --- lustre/autoconf/lustre-core.m4 | 31 ++++++++++ lustre/include/uapi/linux/lustre/lustre_user.h | 6 ++ lustre/utils/gss/lgss_sk.c | 80 +++++++++++++++++++++++++- lustre/utils/gss/sk_utils.h | 73 +++++++++++++++++++++++ 4 files changed, 189 insertions(+), 1 deletion(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index f139924..6b6c0fb 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -405,6 +405,36 @@ AC_MSG_RESULT([$has_hmac_functions]) CFLAGS="$saved_flags" ]) # LC_OPENSSL_HMAC +# LC_OPENSSL_FIPS +# +# OpenSSL 1.0+ can be built with or without FIPS support +AC_DEFUN([LC_OPENSSL_FIPS], [ +has_fips_support="no" +saved_flags="$CFLAGS" +CFLAGS="-Werror" +AC_MSG_CHECKING([whether OpenSSL has FIPS_mode]) +AS_IF([test "x$enable_ssk" != xno], [ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([ + #include + #include + #include + #include + #include + + int main(void) { + int rc; + rc = FIPS_mode(); + return rc; + } +])],[ + AC_DEFINE(HAVE_OPENSSL_FIPS, 1, [OpenSSL FIPS_mode]) + has_fips_support="yes" +]) +]) +AC_MSG_RESULT([$has_fips_support]) +CFLAGS="$saved_flags" +]) # LC_OPENSSL_FIPS + # LC_OPENSSL_EVP_PKEY # # OpenSSL 3.0 introduces EVP_PKEY_get_params @@ -440,6 +470,7 @@ AC_MSG_RESULT([$has_evp_pkey]) AC_DEFUN([LC_OPENSSL_SSK], [ AS_IF([test "x$enable_ssk" != xno], [ LC_OPENSSL_HMAC + LC_OPENSSL_FIPS LC_OPENSSL_EVP_PKEY ]) AS_IF([test "x$has_hmac_functions" = xyes -o "x$has_evp_pkey" = xyes], [ diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 376c622..682a5bf 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -2877,6 +2877,12 @@ struct sk_hmac_type { int sht_type; }; +struct sk_prime_type { + const char *spt_name; + int spt_type; + int spt_primebits; +}; + enum lock_mode_user { MODE_READ_USER = 1, MODE_WRITE_USER, diff --git a/lustre/utils/gss/lgss_sk.c b/lustre/utils/gss/lgss_sk.c index afc1181..c43722c 100644 --- a/lustre/utils/gss/lgss_sk.c +++ b/lustre/utils/gss/lgss_sk.c @@ -51,10 +51,14 @@ /* One week default expiration */ #define SK_DEFAULT_EXPIRE 604800 +/* But only one day in FIPS mode */ +#define SK_DEFAULT_EXPIRE_FIPS 86400 #define SK_DEFAULT_SK_KEYLEN 256 #define SK_DEFAULT_PRIME_BITS 2048 #define SK_DEFAULT_NODEMAP "default" +static int fips_mode; + static void usage(FILE *fp, char *program) { int i; @@ -275,13 +279,76 @@ static int parse_mgsnids(char *mgsnids, struct sk_keyfile_config *config) return rc; } +#if !defined(HAVE_OPENSSL_EVP_PKEY) && OPENSSL_VERSION_NUMBER >= 0x10100000L +static inline int __fetch_ssk_prime(struct sk_keyfile_config *config) +{ + const BIGNUM *p; + DH *dh = NULL; + int primenid; + int rc = -1; + + primenid = sk_primebits2primenid(config->skc_prime_bits); + dh = DH_new_by_nid(primenid); + if (!dh) { + fprintf(stderr, "error: dh cannot be init\n"); + goto prime_end; + } + + p = DH_get0_p(dh); + if (!p) { + fprintf(stderr, "error: cannot get p from dh\n"); + goto prime_end; + } + + if (BN_num_bytes(p) > SK_MAX_P_BYTES) { + fprintf(stderr, + "error: requested length %d exceeds maximum %d\n", + BN_num_bytes(p), SK_MAX_P_BYTES * 8); + goto prime_end; + } + + if (BN_bn2bin(p, config->skc_p) != BN_num_bytes(p)) { + fprintf(stderr, "error: convert BIGNUM p to binary failed\n"); + goto prime_end; + } + + rc = 0; + +prime_end: + if (rc) + fprintf(stderr, + "error: fetching SSK prime failed: %s\n", + ERR_error_string(ERR_get_error(), NULL)); + DH_free(dh); + return rc; +} +#endif + static inline int __gen_ssk_prime(struct sk_keyfile_config *config) { int rc = -1; + const char *primename; EVP_PKEY_CTX *ctx = NULL; EVP_PKEY *dh = NULL; BIGNUM *p; + if (fips_mode) { + primename = sk_primebits2name(config->skc_prime_bits); + if (!primename) { + fprintf(stderr, + "error: prime len %d not supported in FIPS mode\n", + config->skc_prime_bits); + return rc; + } +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + fprintf(stdout, + "FIPS mode, using well-known prime %s\n", primename); +#ifndef HAVE_OPENSSL_EVP_PKEY + return __fetch_ssk_prime(config); +#endif +#endif /* OPENSSL_VERSION_NUMBER >= 0x10100000L */ + } + ctx = EVP_PKEY_CTX_new_from_name(NULL, "DH", NULL); if (!ctx || EVP_PKEY_paramgen_init(ctx) != 1) { fprintf(stderr, "error: ctx cannot be init\n"); @@ -320,6 +387,10 @@ static inline int __gen_ssk_prime(struct sk_keyfile_config *config) rc = 0; prime_end: + if (rc) + fprintf(stderr, + "error: generating SSK prime failed: %s\n", + ERR_error_string(ERR_get_error(), NULL)); EVP_PKEY_free(dh); EVP_PKEY_CTX_free(ctx); return rc; @@ -489,6 +560,8 @@ int main(int argc, char **argv) /* init gss logger for foreground (no syslog) which prints to stderr */ initerr(NULL, verbose, 1); + fips_mode = FIPS_mode(); + if (input) return print_config(input); @@ -548,7 +621,8 @@ int main(int argc, char **argv) /* Set the defaults for new key */ config->skc_version = SK_CONF_VERSION; - config->skc_expire = SK_DEFAULT_EXPIRE; + config->skc_expire = fips_mode ? + SK_DEFAULT_EXPIRE_FIPS : SK_DEFAULT_EXPIRE; config->skc_shared_keylen = SK_DEFAULT_SK_KEYLEN; config->skc_prime_bits = SK_DEFAULT_PRIME_BITS; config->skc_crypt_alg = SK_CRYPT_AES256_CTR; @@ -576,6 +650,10 @@ int main(int argc, char **argv) config->skc_hmac_alg = hmac; if (expire != -1) config->skc_expire = expire; + if (fips_mode && config->skc_expire > SK_DEFAULT_EXPIRE_FIPS) + fprintf(stderr, + "warning: using a %us key expiration greater than %us is not recommended in FIPS mode\n", + config->skc_expire, SK_DEFAULT_EXPIRE_FIPS); if (shared_keylen != -1) config->skc_shared_keylen = shared_keylen; if (prime_bits != -1) { diff --git a/lustre/utils/gss/sk_utils.h b/lustre/utils/gss/sk_utils.h index 4de88e9..e54d6c4 100644 --- a/lustre/utils/gss/sk_utils.h +++ b/lustre/utils/gss/sk_utils.h @@ -36,8 +36,12 @@ #include #include #include +#include #include #include +#ifdef HAVE_OPENSSL_FIPS +#include +#endif #ifdef HAVE_OPENSSL_EVP_PKEY #include #endif @@ -133,6 +137,10 @@ static inline const BIGNUM *DH_get0_p(const DH *dh) } #endif +#ifndef HAVE_OPENSSL_FIPS +#define FIPS_mode() 0 +#endif + /* Some limits and defaults */ #define SK_CONF_VERSION 1 #define SK_MSG_VERSION 1 @@ -369,6 +377,71 @@ static inline const char *sk_hmac2name(enum sk_hmac_alg type) return NULL; } +#ifndef NID_ffdhe2048 +#define NID_ffdhe2048 1126 +#define NID_ffdhe3072 1127 +#define NID_ffdhe4096 1128 +#define NID_ffdhe6144 1129 +#define NID_ffdhe8192 1130 +#endif + +static const struct sk_prime_type sk_prime_nids[] = { + { + .spt_name = "null", + .spt_type = 0, + .spt_primebits = 0 + }, + { + .spt_name = "ffdhe2048", + .spt_type = NID_ffdhe2048, + .spt_primebits = 2048 + }, + { + .spt_name = "ffdhe3072", + .spt_type = NID_ffdhe3072, + .spt_primebits = 3072 + }, + { + .spt_name = "ffdhe4096", + .spt_type = NID_ffdhe4096, + .spt_primebits = 4096 + }, + { + .spt_name = "ffdhe6144", + .spt_type = NID_ffdhe6144, + .spt_primebits = 6144 + }, + { + .spt_name = "ffdhe8192", + .spt_type = NID_ffdhe8192, + .spt_primebits = 8192 + }, +}; + +static inline int sk_primebits2primenid(int primebits) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sk_prime_nids); i++) { + if (primebits == sk_prime_nids[i].spt_primebits) + return sk_prime_nids[i].spt_type; + } + + return -1; +} + +static inline const char *sk_primebits2name(int primebits) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sk_prime_nids); i++) { + if (primebits == sk_prime_nids[i].spt_primebits) + return sk_prime_nids[i].spt_name; + } + + return NULL; +} + void sk_init_logging(char *program, int verbose, int fg); struct sk_keyfile_config *sk_read_file(char *filename); int sk_load_keyfile(char *path); -- 1.8.3.1 From b698abd415bc4a810f307611fe984e50e007581e Mon Sep 17 00:00:00 2001 From: Jian Yu Date: Wed, 3 Apr 2024 00:38:47 -0700 Subject: [PATCH 07/16] LU-17504 build: fix array-index-out-of-bounds warning On Linux kernel 6.5, due to commit 2d47c6956ab3 ("ubsan: Tighten UBSAN_BOUNDS on GCC"), flexible trailing arrays declared like 'lc_array_sum[1];' will generate warnings when CONFIG_UBSAN & co. is enabled: UBSAN: array-index-out-of-bounds in lprocfs_status.c:1609:17 index 1 is out of range for type '__s64 [1]' Since LPROCFS_STATS_FLAG_IRQ_SAFE flag is only used in one place - obd_memory() counter, we can just remove it and change obd_memory over to a regular percpu_counter. This would both simplify the lprocfs_counter() code, move over to using more kernel functionality instead of libcfs, as well as reduce overhead slightly for the memory accounting code. Change-Id: Ic461c4b30317bfd2b1e9f5b6be84c4a7fb4e3eb9 Signed-off-by: Jian Yu Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54365 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/autoconf/lustre-core.m4 | 23 ++++++++++++++++ lustre/include/lprocfs_status.h | 18 +------------ lustre/include/obd_support.h | 54 ++++++++++++++++---------------------- lustre/obdclass/class_obd.c | 41 +++++++---------------------- lustre/obdclass/lprocfs_counters.c | 20 ++------------ lustre/obdclass/lprocfs_status.c | 38 ++++----------------------- 6 files changed, 64 insertions(+), 130 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 6b6c0fb..dbaeea3 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -2360,6 +2360,27 @@ AC_DEFUN([LC_HAVE_FSMAP_HEADER], [ ]) # LC_HAVE_FSMAP_HEADER # +# LC_HAVE_PERCPU_COUNTER_ADD_BATCH +# +# Linux commit v4.11-12447-g104b4e5139fe +# percpu_counter: Rename __percpu_counter_add to percpu_counter_add_batch +# +AC_DEFUN([LC_SRC_HAVE_PERCPU_COUNTER_ADD_BATCH], [ + LB2_LINUX_TEST_SRC([percpu_counter_add_batch_exists], [ + #include + ],[ + (void)percpu_counter_add_batch(NULL, 0, 0); + ],[-Werror]) +]) +AC_DEFUN([LC_HAVE_PERCPU_COUNTER_ADD_BATCH], [ + LB2_MSG_LINUX_TEST_RESULT([if 'percpu_counter_add_batch()' exists], + [percpu_counter_add_batch_exists], [ + AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1, + ['percpu_counter_add_batch()' exists]) + ]) +]) # LC_HAVE_PERCPU_COUNTER_ADD_BATCH + +# # Kernel version 4.12 commit 47f38c539e9a42344ff5a664942075bd4df93876 # CURRENT_TIME is not 64 bit time safe so it was replaced with # current_time() @@ -4606,6 +4627,7 @@ AC_DEFUN([LC_PROG_LINUX_SRC], [ LC_SRC_HAVE_KEY_USAGE_REFCOUNT LC_SRC_HAVE_CRYPTO_MAX_ALG_NAME_128 LC_SRC_HAVE_FSMAP_HEADER + LC_SRC_HAVE_PERCPU_COUNTER_ADD_BATCH # 4.12 LC_SRC_CURRENT_TIME @@ -4903,6 +4925,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [ LC_HAVE_KEY_USAGE_REFCOUNT LC_HAVE_CRYPTO_MAX_ALG_NAME_128 LC_HAVE_FSMAP_HEADER + LC_HAVE_PERCPU_COUNTER_ADD_BATCH # 4.12 LC_CURRENT_TIME diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 95a5492..572564ab 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -169,17 +169,9 @@ struct lprocfs_counter { __s64 lc_count; __s64 lc_min; __s64 lc_max; + __s64 lc_sum; __s64 lc_sumsquare; - /* - * Every counter has lc_array_sum[0], while lc_array_sum[1] is only - * for irq context counter, i.e. stats with - * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need - * lc_array_sum[1] - */ - __s64 lc_array_sum[1]; }; -#define lc_sum lc_array_sum[0] -#define lc_sum_irq lc_array_sum[1] struct lprocfs_percpu { struct lprocfs_counter lp_cntr[0]; @@ -193,7 +185,6 @@ enum lprocfs_stats_lock_ops { enum lprocfs_stats_flags { LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* need locking(no percpu area) */ - LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */ }; enum lprocfs_fields_flags { @@ -477,10 +468,6 @@ lprocfs_stats_counter_size(struct lprocfs_stats *stats) percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]); - /* irq safe stats need lc_array_sum[1] */ - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpusize += stats->ls_num * sizeof(__s64); - if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0) percpusize = L1_CACHE_ALIGN(percpusize); @@ -495,9 +482,6 @@ lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid, cntr = &stats->ls_percpu[cpuid]->lp_cntr[index]; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - cntr = (void *)cntr + index * sizeof(__s64); - return cntr; } diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4d88f21..e94e8b3 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -44,11 +45,7 @@ #include /* global variables */ -extern struct lprocfs_stats *obd_memory; -enum { - OBD_MEMORY_STAT = 0, - OBD_STATS_NUM, -}; +extern struct percpu_counter obd_memory; extern unsigned int obd_debug_peer_on_timeout; extern unsigned int obd_dump_on_timeout; @@ -784,41 +781,36 @@ extern bool obd_enable_health_write; extern atomic64_t libcfs_kmem; -#ifdef CONFIG_PROC_FS -#define obd_memory_add(size) \ - lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size)) -#define obd_memory_sub(size) \ - lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size)) -#define obd_memory_sum() \ - lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \ - LPROCFS_FIELDS_FLAGS_SUM) - -extern void obd_update_maxusage(void); -extern __u64 obd_memory_max(void); - -#else /* CONFIG_PROC_FS */ - -extern __u64 obd_alloc; +/* OBD_MEMORY_BATCH is the maximum error allowed per CPU core. Since + * obd_memory_sum() is calling percpu_counter_sum_positive(), it adds + * up the per-core local delta anyway, so the per-core batch size is + * can be large. This could be percpu_counter_add_local(), but that + * only exists in kernel 6.0 and later, and just uses a larger batch. + */ +#define OBD_MEMORY_BATCH (16 * 1024 * 1024) -extern __u64 obd_max_alloc; +#ifndef HAVE_PERCPU_COUNTER_ADD_BATCH +#define percpu_counter_add_batch(fbc, amount, batch) \ + __percpu_counter_add(fbc, amount, batch) +#endif -static inline void obd_memory_add(long size) +static inline void obd_memory_add(size_t size) { - obd_alloc += size; - if (obd_alloc > obd_max_alloc) - obd_max_alloc = obd_alloc; + percpu_counter_add_batch(&obd_memory, size, OBD_MEMORY_BATCH); } -static inline void obd_memory_sub(long size) +static inline void obd_memory_sub(size_t size) { - obd_alloc -= size; + percpu_counter_add_batch(&obd_memory, -size, OBD_MEMORY_BATCH); } -#define obd_memory_sum() (obd_alloc) - -#define obd_memory_max() (obd_max_alloc) +static inline s64 obd_memory_sum(void) +{ + return percpu_counter_sum_positive(&obd_memory); +} -#endif /* !CONFIG_PROC_FS */ +extern void obd_update_maxusage(void); +extern __u64 obd_memory_max(void); #define OBD_DEBUG_MEMUSAGE (1) diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 43deebd..5d95590 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -52,11 +52,7 @@ #include "llog_internal.h" #include -#ifdef CONFIG_PROC_FS static __u64 obd_max_alloc; -#else -__u64 obd_max_alloc; -#endif static DEFINE_SPINLOCK(obd_updatemax_lock); @@ -102,19 +98,15 @@ EXPORT_SYMBOL(at_early_margin); int at_extra = 30; EXPORT_SYMBOL(at_extra); -#ifdef CONFIG_PROC_FS -struct lprocfs_stats *obd_memory = NULL; +struct percpu_counter obd_memory; EXPORT_SYMBOL(obd_memory); -#endif static int obdclass_oom_handler(struct notifier_block *self, unsigned long notused, void *nfreed) { -#ifdef CONFIG_PROC_FS /* in bytes */ pr_info("obd_memory max: %llu, obd_memory current: %llu\n", obd_memory_max(), obd_memory_sum()); -#endif /* CONFIG_PROC_FS */ return NOTIFY_OK; } @@ -715,19 +707,13 @@ static int __init obdclass_init(void) if (err) return err; -#ifdef CONFIG_PROC_FS - obd_memory = lprocfs_stats_alloc(OBD_STATS_NUM, - LPROCFS_STATS_FLAG_NONE | - LPROCFS_STATS_FLAG_IRQ_SAFE); - if (obd_memory == NULL) { - CERROR("kmalloc of 'obd_memory' failed\n"); - return -ENOMEM; + err = percpu_counter_init(&obd_memory, 0, GFP_KERNEL); + if (err < 0) { + CERROR("obdclass: initializing 'obd_memory' failed: rc = %d\n", + err); + return err; } - lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "memused"); -#endif err = libcfs_kkuc_init(); if (err) goto cleanup_obd_memory; @@ -742,7 +728,7 @@ static int __init obdclass_init(void) err = misc_register(&obd_psdev); if (err) { - CERROR("cannot register OBD miscdevice: err = %d\n", err); + CERROR("cannot register OBD miscdevice: rc = %d\n", err); goto cleanup_class_handle; } @@ -830,9 +816,7 @@ cleanup_kkuc: libcfs_kkuc_fini(); cleanup_obd_memory: -#ifdef CONFIG_PROC_FS - lprocfs_stats_free(&obd_memory); -#endif + percpu_counter_destroy(&obd_memory); unregister_oom_notifier(&obdclass_oom); return err; @@ -851,7 +835,6 @@ void obd_update_maxusage(void) } EXPORT_SYMBOL(obd_update_maxusage); -#ifdef CONFIG_PROC_FS __u64 obd_memory_max(void) { __u64 ret; @@ -863,14 +846,12 @@ __u64 obd_memory_max(void) return ret; } -#endif /* CONFIG_PROC_FS */ +EXPORT_SYMBOL(obd_memory_max); static void __exit obdclass_exit(void) { -#ifdef CONFIG_PROC_FS __u64 memory_leaked; __u64 memory_max; -#endif /* CONFIG_PROC_FS */ ENTRY; misc_deregister(&obd_psdev); @@ -891,16 +872,14 @@ static void __exit obdclass_exit(void) obd_zombie_impexp_stop(); libcfs_kkuc_fini(); -#ifdef CONFIG_PROC_FS memory_leaked = obd_memory_sum(); memory_max = obd_memory_max(); - lprocfs_stats_free(&obd_memory); + percpu_counter_destroy(&obd_memory); /* the below message is checked in test-framework.sh check_mem_leak() */ CDEBUG((memory_leaked) ? D_ERROR : D_INFO, "obd_memory max: %llu, leaked: %llu\n", memory_max, memory_leaked); -#endif /* CONFIG_PROC_FS */ unregister_oom_notifier(&obdclass_oom); diff --git a/lustre/obdclass/lprocfs_counters.c b/lustre/obdclass/lprocfs_counters.c index 22e6bf3..9b773e1 100644 --- a/lustre/obdclass/lprocfs_counters.c +++ b/lustre/obdclass/lprocfs_counters.c @@ -70,16 +70,8 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount) * as memory allocation could trigger memory shrinker call * ldlm_pool_shrink(), which calls lprocfs_counter_add(). * LU-1727. - * - * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE - * flag, because it needs accurate counting lest memory leak - * check reports error. */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq += amount; - else - percpu_cntr->lc_sum += amount; + percpu_cntr->lc_sum += amount; if (header->lc_config & LPROCFS_CNTR_STDDEV) percpu_cntr->lc_sumsquare += (__s64)amount * amount; @@ -132,16 +124,8 @@ void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount) * softirq context - right now that's the only case we're in * softirq context here, use separate counter for that. * bz20650. - * - * Only obd_memory uses LPROCFS_STATS_FLAG_IRQ_SAFE - * flag, because it needs accurate counting lest memory leak - * check reports error. */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq -= amount; - else - percpu_cntr->lc_sum -= amount; + percpu_cntr->lc_sum -= amount; } lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags); } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index e9cf370..5437c9c 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -444,10 +444,7 @@ int lprocfs_stats_lock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, *flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); return opc == LPROCFS_GET_NUM_CPU ? 1 : 0; } @@ -490,10 +487,7 @@ void lprocfs_stats_unlock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_unlock_irqrestore(&stats->ls_lock, *flags); - else - spin_unlock(&stats->ls_lock); + spin_unlock(&stats->ls_lock); } else if (opc == LPROCFS_GET_SMP_ID) { put_cpu(); } @@ -1200,7 +1194,6 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) struct lprocfs_counter *cntr; unsigned int percpusize; int rc = -ENOMEM; - unsigned long flags = 0; int i; LASSERT(stats->ls_percpu[cpuid] == NULL); @@ -1211,17 +1204,10 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) if (stats->ls_percpu[cpuid]) { rc = 0; if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); if (stats->ls_biggest_alloc_num <= cpuid) stats->ls_biggest_alloc_num = cpuid + 1; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) { - spin_unlock_irqrestore(&stats->ls_lock, flags); - } else { - spin_unlock(&stats->ls_lock); - } + spin_unlock(&stats->ls_lock); } /* initialize the ls_percpu[cpuid] non-zero counter */ for (i = 0; i < stats->ls_num; ++i) { @@ -1238,7 +1224,6 @@ struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num, struct lprocfs_stats *stats; unsigned int num_entry; unsigned int percpusize = 0; - int i; if (num == 0) return NULL; @@ -1273,11 +1258,6 @@ struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num, if (!stats->ls_percpu[0]) goto fail; stats->ls_biggest_alloc_num = 1; - } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) { - /* alloc all percpu data, currently only obd_memory use this */ - for (i = 0; i < num_entry; ++i) - if (lprocfs_stats_alloc_one(stats, i) < 0) - goto fail; } return stats; @@ -1373,8 +1353,6 @@ void lprocfs_stats_clear(struct lprocfs_stats *stats) percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - percpu_cntr->lc_sum_irq = 0; } } stats->ls_init = ktime_get_real(); @@ -1601,8 +1579,6 @@ void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index, percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq = 0; } lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); } @@ -1724,8 +1700,6 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, break; case LPROCFS_FIELDS_FLAGS_SUM: ret = lc->lc_sum; - if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - ret += lc->lc_sum_irq; break; case LPROCFS_FIELDS_FLAGS_MIN: ret = lc->lc_min; @@ -1734,9 +1708,7 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, ret = lc->lc_max; break; case LPROCFS_FIELDS_FLAGS_AVG: - ret = div64_u64((flags & LPROCFS_STATS_FLAG_IRQ_SAFE ? - lc->lc_sum_irq : 0) + lc->lc_sum, - lc->lc_count); + ret = div64_u64(lc->lc_sum, lc->lc_count); break; case LPROCFS_FIELDS_FLAGS_SUMSQUARE: ret = lc->lc_sumsquare; -- 1.8.3.1 From 5921cb2a5b8b7e1301b2c1502be6f8006ab4082a Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Thu, 14 Mar 2024 18:15:29 +0100 Subject: [PATCH 08/16] LU-17643 gss: make a local copy of the sptlrpc llog Make a local copy on server side of the sptlrpc llog, so that the targets that do not manage to connect to the MGS know at least which security flavor to accept from clients. This needs to pass the super_block to config_log_find_or_add(). Add sanity-sec test_70 to check that sptlrpc llog on MDS and OSS side is equivalent to the one from the MGS. Signed-off-by: Sebastien Buisson Change-Id: I81f0136746e2df7cca1b34c4a17e4b7135a43c29 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54394 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Aurelien Degremont Reviewed-by: Mikhail Pershin Reviewed-by: Oleg Drokin --- lustre/mgc/mgc_request.c | 16 ++++++++++-- lustre/tests/sanity-sec.sh | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 100ce53..7c2c27d 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -297,7 +297,13 @@ config_log_find_or_add(struct obd_device *obd, char *logname, struct config_llog_data *cld; /* Note class_config_llog_handler() depends on getting "obd" back */ - lcfg.cfg_instance = sb ? ll_get_cfg_instance(sb) : (unsigned long)obd; + /* for sptlrpc, sb is only provided to be able to make a local copy, + * not for the instance + */ + if (sb && type != MGS_CFG_T_SPTLRPC) + lcfg.cfg_instance = ll_get_cfg_instance(sb); + else + lcfg.cfg_instance = (unsigned long)obd; cld = config_log_find(logname, &lcfg); if (unlikely(cld != NULL)) @@ -347,7 +353,7 @@ config_log_add(struct obd_device *obd, char *logname, strcpy(seclogname + (ptr - logname), "-sptlrpc"); if (cfg->cfg_sub_clds & CONFIG_SUB_SPTLRPC) { - sptlrpc_cld = config_log_find_or_add(obd, seclogname, NULL, + sptlrpc_cld = config_log_find_or_add(obd, seclogname, sb, MGS_CFG_T_SPTLRPC, cfg); if (IS_ERR(sptlrpc_cld)) { rc = PTR_ERR(sptlrpc_cld); @@ -1637,6 +1643,12 @@ static int mgc_process_cfg_log(struct obd_device *mgc, if (cld->cld_cfg.cfg_sb) lsi = s2lsi(cld->cld_cfg.cfg_sb); + /* sptlrpc llog must not keep ref to sb, + * it was just needed to get lsi + */ + if (cld_is_sptlrpc(cld)) + cld->cld_cfg.cfg_sb = NULL; + OBD_ALLOC_PTR(env); if (!env) RETURN(-ENOMEM); diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh index 51ca209..da0f525 100755 --- a/lustre/tests/sanity-sec.sh +++ b/lustre/tests/sanity-sec.sh @@ -6293,6 +6293,67 @@ test_69() { } run_test 69 "check upcall incorrect values" +test_70() { + local param_mgs=$(mktemp $TMP/$tfile-mgs.XXXXXX) + local param_copy=$(mktemp $TMP/$tfile-copy.XXXXXX) + + stack_trap "rm -f $param_mgs $param_copy" EXIT + + (( $MDS1_VERSION > $(version_code 2.15.61) )) || + skip "Need MDS version at least 2.15.61" + + if ! $SHARED_KEY; then + skip "need shared key feature for this test" + fi + + [[ "$ost1_FSTYPE" == ldiskfs ]] || + skip "ldiskfs only test (using debugfs)" + + # unmount then remount the Lustre filesystem, to make sure llogs + # are copied locally + export SK_NO_KEY=false + stopall || error "stopall failed" + init_gss + mountmgs || error "mountmgs failed" + mountmds || error "mountmds failed" + mountoss || error "mountoss failed" + mountcli || error "mountcli failed" + lfs df -h + unset SK_NO_KEY + + do_facet mgs "sync ; sync" + do_facet mgs "$DEBUGFS -c -R 'ls CONFIGS/' $(mgsdevname)" + do_facet mgs "$DEBUGFS -c -R 'dump CONFIGS/$FSNAME-sptlrpc $param_mgs' \ + $(mgsdevname)" + do_facet mgs "llog_reader $param_mgs" | grep -vE "SKIP|marker" | + grep "^#" > $param_mgs + cat $param_mgs + + if ! combined_mgs_mds; then + do_facet mds1 "sync ; sync" + do_facet mds1 "$DEBUGFS -c -R 'ls CONFIGS/' $(mdsdevname 1)" + do_facet mds1 "$DEBUGFS -c -R 'dump CONFIGS/$FSNAME-sptlrpc \ + $param_copy' $(mdsdevname 1)" + do_facet mds1 "llog_reader $param_copy" | + grep -vE "SKIP|marker" | grep "^#" > $param_copy + cat $param_copy + cmp -bl $param_mgs $param_copy || + error "sptlrpc llog differ in mds" + rm -f $param_copy + fi + + do_facet ost1 "sync ; sync" + do_facet ost1 "$DEBUGFS -c -R 'ls CONFIGS/' $(ostdevname 1)" + do_facet ost1 "$DEBUGFS -c -R 'dump CONFIGS/$FSNAME-sptlrpc \ + $param_copy' $(ostdevname 1)" + do_facet ost1 "llog_reader $param_copy" | grep -vE "SKIP|marker" | + grep "^#" > $param_copy + cat $param_copy + cmp -bl $param_mgs $param_copy || + error "sptlrpc llog differ in oss" +} +run_test 70 "targets have local copy of sptlrpc llog" + log "cleanup: ======================================================" sec_unsetup() { -- 1.8.3.1 From 72734cf178f1c4cd9dfe16a8f5800a0f1ab14e52 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Tue, 19 Mar 2024 17:04:20 +0100 Subject: [PATCH 09/16] LU-17431 ptlrpc: move nodemap related ioctls to ptlrpc Move to ptlrpc the functions designed to handle nodemap specific ioctls, as they should not be accessible to MGS only. Test-Parameters: trivial Signed-off-by: Sebastien Buisson Change-Id: I7a9651ea8484c540d18d6813ab96dc95a0871245 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54502 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_nodemap.h | 5 + lustre/mgs/mgs_handler.c | 146 +--------------- lustre/mgs/mgs_internal.h | 3 - lustre/mgs/mgs_llog.c | 213 ----------------------- lustre/ptlrpc/nodemap_handler.c | 363 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 373 insertions(+), 357 deletions(-) diff --git a/lustre/include/lustre_nodemap.h b/lustre/include/lustre_nodemap.h index ccdc8ab..e421c3b 100644 --- a/lustre/include/lustre_nodemap.h +++ b/lustre/include/lustre_nodemap.h @@ -31,6 +31,7 @@ #define _LUSTRE_NODEMAP_H #include +#include #define LUSTRE_NODEMAP_NAME "nodemap" @@ -170,6 +171,10 @@ void nodemap_test_nid(struct lnet_nid *nid, char *name_buf, size_t name_len); int nodemap_test_id(struct lnet_nid *nid, enum nodemap_id_type idtype, u32 client_id, u32 *fs_id); +int server_iocontrol_nodemap(struct obd_device *obd, + struct obd_ioctl_data *data, bool dynamic); + + struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env, struct dt_object *obj, struct local_oid_storage *l); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 0e15af1..3d2d4cb 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -763,21 +763,8 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, struct mgs_device *mgs, struct obd_ioctl_data *data) { - struct lustre_cfg *lcfg = NULL; - struct fs_db *fsdb; - struct lnet_nid nid; - const char *nodemap_name = NULL; - const char *nidstr = NULL; - const char *client_idstr = NULL; - const char *idtype_str = NULL; - char *param = NULL; - char fs_idstr[16]; - char name_buf[LUSTRE_NODEMAP_NAME_LENGTH + 1]; - int rc = 0; - unsigned long client_id; - __u32 fs_id; - __u32 cmd; - int idtype; + struct fs_db *fsdb; + int rc; ENTRY; @@ -787,130 +774,9 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out, rc = -EINVAL); } - if (data->ioc_plen1 > PAGE_SIZE) - GOTO(out, rc = -E2BIG); - - OBD_ALLOC(lcfg, data->ioc_plen1); - if (lcfg == NULL) - GOTO(out, rc = -ENOMEM); - - if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) - GOTO(out_lcfg, rc = -EFAULT); - - cmd = lcfg->lcfg_command; - - switch (cmd) { - case LCFG_NODEMAP_ACTIVATE: - if (lcfg->lcfg_bufcount != 2) - GOTO(out_lcfg, rc = -EINVAL); - param = lustre_cfg_string(lcfg, 1); - if (strcmp(param, "1") == 0) - nodemap_activate(1); - else - nodemap_activate(0); - break; - case LCFG_NODEMAP_ADD: - case LCFG_NODEMAP_DEL: - if (lcfg->lcfg_bufcount != 2) - GOTO(out_lcfg, rc = -EINVAL); - nodemap_name = lustre_cfg_string(lcfg, 1); - rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); - break; - case LCFG_NODEMAP_TEST_NID: - if (lcfg->lcfg_bufcount != 2) - GOTO(out_lcfg, rc = -EINVAL); - nidstr = lustre_cfg_string(lcfg, 1); - rc = libcfs_strnid(&nid, nidstr); - if (rc < 0) - GOTO(out_lcfg, rc); - - nodemap_test_nid(&nid, name_buf, sizeof(name_buf)); - rc = copy_to_user(data->ioc_pbuf1, name_buf, - min_t(size_t, data->ioc_plen1, - sizeof(name_buf))); - if (rc != 0) - GOTO(out_lcfg, rc = -EFAULT); - break; - case LCFG_NODEMAP_TEST_ID: - if (lcfg->lcfg_bufcount != 4) - GOTO(out_lcfg, rc = -EINVAL); - nidstr = lustre_cfg_string(lcfg, 1); - idtype_str = lustre_cfg_string(lcfg, 2); - client_idstr = lustre_cfg_string(lcfg, 3); - - rc = libcfs_strnid(&nid, nidstr); - if (rc < 0) - GOTO(out_lcfg, rc); - - if (strcmp(idtype_str, "uid") == 0) - idtype = NODEMAP_UID; - else if (strcmp(idtype_str, "gid") == 0) - idtype = NODEMAP_GID; - else if (strcmp(idtype_str, "projid") == 0) - idtype = NODEMAP_PROJID; - else - GOTO(out_lcfg, rc = -EINVAL); - - rc = kstrtoul(client_idstr, 10, &client_id); - if (rc != 0) - GOTO(out_lcfg, rc = -EINVAL); - - rc = nodemap_test_id(&nid, idtype, client_id, &fs_id); - if (rc < 0) - GOTO(out_lcfg, rc = -EINVAL); - - if (data->ioc_plen1 < sizeof(fs_idstr)) - GOTO(out_lcfg, rc = -EINVAL); - - snprintf(fs_idstr, sizeof(fs_idstr), "%u", fs_id); - if (copy_to_user(data->ioc_pbuf1, fs_idstr, - sizeof(fs_idstr)) != 0) - GOTO(out_lcfg, rc = -EINVAL); - break; - case LCFG_NODEMAP_ADD_RANGE: - case LCFG_NODEMAP_DEL_RANGE: - case LCFG_NODEMAP_ADD_UIDMAP: - case LCFG_NODEMAP_DEL_UIDMAP: - case LCFG_NODEMAP_ADD_GIDMAP: - case LCFG_NODEMAP_DEL_GIDMAP: - case LCFG_NODEMAP_ADD_PROJIDMAP: - case LCFG_NODEMAP_DEL_PROJIDMAP: - case LCFG_NODEMAP_SET_FILESET: - case LCFG_NODEMAP_SET_SEPOL: - if (lcfg->lcfg_bufcount != 3) - GOTO(out_lcfg, rc = -EINVAL); - nodemap_name = lustre_cfg_string(lcfg, 1); - param = lustre_cfg_string(lcfg, 2); - rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); - break; - case LCFG_NODEMAP_ADMIN: - case LCFG_NODEMAP_TRUSTED: - case LCFG_NODEMAP_DENY_UNKNOWN: - case LCFG_NODEMAP_SQUASH_UID: - case LCFG_NODEMAP_SQUASH_GID: - case LCFG_NODEMAP_SQUASH_PROJID: - case LCFG_NODEMAP_MAP_MODE: - case LCFG_NODEMAP_AUDIT_MODE: - case LCFG_NODEMAP_FORBID_ENCRYPT: - case LCFG_NODEMAP_READONLY_MOUNT: - case LCFG_NODEMAP_RBAC: - if (lcfg->lcfg_bufcount != 4) - GOTO(out_lcfg, rc = -EINVAL); - nodemap_name = lustre_cfg_string(lcfg, 1); - param = lustre_cfg_string(lcfg, 3); - rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); - break; - default: - rc = -ENOTTY; - } - - if (rc) { - CDEBUG_LIMIT(rc == -EEXIST ? D_INFO : D_ERROR, - "%s: OBD_IOC_NODEMAP command %X for %s: rc = %d\n", - mgs->mgs_obd->obd_name, lcfg->lcfg_command, - nodemap_name, rc); - GOTO(out_lcfg, rc); - } + rc = server_iocontrol_nodemap(mgs->mgs_obd, data, false); + if (rc) + GOTO(out, rc); /* revoke nodemap lock */ rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb); @@ -922,8 +788,6 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, mgs_put_fsdb(mgs, fsdb); } -out_lcfg: - OBD_FREE(lcfg, data->ioc_plen1); out: RETURN(rc); } diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 4b36cac..2d109c5 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -227,9 +227,6 @@ int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs, int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs, enum lcfg_command_type cmd, char *poolname, char *fsname, char *ostname); -int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs, - enum lcfg_command_type cmd, const char *nodemap_name, - char *param); /* mgs_handler.c */ int mgs_get_lock(struct obd_device *obd, struct ldlm_res_id *res, diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 20887d4..ea349b4 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -5648,219 +5648,6 @@ out: return rc; } -int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs, - enum lcfg_command_type cmd, const char *nodemap_name, - char *param) -{ - struct lnet_nid nid[2]; - u32 idmap[2]; - bool bool_switch; - u8 netmask = 0; - u32 int_id; - int rc = 0; - - ENTRY; - switch (cmd) { - case LCFG_NODEMAP_ADD: - rc = nodemap_add(nodemap_name); - break; - case LCFG_NODEMAP_DEL: - rc = nodemap_del(nodemap_name); - break; - case LCFG_NODEMAP_ADD_RANGE: - rc = nodemap_parse_range(param, nid, &netmask); - if (rc != 0) - break; - rc = nodemap_add_range(nodemap_name, nid, netmask); - break; - case LCFG_NODEMAP_DEL_RANGE: - rc = nodemap_parse_range(param, nid, &netmask); - if (rc != 0) - break; - rc = nodemap_del_range(nodemap_name, nid, netmask); - break; - case LCFG_NODEMAP_ADMIN: - rc = kstrtobool(param, &bool_switch); - if (rc) - break; - rc = nodemap_set_allow_root(nodemap_name, bool_switch); - break; - case LCFG_NODEMAP_DENY_UNKNOWN: - rc = kstrtobool(param, &bool_switch); - if (rc) - break; - rc = nodemap_set_deny_unknown(nodemap_name, bool_switch); - break; - case LCFG_NODEMAP_AUDIT_MODE: - rc = kstrtobool(param, &bool_switch); - if (rc == 0) - rc = nodemap_set_audit_mode(nodemap_name, bool_switch); - break; - case LCFG_NODEMAP_FORBID_ENCRYPT: - rc = kstrtobool(param, &bool_switch); - if (rc == 0) - rc = nodemap_set_forbid_encryption(nodemap_name, - bool_switch); - break; - case LCFG_NODEMAP_READONLY_MOUNT: - rc = kstrtobool(param, &bool_switch); - if (rc == 0) - rc = nodemap_set_readonly_mount(nodemap_name, - bool_switch); - break; - case LCFG_NODEMAP_MAP_MODE: - { - char *p; - __u8 map_mode = 0; - - if ((p = strstr(param, "all")) != NULL) { - if ((p == param || *(p-1) == ',') && - (*(p+3) == '\0' || *(p+3) == ',')) { - map_mode = NODEMAP_MAP_ALL; - } else { - rc = -EINVAL; - break; - } - } else { - while ((p = strsep(¶m, ",")) != NULL) { - if (!*p) - break; - - if (strcmp("both", p) == 0) - map_mode |= NODEMAP_MAP_BOTH; - else if (strcmp("uid_only", p) == 0 || - strcmp("uid", p) == 0) - map_mode |= NODEMAP_MAP_UID; - else if (strcmp("gid_only", p) == 0 || - strcmp("gid", p) == 0) - map_mode |= NODEMAP_MAP_GID; - else if (strcmp("projid_only", p) == 0 || - strcmp("projid", p) == 0) - map_mode |= NODEMAP_MAP_PROJID; - else - break; - } - if (p) { - rc = -EINVAL; - break; - } - } - - rc = nodemap_set_mapping_mode(nodemap_name, map_mode); - break; - } - case LCFG_NODEMAP_RBAC: - { - enum nodemap_rbac_roles rbac; - char *p; - - if (strcmp(param, "all") == 0) { - rbac = NODEMAP_RBAC_ALL; - } else if (strcmp(param, "none") == 0) { - rbac = NODEMAP_RBAC_NONE; - } else { - rbac = NODEMAP_RBAC_NONE; - while ((p = strsep(¶m, ",")) != NULL) { - int i; - - if (!*p) - break; - - for (i = 0; i < ARRAY_SIZE(nodemap_rbac_names); - i++) { - if (strcmp(p, - nodemap_rbac_names[i].nrn_name) - == 0) { - rbac |= - nodemap_rbac_names[i].nrn_mode; - break; - } - } - if (i == ARRAY_SIZE(nodemap_rbac_names)) - break; - } - if (p) { - rc = -EINVAL; - break; - } - } - - rc = nodemap_set_rbac(nodemap_name, rbac); - break; - } - case LCFG_NODEMAP_TRUSTED: - rc = kstrtobool(param, &bool_switch); - if (rc) - break; - rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch); - break; - case LCFG_NODEMAP_SQUASH_UID: - rc = kstrtouint(param, 10, &int_id); - if (rc) - break; - rc = nodemap_set_squash_uid(nodemap_name, int_id); - break; - case LCFG_NODEMAP_SQUASH_GID: - rc = kstrtouint(param, 10, &int_id); - if (rc) - break; - rc = nodemap_set_squash_gid(nodemap_name, int_id); - break; - case LCFG_NODEMAP_SQUASH_PROJID: - rc = kstrtouint(param, 10, &int_id); - if (rc) - break; - rc = nodemap_set_squash_projid(nodemap_name, int_id); - break; - case LCFG_NODEMAP_ADD_UIDMAP: - case LCFG_NODEMAP_ADD_GIDMAP: - case LCFG_NODEMAP_ADD_PROJIDMAP: - rc = nodemap_parse_idmap(param, idmap); - if (rc != 0) - break; - if (cmd == LCFG_NODEMAP_ADD_UIDMAP) - rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID, - idmap); - else if (cmd == LCFG_NODEMAP_ADD_GIDMAP) - rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID, - idmap); - else if (cmd == LCFG_NODEMAP_ADD_PROJIDMAP) - rc = nodemap_add_idmap(nodemap_name, NODEMAP_PROJID, - idmap); - else - rc = -EINVAL; - break; - case LCFG_NODEMAP_DEL_UIDMAP: - case LCFG_NODEMAP_DEL_GIDMAP: - case LCFG_NODEMAP_DEL_PROJIDMAP: - rc = nodemap_parse_idmap(param, idmap); - if (rc != 0) - break; - if (cmd == LCFG_NODEMAP_DEL_UIDMAP) - rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID, - idmap); - else if (cmd == LCFG_NODEMAP_DEL_GIDMAP) - rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID, - idmap); - else if (cmd == LCFG_NODEMAP_DEL_PROJIDMAP) - rc = nodemap_del_idmap(nodemap_name, NODEMAP_PROJID, - idmap); - else - rc = -EINVAL; - break; - case LCFG_NODEMAP_SET_FILESET: - rc = nodemap_set_fileset(nodemap_name, param); - break; - case LCFG_NODEMAP_SET_SEPOL: - rc = nodemap_set_sepol(nodemap_name, param); - break; - default: - rc = -EINVAL; - } - - RETURN(rc); -} - static inline int mgs_pool_check_ostname(struct fs_db *fsdb, char *fsname, char *ostname) { diff --git a/lustre/ptlrpc/nodemap_handler.c b/lustre/ptlrpc/nodemap_handler.c index a6da068..f2cf2de 100644 --- a/lustre/ptlrpc/nodemap_handler.c +++ b/lustre/ptlrpc/nodemap_handler.c @@ -2015,3 +2015,366 @@ int nodemap_test_id(struct lnet_nid *nid, enum nodemap_id_type idtype, return 0; } EXPORT_SYMBOL(nodemap_test_id); + +static int cfg_nodemap_cmd(enum lcfg_command_type cmd, const char *nodemap_name, + char *param, bool dynamic) +{ + struct lnet_nid nid[2]; + bool bool_switch; + u8 netmask = 0; + u32 idmap[2]; + u32 int_id; + int rc = 0; + + ENTRY; + switch (cmd) { + case LCFG_NODEMAP_ADD: + rc = nodemap_add(nodemap_name); + break; + case LCFG_NODEMAP_DEL: + rc = nodemap_del(nodemap_name); + break; + case LCFG_NODEMAP_ADD_RANGE: + rc = nodemap_parse_range(param, nid, &netmask); + if (rc != 0) + break; + rc = nodemap_add_range(nodemap_name, nid, netmask); + break; + case LCFG_NODEMAP_DEL_RANGE: + rc = nodemap_parse_range(param, nid, &netmask); + if (rc != 0) + break; + rc = nodemap_del_range(nodemap_name, nid, netmask); + break; + case LCFG_NODEMAP_ADMIN: + rc = kstrtobool(param, &bool_switch); + if (rc) + break; + rc = nodemap_set_allow_root(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_DENY_UNKNOWN: + rc = kstrtobool(param, &bool_switch); + if (rc) + break; + rc = nodemap_set_deny_unknown(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_AUDIT_MODE: + rc = kstrtobool(param, &bool_switch); + if (rc == 0) + rc = nodemap_set_audit_mode(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_FORBID_ENCRYPT: + rc = kstrtobool(param, &bool_switch); + if (rc == 0) + rc = nodemap_set_forbid_encryption(nodemap_name, + bool_switch); + break; + case LCFG_NODEMAP_READONLY_MOUNT: + rc = kstrtobool(param, &bool_switch); + if (rc == 0) + rc = nodemap_set_readonly_mount(nodemap_name, + bool_switch); + break; + case LCFG_NODEMAP_MAP_MODE: + { + char *p; + __u8 map_mode = 0; + + if ((p = strstr(param, "all")) != NULL) { + if ((p == param || *(p-1) == ',') && + (*(p+3) == '\0' || *(p+3) == ',')) { + map_mode = NODEMAP_MAP_ALL; + } else { + rc = -EINVAL; + break; + } + } else { + while ((p = strsep(¶m, ",")) != NULL) { + if (!*p) + break; + + if (strcmp("both", p) == 0) + map_mode |= NODEMAP_MAP_BOTH; + else if (strcmp("uid_only", p) == 0 || + strcmp("uid", p) == 0) + map_mode |= NODEMAP_MAP_UID; + else if (strcmp("gid_only", p) == 0 || + strcmp("gid", p) == 0) + map_mode |= NODEMAP_MAP_GID; + else if (strcmp("projid_only", p) == 0 || + strcmp("projid", p) == 0) + map_mode |= NODEMAP_MAP_PROJID; + else + break; + } + if (p) { + rc = -EINVAL; + break; + } + } + + rc = nodemap_set_mapping_mode(nodemap_name, map_mode); + break; + } + case LCFG_NODEMAP_RBAC: + { + enum nodemap_rbac_roles rbac; + char *p; + + if (strcmp(param, "all") == 0) { + rbac = NODEMAP_RBAC_ALL; + } else if (strcmp(param, "none") == 0) { + rbac = NODEMAP_RBAC_NONE; + } else { + rbac = NODEMAP_RBAC_NONE; + while ((p = strsep(¶m, ",")) != NULL) { + int i; + + if (!*p) + break; + + for (i = 0; i < ARRAY_SIZE(nodemap_rbac_names); + i++) { + if (strcmp(p, + nodemap_rbac_names[i].nrn_name) + == 0) { + rbac |= + nodemap_rbac_names[i].nrn_mode; + break; + } + } + if (i == ARRAY_SIZE(nodemap_rbac_names)) + break; + } + if (p) { + rc = -EINVAL; + break; + } + } + + rc = nodemap_set_rbac(nodemap_name, rbac); + break; + } + case LCFG_NODEMAP_TRUSTED: + rc = kstrtobool(param, &bool_switch); + if (rc) + break; + rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_SQUASH_UID: + rc = kstrtouint(param, 10, &int_id); + if (rc) + break; + rc = nodemap_set_squash_uid(nodemap_name, int_id); + break; + case LCFG_NODEMAP_SQUASH_GID: + rc = kstrtouint(param, 10, &int_id); + if (rc) + break; + rc = nodemap_set_squash_gid(nodemap_name, int_id); + break; + case LCFG_NODEMAP_SQUASH_PROJID: + rc = kstrtouint(param, 10, &int_id); + if (rc) + break; + rc = nodemap_set_squash_projid(nodemap_name, int_id); + break; + case LCFG_NODEMAP_ADD_UIDMAP: + case LCFG_NODEMAP_ADD_GIDMAP: + case LCFG_NODEMAP_ADD_PROJIDMAP: + rc = nodemap_parse_idmap(param, idmap); + if (rc != 0) + break; + if (cmd == LCFG_NODEMAP_ADD_UIDMAP) + rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID, + idmap); + else if (cmd == LCFG_NODEMAP_ADD_GIDMAP) + rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID, + idmap); + else if (cmd == LCFG_NODEMAP_ADD_PROJIDMAP) + rc = nodemap_add_idmap(nodemap_name, NODEMAP_PROJID, + idmap); + else + rc = -EINVAL; + break; + case LCFG_NODEMAP_DEL_UIDMAP: + case LCFG_NODEMAP_DEL_GIDMAP: + case LCFG_NODEMAP_DEL_PROJIDMAP: + rc = nodemap_parse_idmap(param, idmap); + if (rc != 0) + break; + if (cmd == LCFG_NODEMAP_DEL_UIDMAP) + rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID, + idmap); + else if (cmd == LCFG_NODEMAP_DEL_GIDMAP) + rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID, + idmap); + else if (cmd == LCFG_NODEMAP_DEL_PROJIDMAP) + rc = nodemap_del_idmap(nodemap_name, NODEMAP_PROJID, + idmap); + else + rc = -EINVAL; + break; + case LCFG_NODEMAP_SET_FILESET: + rc = nodemap_set_fileset(nodemap_name, param); + break; + case LCFG_NODEMAP_SET_SEPOL: + rc = nodemap_set_sepol(nodemap_name, param); + break; + default: + rc = -EINVAL; + } + + RETURN(rc); +} + +int server_iocontrol_nodemap(struct obd_device *obd, + struct obd_ioctl_data *data, bool dynamic) +{ + char name_buf[LUSTRE_NODEMAP_NAME_LENGTH + 1]; + struct lustre_cfg *lcfg = NULL; + const char *nodemap_name = NULL; + const char *client_idstr = NULL; + const char *idtype_str = NULL; + const char *nidstr = NULL; + unsigned long client_id; + struct lnet_nid nid; + char *param = NULL; + char fs_idstr[16]; + __u32 fs_id, cmd; + int idtype; + int rc = 0; + + ENTRY; + + if (data->ioc_plen1 > PAGE_SIZE) + GOTO(out, rc = -E2BIG); + + OBD_ALLOC(lcfg, data->ioc_plen1); + if (lcfg == NULL) + GOTO(out, rc = -ENOMEM); + + if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + GOTO(out_lcfg, rc = -EFAULT); + + cmd = lcfg->lcfg_command; + + switch (cmd) { + case LCFG_NODEMAP_ACTIVATE: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + param = lustre_cfg_string(lcfg, 1); + if (strcmp(param, "1") == 0) + nodemap_activate(1); + else + nodemap_activate(0); + break; + case LCFG_NODEMAP_ADD: + case LCFG_NODEMAP_DEL: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + rc = cfg_nodemap_cmd(cmd, nodemap_name, param, dynamic); + break; + case LCFG_NODEMAP_TEST_NID: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + nidstr = lustre_cfg_string(lcfg, 1); + rc = libcfs_strnid(&nid, nidstr); + if (rc < 0) + GOTO(out_lcfg, rc); + + nodemap_test_nid(&nid, name_buf, sizeof(name_buf)); + rc = copy_to_user(data->ioc_pbuf1, name_buf, + min_t(size_t, data->ioc_plen1, + sizeof(name_buf))); + if (rc != 0) + GOTO(out_lcfg, rc = -EFAULT); + break; + case LCFG_NODEMAP_TEST_ID: + if (lcfg->lcfg_bufcount != 4) + GOTO(out_lcfg, rc = -EINVAL); + nidstr = lustre_cfg_string(lcfg, 1); + idtype_str = lustre_cfg_string(lcfg, 2); + client_idstr = lustre_cfg_string(lcfg, 3); + + rc = libcfs_strnid(&nid, nidstr); + if (rc < 0) + GOTO(out_lcfg, rc); + + if (strcmp(idtype_str, "uid") == 0) + idtype = NODEMAP_UID; + else if (strcmp(idtype_str, "gid") == 0) + idtype = NODEMAP_GID; + else if (strcmp(idtype_str, "projid") == 0) + idtype = NODEMAP_PROJID; + else + GOTO(out_lcfg, rc = -EINVAL); + + rc = kstrtoul(client_idstr, 10, &client_id); + if (rc != 0) + GOTO(out_lcfg, rc = -EINVAL); + + rc = nodemap_test_id(&nid, idtype, client_id, &fs_id); + if (rc < 0) + GOTO(out_lcfg, rc = -EINVAL); + + if (data->ioc_plen1 < sizeof(fs_idstr)) + GOTO(out_lcfg, rc = -EINVAL); + + snprintf(fs_idstr, sizeof(fs_idstr), "%u", fs_id); + if (copy_to_user(data->ioc_pbuf1, fs_idstr, + sizeof(fs_idstr)) != 0) + GOTO(out_lcfg, rc = -EINVAL); + break; + case LCFG_NODEMAP_ADD_RANGE: + case LCFG_NODEMAP_DEL_RANGE: + case LCFG_NODEMAP_ADD_UIDMAP: + case LCFG_NODEMAP_DEL_UIDMAP: + case LCFG_NODEMAP_ADD_GIDMAP: + case LCFG_NODEMAP_DEL_GIDMAP: + case LCFG_NODEMAP_ADD_PROJIDMAP: + case LCFG_NODEMAP_DEL_PROJIDMAP: + case LCFG_NODEMAP_SET_FILESET: + case LCFG_NODEMAP_SET_SEPOL: + if (lcfg->lcfg_bufcount != 3) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + param = lustre_cfg_string(lcfg, 2); + rc = cfg_nodemap_cmd(cmd, nodemap_name, param, dynamic); + break; + case LCFG_NODEMAP_ADMIN: + case LCFG_NODEMAP_TRUSTED: + case LCFG_NODEMAP_DENY_UNKNOWN: + case LCFG_NODEMAP_SQUASH_UID: + case LCFG_NODEMAP_SQUASH_GID: + case LCFG_NODEMAP_SQUASH_PROJID: + case LCFG_NODEMAP_MAP_MODE: + case LCFG_NODEMAP_AUDIT_MODE: + case LCFG_NODEMAP_FORBID_ENCRYPT: + case LCFG_NODEMAP_READONLY_MOUNT: + case LCFG_NODEMAP_RBAC: + if (lcfg->lcfg_bufcount != 4) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + param = lustre_cfg_string(lcfg, 3); + rc = cfg_nodemap_cmd(cmd, nodemap_name, param, dynamic); + break; + default: + rc = -ENOTTY; + } + + if (rc) { + CDEBUG_LIMIT(rc == -EEXIST ? D_INFO : D_ERROR, + "%s: OBD_IOC_NODEMAP command %X for %s: rc = %d\n", + obd->obd_name, lcfg->lcfg_command, + nodemap_name, rc); + GOTO(out_lcfg, rc); + } + +out_lcfg: + OBD_FREE(lcfg, data->ioc_plen1); +out: + RETURN(rc); +} +EXPORT_SYMBOL(server_iocontrol_nodemap); -- 1.8.3.1 From 3c17de6a840b2903d963f997ee65e55f55c5c75a Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Wed, 20 Mar 2024 09:20:52 +0100 Subject: [PATCH 10/16] LU-17431 mdt: add ioctl handler for mds Adding ioctl handler for mds allows managing dynamic nodemaps on MDS side. Test-Parameters: trivial Signed-off-by: Sebastien Buisson Change-Id: I6a68a17d3f12c799238a93242bbd385e6eeb1d0b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54504 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_mds.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index e94fed6..3e58bed 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -52,6 +52,7 @@ #include "mdt_internal.h" #include #include +#include #include struct mds_device { @@ -656,9 +657,35 @@ static int mds_health_check(const struct lu_env *env, struct obd_device *obd) return rc != 0 ? 1 : 0; } +/* ioctls on obd dev */ +static int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void __user *uarg) +{ + struct obd_device *obd = exp->exp_obd; + struct obd_ioctl_data *data; + int rc = 0; + + ENTRY; + CDEBUG(D_IOCTL, "%s: cmd=%x len=%u karg=%pK uarg=%pK\n", + obd->obd_name, cmd, len, karg, uarg); + + data = karg; + /* we only support nodemap ioctls, for now */ + if (cmd != OBD_IOC_NODEMAP) + GOTO(out, rc = -EINVAL); + + rc = server_iocontrol_nodemap(obd, data, true); + if (rc) + GOTO(out, rc); + +out: + RETURN(rc); +} + static const struct obd_ops mds_obd_device_ops = { .o_owner = THIS_MODULE, .o_health_check = mds_health_check, + .o_iocontrol = mds_iocontrol, }; int mds_mod_init(void) -- 1.8.3.1 From 2f74550e9c9b4dfca2cf78ae1cd86a55be7fd72f Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Wed, 20 Mar 2024 09:24:30 +0100 Subject: [PATCH 11/16] LU-17431 ost: add ioctl handler for oss Adding ioctl handler for oss allows managing dynamic nodemaps on OSS side. Test-Parameters: trivial Signed-off-by: Sebastien Buisson Change-Id: I90f4c6988bed2ba721e366ae088983958d484a2f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54505 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/ost/ost_handler.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index b9e8ff8..80fa454 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "ost_internal.h" @@ -411,12 +412,38 @@ static int ost_health_check(const struct lu_env *env, struct obd_device *obd) return rc != 0 ? 1 : 0; } +/* ioctls on obd dev */ +static int oss_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void __user *uarg) +{ + struct obd_device *obd = exp->exp_obd; + struct obd_ioctl_data *data; + int rc = 0; + + ENTRY; + CDEBUG(D_IOCTL, "%s: cmd=%x len=%u karg=%pK uarg=%pK\n", + obd->obd_name, cmd, len, karg, uarg); + + data = karg; + /* we only support nodemap ioctls, for now */ + if (cmd != OBD_IOC_NODEMAP) + GOTO(out, rc = -EINVAL); + + rc = server_iocontrol_nodemap(obd, data, true); + if (rc) + GOTO(out, rc); + +out: + RETURN(rc); +} + /* use obd ops to offer management infrastructure */ static const struct obd_ops ost_obd_ops = { .o_owner = THIS_MODULE, .o_setup = ost_setup, .o_cleanup = ost_cleanup, .o_health_check = ost_health_check, + .o_iocontrol = oss_iocontrol, }; static int __init ost_init(void) -- 1.8.3.1 From 27fe42d294021b1eaf12397a8ca44bf20a7198ad Mon Sep 17 00:00:00 2001 From: Arshad Hussain Date: Sun, 24 Mar 2024 06:27:22 -0400 Subject: [PATCH 12/16] LU-17667 tests: Handle more than 1 IP returned by 'ip' cmd An interface could have more than one IP address. This may be not normal and is a corner case. This patch handles case where 'ip' command returns more than single IP and also adds new info/debug messages. Corner Case: ip -o -4 a s enp0s8 | awk '{print $4}' | sed 's/\/.*//' 192.168.50.188 192.168.1.12 Before patch: sanity-lnet.sh line 1174: ((: 188 12: syntax error in expression (error token is "12") After patch: ... IP for enp0s8 found [2] Interface:IP are enp0s8:192.168.50.188 enp0s8:192.168.1.12 Using GW_NID:192.168.50.189@tcp ... Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Arshad Hussain Change-Id: I783a6b67508a4497d18db94b5d2bdab616b4ade5 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54547 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lustre/tests/sanity-lnet.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 9369723..614cca9 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -1164,8 +1164,14 @@ append_net_tunables() { awk '/^\s+tunables:$/,/^\s+CPT:/' >> $TMP/sanity-lnet-$testnum-expected.yaml } -IF0_IP=$(ip -o -4 a s ${INTERFACES[0]} | - awk '{print $4}' | sed 's/\/.*//') +ARR_IF0_IP=($(ip -o -4 a s ${INTERFACES[0]} | + awk '{print $4}' | sed 's/\/.*//')) +echo "Total IP for ${INTERFACES[0]} found [${#ARR_IF0_IP[@]}]" +echo "Interface:IP are" +for i in ${ARR_IF0_IP[@]}; do + echo "${INTERFACES[0]}:$i" +done +IF0_IP=${ARR_IF0_IP[0]} IF0_NET=$(awk -F. '{print $1"."$2"."$3}'<<<"${IF0_IP}") IF0_HOSTNUM=$(awk -F. '{print $4}'<<<"${IF0_IP}") if (((IF0_HOSTNUM + 5) > 254)); then @@ -1174,6 +1180,8 @@ else GW_HOSTNUM=$((IF0_HOSTNUM + 1)) fi GW_NID="${IF0_NET}.${GW_HOSTNUM}@${NETTYPE}" +echo "Using GW_NID:$GW_NID" + test_100() { [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE" -- 1.8.3.1 From 2007ab4709acaef0397df15c9f4cf4387844ba9c Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 27 Mar 2024 21:18:56 -0600 Subject: [PATCH 13/16] LU-16500 utils: 'lfs migrate' should select new OSTs When migrating a file using "lfs migrate FILE" without any arguments to specify a new layout, this should migrate the file to the best OSTs available at that time based on free space, instead of keeping the file on the same OSTs (which is almost pointless otherwise). Reset the starting OST index for all components of the copied file layout so that this can happen properly. Previously, only the last component had the OST index reset, which was only partly helpful. Add llapi_layout_ost_index_reset() to handle this, since it seems likely that tools using llapi_layout_from_fd() and friends to copy an existing layout will want to do the same. Add the corresponding man page and reference it from llapi_layout_get_from_fd(). Update sanity test_56xe to check that the starting OST index of each component is not the same for all components. This check might not catch a broken "lfs migrate" every time since even before this patch the last component would be allocated on a random OST, but will still fail about once every 1/$OST_COUNT runs. Conversely, with this patch it passes hundreds of iterations without a false positive, though a small chance exists that it will have a false positive on occasion. Add a "make utils" target to simplify building only user utilities. Test-Parameters: testlist=sanity env=ONLY=56xe,ONLY_REPEAT=100 Fixes: 0568f4ca25 ("LU-16500 utils: set default ost index for lfs migrate") Signed-off-by: Andreas Dilger Change-Id: Ie4c68d4b2ff09560a7a13ae464723745cf968d36 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54600 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Stephane Thiell Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- autoMakefile.am | 6 ++++ lustre/doc/Makefile.am | 1 + lustre/doc/llapi_layout.7 | 1 + lustre/doc/llapi_layout_get_by_fd.3 | 48 ++++++++++++++-------------- lustre/doc/llapi_layout_ost_index_reset.3 | 52 +++++++++++++++++++++++++++++++ lustre/include/lustre/lustreapi.h | 22 ++++++++++++- lustre/tests/sanity.sh | 11 ++++++- lustre/utils/lfs.c | 3 +- lustre/utils/liblustreapi_layout.c | 40 ++++++++++++++++++++++-- 9 files changed, 155 insertions(+), 29 deletions(-) create mode 100644 lustre/doc/llapi_layout_ost_index_reset.3 diff --git a/autoMakefile.am b/autoMakefile.am index b27984a..b3c8186 100644 --- a/autoMakefile.am +++ b/autoMakefile.am @@ -44,6 +44,7 @@ help: @echo 'Generic targets:' @echo ' all - Build all modules and utilities enabled by' @echo ' autotools' + @echo ' utils - Build only userspace utilities' @echo ' checkpatch - Run checkpatch.pl on latest commit' @echo ' checkstack - Run checkstack.pl' @echo ' checkstack-update - Update checkstack.pl' @@ -60,6 +61,11 @@ help: checkpatch: @git diff HEAD~1 | ./contrib/scripts/checkpatch.pl +utils: + $(MAKE) -C libcfs/libcfs/util + $(MAKE) -C lnet/utils + $(MAKE) -C lustre/utils + # these empty rules are needed so that automake doesn't add its own # recursive rules etags-recursive: diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 22c9086..1923f0d 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -170,6 +170,7 @@ LIBMAN = \ llapi_layout_get_by_xattr.3 \ llapi_layout_ost_index_get.3 \ llapi_layout_ost_index_set.3 \ + llapi_layout_ost_index_reset.3 \ llapi_layout_pattern_get.3 \ llapi_layout_pattern_set.3 \ llapi_layout_pool_name_get.3 \ diff --git a/lustre/doc/llapi_layout.7 b/lustre/doc/llapi_layout.7 index 0c1a622..b7c61a7 100644 --- a/lustre/doc/llapi_layout.7 +++ b/lustre/doc/llapi_layout.7 @@ -198,6 +198,7 @@ The RAID pattern may only be set to 0. .BR llapi_layout_get_by_xattr (3), .BR llapi_layout_ost_index_get (3), .BR llapi_layout_ost_index_set (3), +.BR llapi_layout_ost_index_reset (3), .BR llapi_layout_pattern_get (3), .BR llapi_layout_pattern_set (3), .BR llapi_layout_pool_name_get (3), diff --git a/lustre/doc/llapi_layout_get_by_fd.3 b/lustre/doc/llapi_layout_get_by_fd.3 index 864bf24..f158eaa 100644 --- a/lustre/doc/llapi_layout_get_by_fd.3 +++ b/lustre/doc/llapi_layout_get_by_fd.3 @@ -22,6 +22,7 @@ obtain the layout of a Lustre file .fi .SH DESCRIPTION .PP +The functions .BR llapi_layout_get_by_xattr() , .BR llapi_layout_get_by_fd() , .BR llapi_layout_get_by_fid() , @@ -34,8 +35,8 @@ containing the layout information for the file referenced by .IR fd , .IR fid , or -.IR path . -The +.IR path , +respectively. The .B struct llapi_layout is an opaque entity containing the layout information for a file in a Lustre filesystem. Its internal structure should not be directly @@ -52,7 +53,7 @@ is a Lustre layout extended attribute (LOV EA) from a file or directory in a Lustre filesystem. The .I lov_xattr should be the raw xattr without being byte-swapped, since this function will -swap it properly. +swap it to the local machine endianness properly. .PP For .BR llapi_layout_get_by_fd() , @@ -62,12 +63,10 @@ filesystem. .PP For .BR llapi_layout_get_by_fid() , -the .I lustre_path -argument serves to identify the Lustre filesystem containing the file -represented by +identifies the Lustre filesystem containing the file represented by .IR fid . -It is typically the filesystem root, but may also be any path beneath +It is typically the filesystem root directory, but may also be any path beneath the root. Use the function .BR llapi_path2fid (3) to obtain a @@ -82,17 +81,15 @@ argument that names a file or directory in a Lustre filesystem. .PP Zero or more flags may be bitwise-or'd together in .I flags -or -.I xattr_flags to control how a layout is retrieved. Currently -.B llapi_layout_get_by_path() -accepts only one flag, while .B llapi_layout_get_by_fd() and .B llapi_layout_get_by_fid() -do not use any flags. The list of flags that can be used in -.I flags -is as follows: +do not accept any values for +.IR flags , +while +.B llapi_layout_get_by_path() +accepts only one flag as follows: .TP 5 .SM LLAPI_LAYOUT_GET_EXPECTED Unspecified attribute values are replaced by the literal default values @@ -128,10 +125,11 @@ since stripe size is unspecified, while reports the literal value 1048576. Both forms report a stripe count of 2, since that attribute is specified. .PP -The values that can be used by -.B llapi_layout_get_by_xattr() +Valid arguments for .I flags -argument is as follows: +with +.B llapi_layout_get_by_xattr() +are: .TP 5 .SM LLAPI_LAYOUT_GET_CHECK If the @@ -150,13 +148,16 @@ when necessary, leaving unmodified. Otherwise, the byte swapping will be done to the fields of the .I lov_xattr buffer directly. +.SH NOTE +When using these functions to copy an existing file's layout to create a +new file with +.B llapi_layout_file_open (3) +for mirroring, migration, or as the template for a new file, +.BR llapi_layout_ost_index_reset (3) +should be called to reset the OST index values for each component, so that +the file copy is not created on exactly the same OSTs as the original file. .SH RETURN VALUES -.LP -.BR llapi_layout_get_by_fd() , -.BR llapi_layout_get_by_fid() , -and -.B llapi_layout_get_by_path() -return a valid pointer on success or +These functions return a valid pointer on success or .B NULL on failure with .B errno @@ -180,6 +181,7 @@ An invalid argument was specified. The kernel returned less than the expected amount of data. .SH "SEE ALSO" .BR llapi_layout_file_open (3), +.BR llapi_layout_ost_index_reset (3), .BR llapi_path2fid (3), .BR llapi_layout (7), .BR lustreapi (7) diff --git a/lustre/doc/llapi_layout_ost_index_reset.3 b/lustre/doc/llapi_layout_ost_index_reset.3 new file mode 100644 index 0000000..e5bfd26 --- /dev/null +++ b/lustre/doc/llapi_layout_ost_index_reset.3 @@ -0,0 +1,52 @@ +.TH llapi_layout_ost_index_reset 3 "2024 Mar 27" "Lustre User API" +.SH NAME +llapi_layout_ost_index_reset \- reset OST index of all Lustre file components +.SH SYNOPSIS +.nf +.B #include +.PP +.BI "int llapi_layout_ost_index_reset(struct llapi_layout *" layout ); +.fi +.SH DESCRIPTION +.PP +.B llapi_layout_ost_index_reset() +resets the starting ost_index number of all components in the specified file +.I layout +to +.BR LLAPI_LAYOUT_DEFAULT . +This allows the MDS to automatically allocate the objects for each file +component to the best OSTs available at that time. +.PP +This should be called when copying an existing file +.I layout +retrieved using one of +.BR llapi_layout_get_by_fid (3), +.BR llapi_layout_get_by_fd (3), +.BR llapi_layout_get_by_path (3), +or +.BR llapi_layout_get_by_xattr (3), +so that the OST selection is not copied exactly from the source layout if +it is used with +.BR llapi_layout_file_open (3) +to create a new file for migration, mirroring, or other replication task. +.SH RETURN VALUES +.LP +.B llapi_layout_ost_index_reset() +returns 0 on success, or a negative error if an error occurred (in which case, +errno is set appropriately). +.SH ERRORS +.TP 15 +.SM EINVAL +An invalid argument was specified. +.TP 15 +.SM ENOENT +The layout does not have any valid components. +.TP 15 +.SM ENOMEM +The layout does not have any valid components. +.SH "SEE ALSO" +.BR llapi_layout (7), +.BR llapi_layout_alloc (3), +.BR llapi_layout_file_open (3), +.BR llapi_layout_ost_index_set (3), +.BR lustreapi (7) diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index c527b1a..f5e921e 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -1002,6 +1002,18 @@ int llapi_layout_ost_index_get(const struct llapi_layout *layout, int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number, uint64_t index); +/** + * Reset the OST index on all components in \a layout to LLAPI_LAYOUT_DEFAULT. + * + * This is useful when reusing a file layout that was copied from an existing + * file and to be used for a new file (e.g. when mirroring or migrating or + * copying a file), so the objects are allocated on different OSTs. + * + * \retval 0 Success. + * \retval -1 Error with errno set to non-zero value. + */ +int llapi_layout_ost_index_reset(struct llapi_layout *layout); + /******************** Pool Name ********************/ /** @@ -1285,7 +1297,15 @@ enum { typedef int (*llapi_layout_iter_cb)(struct llapi_layout *layout, void *cbdata); /** - * Iterate all components in the corresponding layout + * Iterate every components in the @layout and call callback function @cb. + * + * \param[in] layout component layout list. + * \param[in] cb callback function called for each component + * \param[in] cbdata callback data passed to the callback function + * + * \retval < 0 error happens during the iteration + * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error + * \retval LLAPI_LAYOUT_ITER_STOP got something, stop the iteration */ int llapi_layout_comp_iterate(struct llapi_layout *layout, llapi_layout_iter_cb cb, void *cbdata); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 0c819a7..9b64b2b 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8197,7 +8197,7 @@ test_56xe() { local dir=$DIR/$tdir local f_comp=$dir/$tfile - local layout="-E 1M -S 512K -c 1 -E -1 -S 1M -c 2 -i 0" + local layout="-E 1M -S 512K -E 2M -c 2 -E 3M -c 2 -E eof -c $OSTCOUNT" local layout_before="" local layout_after="" @@ -8211,14 +8211,23 @@ test_56xe() { # 1. migrate a comp layout file by lfs_migrate $LFS_MIGRATE -y $f_comp || error "cannot migrate $f_comp by lfs_migrate" layout_after=$(SKIP_INDEX=yes get_layout_param $f_comp) + idx_before=$($LFS getstripe $f_comp | awk '$2 == "0:" { print $5 }' | + tr '\n' ' ') [ "$layout_before" == "$layout_after" ] || error "lfs_migrate: $layout_before != $layout_after" # 2. migrate a comp layout file by lfs migrate $LFS migrate $f_comp || error "cannot migrate $f_comp by lfs migrate" layout_after=$(SKIP_INDEX=yes get_layout_param $f_comp) + idx_after=$($LFS getstripe $f_comp | awk '$2 == "0:" { print $5 }' | + tr '\n' ' ') [ "$layout_before" == "$layout_after" ] || error "lfs migrate: $layout_before != $layout_after" + + # this may not fail every time with a broken lfs migrate, but will fail + # often enough to notice, and will not have false positives very often + [ "$idx_before" != "$idx_after" ] || + error "lfs migrate: $idx_before == $idx_after" } run_test 56xe "migrate a composite layout file" diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 36490c9..2e24d3f 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -4521,8 +4521,7 @@ static int lfs_setstripe_internal(int argc, char **argv, * Strip the source layout of specific * OST object/index values. */ - result = llapi_layout_ost_index_set(layout, 0, - LLAPI_LAYOUT_DEFAULT); + result = llapi_layout_ost_index_reset(layout); if (result) { fprintf(stderr, "%s: set default ost index failed: %s\n", diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index 332af24..05f4dae 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -1603,6 +1603,42 @@ int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number, return 0; } +static int reset_index_cb(struct llapi_layout *layout, void *cbdata) +{ + int *save_errno = (int *)cbdata; + int rc; + + rc = llapi_layout_ost_index_set(layout, 0, LLAPI_LAYOUT_DEFAULT); + + /* save the first error returned, but try to reset all components */ + if (rc && !*save_errno) + *save_errno = errno; + + return LLAPI_LAYOUT_ITER_CONT; +} + +/** + * Reset the OST index on all components in \a layout to LLAPI_LAYOUT_DEFAULT. + * + * This is useful when reusing a file layout that was copied from an existing + * file and to be used for a new file (e.g. when mirroring or migrating or + * copying a file), so the objects are allocated on different OSTs. + * + * \retval 0 Success. + * \retval -ve errno Error with errno set to non-zero value. + */ +int llapi_layout_ost_index_reset(struct llapi_layout *layout) +{ + int save_errno = 0; + int rc; + + rc = llapi_layout_comp_iterate(layout, reset_index_cb, &save_errno); + + if (save_errno) + errno = save_errno; + return save_errno ? -save_errno : (rc < 0 ? -errno : 0); +} + /** * Get the OST index associated with stripe \a stripe_number. * @@ -2791,8 +2827,8 @@ bool llapi_layout_is_composite(struct llapi_layout *layout) * Iterate every components in the @layout and call callback function @cb. * * \param[in] layout component layout list. - * \param[in] cb callback for each component - * \param[in] cbdata callback data + * \param[in] cb callback function called for each component + * \param[in] cbdata callback data passed to the callback function * * \retval < 0 error happens during the iteration * \retval LLAPI_LAYOUT_ITER_CONT finished the iteration w/o error -- 1.8.3.1 From 0ec4e10ce1aa60ddbc55fb1e8026856a685e8d43 Mon Sep 17 00:00:00 2001 From: Alexandre Ioffe Date: Thu, 28 Mar 2024 22:18:55 -0700 Subject: [PATCH 14/16] LU-15552 tests: skip bad result in sanity-flr test_0d Ignore bad result of sanity-flr test_0d for MDS version older than v2_14_57-72-gf468093cb6 Test-Parameters: trivial testlist=sanity-flr env=ONLY=0d Test-Parameters: trivial testlist=sanity-flr env=ONLY=0d clientversion=2.14 Signed-off-by: Alexandre Ioffe Change-Id: I0df94eea9fd11ca3f74a7df47b77de1de76c4066 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54623 Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/tests/sanity-flr.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index bdd29bd..03558df 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -548,7 +548,7 @@ test_0d() { $mirror_cmd -N $tf-3 &> /dev/null rc=$? - [[ $rc == 34 ]] || + (( $rc == 34 || $MDS1_VERSION < $(version_code v2_14_57-72-gf468093cb6) )) || error "exceeded maximum mirror count returns $rc not ERANGE(34)" } run_test 0d "lfs mirror extend with -N option" -- 1.8.3.1 From ae68041334e410a9bed297e0bd2dd5bcb8d90b8c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 1 Apr 2024 10:51:56 -0700 Subject: [PATCH 15/16] LU-16904 tests: Fix sanity test 56a and 65a when PFL layout is used Fix sanity test_56a to use correct operator order Skip sanity test_65a if PFL layout is set since it is a test of directory with no stripe info Test-Parameters: trivial testlist=sanity-compr env=ONLY="56a 65a",compr_STRIPEPARAMS="-E 1M -c1 -E eof" Test-Parameters: testlist=sanity-compr env=ONLY="56a 65a",compr_STRIPEPARAMS="-E 64k -c 1 -E eof" Test-Parameters: testlist=sanity-compr env=ONLY="56a 65a",compr_STRIPEPARAMS="-E 64k -c 1 -E eof -c 2" Test-Parameters: testlist=sanity-compr env=ONLY="56a 65a",compr_STRIPEPARAMS="-E 64k -c 1 -E 1M -c 2 -E eof -c 4 -S 4M" Signed-off-by: Wei Liu Change-Id: I0c17a0aceed7894f4eefa7336bd4a11e8fd7bc9e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54644 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/tests/sanity.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 9b64b2b..c035a66 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6529,7 +6529,7 @@ test_56a() { #test lfs getstripe with -v prints lmm_fid filenum=$($LFS getstripe -v $dir | grep -c lmm_fid) - local countfids=$((numdirs + numfiles * numcomp)) + local countfids=$(((numdirs + numfiles) * numcomp)) [[ $filenum -eq $countfids ]] || error "$LFS getstripe -v $dir: "\ "got $filenum want $countfids lmm_fid" @@ -10033,6 +10033,10 @@ run_test 64i "shrink on reconnect" test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" + # LU-16904 check if the root is set as PFL layout + local numcomp=$($LFS getstripe --component-count $MOUNT) + [ $numcomp -eq 0 ] || skip "Skip test_65a for PFL layout" + test_mkdir $DIR/$tdir touch $DIR/$tdir/f1 $LVERIFY $DIR/$tdir $DIR/$tdir/f1 || error "lverify failed" -- 1.8.3.1 From f2868d1edfd57f81211d411728d076a10c77bcdc Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 4 Apr 2024 11:14:10 +0300 Subject: [PATCH 16/16] LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier() synchronize_rcu() does not wait for in-flight rcu callback completion, thus kmem_cache_free() can still race with kmem_cache_destroy(). Fixes: a9411a9856a ("LU-17076 nrs: wait for RCU completion") Signed-off-by: Alex Zhuravlev Change-Id: I2da668c06b532a41c8ce2fe681ea17cf6f3013ef Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54669 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger Reviewed-by: Shaun Tancheff Reviewed-by: Neil Brown Reviewed-by: James Simmons --- lustre/ldlm/ldlm_lockd.c | 2 +- lustre/ptlrpc/nrs_orr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 3a89924..c4e553c 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -3578,7 +3578,7 @@ void ldlm_exit(void) { if (ldlm_refcount) CERROR("ldlm_refcount is %d in %s\n", ldlm_refcount, __func__); - synchronize_rcu(); + rcu_barrier(); kmem_cache_destroy(ldlm_resource_slab); /* * ldlm_lock_put() use RCU to call ldlm_lock_free, so need call diff --git a/lustre/ptlrpc/nrs_orr.c b/lustre/ptlrpc/nrs_orr.c index 1aae791..6dd3213 100644 --- a/lustre/ptlrpc/nrs_orr.c +++ b/lustre/ptlrpc/nrs_orr.c @@ -623,7 +623,7 @@ static void nrs_orr_stop(struct ptlrpc_nrs_policy *policy) rhashtable_free_and_destroy(&orrd->od_obj_hash, nrs_orr_hash_exit, NULL); } - synchronize_rcu(); + rcu_barrier(); kmem_cache_destroy(orrd->od_cache); OBD_FREE_PTR(orrd); -- 1.8.3.1