From f6bb1b38f0814c2f8cab103fe228820aadbf2ee9 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 27 Dec 2024 18:17:16 +0100 Subject: [PATCH] LU-18357 ptlrpc: Add alternate fileset data structure handler Handling multiple future alternate filesets requires an in-memory data structure that is synchronized with the nodemap IAM storage for filesets. Specifically, it is necessary to properly handle gaps in the cluster idx subid range that is reserved for filesets which occur when filesets are deleted. To easily synchronize the IAM filesets with the in-memory nodemap, an this patch implements an ordered associative array, rb tree in this case, which is naturally ordered by a node key - in this case the global fileset id that can later be directly mapped to the IAM cluster idx subid range for filesets. This patch sets the maximum number of alternate filesets to 255 (+1 primary fileset). This patch only includes the management of the fileset rb tree and includes all necessary functions. The next patch connects the rb tree to the IAM and add the API for adding and deleting filesets. Test-Parameters: trivial Test-Parameters: testlist=conf-sanity env=ONLY=134 Test-Parameters: testlist=sanity-hsm env=ONLY=411 Test-Parameters: testlist=sanity-quota env=ONLY=75 Test-Parameters: testlist=sanity-sec Test-Parameters: testlist=sanity-selinux env=ONLY=21 Signed-off-by: Marc Vef Change-Id: Ia532894880e5fe65692c699de14e93794c2da483 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57600 Reviewed-by: Sebastien Buisson Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/include/lustre_nodemap.h | 6 +- lustre/include/uapi/linux/lustre/lustre_disk.h | 4 +- lustre/ptlrpc/Makefile.in | 1 + lustre/ptlrpc/nodemap_fileset_alt.c | 288 +++++++++++++++++++++++++ lustre/ptlrpc/nodemap_internal.h | 31 ++- lustre/ptlrpc/nodemap_storage.c | 3 +- 6 files changed, 326 insertions(+), 7 deletions(-) create mode 100644 lustre/ptlrpc/nodemap_fileset_alt.c diff --git a/lustre/include/lustre_nodemap.h b/lustre/include/lustre_nodemap.h index c7344b5..8b23a5c 100644 --- a/lustre/include/lustre_nodemap.h +++ b/lustre/include/lustre_nodemap.h @@ -96,9 +96,13 @@ struct lu_nodemap { /* access by nodemap name */ struct hlist_node nm_hash; struct nodemap_pde *nm_pde_data; - /* fileset the nodes of this nodemap are restricted to */ + /* primary fileset this nodemap is restricted to */ char *nm_prim_fileset; unsigned int nm_prim_fileset_size; + /* lock for fileset red/black tree */ + struct rw_semaphore nm_fileset_alt_lock; + /* alternate fileset map */ + struct rb_root nm_fileset_alt; /* information about the expected SELinux policy on the nodes */ char nm_sepol[LUSTRE_NODEMAP_SEPOL_LENGTH + 1]; diff --git a/lustre/include/uapi/linux/lustre/lustre_disk.h b/lustre/include/uapi/linux/lustre/lustre_disk.h index c539878..4ea9588 100644 --- a/lustre/include/uapi/linux/lustre/lustre_disk.h +++ b/lustre/include/uapi/linux/lustre/lustre_disk.h @@ -338,6 +338,8 @@ struct nodemap_offset_rec { (sizeof(struct nodemap_cluster_rec) - (2 * sizeof(__u16))) /* fileset subid range to support a PATH_MAX characters fileset and header */ #define LUSTRE_NODEMAP_FILESET_SUBID_RANGE 256 +/* max number of filesets per nodemap */ +#define LUSTRE_NODEMAP_FILESET_NUM_MAX 256 struct nodemap_fileset_rec { /* 28 bytes for fileset path fragment */ @@ -370,7 +372,7 @@ enum nodemap_cluster_rec_subid { NODEMAP_FILESET = 512, /* * Depending on its length, its fragments may use several subids - * in the range 512-767. + * in the range of 512 to 66,047 (assuming max 256 filesets). */ }; diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index 252400e..1613ba6 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -21,6 +21,7 @@ nrs_server_objs := nrs_crr.o nrs_orr.o nrs_tbf.o nodemap_objs := nodemap_handler.o nodemap_lproc.o nodemap_range.o nodemap_objs += nodemap_idmap.o nodemap_member.o nodemap_storage.o +nodemap_objs += nodemap_fileset_alt.o -include $(ptlrpc_dir)/../ldlm/Makefile @SERVER_TRUE@-include $(ptlrpc_dir)/../target/Makefile diff --git a/lustre/ptlrpc/nodemap_fileset_alt.c b/lustre/ptlrpc/nodemap_fileset_alt.c new file mode 100644 index 0000000..2aac216 --- /dev/null +++ b/lustre/ptlrpc/nodemap_fileset_alt.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Marc Vef + */ + +#include +#include +#include "nodemap_internal.h" + +/** + * Allocate a lu_fileset_alt structure with a given fileset size. + * + * \param fileset_size size of the fileset path + * + * \retval allocated lu_fileset_alt structure on success, NULL otherwise + */ +struct lu_fileset_alt *fileset_alt_init(unsigned int fileset_size) +{ + struct lu_fileset_alt *fileset; + + OBD_ALLOC_PTR(fileset); + if (fileset == NULL) + RETURN(NULL); + + fileset->nfa_path_size = fileset_size; + fileset->nfa_id = 0; /* is set later on tree insertion */ + + OBD_ALLOC(fileset->nfa_path, fileset->nfa_path_size); + if (fileset->nfa_path == NULL) { + OBD_FREE_PTR(fileset); + RETURN(NULL); + } + + return fileset; +} +EXPORT_SYMBOL(fileset_alt_init); + +/** + * Create a lu_fileset_alt structure with a given fileset path. + * + * \param fileset_path fileset path + * + * \retval allocated lu_fileset_alt structure on success, NULL otherwise + */ +struct lu_fileset_alt *fileset_alt_create(const char *fileset_path) +{ + struct lu_fileset_alt *fileset; + + fileset = fileset_alt_init(strlen(fileset_path) + 1); + if (fileset == NULL) + RETURN(NULL); + + memcpy(fileset->nfa_path, fileset_path, fileset->nfa_path_size); + + return fileset; +} +EXPORT_SYMBOL(fileset_alt_create); + +void fileset_alt_destroy(struct lu_fileset_alt *fileset) +{ + OBD_FREE(fileset->nfa_path, fileset->nfa_path_size); + OBD_FREE_PTR(fileset); +} +EXPORT_SYMBOL(fileset_alt_destroy); + +void fileset_alt_destroy_tree(struct rb_root *root) +{ + struct lu_fileset_alt *fileset; + struct lu_fileset_alt *tmp; + + rbtree_postorder_for_each_entry_safe(fileset, tmp, root, nfa_rb) + fileset_alt_destroy(fileset); + + *root = RB_ROOT; +} +EXPORT_SYMBOL(fileset_alt_destroy_tree); + +/** + * Helper function to find the first free id in the rb tree on insertion. + * + * \param root pointer to the root of the rb tree + * + * \retval >0 first free id + */ +static unsigned int get_first_free_id(struct rb_root *root) +{ + struct rb_node *node; + struct lu_fileset_alt *fileset; + /* start at 1. 0 is reserved for the prim fileset in another context */ + unsigned int fset_id = 1; + + /* iterate over the tree and find gaps in the id sequence */ + for (node = rb_first(root); node; node = rb_next(node)) { + fileset = rb_entry(node, struct lu_fileset_alt, nfa_rb); + if (fileset->nfa_id != fset_id) + RETURN(fset_id); + fset_id++; + } + + /* no gaps found, return the next id after the last one in the tree */ + return fset_id; +} + +/** + * Insert a fileset into the rb tree. If fileset->nfa_id is 0, the first free + * id is assigned and used. The caller is free to set its own fileset->nfa_id + * as long as it is not 0. + * + * \param root pointer to the root of the rb tree + * \param fileset fileset to insert + * + * \retval 0 on success + * \retval -EEXIST if the fileset id already exists + * \retval -ENOSPC if the fileset id exceeds + * LUSTRE_NODEMAP_FILESET_NUM_MAX + */ +int fileset_alt_add(struct rb_root *root, struct lu_fileset_alt *fileset) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + struct lu_fileset_alt *this = NULL; + + if (fileset->nfa_id == 0) + fileset->nfa_id = get_first_free_id(root); + + if (fileset->nfa_id > LUSTRE_NODEMAP_FILESET_NUM_MAX - 1) + return -ENOSPC; + + /* determine the correct position in the tree */ + while (*new) { + this = rb_entry(*new, struct lu_fileset_alt, nfa_rb); + parent = *new; + if (fileset->nfa_id < this->nfa_id) + new = &((*new)->rb_left); + else if (fileset->nfa_id > this->nfa_id) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* insert the new node and rebalance tree */ + rb_link_node(&fileset->nfa_rb, parent, new); + rb_insert_color(&fileset->nfa_rb, root); + + return 0; +} +EXPORT_SYMBOL(fileset_alt_add); + +/** + * Delete a fileset from the rb tree. + * + * \param root pointer to the root of the rb tree + * \param fileset fileset to delete + * \retval >0 id of the deleted fileset + * \retval -EINVAL fileset is NULL + */ +int fileset_alt_delete(struct rb_root *root, struct lu_fileset_alt *fileset) +{ + unsigned int fset_id; + + if (fileset == NULL) + return -EINVAL; + + fset_id = fileset->nfa_id; + rb_erase(&fileset->nfa_rb, root); + fileset_alt_destroy(fileset); + + return fset_id; +} +EXPORT_SYMBOL(fileset_alt_delete); + +static int compare_by_id(const void *key, const struct rb_node *node) +{ + const int *search_id; + struct lu_fileset_alt *fileset; + int rc; + + search_id = key; + fileset = rb_entry(node, struct lu_fileset_alt, nfa_rb); + + if (*search_id < fileset->nfa_id) + rc = -1; + else if (*search_id > fileset->nfa_id) + rc = 1; + else + rc = 0; + + return rc; +} + +/** + * Search for a fileset by its fileset id. + * + * \param root pointer to the root of the rb tree + * \param fileset_id id of the fileset to search + * + * \retval lu_fileset_alt structure on success, NULL otherwise + */ +struct lu_fileset_alt *fileset_alt_search_id(struct rb_root *root, + unsigned int fileset_id) +{ + struct lu_fileset_alt *fileset = NULL; + struct rb_node *node; + + node = rb_find(&fileset_id, root, compare_by_id); + if (node) + fileset = rb_entry(node, struct lu_fileset_alt, nfa_rb); + + return fileset; +} +EXPORT_SYMBOL(fileset_alt_search_id); + +/** + * Search for a fileset by its fileset path. + * + * \param root pointer to the root of the rb tree + * \param fileset_path path of the fileset to search + * + * \retval lu_fileset_alt structure on success, NULL otherwise + */ +struct lu_fileset_alt *fileset_alt_search_path(struct rb_root *root, + const char *fileset_path) +{ + struct rb_node *node; + struct lu_fileset_alt *fileset; + bool found = false; + int rc; + + /* search the full tree for a fileset with the given path */ + for (node = rb_first(root); node; node = rb_next(node)) { + fileset = rb_entry(node, struct lu_fileset_alt, nfa_rb); + rc = strcmp(fileset_path, fileset->nfa_path); + if (!rc) { + found = true; + break; + } + } + return found ? fileset : NULL; +} +EXPORT_SYMBOL(fileset_alt_search_path); + +bool fileset_alt_path_exists(struct rb_root *root, const char *path) +{ + return fileset_alt_search_path(root, path) != NULL; +} +EXPORT_SYMBOL(fileset_alt_path_exists); + +/** + * Iterate over all rb tree entries and shrink the memory requirements + * for the fileset to the actual needed size. This is required when the + * fileset fragments are read from the nodemap IAM, and so the preallocated + * size may be larger than needed. + * + * \param root pointer to the root of the rb tree + */ +void fileset_alt_resize(struct rb_root *root) +{ + struct rb_node *node; + struct lu_fileset_alt *fileset; + unsigned int fset_size_actual, fset_size_prealloc; + char *fset_tmp; + + for (node = rb_first(root); node; node = rb_next(node)) { + fileset = rb_entry(node, struct lu_fileset_alt, nfa_rb); + fset_size_prealloc = fileset->nfa_path_size; + fset_size_actual = strlen(fileset->nfa_path) + 1; + if (fset_size_actual == fset_size_prealloc) + continue; + /* Shrink fileset size to actual */ + OBD_ALLOC(fset_tmp, fset_size_actual); + if (!fset_tmp) { + CERROR("%s: Nodemaps's fileset cannot be resized: rc = %d\n", + fileset->nfa_path, -ENOMEM); + continue; + } + + memcpy(fset_tmp, fileset->nfa_path, fset_size_actual); + + OBD_FREE(fileset->nfa_path, fset_size_prealloc); + + fileset->nfa_path = fset_tmp; + fileset->nfa_path_size = fset_size_actual; + } +} +EXPORT_SYMBOL(fileset_alt_resize); diff --git a/lustre/ptlrpc/nodemap_internal.h b/lustre/ptlrpc/nodemap_internal.h index 76d2999..df846ef 100644 --- a/lustre/ptlrpc/nodemap_internal.h +++ b/lustre/ptlrpc/nodemap_internal.h @@ -67,13 +67,24 @@ struct lu_idmap { struct lu_nodemap_fileset_info { /* nodemap id */ - __u32 nfi_nm_id; + __u32 nfi_nm_id; /* starting subid of the fileset in the IAM */ - __u32 nfi_subid; + __u32 nfi_subid; /* number of fileset fragments */ - __u32 nfi_fragment_cnt; + __u32 nfi_fragment_cnt; /* the fileset */ - const char *nfi_fileset; + const char *nfi_fileset; +}; + +struct lu_fileset_alt { + /* alt fileset id */ + __u32 nfa_id; + /* fileset path */ + char *nfa_path; + /* fileset path size */ + __u32 nfa_path_size; + /* rb tree node */ + struct rb_node nfa_rb; }; static inline enum nodemap_idx_type nm_idx_get_type(unsigned int id) @@ -124,6 +135,18 @@ struct lu_idmap *idmap_search(struct lu_nodemap *nodemap, enum nodemap_tree_type, enum nodemap_id_type id_type, __u32 id); +struct lu_fileset_alt *fileset_alt_init(unsigned int fileset_size); +struct lu_fileset_alt *fileset_alt_create(const char *fileset_path); +void fileset_alt_destroy(struct lu_fileset_alt *fileset); +void fileset_alt_destroy_tree(struct rb_root *root); +int fileset_alt_add(struct rb_root *root, struct lu_fileset_alt *fileset); +int fileset_alt_delete(struct rb_root *root, struct lu_fileset_alt *fileset); +struct lu_fileset_alt *fileset_alt_search_id(struct rb_root *root, + unsigned int fileset_id); +struct lu_fileset_alt *fileset_alt_search_path(struct rb_root *root, + const char *fileset_path); +bool fileset_alt_path_exists(struct rb_root *root, const char *path); +void fileset_alt_resize(struct rb_root *root); int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp); void nm_member_del(struct lu_nodemap *nodemap, struct obd_export *exp); void nm_member_delete_list(struct lu_nodemap *nodemap); diff --git a/lustre/ptlrpc/nodemap_storage.c b/lustre/ptlrpc/nodemap_storage.c index ee8dbdf..b65d600 100644 --- a/lustre/ptlrpc/nodemap_storage.c +++ b/lustre/ptlrpc/nodemap_storage.c @@ -1603,7 +1603,8 @@ static int nodemap_process_keyrec(struct nodemap_config *config, } else if (cluster_idx_key >= NODEMAP_FILESET && cluster_idx_key < NODEMAP_FILESET + - LUSTRE_NODEMAP_FILESET_SUBID_RANGE) { + (LUSTRE_NODEMAP_FILESET_SUBID_RANGE * + LUSTRE_NODEMAP_FILESET_NUM_MAX)) { rc = nodemap_cluster_fileset_helper(nodemap, rec, cluster_idx_key); } else { -- 1.8.3.1