From 3277457b898c3eed62e3be3fb00970e49baaf445 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 19 Dec 2024 11:09:24 +0100 Subject: [PATCH] LU-18357 ptlrpc: Use dynamic memory allocation for filesets Currently, a nodemap's fileset uses a static-sized array which may be much larger than needed. This patch adds dynamic memory allocation for a nodemap's fileset to reduce the future memory footprint when multiple filesets per nodemap are specified. Note, when reading fileset fragments from the nodemap IAM, the needed size is not known yet. Therefore, each fileset is initially preallocated with "PATH_MAX + 1" and then shrunk when the actual size is know. This is done on the last step, when a nodemap is set active, e.g., after reading all IAM idx pages. The nodemap's fileset is further renamed as future "lctl nodemap_set_fileset" commands can only modify the primary fileset. Test-Parameters: trivial testlist=sanity-sec Signed-off-by: Marc Vef Change-Id: I39bd3355c96fcf3938aaa52c8e5ff1eb70617378 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57523 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_nodemap.h | 3 +- lustre/ptlrpc/nodemap_handler.c | 124 ++++++++++++++++++++++++++-------------- lustre/ptlrpc/nodemap_lproc.c | 5 +- lustre/ptlrpc/nodemap_storage.c | 93 ++++++++++++++++++++++++++---- 4 files changed, 169 insertions(+), 56 deletions(-) diff --git a/lustre/include/lustre_nodemap.h b/lustre/include/lustre_nodemap.h index 4eaf632..c7344b5 100644 --- a/lustre/include/lustre_nodemap.h +++ b/lustre/include/lustre_nodemap.h @@ -97,7 +97,8 @@ struct lu_nodemap { struct hlist_node nm_hash; struct nodemap_pde *nm_pde_data; /* fileset the nodes of this nodemap are restricted to */ - char nm_fileset[PATH_MAX + 1]; + char *nm_prim_fileset; + unsigned int nm_prim_fileset_size; /* information about the expected SELinux policy on the nodes */ char nm_sepol[LUSTRE_NODEMAP_SEPOL_LENGTH + 1]; diff --git a/lustre/ptlrpc/nodemap_handler.c b/lustre/ptlrpc/nodemap_handler.c index c0908ac..6238024 100644 --- a/lustre/ptlrpc/nodemap_handler.c +++ b/lustre/ptlrpc/nodemap_handler.c @@ -49,6 +49,8 @@ static void nodemap_destroy(struct lu_nodemap *nodemap) if (nodemap->nm_pde_data != NULL) lprocfs_nodemap_remove(nodemap->nm_pde_data); + OBD_FREE(nodemap->nm_prim_fileset, nodemap->nm_prim_fileset_size); + mutex_lock(&active_config_lock); down_read(&active_config->nmc_range_tree_lock); nm_member_reclassify_nodemap(nodemap); @@ -971,7 +973,6 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst, dst->nm_squash_uid = NODEMAP_NOBODY_UID; dst->nm_squash_gid = NODEMAP_NOBODY_GID; dst->nm_squash_projid = NODEMAP_NOBODY_PROJID; - dst->nm_fileset[0] = '\0'; dst->nm_sepol[0] = '\0'; dst->nm_offset_start_uid = 0; dst->nm_offset_limit_uid = 0; @@ -979,6 +980,8 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst, dst->nm_offset_limit_gid = 0; dst->nm_offset_start_projid = 0; dst->nm_offset_limit_projid = 0; + dst->nm_prim_fileset = NULL; + dst->nm_prim_fileset_size = 0; } else { dst->nmf_trust_client_ids = src->nmf_trust_client_ids; dst->nmf_allow_root_access = src->nmf_allow_root_access; @@ -995,7 +998,6 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst, dst->nm_squash_gid = src->nm_squash_gid; dst->nm_squash_projid = src->nm_squash_projid; if (is_new) { - dst->nm_fileset[0] = '\0'; dst->nm_sepol[0] = '\0'; } dst->nm_offset_start_uid = src->nm_offset_start_uid; @@ -1004,7 +1006,9 @@ static void nodemap_inherit_properties(struct lu_nodemap *dst, dst->nm_offset_limit_gid = src->nm_offset_limit_gid; dst->nm_offset_start_projid = src->nm_offset_start_projid; dst->nm_offset_limit_projid = src->nm_offset_limit_projid; - + /* filesets cannot yet be inherited */ + dst->nm_prim_fileset = NULL; + dst->nm_prim_fileset_size = 0; } } @@ -1153,6 +1157,13 @@ out: } EXPORT_SYMBOL(nodemap_del_range); +static void nodemap_fileset_reset(struct lu_nodemap *nodemap) +{ + OBD_FREE(nodemap->nm_prim_fileset, nodemap->nm_prim_fileset_size); + nodemap->nm_prim_fileset = NULL; + nodemap->nm_prim_fileset_size = 0; +} + /** * Set a fileset on a nodemap in memory and the nodemap IAM records. * If the nodemap is dynamic, the nodemap IAM update is transparently skipped in @@ -1162,47 +1173,61 @@ EXPORT_SYMBOL(nodemap_del_range); * \param fileset string containing fileset * \retval 0 on success * \retval -EINVAL invalid fileset: Does not start with '/' - * \retval -ENAMETOOLONG fileset is too long * \retval -EIO undo operation failed during IAM update */ static int nodemap_set_fileset_iam(struct lu_nodemap *nodemap, const char *fileset) { + size_t fileset_size_new; + char *fileset_new; int rc = 0; - if (strlen(fileset) > PATH_MAX) - RETURN(-ENAMETOOLONG); - if (fileset[0] == '\0' || strcmp(fileset, "clear") == 0) { rc = nodemap_idx_fileset_clear(nodemap); - if (rc == 0) - nodemap->nm_fileset[0] = '\0'; - } else if (fileset[0] != '/') { - rc = -EINVAL; - } else { - /* - * Only update the index if the fileset is already set. - * If it was set by the params llog, it is not set in the IAM. - */ - if (nodemap->nm_fileset[0] != '\0' && - nodemap->nmf_fileset_use_iam) { + if (!rc) + nodemap_fileset_reset(nodemap); + GOTO(out, rc); + } + + if (fileset[0] != '/') + RETURN(-EINVAL); + + fileset_size_new = strlen(fileset) + 1; + + OBD_ALLOC(fileset_new, fileset_size_new); + if (!fileset_new) + GOTO(out, rc = -ENOMEM); + + memcpy(fileset_new, fileset, fileset_size_new); + + /* + * Only update the index if the fileset is already set. + * If it was set by the params llog, it is not set in the IAM. + */ + if (nodemap->nm_prim_fileset) { + if (nodemap->nmf_fileset_use_iam) { rc = nodemap_idx_fileset_update( - nodemap, nodemap->nm_fileset, fileset); - } else { - rc = nodemap_idx_fileset_add(nodemap, fileset); + nodemap, nodemap->nm_prim_fileset, fileset_new); } - if (rc < 0) - GOTO(out, rc); + if (!rc) + nodemap_fileset_reset(nodemap); + } else { + rc = nodemap_idx_fileset_add(nodemap, fileset_new); + } - memcpy(nodemap->nm_fileset, fileset, strlen(fileset) + 1); + if (!rc) { + nodemap->nm_prim_fileset = fileset_new; + nodemap->nm_prim_fileset_size = fileset_size_new; + } else { + OBD_FREE(fileset_new, fileset_size_new); } - if (!nodemap->nm_dyn && !nodemap->nmf_fileset_use_iam) { +out: + if (!nodemap->nm_dyn && !nodemap->nmf_fileset_use_iam && !rc) { nodemap->nmf_fileset_use_iam = 1; rc = nodemap_idx_nodemap_update(nodemap); } -out: return rc; } @@ -1217,12 +1242,12 @@ out: * \param fileset string containing fileset * \retval 0 on success * \retval -EINVAL invalid fileset: Does not start with '/' - * \retval -ENAMETOOLONG fileset is too long */ static int nodemap_set_fileset_local(struct lu_nodemap *nodemap, const char *fileset) { - int rc = 0; + size_t fileset_size_new; + char *fileset_new; /* Allow 'fileset=clear' in addition to 'fileset=""' to clear fileset * because either command 'lctl set_param -P *.*.fileset=""' or @@ -1234,19 +1259,29 @@ static int nodemap_set_fileset_local(struct lu_nodemap *nodemap, * 'fileset=""' is still kept for compatibility reason. */ if (fileset[0] == '\0' || strcmp(fileset, "clear") == 0) { - nodemap->nm_fileset[0] = '\0'; - } else if (fileset[0] != '/') { - rc = -EINVAL; - - } else if (strscpy(nodemap->nm_fileset, fileset, - sizeof(nodemap->nm_fileset)) < 0) { - rc = -ENAMETOOLONG; - /* function may be called from llog thread, so we CERROR here */ - CERROR("%s: fileset '%s' is too long: rc = %d\n", - nodemap->nm_name, fileset, rc); + nodemap_fileset_reset(nodemap); + RETURN(0); } - return rc; + if (fileset[0] != '/') + RETURN(-EINVAL); + + fileset_size_new = strlen(fileset) + 1; + + OBD_ALLOC(fileset_new, fileset_size_new); + if (!fileset_new) + RETURN(-ENOMEM); + + memcpy(fileset_new, fileset, fileset_size_new); + + /* free existing fileset first on update */ + if (nodemap->nm_prim_fileset) + nodemap_fileset_reset(nodemap); + + nodemap->nm_prim_fileset = fileset_new; + nodemap->nm_prim_fileset_size = fileset_size_new; + + return 0; } /** @@ -1257,6 +1292,8 @@ static int nodemap_set_fileset_local(struct lu_nodemap *nodemap, * \param checkperm true if permission check is required * \param ioctl_op true if called from ioctl nodemap functions * \retval 0 on success + * \retval -ENAMETOOLONG fileset is too long + * \retval -EINVAL name or fileset is empty or NULL */ int nodemap_set_fileset(const char *name, const char *fileset, bool checkperm, bool ioctl_op) @@ -1269,6 +1306,9 @@ int nodemap_set_fileset(const char *name, const char *fileset, bool checkperm, if (name == NULL || name[0] == '\0' || fileset == NULL) RETURN(-EINVAL); + if (strlen(fileset) > PATH_MAX) + RETURN(-ENAMETOOLONG); + mutex_lock(&active_config_lock); nodemap = nodemap_lookup(name); if (IS_ERR(nodemap)) { @@ -1316,7 +1356,7 @@ char *nodemap_get_fileset(const struct lu_nodemap *nodemap) if (!nodemap_active) return NULL; - return (char *)nodemap->nm_fileset; + return (char *)nodemap->nm_prim_fileset; } EXPORT_SYMBOL(nodemap_get_fileset); @@ -2037,12 +2077,12 @@ int nodemap_del(const char *nodemap_name) } up_write(&active_config->nmc_range_tree_lock); - if (nodemap->nm_fileset[0] != '\0') { + if (nodemap->nm_prim_fileset) { rc2 = nodemap_idx_fileset_clear(nodemap); if (rc2 < 0) rc = rc2; - nodemap->nm_fileset[0] = '\0'; + nodemap_fileset_reset(nodemap); } if (!nodemap->nm_dyn) { diff --git a/lustre/ptlrpc/nodemap_lproc.c b/lustre/ptlrpc/nodemap_lproc.c index 92ddb0d..445acf8 100644 --- a/lustre/ptlrpc/nodemap_lproc.c +++ b/lustre/ptlrpc/nodemap_lproc.c @@ -210,8 +210,11 @@ static int nodemap_fileset_seq_show(struct seq_file *m, void *data) (char *)m->private, rc); return rc; } + if (nodemap->nm_prim_fileset && nodemap->nm_prim_fileset[0] != '\0') + seq_printf(m, "%s\n", nodemap->nm_prim_fileset); + else + seq_puts(m, "\n"); - seq_printf(m, "%s\n", nodemap->nm_fileset); nodemap_putref(nodemap); return rc; } diff --git a/lustre/ptlrpc/nodemap_storage.c b/lustre/ptlrpc/nodemap_storage.c index 08e3e0c..579e822 100644 --- a/lustre/ptlrpc/nodemap_storage.c +++ b/lustre/ptlrpc/nodemap_storage.c @@ -1331,6 +1331,7 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config, struct lu_nodemap *nodemap, *old_nm; enum nm_flag_bits flags; enum nm_flag2_bits flags2; + int rc = 0; nodemap = cfs_hash_lookup(config->nmc_nodemap_hash, rec->ncr.ncr_name); if (nodemap == NULL) { @@ -1346,10 +1347,8 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config, if (nodemap_id > config->nmc_nodemap_highest_id) config->nmc_nodemap_highest_id = nodemap_id; - } else if (nodemap->nm_id != nodemap_id) { - nodemap_putref(nodemap); - return -EINVAL; - } + } else if (nodemap->nm_id != nodemap_id) + GOTO(out_nodemap, rc = -EINVAL); nodemap->nm_squash_uid = le32_to_cpu(rec->ncr.ncr_squash_uid); nodemap->nm_squash_gid = le32_to_cpu(rec->ncr.ncr_squash_gid); @@ -1384,9 +1383,21 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config, if (!nodemap->nmf_fileset_use_iam) { mutex_lock(&active_config_lock); old_nm = nodemap_lookup(rec->ncr.ncr_name); - if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0') - strscpy(nodemap->nm_fileset, old_nm->nm_fileset, - sizeof(nodemap->nm_fileset)); + if (!IS_ERR(old_nm) && old_nm->nm_prim_fileset && + old_nm->nm_prim_fileset[0] != '\0') { + OBD_ALLOC(nodemap->nm_prim_fileset, + old_nm->nm_prim_fileset_size); + if (!nodemap->nm_prim_fileset) { + mutex_unlock(&active_config_lock); + nodemap_putref(old_nm); + GOTO(out_nodemap, rc = -ENOMEM); + } + nodemap->nm_prim_fileset_size = + old_nm->nm_prim_fileset_size; + memcpy(nodemap->nm_prim_fileset, + old_nm->nm_prim_fileset, + old_nm->nm_prim_fileset_size); + } mutex_unlock(&active_config_lock); if (!IS_ERR(old_nm)) nodemap_putref(old_nm); @@ -1398,9 +1409,11 @@ static int nodemap_cluster_rec_helper(struct nodemap_config *config, } else { list_add(&nodemap->nm_list, &(*recent_nodemap)->nm_list); } + +out_nodemap: nodemap_putref(nodemap); - return 0; + return rc; } static int nodemap_cluster_roles_helper(struct lu_nodemap *nodemap, @@ -1424,20 +1437,30 @@ static int nodemap_cluster_roles_helper(struct lu_nodemap *nodemap, static int nodemap_cluster_fileset_helper(struct lu_nodemap *nodemap, const union nodemap_rec *rec) { - unsigned int fragment_id, fragment_len, fset_offset, fset_len_remain; + unsigned int fragment_id, fragment_len, fset_offset, fset_len_remain, + fset_prealloc_size; fragment_id = le16_to_cpu(rec->nfr.nfr_fragment_id); fragment_len = LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE; + fset_prealloc_size = PATH_MAX + 1; + + /* preallocate fileset for first occurring fragment with PATH_MAX + 1 */ + if (!nodemap->nm_prim_fileset) { + OBD_ALLOC(nodemap->nm_prim_fileset, fset_prealloc_size); + if (!nodemap->nm_prim_fileset) + return -ENOMEM; + nodemap->nm_prim_fileset_size = fset_prealloc_size; + } /* compute nodemap fileset position */ fset_offset = fragment_id * LUSTRE_NODEMAP_FILESET_FRAGMENT_SIZE; - fset_len_remain = sizeof(nodemap->nm_fileset) - fset_offset; + fset_len_remain = nodemap->nm_prim_fileset_size - fset_offset; if (fragment_len > fset_len_remain) fragment_len = fset_len_remain; - memcpy(nodemap->nm_fileset + fset_offset, rec->nfr.nfr_path_fragment, - fragment_len); + memcpy(nodemap->nm_prim_fileset + fset_offset, + rec->nfr.nfr_path_fragment, fragment_len); return 0; } @@ -1953,6 +1976,51 @@ void nodemap_config_set_loading_mgc(bool loading) EXPORT_SYMBOL(nodemap_config_set_loading_mgc); /** + * After all index pages are read, filesets may use more memory than necessary. + * So each nodemap's fileset is resized to its actual size. + * + * \param config current active nodemap config + */ +static void nodemap_fileset_resize(struct nodemap_config *config) +{ + struct lu_nodemap *nodemap; + unsigned int fset_size_actual, fset_size_prealloc; + char *fset_tmp; + LIST_HEAD(nodemap_list_head); + + mutex_lock(&active_config_lock); + + cfs_hash_for_each_safe(config->nmc_nodemap_hash, nm_hash_list_cb, + &nodemap_list_head); + list_for_each_entry(nodemap, &nodemap_list_head, nm_list) { + if (!nodemap->nm_prim_fileset) + continue; + + fset_size_prealloc = nodemap->nm_prim_fileset_size; + fset_size_actual = strlen(nodemap->nm_prim_fileset) + 1; + if (fset_size_actual == fset_size_prealloc) + continue; + + /* Shrink fileset size to actual */ + OBD_ALLOC(fset_tmp, fset_size_actual); + if (!fset_tmp) { + CERROR("%s: Nodemaps's fileset cannot be resized: rc = %d\n", + nodemap->nm_name, -ENOMEM); + continue; + } + + memcpy(fset_tmp, nodemap->nm_prim_fileset, fset_size_actual); + + OBD_FREE(nodemap->nm_prim_fileset, fset_size_prealloc); + + nodemap->nm_prim_fileset_size = fset_size_actual; + nodemap->nm_prim_fileset = fset_tmp; + } + + mutex_unlock(&active_config_lock); +} + +/** * Ensures that configs loaded over the wire are prioritized over those loaded * from disk. * @@ -1962,6 +2030,7 @@ void nodemap_config_set_active_mgc(struct nodemap_config *config) { mutex_lock(&nodemap_config_loaded_lock); nodemap_config_set_active(config); + nodemap_fileset_resize(config); nodemap_config_loaded = 1; nodemap_save_all_caches(); mutex_unlock(&nodemap_config_loaded_lock); -- 1.8.3.1