4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (C) 2015, Trustees of Indiana University
25 * Copyright (c) 2017, Intel Corporation.
27 * Author: Joshua Walgenbach <jjw@iu.edu>
28 * Author: Kit Westneat <cwestnea@iu.edu>
30 * Implements the storage functionality for the nodemap configuration. Functions
31 * in this file prepare, store, and load nodemap configuration data. Targets
32 * using nodemap services should register a configuration file object. Nodemap
33 * configuration changes that need to persist should call the appropriate
34 * storage function for the data being modified.
36 * There are several index types as defined in enum nodemap_idx_type:
37 * NODEMAP_CLUSTER_IDX stores the data found on the lu_nodemap struct,
38 * like root squash and config flags, as well as
40 * NODEMAP_RANGE_IDX stores NID range information for a nodemap
41 * NODEMAP_UIDMAP_IDX stores a fs/client UID mapping pair
42 * NODEMAP_GIDMAP_IDX stores a fs/client GID mapping pair
43 * NODEMAP_GLOBAL_IDX stores whether or not nodemaps are active
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <uapi/linux/lnet/lnet-types.h>
54 #include <uapi/linux/lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
70 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
72 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
73 NODEMAP_CLUSTER_IDX));
77 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
78 const struct lu_nodemap *nodemap)
80 BUILD_BUG_ON(sizeof(nr->ncr.ncr_name) != sizeof(nodemap->nm_name));
82 strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nr->ncr.ncr_name));
83 nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
84 nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
85 nr->ncr.ncr_squash_projid = cpu_to_le32(nodemap->nm_squash_projid);
87 (nodemap->nmf_trust_client_ids ?
88 NM_FL_TRUST_CLIENT_IDS : 0) |
89 (nodemap->nmf_allow_root_access ?
90 NM_FL_ALLOW_ROOT_ACCESS : 0) |
91 (nodemap->nmf_deny_unknown ?
92 NM_FL_DENY_UNKNOWN : 0) |
93 (nodemap->nmf_map_mode & NODEMAP_MAP_UID ?
95 (nodemap->nmf_map_mode & NODEMAP_MAP_GID ?
97 (nodemap->nmf_map_mode & NODEMAP_MAP_PROJID ?
98 NM_FL_MAP_PROJID : 0) |
99 (nodemap->nmf_enable_audit ?
100 NM_FL_ENABLE_AUDIT : 0) |
101 (nodemap->nmf_forbid_encryption ?
102 NM_FL_FORBID_ENCRYPT : 0);
104 (nodemap->nmf_readonly_mount ?
105 NM_FL2_READONLY_MOUNT : 0);
108 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
109 enum nodemap_id_type id_type,
112 enum nodemap_idx_type idx_type;
114 if (id_type == NODEMAP_UID)
115 idx_type = NODEMAP_UIDMAP_IDX;
116 else if (id_type == NODEMAP_GID)
117 idx_type = NODEMAP_GIDMAP_IDX;
118 else if (id_type == NODEMAP_PROJID)
119 idx_type = NODEMAP_PROJIDMAP_IDX;
121 idx_type = NODEMAP_EMPTY_IDX;
123 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
124 nk->nk_id_client = cpu_to_le32(id_client);
127 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
129 nr->nir.nir_id_fs = cpu_to_le32(id_fs);
132 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
135 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
137 nk->nk_range_id = cpu_to_le32(rn_id);
140 static void nodemap_range_rec_init(union nodemap_rec *nr,
141 const lnet_nid_t nid[2])
143 nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
144 nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
147 static void nodemap_global_key_init(struct nodemap_key *nk)
149 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
153 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
155 nr->ngr.ngr_is_active = active;
158 /* should be called with dt_write lock */
159 static void nodemap_inc_version(const struct lu_env *env,
160 struct dt_object *nodemap_idx,
163 u64 ver = dt_version_get(env, nodemap_idx);
164 dt_version_set(env, nodemap_idx, ver + 1, th);
167 enum ncfc_find_create {
171 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
172 struct dt_device *dev,
173 struct local_oid_storage *los,
174 enum ncfc_find_create create_new)
177 struct dt_object *root_obj;
178 struct dt_object *nm_obj;
181 rc = dt_root_get(env, dev, &tfid);
183 GOTO(out, nm_obj = ERR_PTR(rc));
185 root_obj = dt_locate(env, dev, &tfid);
186 if (unlikely(IS_ERR(root_obj)))
187 GOTO(out, nm_obj = root_obj);
189 rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
192 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
194 GOTO(out_root, nm_obj = ERR_PTR(rc));
195 } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
196 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
200 /* if loading index fails the first time, create new index */
201 if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
202 CDEBUG(D_INFO, "removing old index, creating new one\n");
203 rc = local_object_unlink(env, dev, root_obj,
204 LUSTRE_NODEMAP_NAME);
206 /* XXX not sure the best way to get obd name. */
207 CERROR("cannot destroy nodemap index: rc = %d\n",
209 GOTO(out_root, nm_obj = ERR_PTR(rc));
213 nm_obj = local_index_find_or_create(env, los, root_obj,
215 S_IFREG | S_IRUGO | S_IWUSR,
216 &dt_nodemap_features);
218 GOTO(out_root, nm_obj);
220 if (nm_obj->do_index_ops == NULL) {
221 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
222 &dt_nodemap_features);
223 /* even if loading from tgt fails, connecting to MGS will
227 dt_object_put(env, nm_obj);
229 if (create_new == NCFC_CREATE_NEW)
230 GOTO(out_root, nm_obj = ERR_PTR(rc));
232 CERROR("cannot load nodemap index from disk, creating "
233 "new index: rc = %d\n", rc);
234 create_new = NCFC_CREATE_NEW;
240 dt_object_put(env, root_obj);
245 static int nodemap_idx_insert(const struct lu_env *env,
246 struct dt_object *idx,
247 const struct nodemap_key *nk,
248 const union nodemap_rec *nr)
251 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
254 BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
256 th = dt_trans_create(env, dev);
259 GOTO(out, rc = PTR_ERR(th));
261 rc = dt_declare_insert(env, idx,
262 (const struct dt_rec *)nr,
263 (const struct dt_key *)nk, th);
267 rc = dt_declare_version_set(env, idx, th);
271 rc = dt_trans_start_local(env, dev, th);
275 dt_write_lock(env, idx, 0);
277 rc = dt_insert(env, idx, (const struct dt_rec *)nr,
278 (const struct dt_key *)nk, th);
280 nodemap_inc_version(env, idx, th);
281 dt_write_unlock(env, idx);
283 dt_trans_stop(env, dev, th);
288 static int nodemap_idx_update(const struct lu_env *env,
289 struct dt_object *idx,
290 const struct nodemap_key *nk,
291 const union nodemap_rec *nr)
294 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
297 th = dt_trans_create(env, dev);
300 GOTO(out, rc = PTR_ERR(th));
302 rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
306 rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
307 (const struct dt_key *)nk, th);
311 rc = dt_declare_version_set(env, idx, th);
315 rc = dt_trans_start_local(env, dev, th);
319 dt_write_lock(env, idx, 0);
321 rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
325 rc = dt_insert(env, idx, (const struct dt_rec *)nr,
326 (const struct dt_key *)nk, th);
330 nodemap_inc_version(env, idx, th);
332 dt_write_unlock(env, idx);
334 dt_trans_stop(env, dev, th);
339 static int nodemap_idx_delete(const struct lu_env *env,
340 struct dt_object *idx,
341 const struct nodemap_key *nk,
342 const union nodemap_rec *unused)
345 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
348 th = dt_trans_create(env, dev);
351 GOTO(out, rc = PTR_ERR(th));
353 rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
357 rc = dt_declare_version_set(env, idx, th);
361 rc = dt_trans_start_local(env, dev, th);
365 dt_write_lock(env, idx, 0);
367 rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
369 nodemap_inc_version(env, idx, th);
371 dt_write_unlock(env, idx);
373 dt_trans_stop(env, dev, th);
383 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
384 struct dt_object *idx,
385 enum nm_add_update update)
387 struct nodemap_key nk;
388 union nodemap_rec nr;
394 rc = lu_env_init(&env, LCT_LOCAL);
398 nodemap_cluster_key_init(&nk, nodemap->nm_id);
399 nodemap_cluster_rec_init(&nr, nodemap);
401 if (update == NM_UPDATE)
402 rc = nodemap_idx_update(&env, idx, &nk, &nr);
404 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
411 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
413 if (nodemap_mgs_ncf == NULL) {
414 CERROR("cannot add nodemap config to non-existing MGS.\n");
418 return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
422 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
424 if (nodemap_mgs_ncf == NULL) {
425 CERROR("cannot add nodemap config to non-existing MGS.\n");
429 return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
433 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
436 struct lu_idmap *idmap;
437 struct lu_idmap *temp;
438 struct lu_nid_range *range;
439 struct lu_nid_range *range_temp;
440 struct nodemap_key nk;
447 if (nodemap_mgs_ncf == NULL) {
448 CERROR("cannot add nodemap config to non-existing MGS.\n");
452 rc = lu_env_init(&env, LCT_LOCAL);
456 root = nodemap->nm_fs_to_client_uidmap;
457 nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
459 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
461 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
467 root = nodemap->nm_client_to_fs_gidmap;
468 nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
470 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
472 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
478 root = nodemap->nm_client_to_fs_projidmap;
479 nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
481 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_PROJID,
483 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
489 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
491 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
492 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
498 nodemap_cluster_key_init(&nk, nodemap->nm_id);
499 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
508 int nodemap_idx_range_add(const struct lu_nid_range *range,
509 const lnet_nid_t nid[2])
511 struct nodemap_key nk;
512 union nodemap_rec nr;
517 if (nodemap_mgs_ncf == NULL) {
518 CERROR("cannot add nodemap config to non-existing MGS.\n");
522 rc = lu_env_init(&env, LCT_LOCAL);
526 nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
527 nodemap_range_rec_init(&nr, nid);
529 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
535 int nodemap_idx_range_del(const struct lu_nid_range *range)
537 struct nodemap_key nk;
542 if (nodemap_mgs_ncf == NULL) {
543 CERROR("cannot add nodemap config to non-existing MGS.\n");
547 rc = lu_env_init(&env, LCT_LOCAL);
551 nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
553 rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
559 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
560 enum nodemap_id_type id_type,
563 struct nodemap_key nk;
564 union nodemap_rec nr;
569 if (nodemap_mgs_ncf == NULL) {
570 CERROR("cannot add nodemap config to non-existing MGS.\n");
574 rc = lu_env_init(&env, LCT_LOCAL);
578 nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
579 nodemap_idmap_rec_init(&nr, map[1]);
581 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
587 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
588 enum nodemap_id_type id_type,
591 struct nodemap_key nk;
596 if (nodemap_mgs_ncf == NULL) {
597 CERROR("cannot add nodemap config to non-existing MGS.\n");
601 rc = lu_env_init(&env, LCT_LOCAL);
605 nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
607 rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
613 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
615 struct nodemap_key nk;
616 union nodemap_rec nr;
621 if (nodemap_mgs_ncf == NULL) {
622 CERROR("cannot add nodemap config to non-existing MGS.\n");
626 rc = lu_env_init(&env, LCT_LOCAL);
630 nodemap_global_key_init(&nk);
631 nodemap_global_rec_init(&nr, value);
633 if (update == NM_UPDATE)
634 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
637 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
645 int nodemap_idx_nodemap_activate(bool value)
647 return nodemap_idx_global_add_update(value, NM_UPDATE);
650 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
654 nodemap_id = le32_to_cpu(key->nk_nodemap_id);
655 return nm_idx_get_type(nodemap_id);
659 * Process a key/rec pair and modify the new configuration.
661 * \param config configuration to update with this key/rec data
662 * \param key key of the record that was loaded
663 * \param rec record that was loaded
664 * \param recent_nodemap last referenced nodemap
665 * \retval type of record processed, see enum #nodemap_idx_type
666 * \retval -ENOENT range or map loaded before nodemap record
667 * \retval -EINVAL duplicate nodemap cluster records found with
668 * different IDs, or nodemap has invalid name
671 static int nodemap_process_keyrec(struct nodemap_config *config,
672 const struct nodemap_key *key,
673 const union nodemap_rec *rec,
674 struct lu_nodemap **recent_nodemap)
676 struct lu_nodemap *nodemap = NULL;
677 enum nodemap_idx_type type;
678 enum nodemap_id_type id_type;
679 enum nm_flag_bits flags;
680 enum nm_flag2_bits flags2;
688 BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
690 nodemap_id = le32_to_cpu(key->nk_nodemap_id);
691 type = nodemap_get_key_type(key);
692 nodemap_id = nm_idx_set_type(nodemap_id, 0);
694 CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
697 /* find the correct nodemap in the load list */
698 if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
699 type == NODEMAP_GIDMAP_IDX || type == NODEMAP_PROJIDMAP_IDX) {
700 struct lu_nodemap *tmp = NULL;
702 nodemap = *recent_nodemap;
705 GOTO(out, rc = -ENOENT);
707 if (nodemap->nm_id != nodemap_id) {
708 list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
709 if (tmp->nm_id == nodemap_id) {
714 if (nodemap->nm_id != nodemap_id)
715 GOTO(out, rc = -ENOENT);
718 /* update most recently used nodemap if necessay */
719 if (nodemap != *recent_nodemap)
720 *recent_nodemap = nodemap;
724 case NODEMAP_EMPTY_IDX:
726 CWARN("Found nodemap config record without type field, "
727 " nodemap_id=%d. nodemap config file corrupt?\n",
730 case NODEMAP_CLUSTER_IDX: {
731 struct lu_nodemap *old_nm = NULL;
733 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
735 if (nodemap == NULL) {
736 if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
737 nodemap = nodemap_create(rec->ncr.ncr_name,
740 nodemap = nodemap_create(rec->ncr.ncr_name,
744 GOTO(out, rc = PTR_ERR(nodemap));
746 /* we need to override the local ID with the saved ID */
747 nodemap->nm_id = nodemap_id;
748 if (nodemap_id > config->nmc_nodemap_highest_id)
749 config->nmc_nodemap_highest_id = nodemap_id;
751 } else if (nodemap->nm_id != nodemap_id) {
752 nodemap_putref(nodemap);
753 GOTO(out, rc = -EINVAL);
756 nodemap->nm_squash_uid =
757 le32_to_cpu(rec->ncr.ncr_squash_uid);
758 nodemap->nm_squash_gid =
759 le32_to_cpu(rec->ncr.ncr_squash_gid);
760 nodemap->nm_squash_projid =
761 le32_to_cpu(rec->ncr.ncr_squash_projid);
763 flags = rec->ncr.ncr_flags;
764 nodemap->nmf_allow_root_access =
765 flags & NM_FL_ALLOW_ROOT_ACCESS;
766 nodemap->nmf_trust_client_ids =
767 flags & NM_FL_TRUST_CLIENT_IDS;
768 nodemap->nmf_deny_unknown =
769 flags & NM_FL_DENY_UNKNOWN;
770 nodemap->nmf_map_mode = (flags & NM_FL_MAP_UID ?
771 NODEMAP_MAP_UID : 0) |
772 (flags & NM_FL_MAP_GID ?
773 NODEMAP_MAP_GID : 0) |
774 (flags & NM_FL_MAP_PROJID ?
775 NODEMAP_MAP_PROJID : 0);
776 if (nodemap->nmf_map_mode == NODEMAP_MAP_BOTH_LEGACY)
777 nodemap->nmf_map_mode = NODEMAP_MAP_BOTH;
778 nodemap->nmf_enable_audit =
779 flags & NM_FL_ENABLE_AUDIT;
780 nodemap->nmf_forbid_encryption =
781 flags & NM_FL_FORBID_ENCRYPT;
782 flags2 = rec->ncr.ncr_flags2;
783 nodemap->nmf_readonly_mount =
784 flags2 & NM_FL2_READONLY_MOUNT;
786 /* The fileset should be saved otherwise it will be empty
787 * every time in case of "NODEMAP_CLUSTER_IDX". */
788 mutex_lock(&active_config_lock);
789 old_nm = nodemap_lookup(rec->ncr.ncr_name);
790 if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0')
791 strlcpy(nodemap->nm_fileset, old_nm->nm_fileset,
792 sizeof(nodemap->nm_fileset));
793 mutex_unlock(&active_config_lock);
795 nodemap_putref(old_nm);
797 if (*recent_nodemap == NULL) {
798 *recent_nodemap = nodemap;
799 INIT_LIST_HEAD(&nodemap->nm_list);
801 list_add(&nodemap->nm_list,
802 &(*recent_nodemap)->nm_list);
804 nodemap_putref(nodemap);
807 case NODEMAP_RANGE_IDX:
808 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
809 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
811 rc = nodemap_add_range_helper(config, nodemap, nid,
812 le32_to_cpu(key->nk_range_id));
816 case NODEMAP_UIDMAP_IDX:
817 case NODEMAP_GIDMAP_IDX:
818 case NODEMAP_PROJIDMAP_IDX:
819 map[0] = le32_to_cpu(key->nk_id_client);
820 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
822 if (type == NODEMAP_UIDMAP_IDX)
823 id_type = NODEMAP_UID;
824 else if (type == NODEMAP_GIDMAP_IDX)
825 id_type = NODEMAP_GID;
826 else if (type == NODEMAP_PROJIDMAP_IDX)
827 id_type = NODEMAP_PROJID;
829 GOTO(out, rc = -EINVAL);
831 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
835 case NODEMAP_GLOBAL_IDX:
836 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
839 CERROR("got keyrec pair for unknown type %d\n", type);
851 enum nm_config_passes {
852 NM_READ_CLUSTERS = 0,
853 NM_READ_ATTRIBUTES = 1,
856 static int nodemap_load_entries(const struct lu_env *env,
857 struct dt_object *nodemap_idx)
859 const struct dt_it_ops *iops;
861 struct lu_nodemap *recent_nodemap = NULL;
862 struct nodemap_config *new_config = NULL;
864 bool activate_nodemap = false;
865 bool loaded_global_idx = false;
866 enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
871 iops = &nodemap_idx->do_index_ops->dio_it;
873 dt_read_lock(env, nodemap_idx, 0);
874 it = iops->init(env, nodemap_idx, 0);
876 GOTO(out, rc = PTR_ERR(it));
878 rc = iops->load(env, it, hash);
880 GOTO(out_iops_fini, rc);
882 /* rc == 0 means we need to advance to record */
884 rc = iops->next(env, it);
887 GOTO(out_iops_put, rc);
888 /* rc > 0 is eof, will be checked in while below */
890 /* rc == 1, we found initial record and can process below */
894 new_config = nodemap_config_alloc();
895 if (IS_ERR(new_config)) {
896 rc = PTR_ERR(new_config);
898 GOTO(out_iops_put, rc);
901 /* rc > 0 is eof, check initial iops->next here as well */
903 struct nodemap_key *key;
904 union nodemap_rec rec;
905 enum nodemap_idx_type key_type;
907 key = (struct nodemap_key *)iops->key(env, it);
908 key_type = nodemap_get_key_type((struct nodemap_key *)key);
909 if ((cur_pass == NM_READ_CLUSTERS &&
910 key_type == NODEMAP_CLUSTER_IDX) ||
911 (cur_pass == NM_READ_ATTRIBUTES &&
912 key_type != NODEMAP_CLUSTER_IDX &&
913 key_type != NODEMAP_EMPTY_IDX)) {
914 rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
917 GOTO(out_nodemap_config, rc);
918 rc = nodemap_process_keyrec(new_config, key, &rec,
921 GOTO(out_nodemap_config, rc);
922 if (rc == NODEMAP_GLOBAL_IDX)
923 loaded_global_idx = true;
928 rc = iops->next(env, it);
929 while (rc == -ESTALE);
931 /* move to second pass */
932 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
933 cur_pass = NM_READ_ATTRIBUTES;
934 rc = iops->load(env, it, 0);
936 rc = iops->next(env, it);
949 nodemap_config_dealloc(new_config);
951 /* creating new default needs to be done outside dt read lock */
952 activate_nodemap = true;
958 dt_read_unlock(env, nodemap_idx);
961 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
962 nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
964 if (!activate_nodemap)
967 if (new_config->nmc_default_nodemap == NULL) {
968 /* new MGS won't have a default nm on disk, so create it here */
969 struct lu_nodemap *nodemap =
970 nodemap_create(DEFAULT_NODEMAP, new_config, 1);
971 if (IS_ERR(nodemap)) {
972 rc = PTR_ERR(nodemap);
974 rc = nodemap_idx_nodemap_add_update(
975 new_config->nmc_default_nodemap,
978 nodemap_putref(new_config->nmc_default_nodemap);
982 /* new nodemap config won't have an active/inactive record */
983 if (rc == 0 && loaded_global_idx == false) {
984 struct nodemap_key nk;
985 union nodemap_rec nr;
987 nodemap_global_key_init(&nk);
988 nodemap_global_rec_init(&nr, false);
989 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
993 nodemap_config_set_active(new_config);
995 nodemap_config_dealloc(new_config);
1001 * Step through active config and write to disk.
1003 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
1004 struct dt_device *dev,
1005 struct local_oid_storage *los)
1007 struct dt_object *o;
1008 struct lu_nodemap *nodemap;
1009 struct lu_nodemap *nm_tmp;
1010 struct lu_nid_range *range;
1011 struct lu_nid_range *range_temp;
1012 struct lu_idmap *idmap;
1013 struct lu_idmap *id_tmp;
1014 struct rb_root root;
1015 struct nodemap_key nk;
1016 union nodemap_rec nr;
1017 LIST_HEAD(nodemap_list_head);
1022 /* create a new index file to fill with active config */
1023 o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
1027 mutex_lock(&active_config_lock);
1029 /* convert hash to list so we don't spin */
1030 cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
1031 nm_hash_list_cb, &nodemap_list_head);
1033 list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
1034 nodemap_cluster_key_init(&nk, nodemap->nm_id);
1035 nodemap_cluster_rec_init(&nr, nodemap);
1037 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1043 down_read(&active_config->nmc_range_tree_lock);
1044 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
1046 lnet_nid_t nid[2] = {
1050 nodemap_range_key_init(&nk, nodemap->nm_id,
1052 nodemap_range_rec_init(&nr, nid);
1053 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1057 up_read(&active_config->nmc_range_tree_lock);
1059 /* we don't need to take nm_idmap_lock because active config
1060 * lock prevents changes from happening to nodemaps
1062 root = nodemap->nm_client_to_fs_uidmap;
1063 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1065 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1067 nodemap_idmap_rec_init(&nr, idmap->id_fs);
1068 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1073 root = nodemap->nm_client_to_fs_gidmap;
1074 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1076 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1078 nodemap_idmap_rec_init(&nr, idmap->id_fs);
1079 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1084 root = nodemap->nm_client_to_fs_projidmap;
1085 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1087 nodemap_idmap_key_init(&nk, nodemap->nm_id,
1090 nodemap_idmap_rec_init(&nr, idmap->id_fs);
1091 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1096 nodemap_global_key_init(&nk);
1097 nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1098 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1102 mutex_unlock(&active_config_lock);
1105 dt_object_put(env, o);
1112 static void nodemap_save_all_caches(void)
1114 struct nm_config_file *ncf;
1118 /* recreating nodemap cache requires fld_thread_key be in env */
1119 rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1121 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1125 mutex_lock(&ncf_list_lock);
1126 list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1127 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1128 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1129 struct dt_object *o;
1131 /* put current config file so save conf can rewrite it */
1132 dt_object_put_nocache(&env, ncf->ncf_obj);
1133 ncf->ncf_obj = NULL;
1135 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1137 CWARN("%s: error writing to nodemap config: rc = %d\n",
1142 mutex_unlock(&ncf_list_lock);
1147 /* tracks if config still needs to be loaded, either from disk or network */
1148 static bool nodemap_config_loaded;
1149 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1152 * Ensures that configs loaded over the wire are prioritized over those loaded
1155 * \param config config to set as the active config
1157 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1159 mutex_lock(&nodemap_config_loaded_lock);
1160 nodemap_config_set_active(config);
1161 nodemap_config_loaded = true;
1162 nodemap_save_all_caches();
1163 mutex_unlock(&nodemap_config_loaded_lock);
1165 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1168 * Register a dt_object representing the config index file. This should be
1169 * called by targets in order to load the nodemap configuration from disk. The
1170 * dt_object should be created with local_index_find_or_create and the index
1171 * features should be enabled with do_index_try.
1173 * \param obj dt_object returned by local_index_find_or_create
1175 * \retval on success: nm_config_file handle for later deregistration
1176 * \retval -ENOMEM memory allocation failure
1177 * \retval -ENOENT error loading nodemap config
1178 * \retval -EINVAL error loading nodemap config
1179 * \retval -EEXIST nodemap config already registered for MGS
1181 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1182 struct dt_object *obj,
1183 struct local_oid_storage *los)
1185 struct nm_config_file *ncf;
1189 if (nodemap_mgs_ncf != NULL)
1190 GOTO(out, ncf = ERR_PTR(-EEXIST));
1194 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1196 /* if loading from cache, prevent activation of MGS config until cache
1197 * loading is done, so disk config is overwritten by MGS config.
1199 mutex_lock(&nodemap_config_loaded_lock);
1200 rc = nodemap_load_entries(env, obj);
1202 nodemap_config_loaded = true;
1203 mutex_unlock(&nodemap_config_loaded_lock);
1207 GOTO(out, ncf = ERR_PTR(rc));
1210 lu_object_get(&obj->do_lu);
1215 nodemap_mgs_ncf = ncf;
1220 EXPORT_SYMBOL(nm_config_file_register_mgs);
1222 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1223 struct dt_device *dev,
1224 struct local_oid_storage *los)
1226 struct nm_config_file *ncf;
1227 struct dt_object *config_obj = NULL;
1232 RETURN(ERR_PTR(-ENOMEM));
1234 /* don't load from cache if config already loaded */
1235 mutex_lock(&nodemap_config_loaded_lock);
1236 if (!nodemap_config_loaded) {
1237 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1238 if (IS_ERR(config_obj))
1239 rc = PTR_ERR(config_obj);
1241 rc = nodemap_load_entries(env, config_obj);
1244 nodemap_config_loaded = true;
1246 mutex_unlock(&nodemap_config_loaded_lock);
1250 /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1252 config_obj = nodemap_save_config_cache(env, dev, los);
1253 if (IS_ERR(config_obj))
1254 GOTO(out_ncf, rc = PTR_ERR(config_obj));
1257 ncf->ncf_obj = config_obj;
1260 mutex_lock(&ncf_list_lock);
1261 list_add(&ncf->ncf_list, &ncf_list_head);
1262 mutex_unlock(&ncf_list_lock);
1267 RETURN(ERR_PTR(rc));
1272 EXPORT_SYMBOL(nm_config_file_register_tgt);
1275 * Deregister a nm_config_file. Should be called by targets during cleanup.
1277 * \param ncf config file to deregister
1279 void nm_config_file_deregister_mgs(const struct lu_env *env,
1280 struct nm_config_file *ncf)
1283 LASSERT(nodemap_mgs_ncf == ncf);
1285 nodemap_mgs_ncf = NULL;
1287 dt_object_put(env, ncf->ncf_obj);
1293 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1295 void nm_config_file_deregister_tgt(const struct lu_env *env,
1296 struct nm_config_file *ncf)
1303 mutex_lock(&ncf_list_lock);
1304 list_del(&ncf->ncf_list);
1305 mutex_unlock(&ncf_list_lock);
1308 dt_object_put(env, ncf->ncf_obj);
1314 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1316 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1317 struct lu_nodemap **recent_nodemap)
1319 struct nodemap_key *key;
1320 union nodemap_rec *rec;
1325 int size = dt_nodemap_features.dif_keysize_max +
1326 dt_nodemap_features.dif_recsize_max;
1329 for (j = 0; j < LU_PAGE_COUNT; j++) {
1330 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1333 /* get and process keys and records from page */
1334 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1335 entry = lip->lp_idx.lip_entries + k * size;
1336 key = (struct nodemap_key *)entry;
1338 entry += dt_nodemap_features.dif_keysize_max;
1339 rec = (union nodemap_rec *)entry;
1341 rc = nodemap_process_keyrec(config, key, rec,
1352 EXPORT_SYMBOL(nodemap_process_idx_pages);
1354 static int nodemap_page_build(const struct lu_env *env, struct dt_object *obj,
1355 union lu_page *lp, size_t bytes,
1356 const struct dt_it_ops *iops,
1357 struct dt_it *it, __u32 attr, void *arg)
1359 struct idx_info *ii = (struct idx_info *)arg;
1360 struct lu_idxpage *lip = &lp->lp_idx;
1362 size_t size = ii->ii_keysize + ii->ii_recsize;
1366 if (bytes < LIP_HDR_SIZE)
1369 /* initialize the header of the new container */
1370 memset(lip, 0, LIP_HDR_SIZE);
1371 lip->lip_magic = LIP_MAGIC;
1372 bytes -= LIP_HDR_SIZE;
1374 entry = lip->lip_entries;
1376 char *tmp_entry = entry;
1379 enum nodemap_idx_type key_type;
1381 /* fetch 64-bit hash value */
1382 hash = iops->store(env, it);
1383 ii->ii_hash_end = hash;
1385 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1386 if (lip->lip_nr != 0)
1391 if (lip->lip_nr == 0)
1392 GOTO(out, rc = -EINVAL);
1396 key = iops->key(env, it);
1397 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1399 /* on the first pass, get only the cluster types. On second
1400 * pass, get all the rest */
1401 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1402 key_type == NODEMAP_CLUSTER_IDX) ||
1403 (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1404 key_type != NODEMAP_CLUSTER_IDX &&
1405 key_type != NODEMAP_EMPTY_IDX)) {
1406 memcpy(tmp_entry, key, ii->ii_keysize);
1407 tmp_entry += ii->ii_keysize;
1409 /* and finally the record */
1410 rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1412 if (rc != -ESTALE) {
1416 /* hash/key/record successfully copied! */
1418 if (unlikely(lip->lip_nr == 1 &&
1420 ii->ii_hash_start = hash;
1422 entry = tmp_entry + ii->ii_recsize;
1427 /* move on to the next record */
1429 rc = iops->next(env, it);
1430 } while (rc == -ESTALE);
1432 /* move to second pass */
1433 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1434 ii->ii_attrs = NM_READ_ATTRIBUTES;
1435 rc = iops->load(env, it, 0);
1437 rc = iops->next(env, it);
1448 if (rc >= 0 && lip->lip_nr > 0)
1449 /* one more container */
1452 /* no more entries */
1453 ii->ii_hash_end = II_END_OFF;
1458 int nodemap_index_read(struct lu_env *env,
1459 struct nm_config_file *ncf,
1460 struct idx_info *ii,
1461 const struct lu_rdpg *rdpg)
1463 struct dt_object *nodemap_idx = ncf->ncf_obj;
1467 ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1468 ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1470 dt_read_lock(env, nodemap_idx, 0);
1471 version = dt_version_get(env, nodemap_idx);
1472 if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1473 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1476 ii->ii_hash_end = 0;
1478 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1480 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1484 ii->ii_version = version;
1486 dt_read_unlock(env, nodemap_idx);
1489 EXPORT_SYMBOL(nodemap_index_read);
1492 * Returns the current nodemap configuration to MGC by walking the nodemap
1493 * config index and storing it in the response buffer.
1495 * \param req incoming MGS_CONFIG_READ request
1497 * \retval -EINVAL malformed request
1498 * \retval -ENOTCONN client evicted/reconnected already
1499 * \retval -ETIMEDOUT client timeout or network error
1502 int nodemap_get_config_req(struct obd_device *mgs_obd,
1503 struct ptlrpc_request *req)
1505 const struct ptlrpc_bulk_frag_ops *frag_ops = &ptlrpc_bulk_kiov_pin_ops;
1506 struct mgs_config_body *body;
1507 struct mgs_config_res *res;
1508 struct lu_rdpg rdpg;
1509 struct idx_info nodemap_ii;
1510 struct ptlrpc_bulk_desc *desc;
1511 struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1517 body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1521 if (body->mcb_type != MGS_CFG_T_NODEMAP)
1524 rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1525 rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1527 if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1530 CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1531 body->mcb_name, rdpg.rp_count);
1533 /* allocate pages to store the containers */
1534 OBD_ALLOC_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1535 if (rdpg.rp_pages == NULL)
1537 for (i = 0; i < rdpg.rp_npages; i++) {
1538 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1539 if (rdpg.rp_pages[i] == NULL)
1540 GOTO(out, rc = -ENOMEM);
1543 rdpg.rp_hash = body->mcb_offset;
1544 nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1545 nodemap_ii.ii_flags = II_FL_NOHASH;
1546 nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1547 nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1549 bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1550 obd2obt(mgs_obd)->obt_nodemap_config_file,
1551 &nodemap_ii, &rdpg);
1553 GOTO(out, rc = bytes);
1555 rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1557 res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1559 GOTO(out, rc = -EINVAL);
1560 res->mcr_offset = nodemap_ii.ii_hash_end;
1561 res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1563 page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1564 LASSERT(page_count <= rdpg.rp_count);
1565 desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1566 PTLRPC_BULK_PUT_SOURCE,
1567 MGS_BULK_PORTAL, frag_ops);
1569 GOTO(out, rc = -ENOMEM);
1571 for (i = 0; i < page_count && bytes > 0; i++) {
1572 frag_ops->add_kiov_frag(desc, rdpg.rp_pages[i], 0,
1573 min_t(int, bytes, PAGE_SIZE));
1577 rc = target_bulk_io(req->rq_export, desc);
1578 ptlrpc_free_bulk(desc);
1581 if (rdpg.rp_pages != NULL) {
1582 for (i = 0; i < rdpg.rp_npages; i++)
1583 if (rdpg.rp_pages[i] != NULL)
1584 __free_page(rdpg.rp_pages[i]);
1585 OBD_FREE_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1589 EXPORT_SYMBOL(nodemap_get_config_req);