4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (C) 2015, Trustees of Indiana University
25 * Copyright (c) 2017, Intel Corporation.
27 * Author: Joshua Walgenbach <jjw@iu.edu>
28 * Author: Kit Westneat <cwestnea@iu.edu>
30 * Implements the storage functionality for the nodemap configuration. Functions
31 * in this file prepare, store, and load nodemap configuration data. Targets
32 * using nodemap services should register a configuration file object. Nodemap
33 * configuration changes that need to persist should call the appropriate
34 * storage function for the data being modified.
36 * There are several index types as defined in enum nodemap_idx_type:
37 * NODEMAP_CLUSTER_IDX stores the data found on the lu_nodemap struct,
38 * like root squash and config flags, as well as
40 * NODEMAP_RANGE_IDX stores NID range information for a nodemap
41 * NODEMAP_UIDMAP_IDX stores a fs/client UID mapping pair
42 * NODEMAP_GIDMAP_IDX stores a fs/client GID mapping pair
43 * NODEMAP_GLOBAL_IDX stores whether or not nodemaps are active
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <uapi/linux/lnet/lnet-types.h>
54 #include <uapi/linux/lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
70 /* lu_nodemap flags */
72 NM_FL_ALLOW_ROOT_ACCESS = 0x1,
73 NM_FL_TRUST_CLIENT_IDS = 0x2,
74 NM_FL_DENY_UNKNOWN = 0x4,
75 NM_FL_MAP_UID_ONLY = 0x8,
76 NM_FL_MAP_GID_ONLY = 0x10,
77 NM_FL_ENABLE_AUDIT = 0x20,
80 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
82 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
83 NODEMAP_CLUSTER_IDX));
87 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
88 const struct lu_nodemap *nodemap)
90 CLASSERT(sizeof(nr->ncr.ncr_name) == sizeof(nodemap->nm_name));
92 strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nodemap->nm_name));
93 nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
94 nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
95 nr->ncr.ncr_flags = cpu_to_le32(
96 (nodemap->nmf_trust_client_ids ?
97 NM_FL_TRUST_CLIENT_IDS : 0) |
98 (nodemap->nmf_allow_root_access ?
99 NM_FL_ALLOW_ROOT_ACCESS : 0) |
100 (nodemap->nmf_deny_unknown ?
101 NM_FL_DENY_UNKNOWN : 0) |
102 (nodemap->nmf_map_uid_only ?
103 NM_FL_MAP_UID_ONLY : 0) |
104 (nodemap->nmf_map_gid_only ?
105 NM_FL_MAP_GID_ONLY : 0) |
106 (nodemap->nmf_enable_audit ?
107 NM_FL_ENABLE_AUDIT : 0));
110 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
111 enum nodemap_id_type id_type,
114 enum nodemap_idx_type idx_type;
116 if (id_type == NODEMAP_UID)
117 idx_type = NODEMAP_UIDMAP_IDX;
119 idx_type = NODEMAP_GIDMAP_IDX;
121 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
122 nk->nk_id_client = cpu_to_le32(id_client);
125 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
127 nr->nir.nir_id_fs = cpu_to_le32(id_fs);
130 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
133 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
135 nk->nk_range_id = cpu_to_le32(rn_id);
138 static void nodemap_range_rec_init(union nodemap_rec *nr,
139 const lnet_nid_t nid[2])
141 nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
142 nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
145 static void nodemap_global_key_init(struct nodemap_key *nk)
147 nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
151 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
153 nr->ngr.ngr_is_active = active;
156 /* should be called with dt_write lock */
157 static void nodemap_inc_version(const struct lu_env *env,
158 struct dt_object *nodemap_idx,
161 u64 ver = dt_version_get(env, nodemap_idx);
162 dt_version_set(env, nodemap_idx, ver + 1, th);
165 enum ncfc_find_create {
169 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
170 struct dt_device *dev,
171 struct local_oid_storage *los,
172 enum ncfc_find_create create_new)
175 struct dt_object *root_obj;
176 struct dt_object *nm_obj;
179 rc = dt_root_get(env, dev, &tfid);
181 GOTO(out, nm_obj = ERR_PTR(rc));
183 root_obj = dt_locate(env, dev, &tfid);
184 if (unlikely(IS_ERR(root_obj)))
185 GOTO(out, nm_obj = root_obj);
187 rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
190 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
192 GOTO(out_root, nm_obj = ERR_PTR(rc));
193 } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
194 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
198 /* if loading index fails the first time, create new index */
199 if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
200 CDEBUG(D_INFO, "removing old index, creating new one\n");
201 rc = local_object_unlink(env, dev, root_obj,
202 LUSTRE_NODEMAP_NAME);
204 /* XXX not sure the best way to get obd name. */
205 CERROR("cannot destroy nodemap index: rc = %d\n",
207 GOTO(out_root, nm_obj = ERR_PTR(rc));
211 nm_obj = local_index_find_or_create(env, los, root_obj,
213 S_IFREG | S_IRUGO | S_IWUSR,
214 &dt_nodemap_features);
216 GOTO(out_root, nm_obj);
218 if (nm_obj->do_index_ops == NULL) {
219 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
220 &dt_nodemap_features);
221 /* even if loading from tgt fails, connecting to MGS will
225 dt_object_put(env, nm_obj);
227 if (create_new == NCFC_CREATE_NEW)
228 GOTO(out_root, nm_obj = ERR_PTR(rc));
230 CERROR("cannot load nodemap index from disk, creating "
231 "new index: rc = %d\n", rc);
232 create_new = NCFC_CREATE_NEW;
238 dt_object_put(env, root_obj);
243 static int nodemap_idx_insert(const struct lu_env *env,
244 struct dt_object *idx,
245 const struct nodemap_key *nk,
246 const union nodemap_rec *nr)
249 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
252 CLASSERT(sizeof(union nodemap_rec) == 32);
254 th = dt_trans_create(env, dev);
257 GOTO(out, rc = PTR_ERR(th));
259 rc = dt_declare_insert(env, idx,
260 (const struct dt_rec *)nr,
261 (const struct dt_key *)nk, th);
265 rc = dt_declare_version_set(env, idx, th);
269 rc = dt_trans_start_local(env, dev, th);
273 dt_write_lock(env, idx, 0);
275 rc = dt_insert(env, idx, (const struct dt_rec *)nr,
276 (const struct dt_key *)nk, th, 1);
278 nodemap_inc_version(env, idx, th);
279 dt_write_unlock(env, idx);
281 dt_trans_stop(env, dev, th);
286 static int nodemap_idx_update(const struct lu_env *env,
287 struct dt_object *idx,
288 const struct nodemap_key *nk,
289 const union nodemap_rec *nr)
292 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
295 th = dt_trans_create(env, dev);
298 GOTO(out, rc = PTR_ERR(th));
300 rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
304 rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
305 (const struct dt_key *)nk, th);
309 rc = dt_declare_version_set(env, idx, th);
313 rc = dt_trans_start_local(env, dev, th);
317 dt_write_lock(env, idx, 0);
319 rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
323 rc = dt_insert(env, idx, (const struct dt_rec *)nr,
324 (const struct dt_key *)nk, th, 1);
328 nodemap_inc_version(env, idx, th);
330 dt_write_unlock(env, idx);
332 dt_trans_stop(env, dev, th);
337 static int nodemap_idx_delete(const struct lu_env *env,
338 struct dt_object *idx,
339 const struct nodemap_key *nk,
340 const union nodemap_rec *unused)
343 struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
346 th = dt_trans_create(env, dev);
349 GOTO(out, rc = PTR_ERR(th));
351 rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
355 rc = dt_declare_version_set(env, idx, th);
359 rc = dt_trans_start_local(env, dev, th);
363 dt_write_lock(env, idx, 0);
365 rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
367 nodemap_inc_version(env, idx, th);
369 dt_write_unlock(env, idx);
371 dt_trans_stop(env, dev, th);
381 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
382 struct dt_object *idx,
383 enum nm_add_update update)
385 struct nodemap_key nk;
386 union nodemap_rec nr;
392 rc = lu_env_init(&env, LCT_LOCAL);
396 nodemap_cluster_key_init(&nk, nodemap->nm_id);
397 nodemap_cluster_rec_init(&nr, nodemap);
399 if (update == NM_UPDATE)
400 rc = nodemap_idx_update(&env, idx, &nk, &nr);
402 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
409 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
411 if (nodemap_mgs_ncf == NULL) {
412 CERROR("cannot add nodemap config to non-existing MGS.\n");
416 return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
420 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
422 if (nodemap_mgs_ncf == NULL) {
423 CERROR("cannot add nodemap config to non-existing MGS.\n");
427 return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
431 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
434 struct lu_idmap *idmap;
435 struct lu_idmap *temp;
436 struct lu_nid_range *range;
437 struct lu_nid_range *range_temp;
438 struct nodemap_key nk;
445 if (nodemap_mgs_ncf == NULL) {
446 CERROR("cannot add nodemap config to non-existing MGS.\n");
450 rc = lu_env_init(&env, LCT_LOCAL);
454 root = nodemap->nm_fs_to_client_uidmap;
455 nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
457 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
459 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
465 root = nodemap->nm_client_to_fs_gidmap;
466 nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
468 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
470 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
476 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
478 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
479 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
485 nodemap_cluster_key_init(&nk, nodemap->nm_id);
486 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
495 int nodemap_idx_range_add(const struct lu_nid_range *range,
496 const lnet_nid_t nid[2])
498 struct nodemap_key nk;
499 union nodemap_rec nr;
504 if (nodemap_mgs_ncf == NULL) {
505 CERROR("cannot add nodemap config to non-existing MGS.\n");
509 rc = lu_env_init(&env, LCT_LOCAL);
513 nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
514 nodemap_range_rec_init(&nr, nid);
516 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
522 int nodemap_idx_range_del(const struct lu_nid_range *range)
524 struct nodemap_key nk;
529 if (nodemap_mgs_ncf == NULL) {
530 CERROR("cannot add nodemap config to non-existing MGS.\n");
534 rc = lu_env_init(&env, LCT_LOCAL);
538 nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
540 rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
546 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
547 enum nodemap_id_type id_type,
550 struct nodemap_key nk;
551 union nodemap_rec nr;
556 if (nodemap_mgs_ncf == NULL) {
557 CERROR("cannot add nodemap config to non-existing MGS.\n");
561 rc = lu_env_init(&env, LCT_LOCAL);
565 nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
566 nodemap_idmap_rec_init(&nr, map[1]);
568 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
574 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
575 enum nodemap_id_type id_type,
578 struct nodemap_key nk;
583 if (nodemap_mgs_ncf == NULL) {
584 CERROR("cannot add nodemap config to non-existing MGS.\n");
588 rc = lu_env_init(&env, LCT_LOCAL);
592 nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
594 rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
600 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
602 struct nodemap_key nk;
603 union nodemap_rec nr;
608 if (nodemap_mgs_ncf == NULL) {
609 CERROR("cannot add nodemap config to non-existing MGS.\n");
613 rc = lu_env_init(&env, LCT_LOCAL);
617 nodemap_global_key_init(&nk);
618 nodemap_global_rec_init(&nr, value);
620 if (update == NM_UPDATE)
621 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
624 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
632 int nodemap_idx_nodemap_activate(bool value)
634 return nodemap_idx_global_add_update(value, NM_UPDATE);
637 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
641 nodemap_id = le32_to_cpu(key->nk_nodemap_id);
642 return nm_idx_get_type(nodemap_id);
646 * Process a key/rec pair and modify the new configuration.
648 * \param config configuration to update with this key/rec data
649 * \param key key of the record that was loaded
650 * \param rec record that was loaded
651 * \param recent_nodemap last referenced nodemap
652 * \retval type of record processed, see enum #nodemap_idx_type
653 * \retval -ENOENT range or map loaded before nodemap record
654 * \retval -EINVAL duplicate nodemap cluster records found with
655 * different IDs, or nodemap has invalid name
658 static int nodemap_process_keyrec(struct nodemap_config *config,
659 const struct nodemap_key *key,
660 const union nodemap_rec *rec,
661 struct lu_nodemap **recent_nodemap)
663 struct lu_nodemap *nodemap = NULL;
664 enum nodemap_idx_type type;
665 enum nodemap_id_type id_type;
674 CLASSERT(sizeof(union nodemap_rec) == 32);
676 nodemap_id = le32_to_cpu(key->nk_nodemap_id);
677 type = nodemap_get_key_type(key);
678 nodemap_id = nm_idx_set_type(nodemap_id, 0);
680 CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
683 /* find the correct nodemap in the load list */
684 if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
685 type == NODEMAP_GIDMAP_IDX) {
686 struct lu_nodemap *tmp = NULL;
688 nodemap = *recent_nodemap;
691 GOTO(out, rc = -ENOENT);
693 if (nodemap->nm_id != nodemap_id) {
694 list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
695 if (tmp->nm_id == nodemap_id) {
700 if (nodemap->nm_id != nodemap_id)
701 GOTO(out, rc = -ENOENT);
704 /* update most recently used nodemap if necessay */
705 if (nodemap != *recent_nodemap)
706 *recent_nodemap = nodemap;
710 case NODEMAP_EMPTY_IDX:
712 CWARN("Found nodemap config record without type field, "
713 " nodemap_id=%d. nodemap config file corrupt?\n",
716 case NODEMAP_CLUSTER_IDX:
717 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
719 if (nodemap == NULL) {
720 if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
721 nodemap = nodemap_create(rec->ncr.ncr_name,
723 config->nmc_default_nodemap = nodemap;
725 nodemap = nodemap_create(rec->ncr.ncr_name,
729 GOTO(out, rc = PTR_ERR(nodemap));
731 /* we need to override the local ID with the saved ID */
732 nodemap->nm_id = nodemap_id;
733 if (nodemap_id > config->nmc_nodemap_highest_id)
734 config->nmc_nodemap_highest_id = nodemap_id;
736 } else if (nodemap->nm_id != nodemap_id) {
737 nodemap_putref(nodemap);
738 GOTO(out, rc = -EINVAL);
741 nodemap->nm_squash_uid =
742 le32_to_cpu(rec->ncr.ncr_squash_uid);
743 nodemap->nm_squash_gid =
744 le32_to_cpu(rec->ncr.ncr_squash_gid);
746 flags = le32_to_cpu(rec->ncr.ncr_flags);
747 nodemap->nmf_allow_root_access =
748 flags & NM_FL_ALLOW_ROOT_ACCESS;
749 nodemap->nmf_trust_client_ids =
750 flags & NM_FL_TRUST_CLIENT_IDS;
751 nodemap->nmf_deny_unknown =
752 flags & NM_FL_DENY_UNKNOWN;
753 nodemap->nmf_map_uid_only =
754 flags & NM_FL_MAP_UID_ONLY;
755 nodemap->nmf_map_gid_only =
756 flags & NM_FL_MAP_GID_ONLY;
757 nodemap->nmf_enable_audit =
758 flags & NM_FL_ENABLE_AUDIT;
760 if (*recent_nodemap == NULL) {
761 *recent_nodemap = nodemap;
762 INIT_LIST_HEAD(&nodemap->nm_list);
764 list_add(&nodemap->nm_list,
765 &(*recent_nodemap)->nm_list);
767 nodemap_putref(nodemap);
769 case NODEMAP_RANGE_IDX:
770 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
771 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
773 rc = nodemap_add_range_helper(config, nodemap, nid,
774 le32_to_cpu(key->nk_range_id));
778 case NODEMAP_UIDMAP_IDX:
779 case NODEMAP_GIDMAP_IDX:
780 map[0] = le32_to_cpu(key->nk_id_client);
781 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
783 if (type == NODEMAP_UIDMAP_IDX)
784 id_type = NODEMAP_UID;
786 id_type = NODEMAP_GID;
788 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
792 case NODEMAP_GLOBAL_IDX:
793 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
796 CERROR("got keyrec pair for unknown type %d\n", type);
808 enum nm_config_passes {
809 NM_READ_CLUSTERS = 0,
810 NM_READ_ATTRIBUTES = 1,
813 static int nodemap_load_entries(const struct lu_env *env,
814 struct dt_object *nodemap_idx)
816 const struct dt_it_ops *iops;
818 struct lu_nodemap *recent_nodemap = NULL;
819 struct nodemap_config *new_config = NULL;
821 bool activate_nodemap = false;
822 bool loaded_global_idx = false;
823 enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
828 iops = &nodemap_idx->do_index_ops->dio_it;
830 dt_read_lock(env, nodemap_idx, 0);
831 it = iops->init(env, nodemap_idx, 0);
833 GOTO(out, rc = PTR_ERR(it));
835 rc = iops->load(env, it, hash);
837 GOTO(out_iops_fini, rc);
839 /* rc == 0 means we need to advance to record */
841 rc = iops->next(env, it);
844 GOTO(out_iops_put, rc);
845 /* rc > 0 is eof, will be checked in while below */
847 /* rc == 1, we found initial record and can process below */
851 new_config = nodemap_config_alloc();
852 if (IS_ERR(new_config)) {
853 rc = PTR_ERR(new_config);
855 GOTO(out_iops_put, rc);
858 /* rc > 0 is eof, check initial iops->next here as well */
860 struct nodemap_key *key;
861 union nodemap_rec rec;
862 enum nodemap_idx_type key_type;
864 key = (struct nodemap_key *)iops->key(env, it);
865 key_type = nodemap_get_key_type((struct nodemap_key *)key);
866 if ((cur_pass == NM_READ_CLUSTERS &&
867 key_type == NODEMAP_CLUSTER_IDX) ||
868 (cur_pass == NM_READ_ATTRIBUTES &&
869 key_type != NODEMAP_CLUSTER_IDX &&
870 key_type != NODEMAP_EMPTY_IDX)) {
871 rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
874 GOTO(out_nodemap_config, rc);
875 rc = nodemap_process_keyrec(new_config, key, &rec,
878 GOTO(out_nodemap_config, rc);
879 if (rc == NODEMAP_GLOBAL_IDX)
880 loaded_global_idx = true;
885 rc = iops->next(env, it);
886 while (rc == -ESTALE);
888 /* move to second pass */
889 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
890 cur_pass = NM_READ_ATTRIBUTES;
891 rc = iops->load(env, it, 0);
893 rc = iops->next(env, it);
906 nodemap_config_dealloc(new_config);
908 /* creating new default needs to be done outside dt read lock */
909 activate_nodemap = true;
915 dt_read_unlock(env, nodemap_idx);
918 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
919 nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
921 if (!activate_nodemap)
924 if (new_config->nmc_default_nodemap == NULL) {
925 /* new MGS won't have a default nm on disk, so create it here */
926 new_config->nmc_default_nodemap =
927 nodemap_create(DEFAULT_NODEMAP, new_config, 1);
928 if (IS_ERR(new_config->nmc_default_nodemap)) {
929 rc = PTR_ERR(new_config->nmc_default_nodemap);
931 rc = nodemap_idx_nodemap_add_update(
932 new_config->nmc_default_nodemap,
935 nodemap_putref(new_config->nmc_default_nodemap);
939 /* new nodemap config won't have an active/inactive record */
940 if (rc == 0 && loaded_global_idx == false) {
941 struct nodemap_key nk;
942 union nodemap_rec nr;
944 nodemap_global_key_init(&nk);
945 nodemap_global_rec_init(&nr, false);
946 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
950 nodemap_config_set_active(new_config);
952 nodemap_config_dealloc(new_config);
958 * Step through active config and write to disk.
960 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
961 struct dt_device *dev,
962 struct local_oid_storage *los)
965 struct lu_nodemap *nodemap;
966 struct lu_nodemap *nm_tmp;
967 struct lu_nid_range *range;
968 struct lu_nid_range *range_temp;
969 struct lu_idmap *idmap;
970 struct lu_idmap *id_tmp;
972 struct nodemap_key nk;
973 union nodemap_rec nr;
974 LIST_HEAD(nodemap_list_head);
979 /* create a new index file to fill with active config */
980 o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
984 mutex_lock(&active_config_lock);
986 /* convert hash to list so we don't spin */
987 cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
988 nm_hash_list_cb, &nodemap_list_head);
990 list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
991 nodemap_cluster_key_init(&nk, nodemap->nm_id);
992 nodemap_cluster_rec_init(&nr, nodemap);
994 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1000 down_read(&active_config->nmc_range_tree_lock);
1001 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
1003 lnet_nid_t nid[2] = {
1004 range->rn_node.in_extent.start,
1005 range->rn_node.in_extent.end
1007 nodemap_range_key_init(&nk, nodemap->nm_id,
1009 nodemap_range_rec_init(&nr, nid);
1010 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1014 up_read(&active_config->nmc_range_tree_lock);
1016 /* we don't need to take nm_idmap_lock because active config
1017 * lock prevents changes from happening to nodemaps
1019 root = nodemap->nm_client_to_fs_uidmap;
1020 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1022 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1024 nodemap_idmap_rec_init(&nr, idmap->id_fs);
1025 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1030 root = nodemap->nm_client_to_fs_gidmap;
1031 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1033 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1035 nodemap_idmap_rec_init(&nr, idmap->id_fs);
1036 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1041 nodemap_global_key_init(&nk);
1042 nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1043 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1047 mutex_unlock(&active_config_lock);
1050 dt_object_put(env, o);
1057 static void nodemap_save_all_caches(void)
1059 struct nm_config_file *ncf;
1063 /* recreating nodemap cache requires fld_thread_key be in env */
1064 rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1066 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1070 mutex_lock(&ncf_list_lock);
1071 list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1072 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1073 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1074 struct dt_object *o;
1076 /* put current config file so save conf can rewrite it */
1077 dt_object_put_nocache(&env, ncf->ncf_obj);
1078 ncf->ncf_obj = NULL;
1080 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1082 CWARN("%s: error writing to nodemap config: rc = %d\n",
1087 mutex_unlock(&ncf_list_lock);
1092 /* tracks if config still needs to be loaded, either from disk or network */
1093 static bool nodemap_config_loaded;
1094 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1097 * Ensures that configs loaded over the wire are prioritized over those loaded
1100 * \param config config to set as the active config
1102 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1104 mutex_lock(&nodemap_config_loaded_lock);
1105 nodemap_config_set_active(config);
1106 nodemap_config_loaded = true;
1107 nodemap_save_all_caches();
1108 mutex_unlock(&nodemap_config_loaded_lock);
1110 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1113 * Register a dt_object representing the config index file. This should be
1114 * called by targets in order to load the nodemap configuration from disk. The
1115 * dt_object should be created with local_index_find_or_create and the index
1116 * features should be enabled with do_index_try.
1118 * \param obj dt_object returned by local_index_find_or_create
1120 * \retval on success: nm_config_file handle for later deregistration
1121 * \retval -ENOMEM memory allocation failure
1122 * \retval -ENOENT error loading nodemap config
1123 * \retval -EINVAL error loading nodemap config
1124 * \retval -EEXIST nodemap config already registered for MGS
1126 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1127 struct dt_object *obj,
1128 struct local_oid_storage *los)
1130 struct nm_config_file *ncf;
1134 if (nodemap_mgs_ncf != NULL)
1135 GOTO(out, ncf = ERR_PTR(-EEXIST));
1139 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1141 /* if loading from cache, prevent activation of MGS config until cache
1142 * loading is done, so disk config is overwritten by MGS config.
1144 mutex_lock(&nodemap_config_loaded_lock);
1145 rc = nodemap_load_entries(env, obj);
1147 nodemap_config_loaded = true;
1148 mutex_unlock(&nodemap_config_loaded_lock);
1152 GOTO(out, ncf = ERR_PTR(rc));
1155 lu_object_get(&obj->do_lu);
1160 nodemap_mgs_ncf = ncf;
1165 EXPORT_SYMBOL(nm_config_file_register_mgs);
1167 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1168 struct dt_device *dev,
1169 struct local_oid_storage *los)
1171 struct nm_config_file *ncf;
1172 struct dt_object *config_obj = NULL;
1177 RETURN(ERR_PTR(-ENOMEM));
1179 /* don't load from cache if config already loaded */
1180 mutex_lock(&nodemap_config_loaded_lock);
1181 if (!nodemap_config_loaded) {
1182 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1183 if (IS_ERR(config_obj))
1184 rc = PTR_ERR(config_obj);
1186 rc = nodemap_load_entries(env, config_obj);
1189 nodemap_config_loaded = true;
1191 mutex_unlock(&nodemap_config_loaded_lock);
1195 /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1197 config_obj = nodemap_save_config_cache(env, dev, los);
1198 if (IS_ERR(config_obj))
1199 GOTO(out_ncf, rc = PTR_ERR(config_obj));
1202 ncf->ncf_obj = config_obj;
1205 mutex_lock(&ncf_list_lock);
1206 list_add(&ncf->ncf_list, &ncf_list_head);
1207 mutex_unlock(&ncf_list_lock);
1212 RETURN(ERR_PTR(rc));
1217 EXPORT_SYMBOL(nm_config_file_register_tgt);
1220 * Deregister a nm_config_file. Should be called by targets during cleanup.
1222 * \param ncf config file to deregister
1224 void nm_config_file_deregister_mgs(const struct lu_env *env,
1225 struct nm_config_file *ncf)
1228 LASSERT(nodemap_mgs_ncf == ncf);
1230 nodemap_mgs_ncf = NULL;
1232 dt_object_put(env, ncf->ncf_obj);
1238 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1240 void nm_config_file_deregister_tgt(const struct lu_env *env,
1241 struct nm_config_file *ncf)
1248 mutex_lock(&ncf_list_lock);
1249 list_del(&ncf->ncf_list);
1250 mutex_unlock(&ncf_list_lock);
1253 dt_object_put(env, ncf->ncf_obj);
1259 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1261 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1262 struct lu_nodemap **recent_nodemap)
1264 struct nodemap_key *key;
1265 union nodemap_rec *rec;
1270 int size = dt_nodemap_features.dif_keysize_max +
1271 dt_nodemap_features.dif_recsize_max;
1274 for (j = 0; j < LU_PAGE_COUNT; j++) {
1275 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1278 /* get and process keys and records from page */
1279 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1280 entry = lip->lp_idx.lip_entries + k * size;
1281 key = (struct nodemap_key *)entry;
1283 entry += dt_nodemap_features.dif_keysize_max;
1284 rec = (union nodemap_rec *)entry;
1286 rc = nodemap_process_keyrec(config, key, rec,
1297 EXPORT_SYMBOL(nodemap_process_idx_pages);
1299 static int nodemap_page_build(const struct lu_env *env, union lu_page *lp,
1300 size_t nob, const struct dt_it_ops *iops,
1301 struct dt_it *it, __u32 attr, void *arg)
1303 struct idx_info *ii = (struct idx_info *)arg;
1304 struct lu_idxpage *lip = &lp->lp_idx;
1306 size_t size = ii->ii_keysize + ii->ii_recsize;
1310 if (nob < LIP_HDR_SIZE)
1313 /* initialize the header of the new container */
1314 memset(lip, 0, LIP_HDR_SIZE);
1315 lip->lip_magic = LIP_MAGIC;
1316 nob -= LIP_HDR_SIZE;
1318 entry = lip->lip_entries;
1320 char *tmp_entry = entry;
1323 enum nodemap_idx_type key_type;
1325 /* fetch 64-bit hash value */
1326 hash = iops->store(env, it);
1327 ii->ii_hash_end = hash;
1329 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1330 if (lip->lip_nr != 0)
1335 if (lip->lip_nr == 0)
1336 GOTO(out, rc = -EINVAL);
1340 key = iops->key(env, it);
1341 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1343 /* on the first pass, get only the cluster types. On second
1344 * pass, get all the rest */
1345 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1346 key_type == NODEMAP_CLUSTER_IDX) ||
1347 (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1348 key_type != NODEMAP_CLUSTER_IDX &&
1349 key_type != NODEMAP_EMPTY_IDX)) {
1350 memcpy(tmp_entry, key, ii->ii_keysize);
1351 tmp_entry += ii->ii_keysize;
1353 /* and finally the record */
1354 rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1356 if (rc != -ESTALE) {
1360 /* hash/key/record successfully copied! */
1362 if (unlikely(lip->lip_nr == 1 &&
1364 ii->ii_hash_start = hash;
1366 entry = tmp_entry + ii->ii_recsize;
1371 /* move on to the next record */
1373 rc = iops->next(env, it);
1374 } while (rc == -ESTALE);
1376 /* move to second pass */
1377 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1378 ii->ii_attrs = NM_READ_ATTRIBUTES;
1379 rc = iops->load(env, it, 0);
1381 rc = iops->next(env, it);
1392 if (rc >= 0 && lip->lip_nr > 0)
1393 /* one more container */
1396 /* no more entries */
1397 ii->ii_hash_end = II_END_OFF;
1402 int nodemap_index_read(struct lu_env *env,
1403 struct nm_config_file *ncf,
1404 struct idx_info *ii,
1405 const struct lu_rdpg *rdpg)
1407 struct dt_object *nodemap_idx = ncf->ncf_obj;
1411 ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1412 ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1414 dt_read_lock(env, nodemap_idx, 0);
1415 version = dt_version_get(env, nodemap_idx);
1416 if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1417 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1420 ii->ii_hash_end = 0;
1422 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1424 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1428 ii->ii_version = version;
1430 dt_read_unlock(env, nodemap_idx);
1433 EXPORT_SYMBOL(nodemap_index_read);
1436 * Returns the current nodemap configuration to MGC by walking the nodemap
1437 * config index and storing it in the response buffer.
1439 * \param req incoming MGS_CONFIG_READ request
1441 * \retval -EINVAL malformed request
1442 * \retval -ENOTCONN client evicted/reconnected already
1443 * \retval -ETIMEDOUT client timeout or network error
1446 int nodemap_get_config_req(struct obd_device *mgs_obd,
1447 struct ptlrpc_request *req)
1449 struct mgs_config_body *body;
1450 struct mgs_config_res *res;
1451 struct lu_rdpg rdpg;
1452 struct idx_info nodemap_ii;
1453 struct ptlrpc_bulk_desc *desc;
1454 struct l_wait_info lwi;
1455 struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1461 body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1465 if (body->mcb_type != CONFIG_T_NODEMAP)
1468 rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1469 rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1471 if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1474 CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1475 body->mcb_name, rdpg.rp_count);
1477 /* allocate pages to store the containers */
1478 OBD_ALLOC(rdpg.rp_pages, sizeof(*rdpg.rp_pages) * rdpg.rp_npages);
1479 if (rdpg.rp_pages == NULL)
1481 for (i = 0; i < rdpg.rp_npages; i++) {
1482 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1483 if (rdpg.rp_pages[i] == NULL)
1484 GOTO(out, rc = -ENOMEM);
1487 rdpg.rp_hash = body->mcb_offset;
1488 nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1489 nodemap_ii.ii_flags = II_FL_NOHASH;
1490 nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1491 nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1493 bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1494 mgs_obd->u.obt.obt_nodemap_config_file,
1495 &nodemap_ii, &rdpg);
1497 GOTO(out, rc = bytes);
1499 rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1501 res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1503 GOTO(out, rc = -EINVAL);
1504 res->mcr_offset = nodemap_ii.ii_hash_end;
1505 res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1507 page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1508 LASSERT(page_count <= rdpg.rp_count);
1509 desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1510 PTLRPC_BULK_PUT_SOURCE |
1511 PTLRPC_BULK_BUF_KIOV,
1513 &ptlrpc_bulk_kiov_pin_ops);
1515 GOTO(out, rc = -ENOMEM);
1517 for (i = 0; i < page_count && bytes > 0; i++) {
1518 ptlrpc_prep_bulk_page_pin(desc, rdpg.rp_pages[i], 0,
1519 min_t(int, bytes, PAGE_SIZE));
1523 rc = target_bulk_io(req->rq_export, desc, &lwi);
1524 ptlrpc_free_bulk(desc);
1527 if (rdpg.rp_pages != NULL) {
1528 for (i = 0; i < rdpg.rp_npages; i++)
1529 if (rdpg.rp_pages[i] != NULL)
1530 __free_page(rdpg.rp_pages[i]);
1531 OBD_FREE(rdpg.rp_pages,
1532 rdpg.rp_npages * sizeof(rdpg.rp_pages[0]));
1536 EXPORT_SYMBOL(nodemap_get_config_req);