Whamcloud - gitweb
LU-8851 nodemap: add uid/gid only flags to control mapping
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2014, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <lnet/types.h>
54 #include <lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
62
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
66
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
69
70 /* lu_nodemap flags */
71 enum nm_flag_shifts {
72         NM_FL_ALLOW_ROOT_ACCESS = 0x1,
73         NM_FL_TRUST_CLIENT_IDS = 0x2,
74         NM_FL_DENY_UNKNOWN = 0x4,
75         NM_FL_MAP_UID_ONLY = 0x8,
76         NM_FL_MAP_GID_ONLY = 0x10,
77 };
78
79 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
80 {
81         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
82                                                         NODEMAP_CLUSTER_IDX));
83         nk->nk_unused = 0;
84 }
85
86 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
87                                      const struct lu_nodemap *nodemap)
88 {
89         CLASSERT(sizeof(nr->ncr.ncr_name) == sizeof(nodemap->nm_name));
90
91         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nodemap->nm_name));
92         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
93         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
94         nr->ncr.ncr_flags = cpu_to_le32(
95                 (nodemap->nmf_trust_client_ids ?
96                         NM_FL_TRUST_CLIENT_IDS : 0) |
97                 (nodemap->nmf_allow_root_access ?
98                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
99                 (nodemap->nmf_deny_unknown ?
100                         NM_FL_DENY_UNKNOWN : 0) |
101                 (nodemap->nmf_map_uid_only ?
102                         NM_FL_MAP_UID_ONLY : 0) |
103                 (nodemap->nmf_map_gid_only ?
104                         NM_FL_MAP_GID_ONLY : 0));
105 }
106
107 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
108                                    enum nodemap_id_type id_type,
109                                    u32 id_client)
110 {
111         enum nodemap_idx_type idx_type;
112
113         if (id_type == NODEMAP_UID)
114                 idx_type = NODEMAP_UIDMAP_IDX;
115         else
116                 idx_type = NODEMAP_GIDMAP_IDX;
117
118         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
119         nk->nk_id_client = cpu_to_le32(id_client);
120 }
121
122 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
123 {
124         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
125 }
126
127 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
128                                    unsigned int rn_id)
129 {
130         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
131                                                         NODEMAP_RANGE_IDX));
132         nk->nk_range_id = cpu_to_le32(rn_id);
133 }
134
135 static void nodemap_range_rec_init(union nodemap_rec *nr,
136                                    const lnet_nid_t nid[2])
137 {
138         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
139         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
140 }
141
142 static void nodemap_global_key_init(struct nodemap_key *nk)
143 {
144         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
145         nk->nk_unused = 0;
146 }
147
148 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
149 {
150         nr->ngr.ngr_is_active = active;
151 }
152
153 /* should be called with dt_write lock */
154 static void nodemap_inc_version(const struct lu_env *env,
155                                 struct dt_object *nodemap_idx,
156                                 struct thandle *th)
157 {
158         u64 ver = dt_version_get(env, nodemap_idx);
159         dt_version_set(env, nodemap_idx, ver + 1, th);
160 }
161
162 enum ncfc_find_create {
163         NCFC_CREATE_NEW = 1,
164 };
165
166 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
167                                                    struct dt_device *dev,
168                                                    struct local_oid_storage *los,
169                                                    enum ncfc_find_create create_new)
170 {
171         struct lu_fid tfid;
172         struct dt_object *root_obj;
173         struct dt_object *nm_obj;
174         int rc = 0;
175
176         rc = dt_root_get(env, dev, &tfid);
177         if (rc < 0)
178                 GOTO(out, nm_obj = ERR_PTR(rc));
179
180         root_obj = dt_locate(env, dev, &tfid);
181         if (unlikely(IS_ERR(root_obj)))
182                 GOTO(out, nm_obj = root_obj);
183
184         rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
185         if (rc == -ENOENT) {
186                 if (dev->dd_rdonly)
187                         GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
188         } else if (rc) {
189                 GOTO(out_root, nm_obj = ERR_PTR(rc));
190         } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
191                 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
192         }
193
194 again:
195         /* if loading index fails the first time, create new index */
196         if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
197                 CDEBUG(D_INFO, "removing old index, creating new one\n");
198                 rc = local_object_unlink(env, dev, root_obj,
199                                          LUSTRE_NODEMAP_NAME);
200                 if (rc < 0) {
201                         /* XXX not sure the best way to get obd name. */
202                         CERROR("cannot destroy nodemap index: rc = %d\n",
203                                rc);
204                         GOTO(out_root, nm_obj = ERR_PTR(rc));
205                 }
206         }
207
208         nm_obj = local_index_find_or_create(env, los, root_obj,
209                                                 LUSTRE_NODEMAP_NAME,
210                                                 S_IFREG | S_IRUGO | S_IWUSR,
211                                                 &dt_nodemap_features);
212         if (IS_ERR(nm_obj))
213                 GOTO(out_root, nm_obj);
214
215         if (nm_obj->do_index_ops == NULL) {
216                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
217                                                       &dt_nodemap_features);
218                 /* even if loading from tgt fails, connecting to MGS will
219                  * rewrite the config
220                  */
221                 if (rc < 0) {
222                         dt_object_put(env, nm_obj);
223
224                         if (create_new == NCFC_CREATE_NEW)
225                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
226
227                         CERROR("cannot load nodemap index from disk, creating "
228                                "new index: rc = %d\n", rc);
229                         create_new = NCFC_CREATE_NEW;
230                         goto again;
231                 }
232         }
233
234 out_root:
235         dt_object_put(env, root_obj);
236 out:
237         return nm_obj;
238 }
239
240 static int nodemap_idx_insert(const struct lu_env *env,
241                               struct dt_object *idx,
242                               const struct nodemap_key *nk,
243                               const union nodemap_rec *nr)
244 {
245         struct thandle          *th;
246         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
247         int                      rc;
248
249         CLASSERT(sizeof(union nodemap_rec) == 32);
250
251         th = dt_trans_create(env, dev);
252
253         if (IS_ERR(th))
254                 GOTO(out, rc = PTR_ERR(th));
255
256         rc = dt_declare_insert(env, idx,
257                                (const struct dt_rec *)nr,
258                                (const struct dt_key *)nk, th);
259         if (rc != 0)
260                 GOTO(out, rc);
261
262         rc = dt_declare_version_set(env, idx, th);
263         if (rc != 0)
264                 GOTO(out, rc);
265
266         rc = dt_trans_start_local(env, dev, th);
267         if (rc != 0)
268                 GOTO(out, rc);
269
270         dt_write_lock(env, idx, 0);
271
272         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
273                        (const struct dt_key *)nk, th, 1);
274
275         nodemap_inc_version(env, idx, th);
276         dt_write_unlock(env, idx);
277 out:
278         dt_trans_stop(env, dev, th);
279
280         return rc;
281 }
282
283 static int nodemap_idx_update(const struct lu_env *env,
284                               struct dt_object *idx,
285                               const struct nodemap_key *nk,
286                               const union nodemap_rec *nr)
287 {
288         struct thandle          *th;
289         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
290         int                      rc = 0;
291
292         th = dt_trans_create(env, dev);
293
294         if (IS_ERR(th))
295                 GOTO(out, rc = PTR_ERR(th));
296
297         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
298         if (rc != 0)
299                 GOTO(out, rc);
300
301         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
302                                (const struct dt_key *)nk, th);
303         if (rc != 0)
304                 GOTO(out, rc);
305
306         rc = dt_declare_version_set(env, idx, th);
307         if (rc != 0)
308                 GOTO(out, rc);
309
310         rc = dt_trans_start_local(env, dev, th);
311         if (rc != 0)
312                 GOTO(out, rc);
313
314         dt_write_lock(env, idx, 0);
315
316         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
317         if (rc != 0)
318                 GOTO(out_lock, rc);
319
320         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
321                        (const struct dt_key *)nk, th, 1);
322         if (rc != 0)
323                 GOTO(out_lock, rc);
324
325         nodemap_inc_version(env, idx, th);
326 out_lock:
327         dt_write_unlock(env, idx);
328 out:
329         dt_trans_stop(env, dev, th);
330
331         return rc;
332 }
333
334 static int nodemap_idx_delete(const struct lu_env *env,
335                               struct dt_object *idx,
336                               const struct nodemap_key *nk,
337                               const union nodemap_rec *unused)
338 {
339         struct thandle          *th;
340         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
341         int                      rc = 0;
342
343         th = dt_trans_create(env, dev);
344
345         if (IS_ERR(th))
346                 GOTO(out, rc = PTR_ERR(th));
347
348         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
349         if (rc != 0)
350                 GOTO(out, rc);
351
352         rc = dt_declare_version_set(env, idx, th);
353         if (rc != 0)
354                 GOTO(out, rc);
355
356         rc = dt_trans_start_local(env, dev, th);
357         if (rc != 0)
358                 GOTO(out, rc);
359
360         dt_write_lock(env, idx, 0);
361
362         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
363
364         nodemap_inc_version(env, idx, th);
365
366         dt_write_unlock(env, idx);
367 out:
368         dt_trans_stop(env, dev, th);
369
370         return rc;
371 }
372
373 enum nm_add_update {
374         NM_ADD = 0,
375         NM_UPDATE = 1,
376 };
377
378 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
379                                           struct dt_object *idx,
380                                           enum nm_add_update update)
381 {
382         struct nodemap_key nk;
383         union nodemap_rec nr;
384         struct lu_env env;
385         int rc = 0;
386
387         ENTRY;
388
389         rc = lu_env_init(&env, LCT_LOCAL);
390         if (rc)
391                 RETURN(rc);
392
393         nodemap_cluster_key_init(&nk, nodemap->nm_id);
394         nodemap_cluster_rec_init(&nr, nodemap);
395
396         if (update == NM_UPDATE)
397                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
398         else
399                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
400
401         lu_env_fini(&env);
402
403         RETURN(rc);
404 }
405
406 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
407 {
408         if (nodemap_mgs_ncf == NULL) {
409                 CERROR("cannot add nodemap config to non-existing MGS.\n");
410                 return -EINVAL;
411         }
412
413         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
414                                               NM_ADD);
415 }
416
417 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
418 {
419         if (nodemap_mgs_ncf == NULL) {
420                 CERROR("cannot add nodemap config to non-existing MGS.\n");
421                 return -EINVAL;
422         }
423
424         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
425                                               NM_UPDATE);
426 }
427
428 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
429 {
430         struct rb_root           root;
431         struct lu_idmap         *idmap;
432         struct lu_idmap         *temp;
433         struct lu_nid_range     *range;
434         struct lu_nid_range     *range_temp;
435         struct nodemap_key       nk;
436         struct lu_env            env;
437         int                      rc = 0;
438         int                      rc2 = 0;
439
440         ENTRY;
441
442         if (nodemap_mgs_ncf == NULL) {
443                 CERROR("cannot add nodemap config to non-existing MGS.\n");
444                 return -EINVAL;
445         }
446
447         rc = lu_env_init(&env, LCT_LOCAL);
448         if (rc != 0)
449                 RETURN(rc);
450
451         root = nodemap->nm_fs_to_client_uidmap;
452         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
453                                                 id_fs_to_client) {
454                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
455                                        idmap->id_client);
456                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
457                                          &nk, NULL);
458                 if (rc2 < 0)
459                         rc = rc2;
460         }
461
462         root = nodemap->nm_client_to_fs_gidmap;
463         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
464                                                 id_client_to_fs) {
465                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
466                                        idmap->id_client);
467                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
468                                          &nk, NULL);
469                 if (rc2 < 0)
470                         rc = rc2;
471         }
472
473         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
474                                  rn_list) {
475                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
476                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
477                                          &nk, NULL);
478                 if (rc2 < 0)
479                         rc = rc2;
480         }
481
482         nodemap_cluster_key_init(&nk, nodemap->nm_id);
483         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
484         if (rc2 < 0)
485                 rc = rc2;
486
487         lu_env_fini(&env);
488
489         RETURN(rc);
490 }
491
492 int nodemap_idx_range_add(const struct lu_nid_range *range,
493                           const lnet_nid_t nid[2])
494 {
495         struct nodemap_key       nk;
496         union nodemap_rec        nr;
497         struct lu_env            env;
498         int                      rc = 0;
499         ENTRY;
500
501         if (nodemap_mgs_ncf == NULL) {
502                 CERROR("cannot add nodemap config to non-existing MGS.\n");
503                 return -EINVAL;
504         }
505
506         rc = lu_env_init(&env, LCT_LOCAL);
507         if (rc != 0)
508                 RETURN(rc);
509
510         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
511         nodemap_range_rec_init(&nr, nid);
512
513         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
514         lu_env_fini(&env);
515
516         RETURN(rc);
517 }
518
519 int nodemap_idx_range_del(const struct lu_nid_range *range)
520 {
521         struct nodemap_key       nk;
522         struct lu_env            env;
523         int                      rc = 0;
524         ENTRY;
525
526         if (nodemap_mgs_ncf == NULL) {
527                 CERROR("cannot add nodemap config to non-existing MGS.\n");
528                 return -EINVAL;
529         }
530
531         rc = lu_env_init(&env, LCT_LOCAL);
532         if (rc != 0)
533                 RETURN(rc);
534
535         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
536
537         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
538         lu_env_fini(&env);
539
540         RETURN(rc);
541 }
542
543 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
544                           enum nodemap_id_type id_type,
545                           const u32 map[2])
546 {
547         struct nodemap_key       nk;
548         union nodemap_rec        nr;
549         struct lu_env            env;
550         int                      rc = 0;
551         ENTRY;
552
553         if (nodemap_mgs_ncf == NULL) {
554                 CERROR("cannot add nodemap config to non-existing MGS.\n");
555                 return -EINVAL;
556         }
557
558         rc = lu_env_init(&env, LCT_LOCAL);
559         if (rc != 0)
560                 RETURN(rc);
561
562         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
563         nodemap_idmap_rec_init(&nr, map[1]);
564
565         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
566         lu_env_fini(&env);
567
568         RETURN(rc);
569 }
570
571 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
572                           enum nodemap_id_type id_type,
573                           const u32 map[2])
574 {
575         struct nodemap_key       nk;
576         struct lu_env            env;
577         int                      rc = 0;
578         ENTRY;
579
580         if (nodemap_mgs_ncf == NULL) {
581                 CERROR("cannot add nodemap config to non-existing MGS.\n");
582                 return -EINVAL;
583         }
584
585         rc = lu_env_init(&env, LCT_LOCAL);
586         if (rc != 0)
587                 RETURN(rc);
588
589         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
590
591         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
592         lu_env_fini(&env);
593
594         RETURN(rc);
595 }
596
597 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
598 {
599         struct nodemap_key       nk;
600         union nodemap_rec        nr;
601         struct lu_env            env;
602         int                      rc = 0;
603         ENTRY;
604
605         if (nodemap_mgs_ncf == NULL) {
606                 CERROR("cannot add nodemap config to non-existing MGS.\n");
607                 return -EINVAL;
608         }
609
610         rc = lu_env_init(&env, LCT_LOCAL);
611         if (rc != 0)
612                 RETURN(rc);
613
614         nodemap_global_key_init(&nk);
615         nodemap_global_rec_init(&nr, value);
616
617         if (update == NM_UPDATE)
618                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
619                                         &nk, &nr);
620         else
621                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
622                                         &nk, &nr);
623
624         lu_env_fini(&env);
625
626         RETURN(rc);
627 }
628
629 int nodemap_idx_nodemap_activate(bool value)
630 {
631         return nodemap_idx_global_add_update(value, NM_UPDATE);
632 }
633
634 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
635 {
636         u32                      nodemap_id;
637
638         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
639         return nm_idx_get_type(nodemap_id);
640 }
641
642 /**
643  * Process a key/rec pair and modify the new configuration.
644  *
645  * \param       config          configuration to update with this key/rec data
646  * \param       key             key of the record that was loaded
647  * \param       rec             record that was loaded
648  * \param       recent_nodemap  last referenced nodemap
649  * \retval      type of record processed, see enum #nodemap_idx_type
650  * \retval      -ENOENT         range or map loaded before nodemap record
651  * \retval      -EINVAL         duplicate nodemap cluster records found with
652  *                              different IDs, or nodemap has invalid name
653  * \retval      -ENOMEM
654  */
655 static int nodemap_process_keyrec(struct nodemap_config *config,
656                                   const struct nodemap_key *key,
657                                   const union nodemap_rec *rec,
658                                   struct lu_nodemap **recent_nodemap)
659 {
660         struct lu_nodemap       *nodemap = NULL;
661         enum nodemap_idx_type    type;
662         enum nodemap_id_type     id_type;
663         u8                       flags;
664         u32                      nodemap_id;
665         lnet_nid_t               nid[2];
666         u32                      map[2];
667         int                      rc;
668
669         ENTRY;
670
671         CLASSERT(sizeof(union nodemap_rec) == 32);
672
673         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
674         type = nodemap_get_key_type(key);
675         nodemap_id = nm_idx_set_type(nodemap_id, 0);
676
677         CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
678                nodemap_id, type);
679
680         /* find the correct nodemap in the load list */
681         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
682             type == NODEMAP_GIDMAP_IDX) {
683                 struct lu_nodemap *tmp = NULL;
684
685                 nodemap = *recent_nodemap;
686
687                 if (nodemap == NULL)
688                         GOTO(out, rc = -ENOENT);
689
690                 if (nodemap->nm_id != nodemap_id) {
691                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
692                                 if (tmp->nm_id == nodemap_id) {
693                                         nodemap = tmp;
694                                         break;
695                                 }
696
697                         if (nodemap->nm_id != nodemap_id)
698                                 GOTO(out, rc = -ENOENT);
699                 }
700
701                 /* update most recently used nodemap if necessay */
702                 if (nodemap != *recent_nodemap)
703                         *recent_nodemap = nodemap;
704         }
705
706         switch (type) {
707         case NODEMAP_EMPTY_IDX:
708                 if (nodemap_id != 0)
709                         CWARN("Found nodemap config record without type field, "
710                               " nodemap_id=%d. nodemap config file corrupt?\n",
711                               nodemap_id);
712                 break;
713         case NODEMAP_CLUSTER_IDX:
714                 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
715                                           rec->ncr.ncr_name);
716                 if (nodemap == NULL) {
717                         if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
718                                 nodemap = nodemap_create(rec->ncr.ncr_name,
719                                                          config, 1);
720                                 config->nmc_default_nodemap = nodemap;
721                         } else {
722                                 nodemap = nodemap_create(rec->ncr.ncr_name,
723                                                          config, 0);
724                         }
725                         if (IS_ERR(nodemap))
726                                 GOTO(out, rc = PTR_ERR(nodemap));
727
728                         /* we need to override the local ID with the saved ID */
729                         nodemap->nm_id = nodemap_id;
730                         if (nodemap_id > config->nmc_nodemap_highest_id)
731                                 config->nmc_nodemap_highest_id = nodemap_id;
732
733                 } else if (nodemap->nm_id != nodemap_id) {
734                         nodemap_putref(nodemap);
735                         GOTO(out, rc = -EINVAL);
736                 }
737
738                 nodemap->nm_squash_uid =
739                                 le32_to_cpu(rec->ncr.ncr_squash_uid);
740                 nodemap->nm_squash_gid =
741                                 le32_to_cpu(rec->ncr.ncr_squash_gid);
742
743                 flags = le32_to_cpu(rec->ncr.ncr_flags);
744                 nodemap->nmf_allow_root_access =
745                                         flags & NM_FL_ALLOW_ROOT_ACCESS;
746                 nodemap->nmf_trust_client_ids =
747                                         flags & NM_FL_TRUST_CLIENT_IDS;
748                 nodemap->nmf_deny_unknown =
749                                         flags & NM_FL_DENY_UNKNOWN;
750                 nodemap->nmf_map_uid_only =
751                                         flags & NM_FL_MAP_UID_ONLY;
752                 nodemap->nmf_map_gid_only =
753                                         flags & NM_FL_MAP_GID_ONLY;
754
755                 if (*recent_nodemap == NULL) {
756                         *recent_nodemap = nodemap;
757                         INIT_LIST_HEAD(&nodemap->nm_list);
758                 } else {
759                         list_add(&nodemap->nm_list,
760                                  &(*recent_nodemap)->nm_list);
761                 }
762                 nodemap_putref(nodemap);
763                 break;
764         case NODEMAP_RANGE_IDX:
765                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
766                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
767
768                 rc = nodemap_add_range_helper(config, nodemap, nid,
769                                         le32_to_cpu(key->nk_range_id));
770                 if (rc != 0)
771                         GOTO(out, rc);
772                 break;
773         case NODEMAP_UIDMAP_IDX:
774         case NODEMAP_GIDMAP_IDX:
775                 map[0] = le32_to_cpu(key->nk_id_client);
776                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
777
778                 if (type == NODEMAP_UIDMAP_IDX)
779                         id_type = NODEMAP_UID;
780                 else
781                         id_type = NODEMAP_GID;
782
783                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
784                 if (rc != 0)
785                         GOTO(out, rc);
786                 break;
787         case NODEMAP_GLOBAL_IDX:
788                 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
789                 break;
790         default:
791                 CERROR("got keyrec pair for unknown type %d\n", type);
792                 break;
793         }
794
795         rc = type;
796
797         EXIT;
798
799 out:
800         return rc;
801 }
802
803 enum nm_config_passes {
804         NM_READ_CLUSTERS = 0,
805         NM_READ_ATTRIBUTES = 1,
806 };
807
808 static int nodemap_load_entries(const struct lu_env *env,
809                                 struct dt_object *nodemap_idx)
810 {
811         const struct dt_it_ops *iops;
812         struct dt_it *it;
813         struct lu_nodemap *recent_nodemap = NULL;
814         struct nodemap_config *new_config = NULL;
815         u64 hash = 0;
816         bool activate_nodemap = false;
817         bool loaded_global_idx = false;
818         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
819         int rc = 0;
820
821         ENTRY;
822
823         iops = &nodemap_idx->do_index_ops->dio_it;
824
825         dt_read_lock(env, nodemap_idx, 0);
826         it = iops->init(env, nodemap_idx, 0);
827         if (IS_ERR(it))
828                 GOTO(out, rc = PTR_ERR(it));
829
830         rc = iops->load(env, it, hash);
831         if (rc < 0)
832                 GOTO(out_iops_fini, rc);
833
834         /* rc == 0 means we need to advance to record */
835         if (rc == 0) {
836                 rc = iops->next(env, it);
837
838                 if (rc < 0)
839                         GOTO(out_iops_put, rc);
840                 /* rc > 0 is eof, will be checked in while below */
841         } else {
842                 /* rc == 1, we found initial record and can process below */
843                 rc = 0;
844         }
845
846         new_config = nodemap_config_alloc();
847         if (IS_ERR(new_config)) {
848                 rc = PTR_ERR(new_config);
849                 new_config = NULL;
850                 GOTO(out_iops_put, rc);
851         }
852
853         /* rc > 0 is eof, check initial iops->next here as well */
854         while (rc == 0) {
855                 struct nodemap_key *key;
856                 union nodemap_rec rec;
857                 enum nodemap_idx_type key_type;
858
859                 key = (struct nodemap_key *)iops->key(env, it);
860                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
861                 if ((cur_pass == NM_READ_CLUSTERS &&
862                                 key_type == NODEMAP_CLUSTER_IDX) ||
863                     (cur_pass == NM_READ_ATTRIBUTES &&
864                                 key_type != NODEMAP_CLUSTER_IDX &&
865                                 key_type != NODEMAP_EMPTY_IDX)) {
866                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
867                         if (rc != -ESTALE) {
868                                 if (rc != 0)
869                                         GOTO(out_nodemap_config, rc);
870                                 rc = nodemap_process_keyrec(new_config, key, &rec,
871                                                             &recent_nodemap);
872                                 if (rc < 0)
873                                         GOTO(out_nodemap_config, rc);
874                                 if (rc == NODEMAP_GLOBAL_IDX)
875                                         loaded_global_idx = true;
876                         }
877                 }
878
879                 do
880                         rc = iops->next(env, it);
881                 while (rc == -ESTALE);
882
883                 /* move to second pass */
884                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
885                         cur_pass = NM_READ_ATTRIBUTES;
886                         rc = iops->load(env, it, 0);
887                         if (rc == 0)
888                                 rc = iops->next(env, it);
889                         else if (rc > 0)
890                                 rc = 0;
891                         else
892                                 GOTO(out, rc);
893                 }
894         }
895
896         if (rc > 0)
897                 rc = 0;
898
899 out_nodemap_config:
900         if (rc != 0)
901                 nodemap_config_dealloc(new_config);
902         else
903                 /* creating new default needs to be done outside dt read lock */
904                 activate_nodemap = true;
905 out_iops_put:
906         iops->put(env, it);
907 out_iops_fini:
908         iops->fini(env, it);
909 out:
910         dt_read_unlock(env, nodemap_idx);
911
912         if (rc != 0)
913                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
914                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
915
916         if (!activate_nodemap)
917                 RETURN(rc);
918
919         if (new_config->nmc_default_nodemap == NULL) {
920                 /* new MGS won't have a default nm on disk, so create it here */
921                 new_config->nmc_default_nodemap =
922                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
923                 if (IS_ERR(new_config->nmc_default_nodemap)) {
924                         rc = PTR_ERR(new_config->nmc_default_nodemap);
925                 } else {
926                         rc = nodemap_idx_nodemap_add_update(
927                                         new_config->nmc_default_nodemap,
928                                         nodemap_idx,
929                                         NM_ADD);
930                         nodemap_putref(new_config->nmc_default_nodemap);
931                 }
932         }
933
934         /* new nodemap config won't have an active/inactive record */
935         if (rc == 0 && loaded_global_idx == false) {
936                 struct nodemap_key       nk;
937                 union nodemap_rec        nr;
938
939                 nodemap_global_key_init(&nk);
940                 nodemap_global_rec_init(&nr, false);
941                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
942         }
943
944         if (rc == 0)
945                 nodemap_config_set_active(new_config);
946         else
947                 nodemap_config_dealloc(new_config);
948
949         RETURN(rc);
950 }
951
952 /**
953  * Step through active config and write to disk.
954  */
955 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
956                                             struct dt_device *dev,
957                                             struct local_oid_storage *los)
958 {
959         struct dt_object *o;
960         struct lu_nodemap *nodemap;
961         struct lu_nodemap *nm_tmp;
962         struct lu_nid_range *range;
963         struct lu_nid_range *range_temp;
964         struct lu_idmap *idmap;
965         struct lu_idmap *id_tmp;
966         struct rb_root root;
967         struct nodemap_key nk;
968         union nodemap_rec nr;
969         LIST_HEAD(nodemap_list_head);
970         int rc = 0, rc2;
971
972         ENTRY;
973
974         /* create a new index file to fill with active config */
975         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
976         if (IS_ERR(o))
977                 RETURN(o);
978
979         mutex_lock(&active_config_lock);
980
981         /* convert hash to list so we don't spin */
982         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
983                                nm_hash_list_cb, &nodemap_list_head);
984
985         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
986                 nodemap_cluster_key_init(&nk, nodemap->nm_id);
987                 nodemap_cluster_rec_init(&nr, nodemap);
988
989                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
990                 if (rc2 < 0) {
991                         rc = rc2;
992                         continue;
993                 }
994
995                 down_read(&active_config->nmc_range_tree_lock);
996                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
997                                          rn_list) {
998                         lnet_nid_t nid[2] = {
999                                 range->rn_node.in_extent.start,
1000                                 range->rn_node.in_extent.end
1001                         };
1002                         nodemap_range_key_init(&nk, nodemap->nm_id,
1003                                                range->rn_id);
1004                         nodemap_range_rec_init(&nr, nid);
1005                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1006                         if (rc2 < 0)
1007                                 rc = rc2;
1008                 }
1009                 up_read(&active_config->nmc_range_tree_lock);
1010
1011                 /* we don't need to take nm_idmap_lock because active config
1012                  * lock prevents changes from happening to nodemaps
1013                  */
1014                 root = nodemap->nm_client_to_fs_uidmap;
1015                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1016                                                         id_client_to_fs) {
1017                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1018                                                idmap->id_client);
1019                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1020                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1021                         if (rc2 < 0)
1022                                 rc = rc2;
1023                 }
1024
1025                 root = nodemap->nm_client_to_fs_gidmap;
1026                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1027                                                         id_client_to_fs) {
1028                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1029                                                idmap->id_client);
1030                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1031                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1032                         if (rc2 < 0)
1033                                 rc = rc2;
1034                 }
1035         }
1036         nodemap_global_key_init(&nk);
1037         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1038         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1039         if (rc2 < 0)
1040                 rc = rc2;
1041
1042         mutex_unlock(&active_config_lock);
1043
1044         if (rc < 0) {
1045                 dt_object_put(env, o);
1046                 o = ERR_PTR(rc);
1047         }
1048
1049         RETURN(o);
1050 }
1051
1052 static void nodemap_save_all_caches(void)
1053 {
1054         struct nm_config_file   *ncf;
1055         struct lu_env            env;
1056         int                      rc = 0;
1057
1058         /* recreating nodemap cache requires fld_thread_key be in env */
1059         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1060         if (rc != 0) {
1061                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1062                 return;
1063         }
1064
1065         mutex_lock(&ncf_list_lock);
1066         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1067                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1068                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1069                 struct dt_object *o;
1070
1071                 /* put current config file so save conf can rewrite it */
1072                 dt_object_put_nocache(&env, ncf->ncf_obj);
1073                 ncf->ncf_obj = NULL;
1074
1075                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1076                 if (IS_ERR(o))
1077                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1078                               obd->obd_name, rc);
1079                 else
1080                         ncf->ncf_obj = o;
1081         }
1082         mutex_unlock(&ncf_list_lock);
1083
1084         lu_env_fini(&env);
1085 }
1086
1087 /* tracks if config still needs to be loaded, either from disk or network */
1088 static bool nodemap_config_loaded;
1089 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1090
1091 /**
1092  * Ensures that configs loaded over the wire are prioritized over those loaded
1093  * from disk.
1094  *
1095  * \param config        config to set as the active config
1096  */
1097 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1098 {
1099         mutex_lock(&nodemap_config_loaded_lock);
1100         nodemap_config_set_active(config);
1101         nodemap_config_loaded = true;
1102         nodemap_save_all_caches();
1103         mutex_unlock(&nodemap_config_loaded_lock);
1104 }
1105 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1106
1107 /**
1108  * Register a dt_object representing the config index file. This should be
1109  * called by targets in order to load the nodemap configuration from disk. The
1110  * dt_object should be created with local_index_find_or_create and the index
1111  * features should be enabled with do_index_try.
1112  *
1113  * \param obj   dt_object returned by local_index_find_or_create
1114  *
1115  * \retval      on success: nm_config_file handle for later deregistration
1116  * \retval      -ENOMEM         memory allocation failure
1117  * \retval      -ENOENT         error loading nodemap config
1118  * \retval      -EINVAL         error loading nodemap config
1119  * \retval      -EEXIST         nodemap config already registered for MGS
1120  */
1121 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1122                                                    struct dt_object *obj,
1123                                                    struct local_oid_storage *los)
1124 {
1125         struct nm_config_file *ncf;
1126         int rc = 0;
1127         ENTRY;
1128
1129         if (nodemap_mgs_ncf != NULL)
1130                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1131
1132         OBD_ALLOC_PTR(ncf);
1133         if (ncf == NULL)
1134                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1135
1136         /* if loading from cache, prevent activation of MGS config until cache
1137          * loading is done, so disk config is overwritten by MGS config.
1138          */
1139         mutex_lock(&nodemap_config_loaded_lock);
1140         rc = nodemap_load_entries(env, obj);
1141         if (!rc)
1142                 nodemap_config_loaded = true;
1143         mutex_unlock(&nodemap_config_loaded_lock);
1144
1145         if (rc) {
1146                 OBD_FREE_PTR(ncf);
1147                 GOTO(out, ncf = ERR_PTR(rc));
1148         }
1149
1150         lu_object_get(&obj->do_lu);
1151
1152         ncf->ncf_obj = obj;
1153         ncf->ncf_los = los;
1154
1155         nodemap_mgs_ncf = ncf;
1156
1157 out:
1158         return ncf;
1159 }
1160 EXPORT_SYMBOL(nm_config_file_register_mgs);
1161
1162 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1163                                                    struct dt_device *dev,
1164                                                    struct local_oid_storage *los)
1165 {
1166         struct nm_config_file *ncf;
1167         struct dt_object *config_obj = NULL;
1168         int rc = 0;
1169
1170         OBD_ALLOC_PTR(ncf);
1171         if (ncf == NULL)
1172                 RETURN(ERR_PTR(-ENOMEM));
1173
1174         /* don't load from cache if config already loaded */
1175         mutex_lock(&nodemap_config_loaded_lock);
1176         if (!nodemap_config_loaded) {
1177                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1178                 if (IS_ERR(config_obj))
1179                         rc = PTR_ERR(config_obj);
1180                 else
1181                         rc = nodemap_load_entries(env, config_obj);
1182
1183                 if (!rc)
1184                         nodemap_config_loaded = true;
1185         }
1186         mutex_unlock(&nodemap_config_loaded_lock);
1187         if (rc)
1188                 GOTO(out_ncf, rc);
1189
1190         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1191         if (!config_obj) {
1192                 config_obj = nodemap_save_config_cache(env, dev, los);
1193                 if (IS_ERR(config_obj))
1194                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1195         }
1196
1197         ncf->ncf_obj = config_obj;
1198         ncf->ncf_los = los;
1199
1200         mutex_lock(&ncf_list_lock);
1201         list_add(&ncf->ncf_list, &ncf_list_head);
1202         mutex_unlock(&ncf_list_lock);
1203
1204 out_ncf:
1205         if (rc) {
1206                 OBD_FREE_PTR(ncf);
1207                 RETURN(ERR_PTR(rc));
1208         }
1209
1210         RETURN(ncf);
1211 }
1212 EXPORT_SYMBOL(nm_config_file_register_tgt);
1213
1214 /**
1215  * Deregister a nm_config_file. Should be called by targets during cleanup.
1216  *
1217  * \param ncf   config file to deregister
1218  */
1219 void nm_config_file_deregister_mgs(const struct lu_env *env,
1220                                    struct nm_config_file *ncf)
1221 {
1222         ENTRY;
1223         LASSERT(nodemap_mgs_ncf == ncf);
1224
1225         nodemap_mgs_ncf = NULL;
1226         if (ncf->ncf_obj)
1227                 dt_object_put(env, ncf->ncf_obj);
1228
1229         OBD_FREE_PTR(ncf);
1230
1231         EXIT;
1232 }
1233 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1234
1235 void nm_config_file_deregister_tgt(const struct lu_env *env,
1236                                    struct nm_config_file *ncf)
1237 {
1238         ENTRY;
1239
1240         if (ncf == NULL)
1241                 return;
1242
1243         mutex_lock(&ncf_list_lock);
1244         list_del(&ncf->ncf_list);
1245         mutex_unlock(&ncf_list_lock);
1246
1247         if (ncf->ncf_obj)
1248                 dt_object_put(env, ncf->ncf_obj);
1249
1250         OBD_FREE_PTR(ncf);
1251
1252         EXIT;
1253 }
1254 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1255
1256 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1257                               struct lu_nodemap **recent_nodemap)
1258 {
1259         struct nodemap_key *key;
1260         union nodemap_rec *rec;
1261         char *entry;
1262         int j;
1263         int k;
1264         int rc = 0;
1265         int size = dt_nodemap_features.dif_keysize_max +
1266                    dt_nodemap_features.dif_recsize_max;
1267         ENTRY;
1268
1269         for (j = 0; j < LU_PAGE_COUNT; j++) {
1270                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1271                         return -EINVAL;
1272
1273                 /* get and process keys and records from page */
1274                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1275                         entry = lip->lp_idx.lip_entries + k * size;
1276                         key = (struct nodemap_key *)entry;
1277
1278                         entry += dt_nodemap_features.dif_keysize_max;
1279                         rec = (union nodemap_rec *)entry;
1280
1281                         rc = nodemap_process_keyrec(config, key, rec,
1282                                                     recent_nodemap);
1283                         if (rc < 0)
1284                                 return rc;
1285                 }
1286                 lip++;
1287         }
1288
1289         EXIT;
1290         return 0;
1291 }
1292 EXPORT_SYMBOL(nodemap_process_idx_pages);
1293
1294 static int nodemap_page_build(const struct lu_env *env, union lu_page *lp,
1295                               size_t nob, const struct dt_it_ops *iops,
1296                               struct dt_it *it, __u32 attr, void *arg)
1297 {
1298         struct idx_info *ii = (struct idx_info *)arg;
1299         struct lu_idxpage *lip = &lp->lp_idx;
1300         char *entry;
1301         size_t size = ii->ii_keysize + ii->ii_recsize;
1302         int rc;
1303         ENTRY;
1304
1305         if (nob < LIP_HDR_SIZE)
1306                 return -EINVAL;
1307
1308         /* initialize the header of the new container */
1309         memset(lip, 0, LIP_HDR_SIZE);
1310         lip->lip_magic = LIP_MAGIC;
1311         nob           -= LIP_HDR_SIZE;
1312
1313         entry = lip->lip_entries;
1314         do {
1315                 char            *tmp_entry = entry;
1316                 struct dt_key   *key;
1317                 __u64           hash;
1318                 enum nodemap_idx_type key_type;
1319
1320                 /* fetch 64-bit hash value */
1321                 hash = iops->store(env, it);
1322                 ii->ii_hash_end = hash;
1323
1324                 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1325                         if (lip->lip_nr != 0)
1326                                 GOTO(out, rc = 0);
1327                 }
1328
1329                 if (nob < size) {
1330                         if (lip->lip_nr == 0)
1331                                 GOTO(out, rc = -EINVAL);
1332                         GOTO(out, rc = 0);
1333                 }
1334
1335                 key = iops->key(env, it);
1336                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1337
1338                 /* on the first pass, get only the cluster types. On second
1339                  * pass, get all the rest */
1340                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1341                                 key_type == NODEMAP_CLUSTER_IDX) ||
1342                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1343                                 key_type != NODEMAP_CLUSTER_IDX &&
1344                                 key_type != NODEMAP_EMPTY_IDX)) {
1345                         memcpy(tmp_entry, key, ii->ii_keysize);
1346                         tmp_entry += ii->ii_keysize;
1347
1348                         /* and finally the record */
1349                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1350                                        attr);
1351                         if (rc != -ESTALE) {
1352                                 if (rc != 0)
1353                                         GOTO(out, rc);
1354
1355                                 /* hash/key/record successfully copied! */
1356                                 lip->lip_nr++;
1357                                 if (unlikely(lip->lip_nr == 1 &&
1358                                     ii->ii_count == 0))
1359                                         ii->ii_hash_start = hash;
1360
1361                                 entry = tmp_entry + ii->ii_recsize;
1362                                 nob -= size;
1363                         }
1364                 }
1365
1366                 /* move on to the next record */
1367                 do {
1368                         rc = iops->next(env, it);
1369                 } while (rc == -ESTALE);
1370
1371                 /* move to second pass */
1372                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1373                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1374                         rc = iops->load(env, it, 0);
1375                         if (rc == 0)
1376                                 rc = iops->next(env, it);
1377                         else if (rc > 0)
1378                                 rc = 0;
1379                         else
1380                                 GOTO(out, rc);
1381                 }
1382
1383         } while (rc == 0);
1384
1385         GOTO(out, rc);
1386 out:
1387         if (rc >= 0 && lip->lip_nr > 0)
1388                 /* one more container */
1389                 ii->ii_count++;
1390         if (rc > 0)
1391                 /* no more entries */
1392                 ii->ii_hash_end = II_END_OFF;
1393         return rc;
1394 }
1395
1396
1397 int nodemap_index_read(struct lu_env *env,
1398                        struct nm_config_file *ncf,
1399                        struct idx_info *ii,
1400                        const struct lu_rdpg *rdpg)
1401 {
1402         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1403         __u64                    version;
1404         int                      rc = 0;
1405
1406         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1407         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1408
1409         dt_read_lock(env, nodemap_idx, 0);
1410         version = dt_version_get(env, nodemap_idx);
1411         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1412                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1413                        ii->ii_version,
1414                        version);
1415                 ii->ii_hash_end = 0;
1416         } else {
1417                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1418                                    ii);
1419                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1420         }
1421
1422         if (rc >= 0)
1423                 ii->ii_version = version;
1424
1425         dt_read_unlock(env, nodemap_idx);
1426         return rc;
1427 }
1428 EXPORT_SYMBOL(nodemap_index_read);
1429
1430 /**
1431  * Returns the current nodemap configuration to MGC by walking the nodemap
1432  * config index and storing it in the response buffer.
1433  *
1434  * \param       req             incoming MGS_CONFIG_READ request
1435  * \retval      0               success
1436  * \retval      -EINVAL         malformed request
1437  * \retval      -ENOTCONN       client evicted/reconnected already
1438  * \retval      -ETIMEDOUT      client timeout or network error
1439  * \retval      -ENOMEM
1440  */
1441 int nodemap_get_config_req(struct obd_device *mgs_obd,
1442                            struct ptlrpc_request *req)
1443 {
1444         struct mgs_config_body *body;
1445         struct mgs_config_res *res;
1446         struct lu_rdpg rdpg;
1447         struct idx_info nodemap_ii;
1448         struct ptlrpc_bulk_desc *desc;
1449         struct l_wait_info lwi;
1450         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1451         int i;
1452         int page_count;
1453         int bytes = 0;
1454         int rc = 0;
1455
1456         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1457         if (!body)
1458                 RETURN(-EINVAL);
1459
1460         if (body->mcb_type != CONFIG_T_NODEMAP)
1461                 RETURN(-EINVAL);
1462
1463         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1464         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1465                 PAGE_SHIFT;
1466         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1467                 RETURN(-EINVAL);
1468
1469         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1470                body->mcb_name, rdpg.rp_count);
1471
1472         /* allocate pages to store the containers */
1473         OBD_ALLOC(rdpg.rp_pages, sizeof(*rdpg.rp_pages) * rdpg.rp_npages);
1474         if (rdpg.rp_pages == NULL)
1475                 RETURN(-ENOMEM);
1476         for (i = 0; i < rdpg.rp_npages; i++) {
1477                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1478                 if (rdpg.rp_pages[i] == NULL)
1479                         GOTO(out, rc = -ENOMEM);
1480         }
1481
1482         rdpg.rp_hash = body->mcb_offset;
1483         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1484         nodemap_ii.ii_flags = II_FL_NOHASH;
1485         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1486         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1487
1488         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1489                                    mgs_obd->u.obt.obt_nodemap_config_file,
1490                                    &nodemap_ii, &rdpg);
1491         if (bytes < 0)
1492                 GOTO(out, rc = bytes);
1493
1494         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1495
1496         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1497         if (res == NULL)
1498                 GOTO(out, rc = -EINVAL);
1499         res->mcr_offset = nodemap_ii.ii_hash_end;
1500         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1501
1502         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1503         LASSERT(page_count <= rdpg.rp_count);
1504         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1505                                     PTLRPC_BULK_PUT_SOURCE |
1506                                         PTLRPC_BULK_BUF_KIOV,
1507                                     MGS_BULK_PORTAL,
1508                                     &ptlrpc_bulk_kiov_pin_ops);
1509         if (desc == NULL)
1510                 GOTO(out, rc = -ENOMEM);
1511
1512         for (i = 0; i < page_count && bytes > 0; i++) {
1513                 ptlrpc_prep_bulk_page_pin(desc, rdpg.rp_pages[i], 0,
1514                                           min_t(int, bytes, PAGE_SIZE));
1515                 bytes -= PAGE_SIZE;
1516         }
1517
1518         rc = target_bulk_io(req->rq_export, desc, &lwi);
1519         ptlrpc_free_bulk(desc);
1520
1521 out:
1522         if (rdpg.rp_pages != NULL) {
1523                 for (i = 0; i < rdpg.rp_npages; i++)
1524                         if (rdpg.rp_pages[i] != NULL)
1525                                 __free_page(rdpg.rp_pages[i]);
1526                 OBD_FREE(rdpg.rp_pages,
1527                          rdpg.rp_npages * sizeof(rdpg.rp_pages[0]));
1528         }
1529         return rc;
1530 }
1531 EXPORT_SYMBOL(nodemap_get_config_req);