Whamcloud - gitweb
LU-8726 osd-ldiskfs: bypass read for benchmarking
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2014, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <lnet/types.h>
54 #include <lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
62
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
66
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
69
70 /* lu_nodemap flags */
71 enum nm_flag_shifts {
72         NM_FL_ALLOW_ROOT_ACCESS = 0x1,
73         NM_FL_TRUST_CLIENT_IDS = 0x2,
74         NM_FL_DENY_UNKNOWN = 0x4,
75 };
76
77 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
78 {
79         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
80                                                         NODEMAP_CLUSTER_IDX));
81         nk->nk_unused = 0;
82 }
83
84 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
85                                      const struct lu_nodemap *nodemap)
86 {
87         CLASSERT(sizeof(nr->ncr.ncr_name) == sizeof(nodemap->nm_name));
88
89         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nodemap->nm_name));
90         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
91         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
92         nr->ncr.ncr_flags = cpu_to_le32(
93                 (nodemap->nmf_trust_client_ids ?
94                         NM_FL_TRUST_CLIENT_IDS : 0) |
95                 (nodemap->nmf_allow_root_access ?
96                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
97                 (nodemap->nmf_deny_unknown ?
98                         NM_FL_DENY_UNKNOWN : 0));
99 }
100
101 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
102                                    enum nodemap_id_type id_type,
103                                    u32 id_client)
104 {
105         enum nodemap_idx_type idx_type;
106
107         if (id_type == NODEMAP_UID)
108                 idx_type = NODEMAP_UIDMAP_IDX;
109         else
110                 idx_type = NODEMAP_GIDMAP_IDX;
111
112         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
113         nk->nk_id_client = cpu_to_le32(id_client);
114 }
115
116 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
117 {
118         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
119 }
120
121 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
122                                    unsigned int rn_id)
123 {
124         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
125                                                         NODEMAP_RANGE_IDX));
126         nk->nk_range_id = cpu_to_le32(rn_id);
127 }
128
129 static void nodemap_range_rec_init(union nodemap_rec *nr,
130                                    const lnet_nid_t nid[2])
131 {
132         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
133         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
134 }
135
136 static void nodemap_global_key_init(struct nodemap_key *nk)
137 {
138         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
139         nk->nk_unused = 0;
140 }
141
142 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
143 {
144         nr->ngr.ngr_is_active = active;
145 }
146
147 /* should be called with dt_write lock */
148 static void nodemap_inc_version(const struct lu_env *env,
149                                 struct dt_object *nodemap_idx,
150                                 struct thandle *th)
151 {
152         u64 ver = dt_version_get(env, nodemap_idx);
153         dt_version_set(env, nodemap_idx, ver + 1, th);
154 }
155
156 enum ncfc_find_create {
157         NCFC_CREATE_NEW = 1,
158 };
159
160 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
161                                                    struct dt_device *dev,
162                                                    struct local_oid_storage *los,
163                                                    enum ncfc_find_create create_new)
164 {
165         struct lu_fid root_fid;
166         struct dt_object *root_obj;
167         struct dt_object *nm_obj;
168         int rc = 0;
169
170         rc = dt_root_get(env, dev, &root_fid);
171         if (rc < 0)
172                 GOTO(out, nm_obj = ERR_PTR(rc));
173
174         root_obj = dt_locate(env, dev, &root_fid);
175         if (unlikely(IS_ERR(root_obj)))
176                 GOTO(out, nm_obj = root_obj);
177
178 again:
179         /* if loading index fails the first time, create new index */
180         if (create_new == NCFC_CREATE_NEW) {
181                 CDEBUG(D_INFO, "removing old index, creating new one\n");
182                 rc = local_object_unlink(env, dev, root_obj,
183                                          LUSTRE_NODEMAP_NAME);
184                 if (rc < 0) {
185                         /* XXX not sure the best way to get obd name. */
186                         CERROR("cannot destroy nodemap index: rc = %d\n",
187                                rc);
188                         GOTO(out_root, nm_obj = ERR_PTR(rc));
189                 }
190         }
191
192         nm_obj = local_index_find_or_create(env, los, root_obj,
193                                                 LUSTRE_NODEMAP_NAME,
194                                                 S_IFREG | S_IRUGO | S_IWUSR,
195                                                 &dt_nodemap_features);
196         if (IS_ERR(nm_obj))
197                 GOTO(out_root, nm_obj);
198
199         if (nm_obj->do_index_ops == NULL) {
200                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
201                                                       &dt_nodemap_features);
202                 /* even if loading from tgt fails, connecting to MGS will
203                  * rewrite the config
204                  */
205                 if (rc < 0) {
206                         lu_object_put(env, &nm_obj->do_lu);
207
208                         if (create_new == NCFC_CREATE_NEW)
209                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
210
211                         CERROR("cannot load nodemap index from disk, creating "
212                                "new index: rc = %d\n", rc);
213                         create_new = NCFC_CREATE_NEW;
214                         goto again;
215                 }
216         }
217
218 out_root:
219         lu_object_put(env, &root_obj->do_lu);
220 out:
221         return nm_obj;
222 }
223
224 static int nodemap_idx_insert(const struct lu_env *env,
225                               struct dt_object *idx,
226                               const struct nodemap_key *nk,
227                               const union nodemap_rec *nr)
228 {
229         struct thandle          *th;
230         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
231         int                      rc;
232
233         CLASSERT(sizeof(union nodemap_rec) == 32);
234
235         th = dt_trans_create(env, dev);
236
237         if (IS_ERR(th))
238                 GOTO(out, rc = PTR_ERR(th));
239
240         rc = dt_declare_insert(env, idx,
241                                (const struct dt_rec *)nr,
242                                (const struct dt_key *)nk, th);
243         if (rc != 0)
244                 GOTO(out, rc);
245
246         rc = dt_declare_version_set(env, idx, th);
247         if (rc != 0)
248                 GOTO(out, rc);
249
250         rc = dt_trans_start_local(env, dev, th);
251         if (rc != 0)
252                 GOTO(out, rc);
253
254         dt_write_lock(env, idx, 0);
255
256         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
257                        (const struct dt_key *)nk, th, 1);
258
259         nodemap_inc_version(env, idx, th);
260         dt_write_unlock(env, idx);
261 out:
262         dt_trans_stop(env, dev, th);
263
264         return rc;
265 }
266
267 static int nodemap_idx_update(const struct lu_env *env,
268                               struct dt_object *idx,
269                               const struct nodemap_key *nk,
270                               const union nodemap_rec *nr)
271 {
272         struct thandle          *th;
273         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
274         int                      rc = 0;
275
276         th = dt_trans_create(env, dev);
277
278         if (IS_ERR(th))
279                 GOTO(out, rc = PTR_ERR(th));
280
281         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
282         if (rc != 0)
283                 GOTO(out, rc);
284
285         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
286                                (const struct dt_key *)nk, th);
287         if (rc != 0)
288                 GOTO(out, rc);
289
290         rc = dt_declare_version_set(env, idx, th);
291         if (rc != 0)
292                 GOTO(out, rc);
293
294         rc = dt_trans_start_local(env, dev, th);
295         if (rc != 0)
296                 GOTO(out, rc);
297
298         dt_write_lock(env, idx, 0);
299
300         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
301         if (rc != 0)
302                 GOTO(out_lock, rc);
303
304         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
305                        (const struct dt_key *)nk, th, 1);
306         if (rc != 0)
307                 GOTO(out_lock, rc);
308
309         nodemap_inc_version(env, idx, th);
310 out_lock:
311         dt_write_unlock(env, idx);
312 out:
313         dt_trans_stop(env, dev, th);
314
315         return rc;
316 }
317
318 static int nodemap_idx_delete(const struct lu_env *env,
319                               struct dt_object *idx,
320                               const struct nodemap_key *nk,
321                               const union nodemap_rec *unused)
322 {
323         struct thandle          *th;
324         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
325         int                      rc = 0;
326
327         th = dt_trans_create(env, dev);
328
329         if (IS_ERR(th))
330                 GOTO(out, rc = PTR_ERR(th));
331
332         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
333         if (rc != 0)
334                 GOTO(out, rc);
335
336         rc = dt_declare_version_set(env, idx, th);
337         if (rc != 0)
338                 GOTO(out, rc);
339
340         rc = dt_trans_start_local(env, dev, th);
341         if (rc != 0)
342                 GOTO(out, rc);
343
344         dt_write_lock(env, idx, 0);
345
346         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
347
348         nodemap_inc_version(env, idx, th);
349
350         dt_write_unlock(env, idx);
351 out:
352         dt_trans_stop(env, dev, th);
353
354         return rc;
355 }
356
357 enum nm_add_update {
358         NM_ADD = 0,
359         NM_UPDATE = 1,
360 };
361
362 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
363                                           struct dt_object *idx,
364                                           enum nm_add_update update)
365 {
366         struct nodemap_key nk;
367         union nodemap_rec nr;
368         struct lu_env env;
369         int rc = 0;
370
371         ENTRY;
372
373         rc = lu_env_init(&env, LCT_LOCAL);
374         if (rc)
375                 RETURN(rc);
376
377         nodemap_cluster_key_init(&nk, nodemap->nm_id);
378         nodemap_cluster_rec_init(&nr, nodemap);
379
380         if (update == NM_UPDATE)
381                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
382         else
383                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
384
385         lu_env_fini(&env);
386
387         RETURN(rc);
388 }
389
390 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
391 {
392         if (nodemap_mgs_ncf == NULL) {
393                 CERROR("cannot add nodemap config to non-existing MGS.\n");
394                 return -EINVAL;
395         }
396
397         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
398                                               NM_ADD);
399 }
400
401 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
402 {
403         if (nodemap_mgs_ncf == NULL) {
404                 CERROR("cannot add nodemap config to non-existing MGS.\n");
405                 return -EINVAL;
406         }
407
408         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
409                                               NM_UPDATE);
410 }
411
412 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
413 {
414         struct rb_root           root;
415         struct lu_idmap         *idmap;
416         struct lu_idmap         *temp;
417         struct lu_nid_range     *range;
418         struct lu_nid_range     *range_temp;
419         struct nodemap_key       nk;
420         struct lu_env            env;
421         int                      rc = 0;
422         int                      rc2 = 0;
423
424         ENTRY;
425
426         if (nodemap_mgs_ncf == NULL) {
427                 CERROR("cannot add nodemap config to non-existing MGS.\n");
428                 return -EINVAL;
429         }
430
431         rc = lu_env_init(&env, LCT_LOCAL);
432         if (rc != 0)
433                 RETURN(rc);
434
435         root = nodemap->nm_fs_to_client_uidmap;
436         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
437                                                 id_fs_to_client) {
438                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
439                                        idmap->id_client);
440                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
441                                          &nk, NULL);
442                 if (rc2 < 0)
443                         rc = rc2;
444         }
445
446         root = nodemap->nm_client_to_fs_gidmap;
447         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
448                                                 id_client_to_fs) {
449                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
450                                        idmap->id_client);
451                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
452                                          &nk, NULL);
453                 if (rc2 < 0)
454                         rc = rc2;
455         }
456
457         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
458                                  rn_list) {
459                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
460                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
461                                          &nk, NULL);
462                 if (rc2 < 0)
463                         rc = rc2;
464         }
465
466         nodemap_cluster_key_init(&nk, nodemap->nm_id);
467         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
468         if (rc2 < 0)
469                 rc = rc2;
470
471         lu_env_fini(&env);
472
473         RETURN(rc);
474 }
475
476 int nodemap_idx_range_add(const struct lu_nid_range *range,
477                           const lnet_nid_t nid[2])
478 {
479         struct nodemap_key       nk;
480         union nodemap_rec        nr;
481         struct lu_env            env;
482         int                      rc = 0;
483         ENTRY;
484
485         if (nodemap_mgs_ncf == NULL) {
486                 CERROR("cannot add nodemap config to non-existing MGS.\n");
487                 return -EINVAL;
488         }
489
490         rc = lu_env_init(&env, LCT_LOCAL);
491         if (rc != 0)
492                 RETURN(rc);
493
494         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
495         nodemap_range_rec_init(&nr, nid);
496
497         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
498         lu_env_fini(&env);
499
500         RETURN(rc);
501 }
502
503 int nodemap_idx_range_del(const struct lu_nid_range *range)
504 {
505         struct nodemap_key       nk;
506         struct lu_env            env;
507         int                      rc = 0;
508         ENTRY;
509
510         if (nodemap_mgs_ncf == NULL) {
511                 CERROR("cannot add nodemap config to non-existing MGS.\n");
512                 return -EINVAL;
513         }
514
515         rc = lu_env_init(&env, LCT_LOCAL);
516         if (rc != 0)
517                 RETURN(rc);
518
519         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
520
521         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
522         lu_env_fini(&env);
523
524         RETURN(rc);
525 }
526
527 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
528                           enum nodemap_id_type id_type,
529                           const u32 map[2])
530 {
531         struct nodemap_key       nk;
532         union nodemap_rec        nr;
533         struct lu_env            env;
534         int                      rc = 0;
535         ENTRY;
536
537         if (nodemap_mgs_ncf == NULL) {
538                 CERROR("cannot add nodemap config to non-existing MGS.\n");
539                 return -EINVAL;
540         }
541
542         rc = lu_env_init(&env, LCT_LOCAL);
543         if (rc != 0)
544                 RETURN(rc);
545
546         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
547         nodemap_idmap_rec_init(&nr, map[1]);
548
549         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
550         lu_env_fini(&env);
551
552         RETURN(rc);
553 }
554
555 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
556                           enum nodemap_id_type id_type,
557                           const u32 map[2])
558 {
559         struct nodemap_key       nk;
560         struct lu_env            env;
561         int                      rc = 0;
562         ENTRY;
563
564         if (nodemap_mgs_ncf == NULL) {
565                 CERROR("cannot add nodemap config to non-existing MGS.\n");
566                 return -EINVAL;
567         }
568
569         rc = lu_env_init(&env, LCT_LOCAL);
570         if (rc != 0)
571                 RETURN(rc);
572
573         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
574
575         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
576         lu_env_fini(&env);
577
578         RETURN(rc);
579 }
580
581 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
582 {
583         struct nodemap_key       nk;
584         union nodemap_rec        nr;
585         struct lu_env            env;
586         int                      rc = 0;
587         ENTRY;
588
589         if (nodemap_mgs_ncf == NULL) {
590                 CERROR("cannot add nodemap config to non-existing MGS.\n");
591                 return -EINVAL;
592         }
593
594         rc = lu_env_init(&env, LCT_LOCAL);
595         if (rc != 0)
596                 RETURN(rc);
597
598         nodemap_global_key_init(&nk);
599         nodemap_global_rec_init(&nr, value);
600
601         if (update == NM_UPDATE)
602                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
603                                         &nk, &nr);
604         else
605                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
606                                         &nk, &nr);
607
608         lu_env_fini(&env);
609
610         RETURN(rc);
611 }
612
613 int nodemap_idx_nodemap_activate(bool value)
614 {
615         return nodemap_idx_global_add_update(value, NM_UPDATE);
616 }
617
618 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
619 {
620         u32                      nodemap_id;
621
622         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
623         return nm_idx_get_type(nodemap_id);
624 }
625
626 /**
627  * Process a key/rec pair and modify the new configuration.
628  *
629  * \param       config          configuration to update with this key/rec data
630  * \param       key             key of the record that was loaded
631  * \param       rec             record that was loaded
632  * \param       recent_nodemap  last referenced nodemap
633  * \retval      type of record processed, see enum #nodemap_idx_type
634  * \retval      -ENOENT         range or map loaded before nodemap record
635  * \retval      -EINVAL         duplicate nodemap cluster records found with
636  *                              different IDs, or nodemap has invalid name
637  * \retval      -ENOMEM
638  */
639 static int nodemap_process_keyrec(struct nodemap_config *config,
640                                   const struct nodemap_key *key,
641                                   const union nodemap_rec *rec,
642                                   struct lu_nodemap **recent_nodemap)
643 {
644         struct lu_nodemap       *nodemap = NULL;
645         enum nodemap_idx_type    type;
646         enum nodemap_id_type     id_type;
647         u8                       flags;
648         u32                      nodemap_id;
649         lnet_nid_t               nid[2];
650         u32                      map[2];
651         int                      rc;
652
653         ENTRY;
654
655         CLASSERT(sizeof(union nodemap_rec) == 32);
656
657         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
658         type = nodemap_get_key_type(key);
659         nodemap_id = nm_idx_set_type(nodemap_id, 0);
660
661         CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
662                nodemap_id, type);
663
664         /* find the correct nodemap in the load list */
665         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
666             type == NODEMAP_GIDMAP_IDX) {
667                 struct lu_nodemap *tmp = NULL;
668
669                 nodemap = *recent_nodemap;
670
671                 if (nodemap == NULL)
672                         GOTO(out, rc = -ENOENT);
673
674                 if (nodemap->nm_id != nodemap_id) {
675                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
676                                 if (tmp->nm_id == nodemap_id) {
677                                         nodemap = tmp;
678                                         break;
679                                 }
680
681                         if (nodemap->nm_id != nodemap_id)
682                                 GOTO(out, rc = -ENOENT);
683                 }
684
685                 /* update most recently used nodemap if necessay */
686                 if (nodemap != *recent_nodemap)
687                         *recent_nodemap = nodemap;
688         }
689
690         switch (type) {
691         case NODEMAP_EMPTY_IDX:
692                 if (nodemap_id != 0)
693                         CWARN("Found nodemap config record without type field, "
694                               " nodemap_id=%d. nodemap config file corrupt?\n",
695                               nodemap_id);
696                 break;
697         case NODEMAP_CLUSTER_IDX:
698                 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
699                                           rec->ncr.ncr_name);
700                 if (nodemap == NULL) {
701                         if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
702                                 nodemap = nodemap_create(rec->ncr.ncr_name,
703                                                          config, 1);
704                                 config->nmc_default_nodemap = nodemap;
705                         } else {
706                                 nodemap = nodemap_create(rec->ncr.ncr_name,
707                                                          config, 0);
708                         }
709                         if (IS_ERR(nodemap))
710                                 GOTO(out, rc = PTR_ERR(nodemap));
711
712                         /* we need to override the local ID with the saved ID */
713                         nodemap->nm_id = nodemap_id;
714                         if (nodemap_id > config->nmc_nodemap_highest_id)
715                                 config->nmc_nodemap_highest_id = nodemap_id;
716
717                 } else if (nodemap->nm_id != nodemap_id) {
718                         nodemap_putref(nodemap);
719                         GOTO(out, rc = -EINVAL);
720                 }
721
722                 nodemap->nm_squash_uid =
723                                 le32_to_cpu(rec->ncr.ncr_squash_uid);
724                 nodemap->nm_squash_gid =
725                                 le32_to_cpu(rec->ncr.ncr_squash_gid);
726
727                 flags = le32_to_cpu(rec->ncr.ncr_flags);
728                 nodemap->nmf_allow_root_access =
729                                         flags & NM_FL_ALLOW_ROOT_ACCESS;
730                 nodemap->nmf_trust_client_ids =
731                                         flags & NM_FL_TRUST_CLIENT_IDS;
732                 nodemap->nmf_deny_unknown =
733                                         flags & NM_FL_DENY_UNKNOWN;
734
735                 if (*recent_nodemap == NULL) {
736                         *recent_nodemap = nodemap;
737                         INIT_LIST_HEAD(&nodemap->nm_list);
738                 } else {
739                         list_add(&nodemap->nm_list,
740                                  &(*recent_nodemap)->nm_list);
741                 }
742                 nodemap_putref(nodemap);
743                 break;
744         case NODEMAP_RANGE_IDX:
745                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
746                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
747
748                 rc = nodemap_add_range_helper(config, nodemap, nid,
749                                         le32_to_cpu(key->nk_range_id));
750                 if (rc != 0)
751                         GOTO(out, rc);
752                 break;
753         case NODEMAP_UIDMAP_IDX:
754         case NODEMAP_GIDMAP_IDX:
755                 map[0] = le32_to_cpu(key->nk_id_client);
756                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
757
758                 if (type == NODEMAP_UIDMAP_IDX)
759                         id_type = NODEMAP_UID;
760                 else
761                         id_type = NODEMAP_GID;
762
763                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
764                 if (rc != 0)
765                         GOTO(out, rc);
766                 break;
767         case NODEMAP_GLOBAL_IDX:
768                 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
769                 break;
770         default:
771                 CERROR("got keyrec pair for unknown type %d\n", type);
772                 break;
773         }
774
775         rc = type;
776
777         EXIT;
778
779 out:
780         return rc;
781 }
782
783 enum nm_config_passes {
784         NM_READ_CLUSTERS = 0,
785         NM_READ_ATTRIBUTES = 1,
786 };
787
788 static int nodemap_load_entries(const struct lu_env *env,
789                                 struct dt_object *nodemap_idx)
790 {
791         const struct dt_it_ops *iops;
792         struct dt_it *it;
793         struct lu_nodemap *recent_nodemap = NULL;
794         struct nodemap_config *new_config = NULL;
795         u64 hash = 0;
796         bool activate_nodemap = false;
797         bool loaded_global_idx = false;
798         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
799         int rc = 0;
800
801         ENTRY;
802
803         iops = &nodemap_idx->do_index_ops->dio_it;
804
805         dt_read_lock(env, nodemap_idx, 0);
806         it = iops->init(env, nodemap_idx, 0);
807         if (IS_ERR(it))
808                 GOTO(out, rc = PTR_ERR(it));
809
810         rc = iops->load(env, it, hash);
811         if (rc < 0)
812                 GOTO(out_iops_fini, rc);
813
814         /* rc == 0 means we need to advance to record */
815         if (rc == 0) {
816                 rc = iops->next(env, it);
817
818                 if (rc < 0)
819                         GOTO(out_iops_put, rc);
820                 /* rc > 0 is eof, will be checked in while below */
821         } else {
822                 /* rc == 1, we found initial record and can process below */
823                 rc = 0;
824         }
825
826         new_config = nodemap_config_alloc();
827         if (IS_ERR(new_config)) {
828                 rc = PTR_ERR(new_config);
829                 new_config = NULL;
830                 GOTO(out_iops_put, rc);
831         }
832
833         /* rc > 0 is eof, check initial iops->next here as well */
834         while (rc == 0) {
835                 struct nodemap_key *key;
836                 union nodemap_rec rec;
837                 enum nodemap_idx_type key_type;
838
839                 key = (struct nodemap_key *)iops->key(env, it);
840                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
841                 if ((cur_pass == NM_READ_CLUSTERS &&
842                                 key_type == NODEMAP_CLUSTER_IDX) ||
843                     (cur_pass == NM_READ_ATTRIBUTES &&
844                                 key_type != NODEMAP_CLUSTER_IDX &&
845                                 key_type != NODEMAP_EMPTY_IDX)) {
846                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
847                         if (rc != -ESTALE) {
848                                 if (rc != 0)
849                                         GOTO(out_nodemap_config, rc);
850                                 rc = nodemap_process_keyrec(new_config, key, &rec,
851                                                             &recent_nodemap);
852                                 if (rc < 0)
853                                         GOTO(out_nodemap_config, rc);
854                                 if (rc == NODEMAP_GLOBAL_IDX)
855                                         loaded_global_idx = true;
856                         }
857                 }
858
859                 do
860                         rc = iops->next(env, it);
861                 while (rc == -ESTALE);
862
863                 /* move to second pass */
864                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
865                         cur_pass = NM_READ_ATTRIBUTES;
866                         rc = iops->load(env, it, 0);
867                         if (rc == 0)
868                                 rc = iops->next(env, it);
869                         else if (rc > 0)
870                                 rc = 0;
871                         else
872                                 GOTO(out, rc);
873                 }
874         }
875
876         if (rc > 0)
877                 rc = 0;
878
879 out_nodemap_config:
880         if (rc != 0)
881                 nodemap_config_dealloc(new_config);
882         else
883                 /* creating new default needs to be done outside dt read lock */
884                 activate_nodemap = true;
885 out_iops_put:
886         iops->put(env, it);
887 out_iops_fini:
888         iops->fini(env, it);
889 out:
890         dt_read_unlock(env, nodemap_idx);
891
892         if (rc != 0)
893                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
894                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
895
896         if (!activate_nodemap)
897                 RETURN(rc);
898
899         if (new_config->nmc_default_nodemap == NULL) {
900                 /* new MGS won't have a default nm on disk, so create it here */
901                 new_config->nmc_default_nodemap =
902                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
903                 if (IS_ERR(new_config->nmc_default_nodemap)) {
904                         rc = PTR_ERR(new_config->nmc_default_nodemap);
905                 } else {
906                         rc = nodemap_idx_nodemap_add_update(
907                                         new_config->nmc_default_nodemap,
908                                         nodemap_idx,
909                                         NM_ADD);
910                         nodemap_putref(new_config->nmc_default_nodemap);
911                 }
912         }
913
914         /* new nodemap config won't have an active/inactive record */
915         if (rc == 0 && loaded_global_idx == false) {
916                 struct nodemap_key       nk;
917                 union nodemap_rec        nr;
918
919                 nodemap_global_key_init(&nk);
920                 nodemap_global_rec_init(&nr, false);
921                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
922         }
923
924         if (rc == 0)
925                 nodemap_config_set_active(new_config);
926         else
927                 nodemap_config_dealloc(new_config);
928
929         RETURN(rc);
930 }
931
932 /**
933  * Step through active config and write to disk.
934  */
935 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
936                                             struct dt_device *dev,
937                                             struct local_oid_storage *los)
938 {
939         struct dt_object *o;
940         struct lu_nodemap *nodemap;
941         struct lu_nodemap *nm_tmp;
942         struct lu_nid_range *range;
943         struct lu_nid_range *range_temp;
944         struct lu_idmap *idmap;
945         struct lu_idmap *id_tmp;
946         struct rb_root root;
947         struct nodemap_key nk;
948         union nodemap_rec nr;
949         LIST_HEAD(nodemap_list_head);
950         int rc = 0, rc2;
951
952         ENTRY;
953
954         /* create a new index file to fill with active config */
955         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
956         if (IS_ERR(o))
957                 GOTO(out, o);
958
959         mutex_lock(&active_config_lock);
960
961         /* convert hash to list so we don't spin */
962         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
963                                nm_hash_list_cb, &nodemap_list_head);
964
965         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
966                 nodemap_cluster_key_init(&nk, nodemap->nm_id);
967                 nodemap_cluster_rec_init(&nr, nodemap);
968
969                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
970                 if (rc2 < 0) {
971                         rc = rc2;
972                         continue;
973                 }
974
975                 down_read(&active_config->nmc_range_tree_lock);
976                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
977                                          rn_list) {
978                         lnet_nid_t nid[2] = {
979                                 range->rn_node.in_extent.start,
980                                 range->rn_node.in_extent.end
981                         };
982                         nodemap_range_key_init(&nk, nodemap->nm_id,
983                                                range->rn_id);
984                         nodemap_range_rec_init(&nr, nid);
985                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
986                         if (rc2 < 0)
987                                 rc = rc2;
988                 }
989                 up_read(&active_config->nmc_range_tree_lock);
990
991                 /* we don't need to take nm_idmap_lock because active config
992                  * lock prevents changes from happening to nodemaps
993                  */
994                 root = nodemap->nm_client_to_fs_uidmap;
995                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
996                                                         id_client_to_fs) {
997                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
998                                                idmap->id_client);
999                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1000                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1001                         if (rc2 < 0)
1002                                 rc = rc2;
1003                 }
1004
1005                 root = nodemap->nm_client_to_fs_gidmap;
1006                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1007                                                         id_client_to_fs) {
1008                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1009                                                idmap->id_client);
1010                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1011                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1012                         if (rc2 < 0)
1013                                 rc = rc2;
1014                 }
1015         }
1016         nodemap_global_key_init(&nk);
1017         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1018         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1019         if (rc2 < 0)
1020                 rc = rc2;
1021
1022 out:
1023         mutex_unlock(&active_config_lock);
1024
1025         if (rc < 0) {
1026                 lu_object_put(env, &o->do_lu);
1027                 o = ERR_PTR(rc);
1028         }
1029
1030         RETURN(o);
1031 }
1032
1033 static void nodemap_save_all_caches(void)
1034 {
1035         struct nm_config_file   *ncf;
1036         struct lu_env            env;
1037         int                      rc = 0;
1038
1039         /* recreating nodemap cache requires fld_thread_key be in env */
1040         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1041         if (rc != 0) {
1042                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1043                 return;
1044         }
1045
1046         mutex_lock(&ncf_list_lock);
1047         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1048                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1049                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1050                 struct dt_object *o;
1051
1052                 /* put current config file so save conf can rewrite it */
1053                 lu_object_put_nocache(&env, &ncf->ncf_obj->do_lu);
1054                 ncf->ncf_obj = NULL;
1055
1056                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1057                 if (IS_ERR(o))
1058                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1059                               obd->obd_name, rc);
1060                 else
1061                         ncf->ncf_obj = o;
1062         }
1063         mutex_unlock(&ncf_list_lock);
1064
1065         lu_env_fini(&env);
1066 }
1067
1068 /* tracks if config still needs to be loaded, either from disk or network */
1069 static bool nodemap_config_loaded;
1070 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1071
1072 /**
1073  * Ensures that configs loaded over the wire are prioritized over those loaded
1074  * from disk.
1075  *
1076  * \param config        config to set as the active config
1077  */
1078 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1079 {
1080         mutex_lock(&nodemap_config_loaded_lock);
1081         nodemap_config_set_active(config);
1082         nodemap_config_loaded = true;
1083         nodemap_save_all_caches();
1084         mutex_unlock(&nodemap_config_loaded_lock);
1085 }
1086 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1087
1088 /**
1089  * Register a dt_object representing the config index file. This should be
1090  * called by targets in order to load the nodemap configuration from disk. The
1091  * dt_object should be created with local_index_find_or_create and the index
1092  * features should be enabled with do_index_try.
1093  *
1094  * \param obj   dt_object returned by local_index_find_or_create
1095  *
1096  * \retval      on success: nm_config_file handle for later deregistration
1097  * \retval      -ENOMEM         memory allocation failure
1098  * \retval      -ENOENT         error loading nodemap config
1099  * \retval      -EINVAL         error loading nodemap config
1100  * \retval      -EEXIST         nodemap config already registered for MGS
1101  */
1102 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1103                                                    struct dt_object *obj,
1104                                                    struct local_oid_storage *los)
1105 {
1106         struct nm_config_file *ncf;
1107         int rc = 0;
1108         ENTRY;
1109
1110         if (nodemap_mgs_ncf != NULL)
1111                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1112
1113         OBD_ALLOC_PTR(ncf);
1114         if (ncf == NULL)
1115                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1116
1117         /* if loading from cache, prevent activation of MGS config until cache
1118          * loading is done, so disk config is overwritten by MGS config.
1119          */
1120         mutex_lock(&nodemap_config_loaded_lock);
1121         rc = nodemap_load_entries(env, obj);
1122         if (!rc)
1123                 nodemap_config_loaded = true;
1124         mutex_unlock(&nodemap_config_loaded_lock);
1125
1126         if (rc) {
1127                 OBD_FREE_PTR(ncf);
1128                 GOTO(out, ncf = ERR_PTR(rc));
1129         }
1130
1131         lu_object_get(&obj->do_lu);
1132
1133         ncf->ncf_obj = obj;
1134         ncf->ncf_los = los;
1135
1136         nodemap_mgs_ncf = ncf;
1137
1138 out:
1139         return ncf;
1140 }
1141 EXPORT_SYMBOL(nm_config_file_register_mgs);
1142
1143 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1144                                                    struct dt_device *dev,
1145                                                    struct local_oid_storage *los)
1146 {
1147         struct nm_config_file *ncf;
1148         struct dt_object *config_obj = NULL;
1149         int rc = 0;
1150
1151         OBD_ALLOC_PTR(ncf);
1152         if (ncf == NULL)
1153                 RETURN(ERR_PTR(-ENOMEM));
1154
1155         /* don't load from cache if config already loaded */
1156         mutex_lock(&nodemap_config_loaded_lock);
1157         if (!nodemap_config_loaded) {
1158                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1159                 if (IS_ERR(config_obj))
1160                         rc = PTR_ERR(config_obj);
1161                 else
1162                         rc = nodemap_load_entries(env, config_obj);
1163
1164                 if (!rc)
1165                         nodemap_config_loaded = true;
1166         }
1167         mutex_unlock(&nodemap_config_loaded_lock);
1168         if (rc)
1169                 GOTO(out_ncf, rc);
1170
1171         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1172         if (!config_obj) {
1173                 config_obj = nodemap_save_config_cache(env, dev, los);
1174                 if (IS_ERR(config_obj))
1175                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1176         }
1177
1178         ncf->ncf_obj = config_obj;
1179         ncf->ncf_los = los;
1180
1181         mutex_lock(&ncf_list_lock);
1182         list_add(&ncf->ncf_list, &ncf_list_head);
1183         mutex_unlock(&ncf_list_lock);
1184
1185 out_ncf:
1186         if (rc) {
1187                 OBD_FREE_PTR(ncf);
1188                 RETURN(ERR_PTR(rc));
1189         }
1190
1191         RETURN(ncf);
1192 }
1193 EXPORT_SYMBOL(nm_config_file_register_tgt);
1194
1195 /**
1196  * Deregister a nm_config_file. Should be called by targets during cleanup.
1197  *
1198  * \param ncf   config file to deregister
1199  */
1200 void nm_config_file_deregister_mgs(const struct lu_env *env,
1201                                    struct nm_config_file *ncf)
1202 {
1203         ENTRY;
1204         LASSERT(nodemap_mgs_ncf == ncf);
1205
1206         nodemap_mgs_ncf = NULL;
1207         if (ncf->ncf_obj)
1208                 lu_object_put(env, &ncf->ncf_obj->do_lu);
1209
1210         OBD_FREE_PTR(ncf);
1211
1212         EXIT;
1213 }
1214 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1215
1216 void nm_config_file_deregister_tgt(const struct lu_env *env,
1217                                    struct nm_config_file *ncf)
1218 {
1219         ENTRY;
1220
1221         if (ncf == NULL)
1222                 return;
1223
1224         mutex_lock(&ncf_list_lock);
1225         list_del(&ncf->ncf_list);
1226         mutex_unlock(&ncf_list_lock);
1227
1228         if (ncf->ncf_obj)
1229                 lu_object_put(env, &ncf->ncf_obj->do_lu);
1230
1231         OBD_FREE_PTR(ncf);
1232
1233         EXIT;
1234 }
1235 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1236
1237 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1238                               struct lu_nodemap **recent_nodemap)
1239 {
1240         struct nodemap_key *key;
1241         union nodemap_rec *rec;
1242         char *entry;
1243         int j;
1244         int k;
1245         int rc = 0;
1246         int size = dt_nodemap_features.dif_keysize_max +
1247                    dt_nodemap_features.dif_recsize_max;
1248         ENTRY;
1249
1250         for (j = 0; j < LU_PAGE_COUNT; j++) {
1251                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1252                         return -EINVAL;
1253
1254                 /* get and process keys and records from page */
1255                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1256                         entry = lip->lp_idx.lip_entries + k * size;
1257                         key = (struct nodemap_key *)entry;
1258
1259                         entry += dt_nodemap_features.dif_keysize_max;
1260                         rec = (union nodemap_rec *)entry;
1261
1262                         rc = nodemap_process_keyrec(config, key, rec,
1263                                                     recent_nodemap);
1264                         if (rc < 0)
1265                                 return rc;
1266                 }
1267                 lip++;
1268         }
1269
1270         EXIT;
1271         return 0;
1272 }
1273 EXPORT_SYMBOL(nodemap_process_idx_pages);
1274
1275 static int nodemap_page_build(const struct lu_env *env, union lu_page *lp,
1276                               size_t nob, const struct dt_it_ops *iops,
1277                               struct dt_it *it, __u32 attr, void *arg)
1278 {
1279         struct idx_info *ii = (struct idx_info *)arg;
1280         struct lu_idxpage *lip = &lp->lp_idx;
1281         char *entry;
1282         size_t size = ii->ii_keysize + ii->ii_recsize;
1283         int rc;
1284         ENTRY;
1285
1286         if (nob < LIP_HDR_SIZE)
1287                 return -EINVAL;
1288
1289         /* initialize the header of the new container */
1290         memset(lip, 0, LIP_HDR_SIZE);
1291         lip->lip_magic = LIP_MAGIC;
1292         nob           -= LIP_HDR_SIZE;
1293
1294         entry = lip->lip_entries;
1295         do {
1296                 char            *tmp_entry = entry;
1297                 struct dt_key   *key;
1298                 __u64           hash;
1299                 enum nodemap_idx_type key_type;
1300
1301                 /* fetch 64-bit hash value */
1302                 hash = iops->store(env, it);
1303                 ii->ii_hash_end = hash;
1304
1305                 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1306                         if (lip->lip_nr != 0)
1307                                 GOTO(out, rc = 0);
1308                 }
1309
1310                 if (nob < size) {
1311                         if (lip->lip_nr == 0)
1312                                 GOTO(out, rc = -EINVAL);
1313                         GOTO(out, rc = 0);
1314                 }
1315
1316                 key = iops->key(env, it);
1317                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1318
1319                 /* on the first pass, get only the cluster types. On second
1320                  * pass, get all the rest */
1321                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1322                                 key_type == NODEMAP_CLUSTER_IDX) ||
1323                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1324                                 key_type != NODEMAP_CLUSTER_IDX &&
1325                                 key_type != NODEMAP_EMPTY_IDX)) {
1326                         memcpy(tmp_entry, key, ii->ii_keysize);
1327                         tmp_entry += ii->ii_keysize;
1328
1329                         /* and finally the record */
1330                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1331                                        attr);
1332                         if (rc != -ESTALE) {
1333                                 if (rc != 0)
1334                                         GOTO(out, rc);
1335
1336                                 /* hash/key/record successfully copied! */
1337                                 lip->lip_nr++;
1338                                 if (unlikely(lip->lip_nr == 1 &&
1339                                     ii->ii_count == 0))
1340                                         ii->ii_hash_start = hash;
1341
1342                                 entry = tmp_entry + ii->ii_recsize;
1343                                 nob -= size;
1344                         }
1345                 }
1346
1347                 /* move on to the next record */
1348                 do {
1349                         rc = iops->next(env, it);
1350                 } while (rc == -ESTALE);
1351
1352                 /* move to second pass */
1353                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1354                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1355                         rc = iops->load(env, it, 0);
1356                         if (rc == 0)
1357                                 rc = iops->next(env, it);
1358                         else if (rc > 0)
1359                                 rc = 0;
1360                         else
1361                                 GOTO(out, rc);
1362                 }
1363
1364         } while (rc == 0);
1365
1366         GOTO(out, rc);
1367 out:
1368         if (rc >= 0 && lip->lip_nr > 0)
1369                 /* one more container */
1370                 ii->ii_count++;
1371         if (rc > 0)
1372                 /* no more entries */
1373                 ii->ii_hash_end = II_END_OFF;
1374         return rc;
1375 }
1376
1377
1378 int nodemap_index_read(struct lu_env *env,
1379                        struct nm_config_file *ncf,
1380                        struct idx_info *ii,
1381                        const struct lu_rdpg *rdpg)
1382 {
1383         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1384         __u64                    version;
1385         int                      rc = 0;
1386
1387         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1388         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1389
1390         dt_read_lock(env, nodemap_idx, 0);
1391         version = dt_version_get(env, nodemap_idx);
1392         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1393                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1394                        ii->ii_version,
1395                        version);
1396                 ii->ii_hash_end = 0;
1397         } else {
1398                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1399                                    ii);
1400                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1401         }
1402
1403         if (rc >= 0)
1404                 ii->ii_version = version;
1405
1406         dt_read_unlock(env, nodemap_idx);
1407         return rc;
1408 }
1409 EXPORT_SYMBOL(nodemap_index_read);
1410
1411 /**
1412  * Returns the current nodemap configuration to MGC by walking the nodemap
1413  * config index and storing it in the response buffer.
1414  *
1415  * \param       req             incoming MGS_CONFIG_READ request
1416  * \retval      0               success
1417  * \retval      -EINVAL         malformed request
1418  * \retval      -ENOTCONN       client evicted/reconnected already
1419  * \retval      -ETIMEDOUT      client timeout or network error
1420  * \retval      -ENOMEM
1421  */
1422 int nodemap_get_config_req(struct obd_device *mgs_obd,
1423                            struct ptlrpc_request *req)
1424 {
1425         struct mgs_config_body *body;
1426         struct mgs_config_res *res;
1427         struct lu_rdpg rdpg;
1428         struct idx_info nodemap_ii;
1429         struct ptlrpc_bulk_desc *desc;
1430         struct l_wait_info lwi;
1431         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1432         int i;
1433         int page_count;
1434         int bytes = 0;
1435         int rc = 0;
1436
1437         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1438         if (!body)
1439                 RETURN(-EINVAL);
1440
1441         if (body->mcb_type != CONFIG_T_NODEMAP)
1442                 RETURN(-EINVAL);
1443
1444         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1445         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1446                 PAGE_SHIFT;
1447         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1448                 RETURN(-EINVAL);
1449
1450         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1451                body->mcb_name, rdpg.rp_count);
1452
1453         /* allocate pages to store the containers */
1454         OBD_ALLOC(rdpg.rp_pages, sizeof(*rdpg.rp_pages) * rdpg.rp_npages);
1455         if (rdpg.rp_pages == NULL)
1456                 RETURN(-ENOMEM);
1457         for (i = 0; i < rdpg.rp_npages; i++) {
1458                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1459                 if (rdpg.rp_pages[i] == NULL)
1460                         GOTO(out, rc = -ENOMEM);
1461         }
1462
1463         rdpg.rp_hash = body->mcb_offset;
1464         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1465         nodemap_ii.ii_flags = II_FL_NOHASH;
1466         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1467         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1468
1469         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1470                                    mgs_obd->u.obt.obt_nodemap_config_file,
1471                                    &nodemap_ii, &rdpg);
1472         if (bytes < 0)
1473                 GOTO(out, rc = bytes);
1474
1475         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1476
1477         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1478         if (res == NULL)
1479                 GOTO(out, rc = -EINVAL);
1480         res->mcr_offset = nodemap_ii.ii_hash_end;
1481         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1482
1483         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1484         LASSERT(page_count <= rdpg.rp_count);
1485         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1486                                     PTLRPC_BULK_PUT_SOURCE |
1487                                         PTLRPC_BULK_BUF_KIOV,
1488                                     MGS_BULK_PORTAL,
1489                                     &ptlrpc_bulk_kiov_pin_ops);
1490         if (desc == NULL)
1491                 GOTO(out, rc = -ENOMEM);
1492
1493         for (i = 0; i < page_count && bytes > 0; i++) {
1494                 ptlrpc_prep_bulk_page_pin(desc, rdpg.rp_pages[i], 0,
1495                                           min_t(int, bytes, PAGE_SIZE));
1496                 bytes -= PAGE_SIZE;
1497         }
1498
1499         rc = target_bulk_io(req->rq_export, desc, &lwi);
1500         ptlrpc_free_bulk(desc);
1501
1502 out:
1503         if (rdpg.rp_pages != NULL) {
1504                 for (i = 0; i < rdpg.rp_npages; i++)
1505                         if (rdpg.rp_pages[i] != NULL)
1506                                 __free_page(rdpg.rp_pages[i]);
1507                 OBD_FREE(rdpg.rp_pages,
1508                          rdpg.rp_npages * sizeof(rdpg.rp_pages[0]));
1509         }
1510         return rc;
1511 }
1512 EXPORT_SYMBOL(nodemap_get_config_req);