Whamcloud - gitweb
LU-13783 procfs: fix improper prop_ops fields
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2017, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <uapi/linux/lnet/lnet-types.h>
54 #include <uapi/linux/lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
62
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
66
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
69
70 /* lu_nodemap flags */
71 enum nm_flag_shifts {
72         NM_FL_ALLOW_ROOT_ACCESS = 0x1,
73         NM_FL_TRUST_CLIENT_IDS = 0x2,
74         NM_FL_DENY_UNKNOWN = 0x4,
75         NM_FL_MAP_UID_ONLY = 0x8,
76         NM_FL_MAP_GID_ONLY = 0x10,
77         NM_FL_ENABLE_AUDIT = 0x20,
78         NM_FL_FORBID_ENCRYPT = 0x40,
79 };
80
81 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
82 {
83         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
84                                                         NODEMAP_CLUSTER_IDX));
85         nk->nk_unused = 0;
86 }
87
88 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
89                                      const struct lu_nodemap *nodemap)
90 {
91         BUILD_BUG_ON(sizeof(nr->ncr.ncr_name) != sizeof(nodemap->nm_name));
92
93         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nr->ncr.ncr_name));
94         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
95         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
96         nr->ncr.ncr_flags = cpu_to_le32(
97                 (nodemap->nmf_trust_client_ids ?
98                         NM_FL_TRUST_CLIENT_IDS : 0) |
99                 (nodemap->nmf_allow_root_access ?
100                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
101                 (nodemap->nmf_deny_unknown ?
102                         NM_FL_DENY_UNKNOWN : 0) |
103                 (nodemap->nmf_map_uid_only ?
104                         NM_FL_MAP_UID_ONLY : 0) |
105                 (nodemap->nmf_map_gid_only ?
106                         NM_FL_MAP_GID_ONLY : 0) |
107                 (nodemap->nmf_enable_audit ?
108                         NM_FL_ENABLE_AUDIT : 0) |
109                 (nodemap->nmf_forbid_encryption ?
110                         NM_FL_FORBID_ENCRYPT : 0));
111 }
112
113 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
114                                    enum nodemap_id_type id_type,
115                                    u32 id_client)
116 {
117         enum nodemap_idx_type idx_type;
118
119         if (id_type == NODEMAP_UID)
120                 idx_type = NODEMAP_UIDMAP_IDX;
121         else
122                 idx_type = NODEMAP_GIDMAP_IDX;
123
124         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
125         nk->nk_id_client = cpu_to_le32(id_client);
126 }
127
128 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
129 {
130         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
131 }
132
133 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
134                                    unsigned int rn_id)
135 {
136         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
137                                                         NODEMAP_RANGE_IDX));
138         nk->nk_range_id = cpu_to_le32(rn_id);
139 }
140
141 static void nodemap_range_rec_init(union nodemap_rec *nr,
142                                    const lnet_nid_t nid[2])
143 {
144         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
145         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
146 }
147
148 static void nodemap_global_key_init(struct nodemap_key *nk)
149 {
150         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
151         nk->nk_unused = 0;
152 }
153
154 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
155 {
156         nr->ngr.ngr_is_active = active;
157 }
158
159 /* should be called with dt_write lock */
160 static void nodemap_inc_version(const struct lu_env *env,
161                                 struct dt_object *nodemap_idx,
162                                 struct thandle *th)
163 {
164         u64 ver = dt_version_get(env, nodemap_idx);
165         dt_version_set(env, nodemap_idx, ver + 1, th);
166 }
167
168 enum ncfc_find_create {
169         NCFC_CREATE_NEW = 1,
170 };
171
172 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
173                                                    struct dt_device *dev,
174                                                    struct local_oid_storage *los,
175                                                    enum ncfc_find_create create_new)
176 {
177         struct lu_fid tfid;
178         struct dt_object *root_obj;
179         struct dt_object *nm_obj;
180         int rc = 0;
181
182         rc = dt_root_get(env, dev, &tfid);
183         if (rc < 0)
184                 GOTO(out, nm_obj = ERR_PTR(rc));
185
186         root_obj = dt_locate(env, dev, &tfid);
187         if (unlikely(IS_ERR(root_obj)))
188                 GOTO(out, nm_obj = root_obj);
189
190         rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
191         if (rc == -ENOENT) {
192                 if (dev->dd_rdonly)
193                         GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
194         } else if (rc) {
195                 GOTO(out_root, nm_obj = ERR_PTR(rc));
196         } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
197                 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
198         }
199
200 again:
201         /* if loading index fails the first time, create new index */
202         if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
203                 CDEBUG(D_INFO, "removing old index, creating new one\n");
204                 rc = local_object_unlink(env, dev, root_obj,
205                                          LUSTRE_NODEMAP_NAME);
206                 if (rc < 0) {
207                         /* XXX not sure the best way to get obd name. */
208                         CERROR("cannot destroy nodemap index: rc = %d\n",
209                                rc);
210                         GOTO(out_root, nm_obj = ERR_PTR(rc));
211                 }
212         }
213
214         nm_obj = local_index_find_or_create(env, los, root_obj,
215                                                 LUSTRE_NODEMAP_NAME,
216                                                 S_IFREG | S_IRUGO | S_IWUSR,
217                                                 &dt_nodemap_features);
218         if (IS_ERR(nm_obj))
219                 GOTO(out_root, nm_obj);
220
221         if (nm_obj->do_index_ops == NULL) {
222                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
223                                                       &dt_nodemap_features);
224                 /* even if loading from tgt fails, connecting to MGS will
225                  * rewrite the config
226                  */
227                 if (rc < 0) {
228                         dt_object_put(env, nm_obj);
229
230                         if (create_new == NCFC_CREATE_NEW)
231                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
232
233                         CERROR("cannot load nodemap index from disk, creating "
234                                "new index: rc = %d\n", rc);
235                         create_new = NCFC_CREATE_NEW;
236                         goto again;
237                 }
238         }
239
240 out_root:
241         dt_object_put(env, root_obj);
242 out:
243         return nm_obj;
244 }
245
246 static int nodemap_idx_insert(const struct lu_env *env,
247                               struct dt_object *idx,
248                               const struct nodemap_key *nk,
249                               const union nodemap_rec *nr)
250 {
251         struct thandle *th;
252         struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
253         int rc;
254
255         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
256
257         th = dt_trans_create(env, dev);
258
259         if (IS_ERR(th))
260                 GOTO(out, rc = PTR_ERR(th));
261
262         rc = dt_declare_insert(env, idx,
263                                (const struct dt_rec *)nr,
264                                (const struct dt_key *)nk, th);
265         if (rc != 0)
266                 GOTO(out, rc);
267
268         rc = dt_declare_version_set(env, idx, th);
269         if (rc != 0)
270                 GOTO(out, rc);
271
272         rc = dt_trans_start_local(env, dev, th);
273         if (rc != 0)
274                 GOTO(out, rc);
275
276         dt_write_lock(env, idx, 0);
277
278         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
279                        (const struct dt_key *)nk, th);
280
281         nodemap_inc_version(env, idx, th);
282         dt_write_unlock(env, idx);
283 out:
284         dt_trans_stop(env, dev, th);
285
286         return rc;
287 }
288
289 static int nodemap_idx_update(const struct lu_env *env,
290                               struct dt_object *idx,
291                               const struct nodemap_key *nk,
292                               const union nodemap_rec *nr)
293 {
294         struct thandle          *th;
295         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
296         int                      rc = 0;
297
298         th = dt_trans_create(env, dev);
299
300         if (IS_ERR(th))
301                 GOTO(out, rc = PTR_ERR(th));
302
303         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
304         if (rc != 0)
305                 GOTO(out, rc);
306
307         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
308                                (const struct dt_key *)nk, th);
309         if (rc != 0)
310                 GOTO(out, rc);
311
312         rc = dt_declare_version_set(env, idx, th);
313         if (rc != 0)
314                 GOTO(out, rc);
315
316         rc = dt_trans_start_local(env, dev, th);
317         if (rc != 0)
318                 GOTO(out, rc);
319
320         dt_write_lock(env, idx, 0);
321
322         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
323         if (rc != 0)
324                 GOTO(out_lock, rc);
325
326         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
327                        (const struct dt_key *)nk, th);
328         if (rc != 0)
329                 GOTO(out_lock, rc);
330
331         nodemap_inc_version(env, idx, th);
332 out_lock:
333         dt_write_unlock(env, idx);
334 out:
335         dt_trans_stop(env, dev, th);
336
337         return rc;
338 }
339
340 static int nodemap_idx_delete(const struct lu_env *env,
341                               struct dt_object *idx,
342                               const struct nodemap_key *nk,
343                               const union nodemap_rec *unused)
344 {
345         struct thandle          *th;
346         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
347         int                      rc = 0;
348
349         th = dt_trans_create(env, dev);
350
351         if (IS_ERR(th))
352                 GOTO(out, rc = PTR_ERR(th));
353
354         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
355         if (rc != 0)
356                 GOTO(out, rc);
357
358         rc = dt_declare_version_set(env, idx, th);
359         if (rc != 0)
360                 GOTO(out, rc);
361
362         rc = dt_trans_start_local(env, dev, th);
363         if (rc != 0)
364                 GOTO(out, rc);
365
366         dt_write_lock(env, idx, 0);
367
368         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
369
370         nodemap_inc_version(env, idx, th);
371
372         dt_write_unlock(env, idx);
373 out:
374         dt_trans_stop(env, dev, th);
375
376         return rc;
377 }
378
379 enum nm_add_update {
380         NM_ADD = 0,
381         NM_UPDATE = 1,
382 };
383
384 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
385                                           struct dt_object *idx,
386                                           enum nm_add_update update)
387 {
388         struct nodemap_key nk;
389         union nodemap_rec nr;
390         struct lu_env env;
391         int rc = 0;
392
393         ENTRY;
394
395         rc = lu_env_init(&env, LCT_LOCAL);
396         if (rc)
397                 RETURN(rc);
398
399         nodemap_cluster_key_init(&nk, nodemap->nm_id);
400         nodemap_cluster_rec_init(&nr, nodemap);
401
402         if (update == NM_UPDATE)
403                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
404         else
405                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
406
407         lu_env_fini(&env);
408
409         RETURN(rc);
410 }
411
412 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
413 {
414         if (nodemap_mgs_ncf == NULL) {
415                 CERROR("cannot add nodemap config to non-existing MGS.\n");
416                 return -EINVAL;
417         }
418
419         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
420                                               NM_ADD);
421 }
422
423 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
424 {
425         if (nodemap_mgs_ncf == NULL) {
426                 CERROR("cannot add nodemap config to non-existing MGS.\n");
427                 return -EINVAL;
428         }
429
430         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
431                                               NM_UPDATE);
432 }
433
434 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
435 {
436         struct rb_root           root;
437         struct lu_idmap         *idmap;
438         struct lu_idmap         *temp;
439         struct lu_nid_range     *range;
440         struct lu_nid_range     *range_temp;
441         struct nodemap_key       nk;
442         struct lu_env            env;
443         int                      rc = 0;
444         int                      rc2 = 0;
445
446         ENTRY;
447
448         if (nodemap_mgs_ncf == NULL) {
449                 CERROR("cannot add nodemap config to non-existing MGS.\n");
450                 return -EINVAL;
451         }
452
453         rc = lu_env_init(&env, LCT_LOCAL);
454         if (rc != 0)
455                 RETURN(rc);
456
457         root = nodemap->nm_fs_to_client_uidmap;
458         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
459                                                 id_fs_to_client) {
460                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
461                                        idmap->id_client);
462                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
463                                          &nk, NULL);
464                 if (rc2 < 0)
465                         rc = rc2;
466         }
467
468         root = nodemap->nm_client_to_fs_gidmap;
469         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
470                                                 id_client_to_fs) {
471                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
472                                        idmap->id_client);
473                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
474                                          &nk, NULL);
475                 if (rc2 < 0)
476                         rc = rc2;
477         }
478
479         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
480                                  rn_list) {
481                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
482                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
483                                          &nk, NULL);
484                 if (rc2 < 0)
485                         rc = rc2;
486         }
487
488         nodemap_cluster_key_init(&nk, nodemap->nm_id);
489         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
490         if (rc2 < 0)
491                 rc = rc2;
492
493         lu_env_fini(&env);
494
495         RETURN(rc);
496 }
497
498 int nodemap_idx_range_add(const struct lu_nid_range *range,
499                           const lnet_nid_t nid[2])
500 {
501         struct nodemap_key       nk;
502         union nodemap_rec        nr;
503         struct lu_env            env;
504         int                      rc = 0;
505         ENTRY;
506
507         if (nodemap_mgs_ncf == NULL) {
508                 CERROR("cannot add nodemap config to non-existing MGS.\n");
509                 return -EINVAL;
510         }
511
512         rc = lu_env_init(&env, LCT_LOCAL);
513         if (rc != 0)
514                 RETURN(rc);
515
516         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
517         nodemap_range_rec_init(&nr, nid);
518
519         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
520         lu_env_fini(&env);
521
522         RETURN(rc);
523 }
524
525 int nodemap_idx_range_del(const struct lu_nid_range *range)
526 {
527         struct nodemap_key       nk;
528         struct lu_env            env;
529         int                      rc = 0;
530         ENTRY;
531
532         if (nodemap_mgs_ncf == NULL) {
533                 CERROR("cannot add nodemap config to non-existing MGS.\n");
534                 return -EINVAL;
535         }
536
537         rc = lu_env_init(&env, LCT_LOCAL);
538         if (rc != 0)
539                 RETURN(rc);
540
541         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
542
543         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
544         lu_env_fini(&env);
545
546         RETURN(rc);
547 }
548
549 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
550                           enum nodemap_id_type id_type,
551                           const u32 map[2])
552 {
553         struct nodemap_key       nk;
554         union nodemap_rec        nr;
555         struct lu_env            env;
556         int                      rc = 0;
557         ENTRY;
558
559         if (nodemap_mgs_ncf == NULL) {
560                 CERROR("cannot add nodemap config to non-existing MGS.\n");
561                 return -EINVAL;
562         }
563
564         rc = lu_env_init(&env, LCT_LOCAL);
565         if (rc != 0)
566                 RETURN(rc);
567
568         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
569         nodemap_idmap_rec_init(&nr, map[1]);
570
571         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
572         lu_env_fini(&env);
573
574         RETURN(rc);
575 }
576
577 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
578                           enum nodemap_id_type id_type,
579                           const u32 map[2])
580 {
581         struct nodemap_key       nk;
582         struct lu_env            env;
583         int                      rc = 0;
584         ENTRY;
585
586         if (nodemap_mgs_ncf == NULL) {
587                 CERROR("cannot add nodemap config to non-existing MGS.\n");
588                 return -EINVAL;
589         }
590
591         rc = lu_env_init(&env, LCT_LOCAL);
592         if (rc != 0)
593                 RETURN(rc);
594
595         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
596
597         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
598         lu_env_fini(&env);
599
600         RETURN(rc);
601 }
602
603 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
604 {
605         struct nodemap_key       nk;
606         union nodemap_rec        nr;
607         struct lu_env            env;
608         int                      rc = 0;
609         ENTRY;
610
611         if (nodemap_mgs_ncf == NULL) {
612                 CERROR("cannot add nodemap config to non-existing MGS.\n");
613                 return -EINVAL;
614         }
615
616         rc = lu_env_init(&env, LCT_LOCAL);
617         if (rc != 0)
618                 RETURN(rc);
619
620         nodemap_global_key_init(&nk);
621         nodemap_global_rec_init(&nr, value);
622
623         if (update == NM_UPDATE)
624                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
625                                         &nk, &nr);
626         else
627                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
628                                         &nk, &nr);
629
630         lu_env_fini(&env);
631
632         RETURN(rc);
633 }
634
635 int nodemap_idx_nodemap_activate(bool value)
636 {
637         return nodemap_idx_global_add_update(value, NM_UPDATE);
638 }
639
640 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
641 {
642         u32                      nodemap_id;
643
644         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
645         return nm_idx_get_type(nodemap_id);
646 }
647
648 /**
649  * Process a key/rec pair and modify the new configuration.
650  *
651  * \param       config          configuration to update with this key/rec data
652  * \param       key             key of the record that was loaded
653  * \param       rec             record that was loaded
654  * \param       recent_nodemap  last referenced nodemap
655  * \retval      type of record processed, see enum #nodemap_idx_type
656  * \retval      -ENOENT         range or map loaded before nodemap record
657  * \retval      -EINVAL         duplicate nodemap cluster records found with
658  *                              different IDs, or nodemap has invalid name
659  * \retval      -ENOMEM
660  */
661 static int nodemap_process_keyrec(struct nodemap_config *config,
662                                   const struct nodemap_key *key,
663                                   const union nodemap_rec *rec,
664                                   struct lu_nodemap **recent_nodemap)
665 {
666         struct lu_nodemap *nodemap = NULL;
667         enum nodemap_idx_type type;
668         enum nodemap_id_type id_type;
669         u8 flags;
670         u32 nodemap_id;
671         lnet_nid_t nid[2];
672         u32 map[2];
673         int rc;
674
675         ENTRY;
676
677         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
678
679         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
680         type = nodemap_get_key_type(key);
681         nodemap_id = nm_idx_set_type(nodemap_id, 0);
682
683         CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
684                nodemap_id, type);
685
686         /* find the correct nodemap in the load list */
687         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
688             type == NODEMAP_GIDMAP_IDX) {
689                 struct lu_nodemap *tmp = NULL;
690
691                 nodemap = *recent_nodemap;
692
693                 if (nodemap == NULL)
694                         GOTO(out, rc = -ENOENT);
695
696                 if (nodemap->nm_id != nodemap_id) {
697                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
698                                 if (tmp->nm_id == nodemap_id) {
699                                         nodemap = tmp;
700                                         break;
701                                 }
702
703                         if (nodemap->nm_id != nodemap_id)
704                                 GOTO(out, rc = -ENOENT);
705                 }
706
707                 /* update most recently used nodemap if necessay */
708                 if (nodemap != *recent_nodemap)
709                         *recent_nodemap = nodemap;
710         }
711
712         switch (type) {
713         case NODEMAP_EMPTY_IDX:
714                 if (nodemap_id != 0)
715                         CWARN("Found nodemap config record without type field, "
716                               " nodemap_id=%d. nodemap config file corrupt?\n",
717                               nodemap_id);
718                 break;
719         case NODEMAP_CLUSTER_IDX: {
720                 struct lu_nodemap *old_nm = NULL;
721
722                 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
723                                           rec->ncr.ncr_name);
724                 if (nodemap == NULL) {
725                         if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
726                                 nodemap = nodemap_create(rec->ncr.ncr_name,
727                                                          config, 1);
728                         } else {
729                                 nodemap = nodemap_create(rec->ncr.ncr_name,
730                                                          config, 0);
731                         }
732                         if (IS_ERR(nodemap))
733                                 GOTO(out, rc = PTR_ERR(nodemap));
734
735                         /* we need to override the local ID with the saved ID */
736                         nodemap->nm_id = nodemap_id;
737                         if (nodemap_id > config->nmc_nodemap_highest_id)
738                                 config->nmc_nodemap_highest_id = nodemap_id;
739
740                 } else if (nodemap->nm_id != nodemap_id) {
741                         nodemap_putref(nodemap);
742                         GOTO(out, rc = -EINVAL);
743                 }
744
745                 nodemap->nm_squash_uid =
746                                 le32_to_cpu(rec->ncr.ncr_squash_uid);
747                 nodemap->nm_squash_gid =
748                                 le32_to_cpu(rec->ncr.ncr_squash_gid);
749
750                 flags = le32_to_cpu(rec->ncr.ncr_flags);
751                 nodemap->nmf_allow_root_access =
752                                         flags & NM_FL_ALLOW_ROOT_ACCESS;
753                 nodemap->nmf_trust_client_ids =
754                                         flags & NM_FL_TRUST_CLIENT_IDS;
755                 nodemap->nmf_deny_unknown =
756                                         flags & NM_FL_DENY_UNKNOWN;
757                 nodemap->nmf_map_uid_only =
758                                         flags & NM_FL_MAP_UID_ONLY;
759                 nodemap->nmf_map_gid_only =
760                                         flags & NM_FL_MAP_GID_ONLY;
761                 nodemap->nmf_enable_audit =
762                                         flags & NM_FL_ENABLE_AUDIT;
763                 nodemap->nmf_forbid_encryption =
764                                         flags & NM_FL_FORBID_ENCRYPT;
765
766                 /* The fileset should be saved otherwise it will be empty
767                  * every time in case of "NODEMAP_CLUSTER_IDX". */
768                 mutex_lock(&active_config_lock);
769                 old_nm = nodemap_lookup(rec->ncr.ncr_name);
770                 if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0')
771                         strlcpy(nodemap->nm_fileset, old_nm->nm_fileset,
772                                 sizeof(nodemap->nm_fileset));
773                 mutex_unlock(&active_config_lock);
774                 if (!IS_ERR(old_nm))
775                         nodemap_putref(old_nm);
776
777                 if (*recent_nodemap == NULL) {
778                         *recent_nodemap = nodemap;
779                         INIT_LIST_HEAD(&nodemap->nm_list);
780                 } else {
781                         list_add(&nodemap->nm_list,
782                                  &(*recent_nodemap)->nm_list);
783                 }
784                 nodemap_putref(nodemap);
785                 break;
786         }
787         case NODEMAP_RANGE_IDX:
788                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
789                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
790
791                 rc = nodemap_add_range_helper(config, nodemap, nid,
792                                         le32_to_cpu(key->nk_range_id));
793                 if (rc != 0)
794                         GOTO(out, rc);
795                 break;
796         case NODEMAP_UIDMAP_IDX:
797         case NODEMAP_GIDMAP_IDX:
798                 map[0] = le32_to_cpu(key->nk_id_client);
799                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
800
801                 if (type == NODEMAP_UIDMAP_IDX)
802                         id_type = NODEMAP_UID;
803                 else
804                         id_type = NODEMAP_GID;
805
806                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
807                 if (rc != 0)
808                         GOTO(out, rc);
809                 break;
810         case NODEMAP_GLOBAL_IDX:
811                 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
812                 break;
813         default:
814                 CERROR("got keyrec pair for unknown type %d\n", type);
815                 break;
816         }
817
818         rc = type;
819
820         EXIT;
821
822 out:
823         return rc;
824 }
825
826 enum nm_config_passes {
827         NM_READ_CLUSTERS = 0,
828         NM_READ_ATTRIBUTES = 1,
829 };
830
831 static int nodemap_load_entries(const struct lu_env *env,
832                                 struct dt_object *nodemap_idx)
833 {
834         const struct dt_it_ops *iops;
835         struct dt_it *it;
836         struct lu_nodemap *recent_nodemap = NULL;
837         struct nodemap_config *new_config = NULL;
838         u64 hash = 0;
839         bool activate_nodemap = false;
840         bool loaded_global_idx = false;
841         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
842         int rc = 0;
843
844         ENTRY;
845
846         iops = &nodemap_idx->do_index_ops->dio_it;
847
848         dt_read_lock(env, nodemap_idx, 0);
849         it = iops->init(env, nodemap_idx, 0);
850         if (IS_ERR(it))
851                 GOTO(out, rc = PTR_ERR(it));
852
853         rc = iops->load(env, it, hash);
854         if (rc < 0)
855                 GOTO(out_iops_fini, rc);
856
857         /* rc == 0 means we need to advance to record */
858         if (rc == 0) {
859                 rc = iops->next(env, it);
860
861                 if (rc < 0)
862                         GOTO(out_iops_put, rc);
863                 /* rc > 0 is eof, will be checked in while below */
864         } else {
865                 /* rc == 1, we found initial record and can process below */
866                 rc = 0;
867         }
868
869         new_config = nodemap_config_alloc();
870         if (IS_ERR(new_config)) {
871                 rc = PTR_ERR(new_config);
872                 new_config = NULL;
873                 GOTO(out_iops_put, rc);
874         }
875
876         /* rc > 0 is eof, check initial iops->next here as well */
877         while (rc == 0) {
878                 struct nodemap_key *key;
879                 union nodemap_rec rec;
880                 enum nodemap_idx_type key_type;
881
882                 key = (struct nodemap_key *)iops->key(env, it);
883                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
884                 if ((cur_pass == NM_READ_CLUSTERS &&
885                                 key_type == NODEMAP_CLUSTER_IDX) ||
886                     (cur_pass == NM_READ_ATTRIBUTES &&
887                                 key_type != NODEMAP_CLUSTER_IDX &&
888                                 key_type != NODEMAP_EMPTY_IDX)) {
889                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
890                         if (rc != -ESTALE) {
891                                 if (rc != 0)
892                                         GOTO(out_nodemap_config, rc);
893                                 rc = nodemap_process_keyrec(new_config, key, &rec,
894                                                             &recent_nodemap);
895                                 if (rc < 0)
896                                         GOTO(out_nodemap_config, rc);
897                                 if (rc == NODEMAP_GLOBAL_IDX)
898                                         loaded_global_idx = true;
899                         }
900                 }
901
902                 do
903                         rc = iops->next(env, it);
904                 while (rc == -ESTALE);
905
906                 /* move to second pass */
907                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
908                         cur_pass = NM_READ_ATTRIBUTES;
909                         rc = iops->load(env, it, 0);
910                         if (rc == 0)
911                                 rc = iops->next(env, it);
912                         else if (rc > 0)
913                                 rc = 0;
914                         else
915                                 GOTO(out, rc);
916                 }
917         }
918
919         if (rc > 0)
920                 rc = 0;
921
922 out_nodemap_config:
923         if (rc != 0)
924                 nodemap_config_dealloc(new_config);
925         else
926                 /* creating new default needs to be done outside dt read lock */
927                 activate_nodemap = true;
928 out_iops_put:
929         iops->put(env, it);
930 out_iops_fini:
931         iops->fini(env, it);
932 out:
933         dt_read_unlock(env, nodemap_idx);
934
935         if (rc != 0)
936                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
937                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
938
939         if (!activate_nodemap)
940                 RETURN(rc);
941
942         if (new_config->nmc_default_nodemap == NULL) {
943                 /* new MGS won't have a default nm on disk, so create it here */
944                 struct lu_nodemap *nodemap =
945                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
946                 if (IS_ERR(nodemap)) {
947                         rc = PTR_ERR(nodemap);
948                 } else {
949                         rc = nodemap_idx_nodemap_add_update(
950                                         new_config->nmc_default_nodemap,
951                                         nodemap_idx,
952                                         NM_ADD);
953                         nodemap_putref(new_config->nmc_default_nodemap);
954                 }
955         }
956
957         /* new nodemap config won't have an active/inactive record */
958         if (rc == 0 && loaded_global_idx == false) {
959                 struct nodemap_key       nk;
960                 union nodemap_rec        nr;
961
962                 nodemap_global_key_init(&nk);
963                 nodemap_global_rec_init(&nr, false);
964                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
965         }
966
967         if (rc == 0)
968                 nodemap_config_set_active(new_config);
969         else
970                 nodemap_config_dealloc(new_config);
971
972         RETURN(rc);
973 }
974
975 /**
976  * Step through active config and write to disk.
977  */
978 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
979                                             struct dt_device *dev,
980                                             struct local_oid_storage *los)
981 {
982         struct dt_object *o;
983         struct lu_nodemap *nodemap;
984         struct lu_nodemap *nm_tmp;
985         struct lu_nid_range *range;
986         struct lu_nid_range *range_temp;
987         struct lu_idmap *idmap;
988         struct lu_idmap *id_tmp;
989         struct rb_root root;
990         struct nodemap_key nk;
991         union nodemap_rec nr;
992         LIST_HEAD(nodemap_list_head);
993         int rc = 0, rc2;
994
995         ENTRY;
996
997         /* create a new index file to fill with active config */
998         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
999         if (IS_ERR(o))
1000                 RETURN(o);
1001
1002         mutex_lock(&active_config_lock);
1003
1004         /* convert hash to list so we don't spin */
1005         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
1006                                nm_hash_list_cb, &nodemap_list_head);
1007
1008         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
1009                 nodemap_cluster_key_init(&nk, nodemap->nm_id);
1010                 nodemap_cluster_rec_init(&nr, nodemap);
1011
1012                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1013                 if (rc2 < 0) {
1014                         rc = rc2;
1015                         continue;
1016                 }
1017
1018                 down_read(&active_config->nmc_range_tree_lock);
1019                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
1020                                          rn_list) {
1021                         lnet_nid_t nid[2] = {
1022                                 range->rn_start,
1023                                 range->rn_end
1024                         };
1025                         nodemap_range_key_init(&nk, nodemap->nm_id,
1026                                                range->rn_id);
1027                         nodemap_range_rec_init(&nr, nid);
1028                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1029                         if (rc2 < 0)
1030                                 rc = rc2;
1031                 }
1032                 up_read(&active_config->nmc_range_tree_lock);
1033
1034                 /* we don't need to take nm_idmap_lock because active config
1035                  * lock prevents changes from happening to nodemaps
1036                  */
1037                 root = nodemap->nm_client_to_fs_uidmap;
1038                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1039                                                         id_client_to_fs) {
1040                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1041                                                idmap->id_client);
1042                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1043                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1044                         if (rc2 < 0)
1045                                 rc = rc2;
1046                 }
1047
1048                 root = nodemap->nm_client_to_fs_gidmap;
1049                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1050                                                         id_client_to_fs) {
1051                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1052                                                idmap->id_client);
1053                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1054                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1055                         if (rc2 < 0)
1056                                 rc = rc2;
1057                 }
1058         }
1059         nodemap_global_key_init(&nk);
1060         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1061         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1062         if (rc2 < 0)
1063                 rc = rc2;
1064
1065         mutex_unlock(&active_config_lock);
1066
1067         if (rc < 0) {
1068                 dt_object_put(env, o);
1069                 o = ERR_PTR(rc);
1070         }
1071
1072         RETURN(o);
1073 }
1074
1075 static void nodemap_save_all_caches(void)
1076 {
1077         struct nm_config_file   *ncf;
1078         struct lu_env            env;
1079         int                      rc = 0;
1080
1081         /* recreating nodemap cache requires fld_thread_key be in env */
1082         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1083         if (rc != 0) {
1084                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1085                 return;
1086         }
1087
1088         mutex_lock(&ncf_list_lock);
1089         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1090                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1091                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1092                 struct dt_object *o;
1093
1094                 /* put current config file so save conf can rewrite it */
1095                 dt_object_put_nocache(&env, ncf->ncf_obj);
1096                 ncf->ncf_obj = NULL;
1097
1098                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1099                 if (IS_ERR(o))
1100                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1101                               obd->obd_name, rc);
1102                 else
1103                         ncf->ncf_obj = o;
1104         }
1105         mutex_unlock(&ncf_list_lock);
1106
1107         lu_env_fini(&env);
1108 }
1109
1110 /* tracks if config still needs to be loaded, either from disk or network */
1111 static bool nodemap_config_loaded;
1112 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1113
1114 /**
1115  * Ensures that configs loaded over the wire are prioritized over those loaded
1116  * from disk.
1117  *
1118  * \param config        config to set as the active config
1119  */
1120 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1121 {
1122         mutex_lock(&nodemap_config_loaded_lock);
1123         nodemap_config_set_active(config);
1124         nodemap_config_loaded = true;
1125         nodemap_save_all_caches();
1126         mutex_unlock(&nodemap_config_loaded_lock);
1127 }
1128 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1129
1130 /**
1131  * Register a dt_object representing the config index file. This should be
1132  * called by targets in order to load the nodemap configuration from disk. The
1133  * dt_object should be created with local_index_find_or_create and the index
1134  * features should be enabled with do_index_try.
1135  *
1136  * \param obj   dt_object returned by local_index_find_or_create
1137  *
1138  * \retval      on success: nm_config_file handle for later deregistration
1139  * \retval      -ENOMEM         memory allocation failure
1140  * \retval      -ENOENT         error loading nodemap config
1141  * \retval      -EINVAL         error loading nodemap config
1142  * \retval      -EEXIST         nodemap config already registered for MGS
1143  */
1144 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1145                                                    struct dt_object *obj,
1146                                                    struct local_oid_storage *los)
1147 {
1148         struct nm_config_file *ncf;
1149         int rc = 0;
1150         ENTRY;
1151
1152         if (nodemap_mgs_ncf != NULL)
1153                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1154
1155         OBD_ALLOC_PTR(ncf);
1156         if (ncf == NULL)
1157                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1158
1159         /* if loading from cache, prevent activation of MGS config until cache
1160          * loading is done, so disk config is overwritten by MGS config.
1161          */
1162         mutex_lock(&nodemap_config_loaded_lock);
1163         rc = nodemap_load_entries(env, obj);
1164         if (!rc)
1165                 nodemap_config_loaded = true;
1166         mutex_unlock(&nodemap_config_loaded_lock);
1167
1168         if (rc) {
1169                 OBD_FREE_PTR(ncf);
1170                 GOTO(out, ncf = ERR_PTR(rc));
1171         }
1172
1173         lu_object_get(&obj->do_lu);
1174
1175         ncf->ncf_obj = obj;
1176         ncf->ncf_los = los;
1177
1178         nodemap_mgs_ncf = ncf;
1179
1180 out:
1181         return ncf;
1182 }
1183 EXPORT_SYMBOL(nm_config_file_register_mgs);
1184
1185 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1186                                                    struct dt_device *dev,
1187                                                    struct local_oid_storage *los)
1188 {
1189         struct nm_config_file *ncf;
1190         struct dt_object *config_obj = NULL;
1191         int rc = 0;
1192
1193         OBD_ALLOC_PTR(ncf);
1194         if (ncf == NULL)
1195                 RETURN(ERR_PTR(-ENOMEM));
1196
1197         /* don't load from cache if config already loaded */
1198         mutex_lock(&nodemap_config_loaded_lock);
1199         if (!nodemap_config_loaded) {
1200                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1201                 if (IS_ERR(config_obj))
1202                         rc = PTR_ERR(config_obj);
1203                 else
1204                         rc = nodemap_load_entries(env, config_obj);
1205
1206                 if (!rc)
1207                         nodemap_config_loaded = true;
1208         }
1209         mutex_unlock(&nodemap_config_loaded_lock);
1210         if (rc)
1211                 GOTO(out_ncf, rc);
1212
1213         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1214         if (!config_obj) {
1215                 config_obj = nodemap_save_config_cache(env, dev, los);
1216                 if (IS_ERR(config_obj))
1217                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1218         }
1219
1220         ncf->ncf_obj = config_obj;
1221         ncf->ncf_los = los;
1222
1223         mutex_lock(&ncf_list_lock);
1224         list_add(&ncf->ncf_list, &ncf_list_head);
1225         mutex_unlock(&ncf_list_lock);
1226
1227 out_ncf:
1228         if (rc) {
1229                 OBD_FREE_PTR(ncf);
1230                 RETURN(ERR_PTR(rc));
1231         }
1232
1233         RETURN(ncf);
1234 }
1235 EXPORT_SYMBOL(nm_config_file_register_tgt);
1236
1237 /**
1238  * Deregister a nm_config_file. Should be called by targets during cleanup.
1239  *
1240  * \param ncf   config file to deregister
1241  */
1242 void nm_config_file_deregister_mgs(const struct lu_env *env,
1243                                    struct nm_config_file *ncf)
1244 {
1245         ENTRY;
1246         LASSERT(nodemap_mgs_ncf == ncf);
1247
1248         nodemap_mgs_ncf = NULL;
1249         if (ncf->ncf_obj)
1250                 dt_object_put(env, ncf->ncf_obj);
1251
1252         OBD_FREE_PTR(ncf);
1253
1254         EXIT;
1255 }
1256 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1257
1258 void nm_config_file_deregister_tgt(const struct lu_env *env,
1259                                    struct nm_config_file *ncf)
1260 {
1261         ENTRY;
1262
1263         if (ncf == NULL)
1264                 return;
1265
1266         mutex_lock(&ncf_list_lock);
1267         list_del(&ncf->ncf_list);
1268         mutex_unlock(&ncf_list_lock);
1269
1270         if (ncf->ncf_obj)
1271                 dt_object_put(env, ncf->ncf_obj);
1272
1273         OBD_FREE_PTR(ncf);
1274
1275         EXIT;
1276 }
1277 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1278
1279 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1280                               struct lu_nodemap **recent_nodemap)
1281 {
1282         struct nodemap_key *key;
1283         union nodemap_rec *rec;
1284         char *entry;
1285         int j;
1286         int k;
1287         int rc = 0;
1288         int size = dt_nodemap_features.dif_keysize_max +
1289                    dt_nodemap_features.dif_recsize_max;
1290         ENTRY;
1291
1292         for (j = 0; j < LU_PAGE_COUNT; j++) {
1293                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1294                         return -EINVAL;
1295
1296                 /* get and process keys and records from page */
1297                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1298                         entry = lip->lp_idx.lip_entries + k * size;
1299                         key = (struct nodemap_key *)entry;
1300
1301                         entry += dt_nodemap_features.dif_keysize_max;
1302                         rec = (union nodemap_rec *)entry;
1303
1304                         rc = nodemap_process_keyrec(config, key, rec,
1305                                                     recent_nodemap);
1306                         if (rc < 0)
1307                                 return rc;
1308                 }
1309                 lip++;
1310         }
1311
1312         EXIT;
1313         return 0;
1314 }
1315 EXPORT_SYMBOL(nodemap_process_idx_pages);
1316
1317 static int nodemap_page_build(const struct lu_env *env, union lu_page *lp,
1318                               size_t nob, const struct dt_it_ops *iops,
1319                               struct dt_it *it, __u32 attr, void *arg)
1320 {
1321         struct idx_info *ii = (struct idx_info *)arg;
1322         struct lu_idxpage *lip = &lp->lp_idx;
1323         char *entry;
1324         size_t size = ii->ii_keysize + ii->ii_recsize;
1325         int rc;
1326         ENTRY;
1327
1328         if (nob < LIP_HDR_SIZE)
1329                 return -EINVAL;
1330
1331         /* initialize the header of the new container */
1332         memset(lip, 0, LIP_HDR_SIZE);
1333         lip->lip_magic = LIP_MAGIC;
1334         nob           -= LIP_HDR_SIZE;
1335
1336         entry = lip->lip_entries;
1337         do {
1338                 char            *tmp_entry = entry;
1339                 struct dt_key   *key;
1340                 __u64           hash;
1341                 enum nodemap_idx_type key_type;
1342
1343                 /* fetch 64-bit hash value */
1344                 hash = iops->store(env, it);
1345                 ii->ii_hash_end = hash;
1346
1347                 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1348                         if (lip->lip_nr != 0)
1349                                 GOTO(out, rc = 0);
1350                 }
1351
1352                 if (nob < size) {
1353                         if (lip->lip_nr == 0)
1354                                 GOTO(out, rc = -EINVAL);
1355                         GOTO(out, rc = 0);
1356                 }
1357
1358                 key = iops->key(env, it);
1359                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1360
1361                 /* on the first pass, get only the cluster types. On second
1362                  * pass, get all the rest */
1363                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1364                                 key_type == NODEMAP_CLUSTER_IDX) ||
1365                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1366                                 key_type != NODEMAP_CLUSTER_IDX &&
1367                                 key_type != NODEMAP_EMPTY_IDX)) {
1368                         memcpy(tmp_entry, key, ii->ii_keysize);
1369                         tmp_entry += ii->ii_keysize;
1370
1371                         /* and finally the record */
1372                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1373                                        attr);
1374                         if (rc != -ESTALE) {
1375                                 if (rc != 0)
1376                                         GOTO(out, rc);
1377
1378                                 /* hash/key/record successfully copied! */
1379                                 lip->lip_nr++;
1380                                 if (unlikely(lip->lip_nr == 1 &&
1381                                     ii->ii_count == 0))
1382                                         ii->ii_hash_start = hash;
1383
1384                                 entry = tmp_entry + ii->ii_recsize;
1385                                 nob -= size;
1386                         }
1387                 }
1388
1389                 /* move on to the next record */
1390                 do {
1391                         rc = iops->next(env, it);
1392                 } while (rc == -ESTALE);
1393
1394                 /* move to second pass */
1395                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1396                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1397                         rc = iops->load(env, it, 0);
1398                         if (rc == 0)
1399                                 rc = iops->next(env, it);
1400                         else if (rc > 0)
1401                                 rc = 0;
1402                         else
1403                                 GOTO(out, rc);
1404                 }
1405
1406         } while (rc == 0);
1407
1408         GOTO(out, rc);
1409 out:
1410         if (rc >= 0 && lip->lip_nr > 0)
1411                 /* one more container */
1412                 ii->ii_count++;
1413         if (rc > 0)
1414                 /* no more entries */
1415                 ii->ii_hash_end = II_END_OFF;
1416         return rc;
1417 }
1418
1419
1420 int nodemap_index_read(struct lu_env *env,
1421                        struct nm_config_file *ncf,
1422                        struct idx_info *ii,
1423                        const struct lu_rdpg *rdpg)
1424 {
1425         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1426         __u64                    version;
1427         int                      rc = 0;
1428
1429         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1430         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1431
1432         dt_read_lock(env, nodemap_idx, 0);
1433         version = dt_version_get(env, nodemap_idx);
1434         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1435                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1436                        ii->ii_version,
1437                        version);
1438                 ii->ii_hash_end = 0;
1439         } else {
1440                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1441                                    ii);
1442                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1443         }
1444
1445         if (rc >= 0)
1446                 ii->ii_version = version;
1447
1448         dt_read_unlock(env, nodemap_idx);
1449         return rc;
1450 }
1451 EXPORT_SYMBOL(nodemap_index_read);
1452
1453 /**
1454  * Returns the current nodemap configuration to MGC by walking the nodemap
1455  * config index and storing it in the response buffer.
1456  *
1457  * \param       req             incoming MGS_CONFIG_READ request
1458  * \retval      0               success
1459  * \retval      -EINVAL         malformed request
1460  * \retval      -ENOTCONN       client evicted/reconnected already
1461  * \retval      -ETIMEDOUT      client timeout or network error
1462  * \retval      -ENOMEM
1463  */
1464 int nodemap_get_config_req(struct obd_device *mgs_obd,
1465                            struct ptlrpc_request *req)
1466 {
1467         const struct ptlrpc_bulk_frag_ops *frag_ops = &ptlrpc_bulk_kiov_pin_ops;
1468         struct mgs_config_body *body;
1469         struct mgs_config_res *res;
1470         struct lu_rdpg rdpg;
1471         struct idx_info nodemap_ii;
1472         struct ptlrpc_bulk_desc *desc;
1473         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1474         int i;
1475         int page_count;
1476         int bytes = 0;
1477         int rc = 0;
1478
1479         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1480         if (!body)
1481                 RETURN(-EINVAL);
1482
1483         if (body->mcb_type != MGS_CFG_T_NODEMAP)
1484                 RETURN(-EINVAL);
1485
1486         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1487         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1488                 PAGE_SHIFT;
1489         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1490                 RETURN(-EINVAL);
1491
1492         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1493                body->mcb_name, rdpg.rp_count);
1494
1495         /* allocate pages to store the containers */
1496         OBD_ALLOC_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1497         if (rdpg.rp_pages == NULL)
1498                 RETURN(-ENOMEM);
1499         for (i = 0; i < rdpg.rp_npages; i++) {
1500                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1501                 if (rdpg.rp_pages[i] == NULL)
1502                         GOTO(out, rc = -ENOMEM);
1503         }
1504
1505         rdpg.rp_hash = body->mcb_offset;
1506         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1507         nodemap_ii.ii_flags = II_FL_NOHASH;
1508         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1509         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1510
1511         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1512                                    mgs_obd->u.obt.obt_nodemap_config_file,
1513                                    &nodemap_ii, &rdpg);
1514         if (bytes < 0)
1515                 GOTO(out, rc = bytes);
1516
1517         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1518
1519         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1520         if (res == NULL)
1521                 GOTO(out, rc = -EINVAL);
1522         res->mcr_offset = nodemap_ii.ii_hash_end;
1523         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1524
1525         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1526         LASSERT(page_count <= rdpg.rp_count);
1527         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1528                                     PTLRPC_BULK_PUT_SOURCE,
1529                                     MGS_BULK_PORTAL, frag_ops);
1530         if (desc == NULL)
1531                 GOTO(out, rc = -ENOMEM);
1532
1533         for (i = 0; i < page_count && bytes > 0; i++) {
1534                 frag_ops->add_kiov_frag(desc, rdpg.rp_pages[i], 0,
1535                                         min_t(int, bytes, PAGE_SIZE));
1536                 bytes -= PAGE_SIZE;
1537         }
1538
1539         rc = target_bulk_io(req->rq_export, desc);
1540         ptlrpc_free_bulk(desc);
1541
1542 out:
1543         if (rdpg.rp_pages != NULL) {
1544                 for (i = 0; i < rdpg.rp_npages; i++)
1545                         if (rdpg.rp_pages[i] != NULL)
1546                                 __free_page(rdpg.rp_pages[i]);
1547                 OBD_FREE_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1548         }
1549         return rc;
1550 }
1551 EXPORT_SYMBOL(nodemap_get_config_req);