Whamcloud - gitweb
efed19e0870e6180f713bfdfac84cdc088db2f66
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2014, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <lnet/types.h>
54 #include <lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
62
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
66
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
69
70 /* lu_nodemap flags */
71 enum nm_flag_shifts {
72         NM_FL_ALLOW_ROOT_ACCESS = 0x1,
73         NM_FL_TRUST_CLIENT_IDS = 0x2,
74         NM_FL_DENY_UNKNOWN = 0x4,
75 };
76
77 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
78 {
79         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
80                                                         NODEMAP_CLUSTER_IDX));
81         nk->nk_unused = 0;
82 }
83
84 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
85                                      const struct lu_nodemap *nodemap)
86 {
87         CLASSERT(sizeof(nr->ncr.ncr_name) == sizeof(nodemap->nm_name));
88
89         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nodemap->nm_name));
90         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
91         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
92         nr->ncr.ncr_flags = cpu_to_le32(
93                 (nodemap->nmf_trust_client_ids ?
94                         NM_FL_TRUST_CLIENT_IDS : 0) |
95                 (nodemap->nmf_allow_root_access ?
96                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
97                 (nodemap->nmf_deny_unknown ?
98                         NM_FL_DENY_UNKNOWN : 0));
99 }
100
101 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
102                                    enum nodemap_id_type id_type,
103                                    u32 id_client)
104 {
105         enum nodemap_idx_type idx_type;
106
107         if (id_type == NODEMAP_UID)
108                 idx_type = NODEMAP_UIDMAP_IDX;
109         else
110                 idx_type = NODEMAP_GIDMAP_IDX;
111
112         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
113         nk->nk_id_client = cpu_to_le32(id_client);
114 }
115
116 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
117 {
118         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
119 }
120
121 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
122                                    unsigned int rn_id)
123 {
124         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
125                                                         NODEMAP_RANGE_IDX));
126         nk->nk_range_id = cpu_to_le32(rn_id);
127 }
128
129 static void nodemap_range_rec_init(union nodemap_rec *nr,
130                                    const lnet_nid_t nid[2])
131 {
132         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
133         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
134 }
135
136 static void nodemap_global_key_init(struct nodemap_key *nk)
137 {
138         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
139         nk->nk_unused = 0;
140 }
141
142 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
143 {
144         nr->ngr.ngr_is_active = active;
145 }
146
147 /* should be called with dt_write lock */
148 static void nodemap_inc_version(const struct lu_env *env,
149                                 struct dt_object *nodemap_idx,
150                                 struct thandle *th)
151 {
152         u64 ver = dt_version_get(env, nodemap_idx);
153         dt_version_set(env, nodemap_idx, ver + 1, th);
154 }
155
156 enum ncfc_find_create {
157         NCFC_CREATE_NEW = 1,
158 };
159
160 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
161                                                    struct dt_device *dev,
162                                                    struct local_oid_storage *los,
163                                                    enum ncfc_find_create create_new)
164 {
165         struct lu_fid tfid;
166         struct dt_object *root_obj;
167         struct dt_object *nm_obj;
168         int rc = 0;
169
170         rc = dt_root_get(env, dev, &tfid);
171         if (rc < 0)
172                 GOTO(out, nm_obj = ERR_PTR(rc));
173
174         root_obj = dt_locate(env, dev, &tfid);
175         if (unlikely(IS_ERR(root_obj)))
176                 GOTO(out, nm_obj = root_obj);
177
178         rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
179         if (rc == -ENOENT) {
180                 if (dev->dd_rdonly)
181                         GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
182         } else if (rc) {
183                 GOTO(out_root, nm_obj = ERR_PTR(rc));
184         } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
185                 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
186         }
187
188 again:
189         /* if loading index fails the first time, create new index */
190         if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
191                 CDEBUG(D_INFO, "removing old index, creating new one\n");
192                 rc = local_object_unlink(env, dev, root_obj,
193                                          LUSTRE_NODEMAP_NAME);
194                 if (rc < 0) {
195                         /* XXX not sure the best way to get obd name. */
196                         CERROR("cannot destroy nodemap index: rc = %d\n",
197                                rc);
198                         GOTO(out_root, nm_obj = ERR_PTR(rc));
199                 }
200         }
201
202         nm_obj = local_index_find_or_create(env, los, root_obj,
203                                                 LUSTRE_NODEMAP_NAME,
204                                                 S_IFREG | S_IRUGO | S_IWUSR,
205                                                 &dt_nodemap_features);
206         if (IS_ERR(nm_obj))
207                 GOTO(out_root, nm_obj);
208
209         if (nm_obj->do_index_ops == NULL) {
210                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
211                                                       &dt_nodemap_features);
212                 /* even if loading from tgt fails, connecting to MGS will
213                  * rewrite the config
214                  */
215                 if (rc < 0) {
216                         dt_object_put(env, nm_obj);
217
218                         if (create_new == NCFC_CREATE_NEW)
219                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
220
221                         CERROR("cannot load nodemap index from disk, creating "
222                                "new index: rc = %d\n", rc);
223                         create_new = NCFC_CREATE_NEW;
224                         goto again;
225                 }
226         }
227
228 out_root:
229         dt_object_put(env, root_obj);
230 out:
231         return nm_obj;
232 }
233
234 static int nodemap_idx_insert(const struct lu_env *env,
235                               struct dt_object *idx,
236                               const struct nodemap_key *nk,
237                               const union nodemap_rec *nr)
238 {
239         struct thandle          *th;
240         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
241         int                      rc;
242
243         CLASSERT(sizeof(union nodemap_rec) == 32);
244
245         th = dt_trans_create(env, dev);
246
247         if (IS_ERR(th))
248                 GOTO(out, rc = PTR_ERR(th));
249
250         rc = dt_declare_insert(env, idx,
251                                (const struct dt_rec *)nr,
252                                (const struct dt_key *)nk, th);
253         if (rc != 0)
254                 GOTO(out, rc);
255
256         rc = dt_declare_version_set(env, idx, th);
257         if (rc != 0)
258                 GOTO(out, rc);
259
260         rc = dt_trans_start_local(env, dev, th);
261         if (rc != 0)
262                 GOTO(out, rc);
263
264         dt_write_lock(env, idx, 0);
265
266         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
267                        (const struct dt_key *)nk, th, 1);
268
269         nodemap_inc_version(env, idx, th);
270         dt_write_unlock(env, idx);
271 out:
272         dt_trans_stop(env, dev, th);
273
274         return rc;
275 }
276
277 static int nodemap_idx_update(const struct lu_env *env,
278                               struct dt_object *idx,
279                               const struct nodemap_key *nk,
280                               const union nodemap_rec *nr)
281 {
282         struct thandle          *th;
283         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
284         int                      rc = 0;
285
286         th = dt_trans_create(env, dev);
287
288         if (IS_ERR(th))
289                 GOTO(out, rc = PTR_ERR(th));
290
291         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
292         if (rc != 0)
293                 GOTO(out, rc);
294
295         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
296                                (const struct dt_key *)nk, th);
297         if (rc != 0)
298                 GOTO(out, rc);
299
300         rc = dt_declare_version_set(env, idx, th);
301         if (rc != 0)
302                 GOTO(out, rc);
303
304         rc = dt_trans_start_local(env, dev, th);
305         if (rc != 0)
306                 GOTO(out, rc);
307
308         dt_write_lock(env, idx, 0);
309
310         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
311         if (rc != 0)
312                 GOTO(out_lock, rc);
313
314         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
315                        (const struct dt_key *)nk, th, 1);
316         if (rc != 0)
317                 GOTO(out_lock, rc);
318
319         nodemap_inc_version(env, idx, th);
320 out_lock:
321         dt_write_unlock(env, idx);
322 out:
323         dt_trans_stop(env, dev, th);
324
325         return rc;
326 }
327
328 static int nodemap_idx_delete(const struct lu_env *env,
329                               struct dt_object *idx,
330                               const struct nodemap_key *nk,
331                               const union nodemap_rec *unused)
332 {
333         struct thandle          *th;
334         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
335         int                      rc = 0;
336
337         th = dt_trans_create(env, dev);
338
339         if (IS_ERR(th))
340                 GOTO(out, rc = PTR_ERR(th));
341
342         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
343         if (rc != 0)
344                 GOTO(out, rc);
345
346         rc = dt_declare_version_set(env, idx, th);
347         if (rc != 0)
348                 GOTO(out, rc);
349
350         rc = dt_trans_start_local(env, dev, th);
351         if (rc != 0)
352                 GOTO(out, rc);
353
354         dt_write_lock(env, idx, 0);
355
356         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
357
358         nodemap_inc_version(env, idx, th);
359
360         dt_write_unlock(env, idx);
361 out:
362         dt_trans_stop(env, dev, th);
363
364         return rc;
365 }
366
367 enum nm_add_update {
368         NM_ADD = 0,
369         NM_UPDATE = 1,
370 };
371
372 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
373                                           struct dt_object *idx,
374                                           enum nm_add_update update)
375 {
376         struct nodemap_key nk;
377         union nodemap_rec nr;
378         struct lu_env env;
379         int rc = 0;
380
381         ENTRY;
382
383         rc = lu_env_init(&env, LCT_LOCAL);
384         if (rc)
385                 RETURN(rc);
386
387         nodemap_cluster_key_init(&nk, nodemap->nm_id);
388         nodemap_cluster_rec_init(&nr, nodemap);
389
390         if (update == NM_UPDATE)
391                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
392         else
393                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
394
395         lu_env_fini(&env);
396
397         RETURN(rc);
398 }
399
400 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
401 {
402         if (nodemap_mgs_ncf == NULL) {
403                 CERROR("cannot add nodemap config to non-existing MGS.\n");
404                 return -EINVAL;
405         }
406
407         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
408                                               NM_ADD);
409 }
410
411 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
412 {
413         if (nodemap_mgs_ncf == NULL) {
414                 CERROR("cannot add nodemap config to non-existing MGS.\n");
415                 return -EINVAL;
416         }
417
418         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
419                                               NM_UPDATE);
420 }
421
422 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
423 {
424         struct rb_root           root;
425         struct lu_idmap         *idmap;
426         struct lu_idmap         *temp;
427         struct lu_nid_range     *range;
428         struct lu_nid_range     *range_temp;
429         struct nodemap_key       nk;
430         struct lu_env            env;
431         int                      rc = 0;
432         int                      rc2 = 0;
433
434         ENTRY;
435
436         if (nodemap_mgs_ncf == NULL) {
437                 CERROR("cannot add nodemap config to non-existing MGS.\n");
438                 return -EINVAL;
439         }
440
441         rc = lu_env_init(&env, LCT_LOCAL);
442         if (rc != 0)
443                 RETURN(rc);
444
445         root = nodemap->nm_fs_to_client_uidmap;
446         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
447                                                 id_fs_to_client) {
448                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
449                                        idmap->id_client);
450                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
451                                          &nk, NULL);
452                 if (rc2 < 0)
453                         rc = rc2;
454         }
455
456         root = nodemap->nm_client_to_fs_gidmap;
457         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
458                                                 id_client_to_fs) {
459                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
460                                        idmap->id_client);
461                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
462                                          &nk, NULL);
463                 if (rc2 < 0)
464                         rc = rc2;
465         }
466
467         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
468                                  rn_list) {
469                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
470                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
471                                          &nk, NULL);
472                 if (rc2 < 0)
473                         rc = rc2;
474         }
475
476         nodemap_cluster_key_init(&nk, nodemap->nm_id);
477         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
478         if (rc2 < 0)
479                 rc = rc2;
480
481         lu_env_fini(&env);
482
483         RETURN(rc);
484 }
485
486 int nodemap_idx_range_add(const struct lu_nid_range *range,
487                           const lnet_nid_t nid[2])
488 {
489         struct nodemap_key       nk;
490         union nodemap_rec        nr;
491         struct lu_env            env;
492         int                      rc = 0;
493         ENTRY;
494
495         if (nodemap_mgs_ncf == NULL) {
496                 CERROR("cannot add nodemap config to non-existing MGS.\n");
497                 return -EINVAL;
498         }
499
500         rc = lu_env_init(&env, LCT_LOCAL);
501         if (rc != 0)
502                 RETURN(rc);
503
504         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
505         nodemap_range_rec_init(&nr, nid);
506
507         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
508         lu_env_fini(&env);
509
510         RETURN(rc);
511 }
512
513 int nodemap_idx_range_del(const struct lu_nid_range *range)
514 {
515         struct nodemap_key       nk;
516         struct lu_env            env;
517         int                      rc = 0;
518         ENTRY;
519
520         if (nodemap_mgs_ncf == NULL) {
521                 CERROR("cannot add nodemap config to non-existing MGS.\n");
522                 return -EINVAL;
523         }
524
525         rc = lu_env_init(&env, LCT_LOCAL);
526         if (rc != 0)
527                 RETURN(rc);
528
529         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
530
531         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
532         lu_env_fini(&env);
533
534         RETURN(rc);
535 }
536
537 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
538                           enum nodemap_id_type id_type,
539                           const u32 map[2])
540 {
541         struct nodemap_key       nk;
542         union nodemap_rec        nr;
543         struct lu_env            env;
544         int                      rc = 0;
545         ENTRY;
546
547         if (nodemap_mgs_ncf == NULL) {
548                 CERROR("cannot add nodemap config to non-existing MGS.\n");
549                 return -EINVAL;
550         }
551
552         rc = lu_env_init(&env, LCT_LOCAL);
553         if (rc != 0)
554                 RETURN(rc);
555
556         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
557         nodemap_idmap_rec_init(&nr, map[1]);
558
559         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
560         lu_env_fini(&env);
561
562         RETURN(rc);
563 }
564
565 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
566                           enum nodemap_id_type id_type,
567                           const u32 map[2])
568 {
569         struct nodemap_key       nk;
570         struct lu_env            env;
571         int                      rc = 0;
572         ENTRY;
573
574         if (nodemap_mgs_ncf == NULL) {
575                 CERROR("cannot add nodemap config to non-existing MGS.\n");
576                 return -EINVAL;
577         }
578
579         rc = lu_env_init(&env, LCT_LOCAL);
580         if (rc != 0)
581                 RETURN(rc);
582
583         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
584
585         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
586         lu_env_fini(&env);
587
588         RETURN(rc);
589 }
590
591 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
592 {
593         struct nodemap_key       nk;
594         union nodemap_rec        nr;
595         struct lu_env            env;
596         int                      rc = 0;
597         ENTRY;
598
599         if (nodemap_mgs_ncf == NULL) {
600                 CERROR("cannot add nodemap config to non-existing MGS.\n");
601                 return -EINVAL;
602         }
603
604         rc = lu_env_init(&env, LCT_LOCAL);
605         if (rc != 0)
606                 RETURN(rc);
607
608         nodemap_global_key_init(&nk);
609         nodemap_global_rec_init(&nr, value);
610
611         if (update == NM_UPDATE)
612                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
613                                         &nk, &nr);
614         else
615                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
616                                         &nk, &nr);
617
618         lu_env_fini(&env);
619
620         RETURN(rc);
621 }
622
623 int nodemap_idx_nodemap_activate(bool value)
624 {
625         return nodemap_idx_global_add_update(value, NM_UPDATE);
626 }
627
628 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
629 {
630         u32                      nodemap_id;
631
632         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
633         return nm_idx_get_type(nodemap_id);
634 }
635
636 /**
637  * Process a key/rec pair and modify the new configuration.
638  *
639  * \param       config          configuration to update with this key/rec data
640  * \param       key             key of the record that was loaded
641  * \param       rec             record that was loaded
642  * \param       recent_nodemap  last referenced nodemap
643  * \retval      type of record processed, see enum #nodemap_idx_type
644  * \retval      -ENOENT         range or map loaded before nodemap record
645  * \retval      -EINVAL         duplicate nodemap cluster records found with
646  *                              different IDs, or nodemap has invalid name
647  * \retval      -ENOMEM
648  */
649 static int nodemap_process_keyrec(struct nodemap_config *config,
650                                   const struct nodemap_key *key,
651                                   const union nodemap_rec *rec,
652                                   struct lu_nodemap **recent_nodemap)
653 {
654         struct lu_nodemap       *nodemap = NULL;
655         enum nodemap_idx_type    type;
656         enum nodemap_id_type     id_type;
657         u8                       flags;
658         u32                      nodemap_id;
659         lnet_nid_t               nid[2];
660         u32                      map[2];
661         int                      rc;
662
663         ENTRY;
664
665         CLASSERT(sizeof(union nodemap_rec) == 32);
666
667         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
668         type = nodemap_get_key_type(key);
669         nodemap_id = nm_idx_set_type(nodemap_id, 0);
670
671         CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
672                nodemap_id, type);
673
674         /* find the correct nodemap in the load list */
675         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
676             type == NODEMAP_GIDMAP_IDX) {
677                 struct lu_nodemap *tmp = NULL;
678
679                 nodemap = *recent_nodemap;
680
681                 if (nodemap == NULL)
682                         GOTO(out, rc = -ENOENT);
683
684                 if (nodemap->nm_id != nodemap_id) {
685                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
686                                 if (tmp->nm_id == nodemap_id) {
687                                         nodemap = tmp;
688                                         break;
689                                 }
690
691                         if (nodemap->nm_id != nodemap_id)
692                                 GOTO(out, rc = -ENOENT);
693                 }
694
695                 /* update most recently used nodemap if necessay */
696                 if (nodemap != *recent_nodemap)
697                         *recent_nodemap = nodemap;
698         }
699
700         switch (type) {
701         case NODEMAP_EMPTY_IDX:
702                 if (nodemap_id != 0)
703                         CWARN("Found nodemap config record without type field, "
704                               " nodemap_id=%d. nodemap config file corrupt?\n",
705                               nodemap_id);
706                 break;
707         case NODEMAP_CLUSTER_IDX:
708                 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
709                                           rec->ncr.ncr_name);
710                 if (nodemap == NULL) {
711                         if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
712                                 nodemap = nodemap_create(rec->ncr.ncr_name,
713                                                          config, 1);
714                                 config->nmc_default_nodemap = nodemap;
715                         } else {
716                                 nodemap = nodemap_create(rec->ncr.ncr_name,
717                                                          config, 0);
718                         }
719                         if (IS_ERR(nodemap))
720                                 GOTO(out, rc = PTR_ERR(nodemap));
721
722                         /* we need to override the local ID with the saved ID */
723                         nodemap->nm_id = nodemap_id;
724                         if (nodemap_id > config->nmc_nodemap_highest_id)
725                                 config->nmc_nodemap_highest_id = nodemap_id;
726
727                 } else if (nodemap->nm_id != nodemap_id) {
728                         nodemap_putref(nodemap);
729                         GOTO(out, rc = -EINVAL);
730                 }
731
732                 nodemap->nm_squash_uid =
733                                 le32_to_cpu(rec->ncr.ncr_squash_uid);
734                 nodemap->nm_squash_gid =
735                                 le32_to_cpu(rec->ncr.ncr_squash_gid);
736
737                 flags = le32_to_cpu(rec->ncr.ncr_flags);
738                 nodemap->nmf_allow_root_access =
739                                         flags & NM_FL_ALLOW_ROOT_ACCESS;
740                 nodemap->nmf_trust_client_ids =
741                                         flags & NM_FL_TRUST_CLIENT_IDS;
742                 nodemap->nmf_deny_unknown =
743                                         flags & NM_FL_DENY_UNKNOWN;
744
745                 if (*recent_nodemap == NULL) {
746                         *recent_nodemap = nodemap;
747                         INIT_LIST_HEAD(&nodemap->nm_list);
748                 } else {
749                         list_add(&nodemap->nm_list,
750                                  &(*recent_nodemap)->nm_list);
751                 }
752                 nodemap_putref(nodemap);
753                 break;
754         case NODEMAP_RANGE_IDX:
755                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
756                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
757
758                 rc = nodemap_add_range_helper(config, nodemap, nid,
759                                         le32_to_cpu(key->nk_range_id));
760                 if (rc != 0)
761                         GOTO(out, rc);
762                 break;
763         case NODEMAP_UIDMAP_IDX:
764         case NODEMAP_GIDMAP_IDX:
765                 map[0] = le32_to_cpu(key->nk_id_client);
766                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
767
768                 if (type == NODEMAP_UIDMAP_IDX)
769                         id_type = NODEMAP_UID;
770                 else
771                         id_type = NODEMAP_GID;
772
773                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
774                 if (rc != 0)
775                         GOTO(out, rc);
776                 break;
777         case NODEMAP_GLOBAL_IDX:
778                 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
779                 break;
780         default:
781                 CERROR("got keyrec pair for unknown type %d\n", type);
782                 break;
783         }
784
785         rc = type;
786
787         EXIT;
788
789 out:
790         return rc;
791 }
792
793 enum nm_config_passes {
794         NM_READ_CLUSTERS = 0,
795         NM_READ_ATTRIBUTES = 1,
796 };
797
798 static int nodemap_load_entries(const struct lu_env *env,
799                                 struct dt_object *nodemap_idx)
800 {
801         const struct dt_it_ops *iops;
802         struct dt_it *it;
803         struct lu_nodemap *recent_nodemap = NULL;
804         struct nodemap_config *new_config = NULL;
805         u64 hash = 0;
806         bool activate_nodemap = false;
807         bool loaded_global_idx = false;
808         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
809         int rc = 0;
810
811         ENTRY;
812
813         iops = &nodemap_idx->do_index_ops->dio_it;
814
815         dt_read_lock(env, nodemap_idx, 0);
816         it = iops->init(env, nodemap_idx, 0);
817         if (IS_ERR(it))
818                 GOTO(out, rc = PTR_ERR(it));
819
820         rc = iops->load(env, it, hash);
821         if (rc < 0)
822                 GOTO(out_iops_fini, rc);
823
824         /* rc == 0 means we need to advance to record */
825         if (rc == 0) {
826                 rc = iops->next(env, it);
827
828                 if (rc < 0)
829                         GOTO(out_iops_put, rc);
830                 /* rc > 0 is eof, will be checked in while below */
831         } else {
832                 /* rc == 1, we found initial record and can process below */
833                 rc = 0;
834         }
835
836         new_config = nodemap_config_alloc();
837         if (IS_ERR(new_config)) {
838                 rc = PTR_ERR(new_config);
839                 new_config = NULL;
840                 GOTO(out_iops_put, rc);
841         }
842
843         /* rc > 0 is eof, check initial iops->next here as well */
844         while (rc == 0) {
845                 struct nodemap_key *key;
846                 union nodemap_rec rec;
847                 enum nodemap_idx_type key_type;
848
849                 key = (struct nodemap_key *)iops->key(env, it);
850                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
851                 if ((cur_pass == NM_READ_CLUSTERS &&
852                                 key_type == NODEMAP_CLUSTER_IDX) ||
853                     (cur_pass == NM_READ_ATTRIBUTES &&
854                                 key_type != NODEMAP_CLUSTER_IDX &&
855                                 key_type != NODEMAP_EMPTY_IDX)) {
856                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
857                         if (rc != -ESTALE) {
858                                 if (rc != 0)
859                                         GOTO(out_nodemap_config, rc);
860                                 rc = nodemap_process_keyrec(new_config, key, &rec,
861                                                             &recent_nodemap);
862                                 if (rc < 0)
863                                         GOTO(out_nodemap_config, rc);
864                                 if (rc == NODEMAP_GLOBAL_IDX)
865                                         loaded_global_idx = true;
866                         }
867                 }
868
869                 do
870                         rc = iops->next(env, it);
871                 while (rc == -ESTALE);
872
873                 /* move to second pass */
874                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
875                         cur_pass = NM_READ_ATTRIBUTES;
876                         rc = iops->load(env, it, 0);
877                         if (rc == 0)
878                                 rc = iops->next(env, it);
879                         else if (rc > 0)
880                                 rc = 0;
881                         else
882                                 GOTO(out, rc);
883                 }
884         }
885
886         if (rc > 0)
887                 rc = 0;
888
889 out_nodemap_config:
890         if (rc != 0)
891                 nodemap_config_dealloc(new_config);
892         else
893                 /* creating new default needs to be done outside dt read lock */
894                 activate_nodemap = true;
895 out_iops_put:
896         iops->put(env, it);
897 out_iops_fini:
898         iops->fini(env, it);
899 out:
900         dt_read_unlock(env, nodemap_idx);
901
902         if (rc != 0)
903                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
904                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
905
906         if (!activate_nodemap)
907                 RETURN(rc);
908
909         if (new_config->nmc_default_nodemap == NULL) {
910                 /* new MGS won't have a default nm on disk, so create it here */
911                 new_config->nmc_default_nodemap =
912                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
913                 if (IS_ERR(new_config->nmc_default_nodemap)) {
914                         rc = PTR_ERR(new_config->nmc_default_nodemap);
915                 } else {
916                         rc = nodemap_idx_nodemap_add_update(
917                                         new_config->nmc_default_nodemap,
918                                         nodemap_idx,
919                                         NM_ADD);
920                         nodemap_putref(new_config->nmc_default_nodemap);
921                 }
922         }
923
924         /* new nodemap config won't have an active/inactive record */
925         if (rc == 0 && loaded_global_idx == false) {
926                 struct nodemap_key       nk;
927                 union nodemap_rec        nr;
928
929                 nodemap_global_key_init(&nk);
930                 nodemap_global_rec_init(&nr, false);
931                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
932         }
933
934         if (rc == 0)
935                 nodemap_config_set_active(new_config);
936         else
937                 nodemap_config_dealloc(new_config);
938
939         RETURN(rc);
940 }
941
942 /**
943  * Step through active config and write to disk.
944  */
945 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
946                                             struct dt_device *dev,
947                                             struct local_oid_storage *los)
948 {
949         struct dt_object *o;
950         struct lu_nodemap *nodemap;
951         struct lu_nodemap *nm_tmp;
952         struct lu_nid_range *range;
953         struct lu_nid_range *range_temp;
954         struct lu_idmap *idmap;
955         struct lu_idmap *id_tmp;
956         struct rb_root root;
957         struct nodemap_key nk;
958         union nodemap_rec nr;
959         LIST_HEAD(nodemap_list_head);
960         int rc = 0, rc2;
961
962         ENTRY;
963
964         /* create a new index file to fill with active config */
965         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
966         if (IS_ERR(o))
967                 RETURN(o);
968
969         mutex_lock(&active_config_lock);
970
971         /* convert hash to list so we don't spin */
972         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
973                                nm_hash_list_cb, &nodemap_list_head);
974
975         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
976                 nodemap_cluster_key_init(&nk, nodemap->nm_id);
977                 nodemap_cluster_rec_init(&nr, nodemap);
978
979                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
980                 if (rc2 < 0) {
981                         rc = rc2;
982                         continue;
983                 }
984
985                 down_read(&active_config->nmc_range_tree_lock);
986                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
987                                          rn_list) {
988                         lnet_nid_t nid[2] = {
989                                 range->rn_node.in_extent.start,
990                                 range->rn_node.in_extent.end
991                         };
992                         nodemap_range_key_init(&nk, nodemap->nm_id,
993                                                range->rn_id);
994                         nodemap_range_rec_init(&nr, nid);
995                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
996                         if (rc2 < 0)
997                                 rc = rc2;
998                 }
999                 up_read(&active_config->nmc_range_tree_lock);
1000
1001                 /* we don't need to take nm_idmap_lock because active config
1002                  * lock prevents changes from happening to nodemaps
1003                  */
1004                 root = nodemap->nm_client_to_fs_uidmap;
1005                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1006                                                         id_client_to_fs) {
1007                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1008                                                idmap->id_client);
1009                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1010                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1011                         if (rc2 < 0)
1012                                 rc = rc2;
1013                 }
1014
1015                 root = nodemap->nm_client_to_fs_gidmap;
1016                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1017                                                         id_client_to_fs) {
1018                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1019                                                idmap->id_client);
1020                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1021                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1022                         if (rc2 < 0)
1023                                 rc = rc2;
1024                 }
1025         }
1026         nodemap_global_key_init(&nk);
1027         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1028         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1029         if (rc2 < 0)
1030                 rc = rc2;
1031
1032         mutex_unlock(&active_config_lock);
1033
1034         if (rc < 0) {
1035                 dt_object_put(env, o);
1036                 o = ERR_PTR(rc);
1037         }
1038
1039         RETURN(o);
1040 }
1041
1042 static void nodemap_save_all_caches(void)
1043 {
1044         struct nm_config_file   *ncf;
1045         struct lu_env            env;
1046         int                      rc = 0;
1047
1048         /* recreating nodemap cache requires fld_thread_key be in env */
1049         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1050         if (rc != 0) {
1051                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1052                 return;
1053         }
1054
1055         mutex_lock(&ncf_list_lock);
1056         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1057                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1058                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1059                 struct dt_object *o;
1060
1061                 /* put current config file so save conf can rewrite it */
1062                 dt_object_put_nocache(&env, ncf->ncf_obj);
1063                 ncf->ncf_obj = NULL;
1064
1065                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1066                 if (IS_ERR(o))
1067                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1068                               obd->obd_name, rc);
1069                 else
1070                         ncf->ncf_obj = o;
1071         }
1072         mutex_unlock(&ncf_list_lock);
1073
1074         lu_env_fini(&env);
1075 }
1076
1077 /* tracks if config still needs to be loaded, either from disk or network */
1078 static bool nodemap_config_loaded;
1079 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1080
1081 /**
1082  * Ensures that configs loaded over the wire are prioritized over those loaded
1083  * from disk.
1084  *
1085  * \param config        config to set as the active config
1086  */
1087 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1088 {
1089         mutex_lock(&nodemap_config_loaded_lock);
1090         nodemap_config_set_active(config);
1091         nodemap_config_loaded = true;
1092         nodemap_save_all_caches();
1093         mutex_unlock(&nodemap_config_loaded_lock);
1094 }
1095 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1096
1097 /**
1098  * Register a dt_object representing the config index file. This should be
1099  * called by targets in order to load the nodemap configuration from disk. The
1100  * dt_object should be created with local_index_find_or_create and the index
1101  * features should be enabled with do_index_try.
1102  *
1103  * \param obj   dt_object returned by local_index_find_or_create
1104  *
1105  * \retval      on success: nm_config_file handle for later deregistration
1106  * \retval      -ENOMEM         memory allocation failure
1107  * \retval      -ENOENT         error loading nodemap config
1108  * \retval      -EINVAL         error loading nodemap config
1109  * \retval      -EEXIST         nodemap config already registered for MGS
1110  */
1111 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1112                                                    struct dt_object *obj,
1113                                                    struct local_oid_storage *los)
1114 {
1115         struct nm_config_file *ncf;
1116         int rc = 0;
1117         ENTRY;
1118
1119         if (nodemap_mgs_ncf != NULL)
1120                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1121
1122         OBD_ALLOC_PTR(ncf);
1123         if (ncf == NULL)
1124                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1125
1126         /* if loading from cache, prevent activation of MGS config until cache
1127          * loading is done, so disk config is overwritten by MGS config.
1128          */
1129         mutex_lock(&nodemap_config_loaded_lock);
1130         rc = nodemap_load_entries(env, obj);
1131         if (!rc)
1132                 nodemap_config_loaded = true;
1133         mutex_unlock(&nodemap_config_loaded_lock);
1134
1135         if (rc) {
1136                 OBD_FREE_PTR(ncf);
1137                 GOTO(out, ncf = ERR_PTR(rc));
1138         }
1139
1140         lu_object_get(&obj->do_lu);
1141
1142         ncf->ncf_obj = obj;
1143         ncf->ncf_los = los;
1144
1145         nodemap_mgs_ncf = ncf;
1146
1147 out:
1148         return ncf;
1149 }
1150 EXPORT_SYMBOL(nm_config_file_register_mgs);
1151
1152 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1153                                                    struct dt_device *dev,
1154                                                    struct local_oid_storage *los)
1155 {
1156         struct nm_config_file *ncf;
1157         struct dt_object *config_obj = NULL;
1158         int rc = 0;
1159
1160         OBD_ALLOC_PTR(ncf);
1161         if (ncf == NULL)
1162                 RETURN(ERR_PTR(-ENOMEM));
1163
1164         /* don't load from cache if config already loaded */
1165         mutex_lock(&nodemap_config_loaded_lock);
1166         if (!nodemap_config_loaded) {
1167                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1168                 if (IS_ERR(config_obj))
1169                         rc = PTR_ERR(config_obj);
1170                 else
1171                         rc = nodemap_load_entries(env, config_obj);
1172
1173                 if (!rc)
1174                         nodemap_config_loaded = true;
1175         }
1176         mutex_unlock(&nodemap_config_loaded_lock);
1177         if (rc)
1178                 GOTO(out_ncf, rc);
1179
1180         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1181         if (!config_obj) {
1182                 config_obj = nodemap_save_config_cache(env, dev, los);
1183                 if (IS_ERR(config_obj))
1184                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1185         }
1186
1187         ncf->ncf_obj = config_obj;
1188         ncf->ncf_los = los;
1189
1190         mutex_lock(&ncf_list_lock);
1191         list_add(&ncf->ncf_list, &ncf_list_head);
1192         mutex_unlock(&ncf_list_lock);
1193
1194 out_ncf:
1195         if (rc) {
1196                 OBD_FREE_PTR(ncf);
1197                 RETURN(ERR_PTR(rc));
1198         }
1199
1200         RETURN(ncf);
1201 }
1202 EXPORT_SYMBOL(nm_config_file_register_tgt);
1203
1204 /**
1205  * Deregister a nm_config_file. Should be called by targets during cleanup.
1206  *
1207  * \param ncf   config file to deregister
1208  */
1209 void nm_config_file_deregister_mgs(const struct lu_env *env,
1210                                    struct nm_config_file *ncf)
1211 {
1212         ENTRY;
1213         LASSERT(nodemap_mgs_ncf == ncf);
1214
1215         nodemap_mgs_ncf = NULL;
1216         if (ncf->ncf_obj)
1217                 dt_object_put(env, ncf->ncf_obj);
1218
1219         OBD_FREE_PTR(ncf);
1220
1221         EXIT;
1222 }
1223 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1224
1225 void nm_config_file_deregister_tgt(const struct lu_env *env,
1226                                    struct nm_config_file *ncf)
1227 {
1228         ENTRY;
1229
1230         if (ncf == NULL)
1231                 return;
1232
1233         mutex_lock(&ncf_list_lock);
1234         list_del(&ncf->ncf_list);
1235         mutex_unlock(&ncf_list_lock);
1236
1237         if (ncf->ncf_obj)
1238                 dt_object_put(env, ncf->ncf_obj);
1239
1240         OBD_FREE_PTR(ncf);
1241
1242         EXIT;
1243 }
1244 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1245
1246 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1247                               struct lu_nodemap **recent_nodemap)
1248 {
1249         struct nodemap_key *key;
1250         union nodemap_rec *rec;
1251         char *entry;
1252         int j;
1253         int k;
1254         int rc = 0;
1255         int size = dt_nodemap_features.dif_keysize_max +
1256                    dt_nodemap_features.dif_recsize_max;
1257         ENTRY;
1258
1259         for (j = 0; j < LU_PAGE_COUNT; j++) {
1260                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1261                         return -EINVAL;
1262
1263                 /* get and process keys and records from page */
1264                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1265                         entry = lip->lp_idx.lip_entries + k * size;
1266                         key = (struct nodemap_key *)entry;
1267
1268                         entry += dt_nodemap_features.dif_keysize_max;
1269                         rec = (union nodemap_rec *)entry;
1270
1271                         rc = nodemap_process_keyrec(config, key, rec,
1272                                                     recent_nodemap);
1273                         if (rc < 0)
1274                                 return rc;
1275                 }
1276                 lip++;
1277         }
1278
1279         EXIT;
1280         return 0;
1281 }
1282 EXPORT_SYMBOL(nodemap_process_idx_pages);
1283
1284 static int nodemap_page_build(const struct lu_env *env, union lu_page *lp,
1285                               size_t nob, const struct dt_it_ops *iops,
1286                               struct dt_it *it, __u32 attr, void *arg)
1287 {
1288         struct idx_info *ii = (struct idx_info *)arg;
1289         struct lu_idxpage *lip = &lp->lp_idx;
1290         char *entry;
1291         size_t size = ii->ii_keysize + ii->ii_recsize;
1292         int rc;
1293         ENTRY;
1294
1295         if (nob < LIP_HDR_SIZE)
1296                 return -EINVAL;
1297
1298         /* initialize the header of the new container */
1299         memset(lip, 0, LIP_HDR_SIZE);
1300         lip->lip_magic = LIP_MAGIC;
1301         nob           -= LIP_HDR_SIZE;
1302
1303         entry = lip->lip_entries;
1304         do {
1305                 char            *tmp_entry = entry;
1306                 struct dt_key   *key;
1307                 __u64           hash;
1308                 enum nodemap_idx_type key_type;
1309
1310                 /* fetch 64-bit hash value */
1311                 hash = iops->store(env, it);
1312                 ii->ii_hash_end = hash;
1313
1314                 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1315                         if (lip->lip_nr != 0)
1316                                 GOTO(out, rc = 0);
1317                 }
1318
1319                 if (nob < size) {
1320                         if (lip->lip_nr == 0)
1321                                 GOTO(out, rc = -EINVAL);
1322                         GOTO(out, rc = 0);
1323                 }
1324
1325                 key = iops->key(env, it);
1326                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1327
1328                 /* on the first pass, get only the cluster types. On second
1329                  * pass, get all the rest */
1330                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1331                                 key_type == NODEMAP_CLUSTER_IDX) ||
1332                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1333                                 key_type != NODEMAP_CLUSTER_IDX &&
1334                                 key_type != NODEMAP_EMPTY_IDX)) {
1335                         memcpy(tmp_entry, key, ii->ii_keysize);
1336                         tmp_entry += ii->ii_keysize;
1337
1338                         /* and finally the record */
1339                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1340                                        attr);
1341                         if (rc != -ESTALE) {
1342                                 if (rc != 0)
1343                                         GOTO(out, rc);
1344
1345                                 /* hash/key/record successfully copied! */
1346                                 lip->lip_nr++;
1347                                 if (unlikely(lip->lip_nr == 1 &&
1348                                     ii->ii_count == 0))
1349                                         ii->ii_hash_start = hash;
1350
1351                                 entry = tmp_entry + ii->ii_recsize;
1352                                 nob -= size;
1353                         }
1354                 }
1355
1356                 /* move on to the next record */
1357                 do {
1358                         rc = iops->next(env, it);
1359                 } while (rc == -ESTALE);
1360
1361                 /* move to second pass */
1362                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1363                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1364                         rc = iops->load(env, it, 0);
1365                         if (rc == 0)
1366                                 rc = iops->next(env, it);
1367                         else if (rc > 0)
1368                                 rc = 0;
1369                         else
1370                                 GOTO(out, rc);
1371                 }
1372
1373         } while (rc == 0);
1374
1375         GOTO(out, rc);
1376 out:
1377         if (rc >= 0 && lip->lip_nr > 0)
1378                 /* one more container */
1379                 ii->ii_count++;
1380         if (rc > 0)
1381                 /* no more entries */
1382                 ii->ii_hash_end = II_END_OFF;
1383         return rc;
1384 }
1385
1386
1387 int nodemap_index_read(struct lu_env *env,
1388                        struct nm_config_file *ncf,
1389                        struct idx_info *ii,
1390                        const struct lu_rdpg *rdpg)
1391 {
1392         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1393         __u64                    version;
1394         int                      rc = 0;
1395
1396         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1397         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1398
1399         dt_read_lock(env, nodemap_idx, 0);
1400         version = dt_version_get(env, nodemap_idx);
1401         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1402                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1403                        ii->ii_version,
1404                        version);
1405                 ii->ii_hash_end = 0;
1406         } else {
1407                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1408                                    ii);
1409                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1410         }
1411
1412         if (rc >= 0)
1413                 ii->ii_version = version;
1414
1415         dt_read_unlock(env, nodemap_idx);
1416         return rc;
1417 }
1418 EXPORT_SYMBOL(nodemap_index_read);
1419
1420 /**
1421  * Returns the current nodemap configuration to MGC by walking the nodemap
1422  * config index and storing it in the response buffer.
1423  *
1424  * \param       req             incoming MGS_CONFIG_READ request
1425  * \retval      0               success
1426  * \retval      -EINVAL         malformed request
1427  * \retval      -ENOTCONN       client evicted/reconnected already
1428  * \retval      -ETIMEDOUT      client timeout or network error
1429  * \retval      -ENOMEM
1430  */
1431 int nodemap_get_config_req(struct obd_device *mgs_obd,
1432                            struct ptlrpc_request *req)
1433 {
1434         struct mgs_config_body *body;
1435         struct mgs_config_res *res;
1436         struct lu_rdpg rdpg;
1437         struct idx_info nodemap_ii;
1438         struct ptlrpc_bulk_desc *desc;
1439         struct l_wait_info lwi;
1440         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1441         int i;
1442         int page_count;
1443         int bytes = 0;
1444         int rc = 0;
1445
1446         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1447         if (!body)
1448                 RETURN(-EINVAL);
1449
1450         if (body->mcb_type != CONFIG_T_NODEMAP)
1451                 RETURN(-EINVAL);
1452
1453         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1454         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1455                 PAGE_SHIFT;
1456         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1457                 RETURN(-EINVAL);
1458
1459         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1460                body->mcb_name, rdpg.rp_count);
1461
1462         /* allocate pages to store the containers */
1463         OBD_ALLOC(rdpg.rp_pages, sizeof(*rdpg.rp_pages) * rdpg.rp_npages);
1464         if (rdpg.rp_pages == NULL)
1465                 RETURN(-ENOMEM);
1466         for (i = 0; i < rdpg.rp_npages; i++) {
1467                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1468                 if (rdpg.rp_pages[i] == NULL)
1469                         GOTO(out, rc = -ENOMEM);
1470         }
1471
1472         rdpg.rp_hash = body->mcb_offset;
1473         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1474         nodemap_ii.ii_flags = II_FL_NOHASH;
1475         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1476         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1477
1478         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1479                                    mgs_obd->u.obt.obt_nodemap_config_file,
1480                                    &nodemap_ii, &rdpg);
1481         if (bytes < 0)
1482                 GOTO(out, rc = bytes);
1483
1484         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1485
1486         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1487         if (res == NULL)
1488                 GOTO(out, rc = -EINVAL);
1489         res->mcr_offset = nodemap_ii.ii_hash_end;
1490         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1491
1492         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1493         LASSERT(page_count <= rdpg.rp_count);
1494         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1495                                     PTLRPC_BULK_PUT_SOURCE |
1496                                         PTLRPC_BULK_BUF_KIOV,
1497                                     MGS_BULK_PORTAL,
1498                                     &ptlrpc_bulk_kiov_pin_ops);
1499         if (desc == NULL)
1500                 GOTO(out, rc = -ENOMEM);
1501
1502         for (i = 0; i < page_count && bytes > 0; i++) {
1503                 ptlrpc_prep_bulk_page_pin(desc, rdpg.rp_pages[i], 0,
1504                                           min_t(int, bytes, PAGE_SIZE));
1505                 bytes -= PAGE_SIZE;
1506         }
1507
1508         rc = target_bulk_io(req->rq_export, desc, &lwi);
1509         ptlrpc_free_bulk(desc);
1510
1511 out:
1512         if (rdpg.rp_pages != NULL) {
1513                 for (i = 0; i < rdpg.rp_npages; i++)
1514                         if (rdpg.rp_pages[i] != NULL)
1515                                 __free_page(rdpg.rp_pages[i]);
1516                 OBD_FREE(rdpg.rp_pages,
1517                          rdpg.rp_npages * sizeof(rdpg.rp_pages[0]));
1518         }
1519         return rc;
1520 }
1521 EXPORT_SYMBOL(nodemap_get_config_req);