Whamcloud - gitweb
LU-15451 sec: read-only nodemap flag
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2017, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <uapi/linux/lnet/lnet-types.h>
54 #include <uapi/linux/lustre/lustre_idl.h>
55 #include <dt_object.h>
56 #include <lu_object.h>
57 #include <lustre_net.h>
58 #include <lustre_nodemap.h>
59 #include <obd_class.h>
60 #include <obd_support.h>
61 #include "nodemap_internal.h"
62
63 /* list of registered nodemap index files, except MGS */
64 static LIST_HEAD(ncf_list_head);
65 static DEFINE_MUTEX(ncf_list_lock);
66
67 /* MGS index is different than others, others are listeners to MGS idx */
68 static struct nm_config_file *nodemap_mgs_ncf;
69
70 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id)
71 {
72         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
73                                                         NODEMAP_CLUSTER_IDX));
74         nk->nk_unused = 0;
75 }
76
77 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
78                                      const struct lu_nodemap *nodemap)
79 {
80         BUILD_BUG_ON(sizeof(nr->ncr.ncr_name) != sizeof(nodemap->nm_name));
81
82         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nr->ncr.ncr_name));
83         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
84         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
85         nr->ncr.ncr_squash_projid = cpu_to_le32(nodemap->nm_squash_projid);
86         nr->ncr.ncr_flags =
87                 (nodemap->nmf_trust_client_ids ?
88                         NM_FL_TRUST_CLIENT_IDS : 0) |
89                 (nodemap->nmf_allow_root_access ?
90                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
91                 (nodemap->nmf_deny_unknown ?
92                         NM_FL_DENY_UNKNOWN : 0) |
93                 (nodemap->nmf_map_mode & NODEMAP_MAP_UID ?
94                         NM_FL_MAP_UID : 0) |
95                 (nodemap->nmf_map_mode & NODEMAP_MAP_GID ?
96                         NM_FL_MAP_GID : 0) |
97                 (nodemap->nmf_map_mode & NODEMAP_MAP_PROJID ?
98                         NM_FL_MAP_PROJID : 0) |
99                 (nodemap->nmf_enable_audit ?
100                         NM_FL_ENABLE_AUDIT : 0) |
101                 (nodemap->nmf_forbid_encryption ?
102                         NM_FL_FORBID_ENCRYPT : 0);
103         nr->ncr.ncr_flags2 =
104                 (nodemap->nmf_readonly_mount ?
105                         NM_FL2_READONLY_MOUNT : 0);
106 }
107
108 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
109                                    enum nodemap_id_type id_type,
110                                    u32 id_client)
111 {
112         enum nodemap_idx_type idx_type;
113
114         if (id_type == NODEMAP_UID)
115                 idx_type = NODEMAP_UIDMAP_IDX;
116         else if (id_type == NODEMAP_GID)
117                 idx_type = NODEMAP_GIDMAP_IDX;
118         else if (id_type == NODEMAP_PROJID)
119                 idx_type = NODEMAP_PROJIDMAP_IDX;
120         else
121                 idx_type = NODEMAP_EMPTY_IDX;
122
123         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
124         nk->nk_id_client = cpu_to_le32(id_client);
125 }
126
127 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
128 {
129         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
130 }
131
132 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
133                                    unsigned int rn_id)
134 {
135         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
136                                                         NODEMAP_RANGE_IDX));
137         nk->nk_range_id = cpu_to_le32(rn_id);
138 }
139
140 static void nodemap_range_rec_init(union nodemap_rec *nr,
141                                    const lnet_nid_t nid[2])
142 {
143         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
144         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
145 }
146
147 static void nodemap_global_key_init(struct nodemap_key *nk)
148 {
149         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
150         nk->nk_unused = 0;
151 }
152
153 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
154 {
155         nr->ngr.ngr_is_active = active;
156 }
157
158 /* should be called with dt_write lock */
159 static void nodemap_inc_version(const struct lu_env *env,
160                                 struct dt_object *nodemap_idx,
161                                 struct thandle *th)
162 {
163         u64 ver = dt_version_get(env, nodemap_idx);
164         dt_version_set(env, nodemap_idx, ver + 1, th);
165 }
166
167 enum ncfc_find_create {
168         NCFC_CREATE_NEW = 1,
169 };
170
171 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
172                                                    struct dt_device *dev,
173                                                    struct local_oid_storage *los,
174                                                    enum ncfc_find_create create_new)
175 {
176         struct lu_fid tfid;
177         struct dt_object *root_obj;
178         struct dt_object *nm_obj;
179         int rc = 0;
180
181         rc = dt_root_get(env, dev, &tfid);
182         if (rc < 0)
183                 GOTO(out, nm_obj = ERR_PTR(rc));
184
185         root_obj = dt_locate(env, dev, &tfid);
186         if (unlikely(IS_ERR(root_obj)))
187                 GOTO(out, nm_obj = root_obj);
188
189         rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
190         if (rc == -ENOENT) {
191                 if (dev->dd_rdonly)
192                         GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
193         } else if (rc) {
194                 GOTO(out_root, nm_obj = ERR_PTR(rc));
195         } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
196                 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
197         }
198
199 again:
200         /* if loading index fails the first time, create new index */
201         if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
202                 CDEBUG(D_INFO, "removing old index, creating new one\n");
203                 rc = local_object_unlink(env, dev, root_obj,
204                                          LUSTRE_NODEMAP_NAME);
205                 if (rc < 0) {
206                         /* XXX not sure the best way to get obd name. */
207                         CERROR("cannot destroy nodemap index: rc = %d\n",
208                                rc);
209                         GOTO(out_root, nm_obj = ERR_PTR(rc));
210                 }
211         }
212
213         nm_obj = local_index_find_or_create(env, los, root_obj,
214                                                 LUSTRE_NODEMAP_NAME,
215                                                 S_IFREG | S_IRUGO | S_IWUSR,
216                                                 &dt_nodemap_features);
217         if (IS_ERR(nm_obj))
218                 GOTO(out_root, nm_obj);
219
220         if (nm_obj->do_index_ops == NULL) {
221                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
222                                                       &dt_nodemap_features);
223                 /* even if loading from tgt fails, connecting to MGS will
224                  * rewrite the config
225                  */
226                 if (rc < 0) {
227                         dt_object_put(env, nm_obj);
228
229                         if (create_new == NCFC_CREATE_NEW)
230                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
231
232                         CERROR("cannot load nodemap index from disk, creating "
233                                "new index: rc = %d\n", rc);
234                         create_new = NCFC_CREATE_NEW;
235                         goto again;
236                 }
237         }
238
239 out_root:
240         dt_object_put(env, root_obj);
241 out:
242         return nm_obj;
243 }
244
245 static int nodemap_idx_insert(const struct lu_env *env,
246                               struct dt_object *idx,
247                               const struct nodemap_key *nk,
248                               const union nodemap_rec *nr)
249 {
250         struct thandle *th;
251         struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
252         int rc;
253
254         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
255
256         th = dt_trans_create(env, dev);
257
258         if (IS_ERR(th))
259                 GOTO(out, rc = PTR_ERR(th));
260
261         rc = dt_declare_insert(env, idx,
262                                (const struct dt_rec *)nr,
263                                (const struct dt_key *)nk, th);
264         if (rc != 0)
265                 GOTO(out, rc);
266
267         rc = dt_declare_version_set(env, idx, th);
268         if (rc != 0)
269                 GOTO(out, rc);
270
271         rc = dt_trans_start_local(env, dev, th);
272         if (rc != 0)
273                 GOTO(out, rc);
274
275         dt_write_lock(env, idx, 0);
276
277         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
278                        (const struct dt_key *)nk, th);
279
280         nodemap_inc_version(env, idx, th);
281         dt_write_unlock(env, idx);
282 out:
283         dt_trans_stop(env, dev, th);
284
285         return rc;
286 }
287
288 static int nodemap_idx_update(const struct lu_env *env,
289                               struct dt_object *idx,
290                               const struct nodemap_key *nk,
291                               const union nodemap_rec *nr)
292 {
293         struct thandle          *th;
294         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
295         int                      rc = 0;
296
297         th = dt_trans_create(env, dev);
298
299         if (IS_ERR(th))
300                 GOTO(out, rc = PTR_ERR(th));
301
302         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
303         if (rc != 0)
304                 GOTO(out, rc);
305
306         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
307                                (const struct dt_key *)nk, th);
308         if (rc != 0)
309                 GOTO(out, rc);
310
311         rc = dt_declare_version_set(env, idx, th);
312         if (rc != 0)
313                 GOTO(out, rc);
314
315         rc = dt_trans_start_local(env, dev, th);
316         if (rc != 0)
317                 GOTO(out, rc);
318
319         dt_write_lock(env, idx, 0);
320
321         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
322         if (rc != 0)
323                 GOTO(out_lock, rc);
324
325         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
326                        (const struct dt_key *)nk, th);
327         if (rc != 0)
328                 GOTO(out_lock, rc);
329
330         nodemap_inc_version(env, idx, th);
331 out_lock:
332         dt_write_unlock(env, idx);
333 out:
334         dt_trans_stop(env, dev, th);
335
336         return rc;
337 }
338
339 static int nodemap_idx_delete(const struct lu_env *env,
340                               struct dt_object *idx,
341                               const struct nodemap_key *nk,
342                               const union nodemap_rec *unused)
343 {
344         struct thandle          *th;
345         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
346         int                      rc = 0;
347
348         th = dt_trans_create(env, dev);
349
350         if (IS_ERR(th))
351                 GOTO(out, rc = PTR_ERR(th));
352
353         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
354         if (rc != 0)
355                 GOTO(out, rc);
356
357         rc = dt_declare_version_set(env, idx, th);
358         if (rc != 0)
359                 GOTO(out, rc);
360
361         rc = dt_trans_start_local(env, dev, th);
362         if (rc != 0)
363                 GOTO(out, rc);
364
365         dt_write_lock(env, idx, 0);
366
367         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
368
369         nodemap_inc_version(env, idx, th);
370
371         dt_write_unlock(env, idx);
372 out:
373         dt_trans_stop(env, dev, th);
374
375         return rc;
376 }
377
378 enum nm_add_update {
379         NM_ADD = 0,
380         NM_UPDATE = 1,
381 };
382
383 static int nodemap_idx_nodemap_add_update(const struct lu_nodemap *nodemap,
384                                           struct dt_object *idx,
385                                           enum nm_add_update update)
386 {
387         struct nodemap_key nk;
388         union nodemap_rec nr;
389         struct lu_env env;
390         int rc = 0;
391
392         ENTRY;
393
394         rc = lu_env_init(&env, LCT_LOCAL);
395         if (rc)
396                 RETURN(rc);
397
398         nodemap_cluster_key_init(&nk, nodemap->nm_id);
399         nodemap_cluster_rec_init(&nr, nodemap);
400
401         if (update == NM_UPDATE)
402                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
403         else
404                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
405
406         lu_env_fini(&env);
407
408         RETURN(rc);
409 }
410
411 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
412 {
413         if (nodemap_mgs_ncf == NULL) {
414                 CERROR("cannot add nodemap config to non-existing MGS.\n");
415                 return -EINVAL;
416         }
417
418         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
419                                               NM_ADD);
420 }
421
422 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
423 {
424         if (nodemap_mgs_ncf == NULL) {
425                 CERROR("cannot add nodemap config to non-existing MGS.\n");
426                 return -EINVAL;
427         }
428
429         return nodemap_idx_nodemap_add_update(nodemap, nodemap_mgs_ncf->ncf_obj,
430                                               NM_UPDATE);
431 }
432
433 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
434 {
435         struct rb_root           root;
436         struct lu_idmap         *idmap;
437         struct lu_idmap         *temp;
438         struct lu_nid_range     *range;
439         struct lu_nid_range     *range_temp;
440         struct nodemap_key       nk;
441         struct lu_env            env;
442         int                      rc = 0;
443         int                      rc2 = 0;
444
445         ENTRY;
446
447         if (nodemap_mgs_ncf == NULL) {
448                 CERROR("cannot add nodemap config to non-existing MGS.\n");
449                 return -EINVAL;
450         }
451
452         rc = lu_env_init(&env, LCT_LOCAL);
453         if (rc != 0)
454                 RETURN(rc);
455
456         root = nodemap->nm_fs_to_client_uidmap;
457         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
458                                                 id_fs_to_client) {
459                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
460                                        idmap->id_client);
461                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
462                                          &nk, NULL);
463                 if (rc2 < 0)
464                         rc = rc2;
465         }
466
467         root = nodemap->nm_client_to_fs_gidmap;
468         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
469                                                 id_client_to_fs) {
470                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
471                                        idmap->id_client);
472                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
473                                          &nk, NULL);
474                 if (rc2 < 0)
475                         rc = rc2;
476         }
477
478         root = nodemap->nm_client_to_fs_projidmap;
479         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
480                                                 id_client_to_fs) {
481                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_PROJID,
482                                        idmap->id_client);
483                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
484                                          &nk, NULL);
485                 if (rc2 < 0)
486                         rc = rc2;
487         }
488
489         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
490                                  rn_list) {
491                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
492                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
493                                          &nk, NULL);
494                 if (rc2 < 0)
495                         rc = rc2;
496         }
497
498         nodemap_cluster_key_init(&nk, nodemap->nm_id);
499         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
500         if (rc2 < 0)
501                 rc = rc2;
502
503         lu_env_fini(&env);
504
505         RETURN(rc);
506 }
507
508 int nodemap_idx_range_add(const struct lu_nid_range *range,
509                           const lnet_nid_t nid[2])
510 {
511         struct nodemap_key       nk;
512         union nodemap_rec        nr;
513         struct lu_env            env;
514         int                      rc = 0;
515         ENTRY;
516
517         if (nodemap_mgs_ncf == NULL) {
518                 CERROR("cannot add nodemap config to non-existing MGS.\n");
519                 return -EINVAL;
520         }
521
522         rc = lu_env_init(&env, LCT_LOCAL);
523         if (rc != 0)
524                 RETURN(rc);
525
526         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
527         nodemap_range_rec_init(&nr, nid);
528
529         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
530         lu_env_fini(&env);
531
532         RETURN(rc);
533 }
534
535 int nodemap_idx_range_del(const struct lu_nid_range *range)
536 {
537         struct nodemap_key       nk;
538         struct lu_env            env;
539         int                      rc = 0;
540         ENTRY;
541
542         if (nodemap_mgs_ncf == NULL) {
543                 CERROR("cannot add nodemap config to non-existing MGS.\n");
544                 return -EINVAL;
545         }
546
547         rc = lu_env_init(&env, LCT_LOCAL);
548         if (rc != 0)
549                 RETURN(rc);
550
551         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
552
553         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
554         lu_env_fini(&env);
555
556         RETURN(rc);
557 }
558
559 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
560                           enum nodemap_id_type id_type,
561                           const u32 map[2])
562 {
563         struct nodemap_key       nk;
564         union nodemap_rec        nr;
565         struct lu_env            env;
566         int                      rc = 0;
567         ENTRY;
568
569         if (nodemap_mgs_ncf == NULL) {
570                 CERROR("cannot add nodemap config to non-existing MGS.\n");
571                 return -EINVAL;
572         }
573
574         rc = lu_env_init(&env, LCT_LOCAL);
575         if (rc != 0)
576                 RETURN(rc);
577
578         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
579         nodemap_idmap_rec_init(&nr, map[1]);
580
581         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
582         lu_env_fini(&env);
583
584         RETURN(rc);
585 }
586
587 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
588                           enum nodemap_id_type id_type,
589                           const u32 map[2])
590 {
591         struct nodemap_key       nk;
592         struct lu_env            env;
593         int                      rc = 0;
594         ENTRY;
595
596         if (nodemap_mgs_ncf == NULL) {
597                 CERROR("cannot add nodemap config to non-existing MGS.\n");
598                 return -EINVAL;
599         }
600
601         rc = lu_env_init(&env, LCT_LOCAL);
602         if (rc != 0)
603                 RETURN(rc);
604
605         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
606
607         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
608         lu_env_fini(&env);
609
610         RETURN(rc);
611 }
612
613 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
614 {
615         struct nodemap_key       nk;
616         union nodemap_rec        nr;
617         struct lu_env            env;
618         int                      rc = 0;
619         ENTRY;
620
621         if (nodemap_mgs_ncf == NULL) {
622                 CERROR("cannot add nodemap config to non-existing MGS.\n");
623                 return -EINVAL;
624         }
625
626         rc = lu_env_init(&env, LCT_LOCAL);
627         if (rc != 0)
628                 RETURN(rc);
629
630         nodemap_global_key_init(&nk);
631         nodemap_global_rec_init(&nr, value);
632
633         if (update == NM_UPDATE)
634                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
635                                         &nk, &nr);
636         else
637                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
638                                         &nk, &nr);
639
640         lu_env_fini(&env);
641
642         RETURN(rc);
643 }
644
645 int nodemap_idx_nodemap_activate(bool value)
646 {
647         return nodemap_idx_global_add_update(value, NM_UPDATE);
648 }
649
650 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
651 {
652         u32                      nodemap_id;
653
654         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
655         return nm_idx_get_type(nodemap_id);
656 }
657
658 /**
659  * Process a key/rec pair and modify the new configuration.
660  *
661  * \param       config          configuration to update with this key/rec data
662  * \param       key             key of the record that was loaded
663  * \param       rec             record that was loaded
664  * \param       recent_nodemap  last referenced nodemap
665  * \retval      type of record processed, see enum #nodemap_idx_type
666  * \retval      -ENOENT         range or map loaded before nodemap record
667  * \retval      -EINVAL         duplicate nodemap cluster records found with
668  *                              different IDs, or nodemap has invalid name
669  * \retval      -ENOMEM
670  */
671 static int nodemap_process_keyrec(struct nodemap_config *config,
672                                   const struct nodemap_key *key,
673                                   const union nodemap_rec *rec,
674                                   struct lu_nodemap **recent_nodemap)
675 {
676         struct lu_nodemap *nodemap = NULL;
677         enum nodemap_idx_type type;
678         enum nodemap_id_type id_type;
679         enum nm_flag_bits flags;
680         enum nm_flag2_bits flags2;
681         u32 nodemap_id;
682         lnet_nid_t nid[2];
683         u32 map[2];
684         int rc;
685
686         ENTRY;
687
688         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
689
690         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
691         type = nodemap_get_key_type(key);
692         nodemap_id = nm_idx_set_type(nodemap_id, 0);
693
694         CDEBUG(D_INFO, "found config entry, nm_id %d type %d\n",
695                nodemap_id, type);
696
697         /* find the correct nodemap in the load list */
698         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
699             type == NODEMAP_GIDMAP_IDX || type == NODEMAP_PROJIDMAP_IDX) {
700                 struct lu_nodemap *tmp = NULL;
701
702                 nodemap = *recent_nodemap;
703
704                 if (nodemap == NULL)
705                         GOTO(out, rc = -ENOENT);
706
707                 if (nodemap->nm_id != nodemap_id) {
708                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
709                                 if (tmp->nm_id == nodemap_id) {
710                                         nodemap = tmp;
711                                         break;
712                                 }
713
714                         if (nodemap->nm_id != nodemap_id)
715                                 GOTO(out, rc = -ENOENT);
716                 }
717
718                 /* update most recently used nodemap if necessay */
719                 if (nodemap != *recent_nodemap)
720                         *recent_nodemap = nodemap;
721         }
722
723         switch (type) {
724         case NODEMAP_EMPTY_IDX:
725                 if (nodemap_id != 0)
726                         CWARN("Found nodemap config record without type field, "
727                               " nodemap_id=%d. nodemap config file corrupt?\n",
728                               nodemap_id);
729                 break;
730         case NODEMAP_CLUSTER_IDX: {
731                 struct lu_nodemap *old_nm = NULL;
732
733                 nodemap = cfs_hash_lookup(config->nmc_nodemap_hash,
734                                           rec->ncr.ncr_name);
735                 if (nodemap == NULL) {
736                         if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID) {
737                                 nodemap = nodemap_create(rec->ncr.ncr_name,
738                                                          config, 1);
739                         } else {
740                                 nodemap = nodemap_create(rec->ncr.ncr_name,
741                                                          config, 0);
742                         }
743                         if (IS_ERR(nodemap))
744                                 GOTO(out, rc = PTR_ERR(nodemap));
745
746                         /* we need to override the local ID with the saved ID */
747                         nodemap->nm_id = nodemap_id;
748                         if (nodemap_id > config->nmc_nodemap_highest_id)
749                                 config->nmc_nodemap_highest_id = nodemap_id;
750
751                 } else if (nodemap->nm_id != nodemap_id) {
752                         nodemap_putref(nodemap);
753                         GOTO(out, rc = -EINVAL);
754                 }
755
756                 nodemap->nm_squash_uid =
757                                 le32_to_cpu(rec->ncr.ncr_squash_uid);
758                 nodemap->nm_squash_gid =
759                                 le32_to_cpu(rec->ncr.ncr_squash_gid);
760                 nodemap->nm_squash_projid =
761                         le32_to_cpu(rec->ncr.ncr_squash_projid);
762
763                 flags = rec->ncr.ncr_flags;
764                 nodemap->nmf_allow_root_access =
765                                         flags & NM_FL_ALLOW_ROOT_ACCESS;
766                 nodemap->nmf_trust_client_ids =
767                                         flags & NM_FL_TRUST_CLIENT_IDS;
768                 nodemap->nmf_deny_unknown =
769                                         flags & NM_FL_DENY_UNKNOWN;
770                 nodemap->nmf_map_mode = (flags & NM_FL_MAP_UID ?
771                                          NODEMAP_MAP_UID : 0) |
772                                         (flags & NM_FL_MAP_GID ?
773                                          NODEMAP_MAP_GID : 0) |
774                                         (flags & NM_FL_MAP_PROJID ?
775                                          NODEMAP_MAP_PROJID : 0);
776                 if (nodemap->nmf_map_mode == NODEMAP_MAP_BOTH_LEGACY)
777                         nodemap->nmf_map_mode = NODEMAP_MAP_BOTH;
778                 nodemap->nmf_enable_audit =
779                                         flags & NM_FL_ENABLE_AUDIT;
780                 nodemap->nmf_forbid_encryption =
781                                         flags & NM_FL_FORBID_ENCRYPT;
782                 flags2 = rec->ncr.ncr_flags2;
783                 nodemap->nmf_readonly_mount =
784                                         flags2 & NM_FL2_READONLY_MOUNT;
785
786                 /* The fileset should be saved otherwise it will be empty
787                  * every time in case of "NODEMAP_CLUSTER_IDX". */
788                 mutex_lock(&active_config_lock);
789                 old_nm = nodemap_lookup(rec->ncr.ncr_name);
790                 if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0')
791                         strlcpy(nodemap->nm_fileset, old_nm->nm_fileset,
792                                 sizeof(nodemap->nm_fileset));
793                 mutex_unlock(&active_config_lock);
794                 if (!IS_ERR(old_nm))
795                         nodemap_putref(old_nm);
796
797                 if (*recent_nodemap == NULL) {
798                         *recent_nodemap = nodemap;
799                         INIT_LIST_HEAD(&nodemap->nm_list);
800                 } else {
801                         list_add(&nodemap->nm_list,
802                                  &(*recent_nodemap)->nm_list);
803                 }
804                 nodemap_putref(nodemap);
805                 break;
806         }
807         case NODEMAP_RANGE_IDX:
808                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
809                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
810
811                 rc = nodemap_add_range_helper(config, nodemap, nid,
812                                         le32_to_cpu(key->nk_range_id));
813                 if (rc != 0)
814                         GOTO(out, rc);
815                 break;
816         case NODEMAP_UIDMAP_IDX:
817         case NODEMAP_GIDMAP_IDX:
818         case NODEMAP_PROJIDMAP_IDX:
819                 map[0] = le32_to_cpu(key->nk_id_client);
820                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
821
822                 if (type == NODEMAP_UIDMAP_IDX)
823                         id_type = NODEMAP_UID;
824                 else if (type == NODEMAP_GIDMAP_IDX)
825                         id_type = NODEMAP_GID;
826                 else if (type == NODEMAP_PROJIDMAP_IDX)
827                         id_type = NODEMAP_PROJID;
828                 else
829                         GOTO(out, rc = -EINVAL);
830
831                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
832                 if (rc != 0)
833                         GOTO(out, rc);
834                 break;
835         case NODEMAP_GLOBAL_IDX:
836                 config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
837                 break;
838         default:
839                 CERROR("got keyrec pair for unknown type %d\n", type);
840                 break;
841         }
842
843         rc = type;
844
845         EXIT;
846
847 out:
848         return rc;
849 }
850
851 enum nm_config_passes {
852         NM_READ_CLUSTERS = 0,
853         NM_READ_ATTRIBUTES = 1,
854 };
855
856 static int nodemap_load_entries(const struct lu_env *env,
857                                 struct dt_object *nodemap_idx)
858 {
859         const struct dt_it_ops *iops;
860         struct dt_it *it;
861         struct lu_nodemap *recent_nodemap = NULL;
862         struct nodemap_config *new_config = NULL;
863         u64 hash = 0;
864         bool activate_nodemap = false;
865         bool loaded_global_idx = false;
866         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
867         int rc = 0;
868
869         ENTRY;
870
871         iops = &nodemap_idx->do_index_ops->dio_it;
872
873         dt_read_lock(env, nodemap_idx, 0);
874         it = iops->init(env, nodemap_idx, 0);
875         if (IS_ERR(it))
876                 GOTO(out, rc = PTR_ERR(it));
877
878         rc = iops->load(env, it, hash);
879         if (rc < 0)
880                 GOTO(out_iops_fini, rc);
881
882         /* rc == 0 means we need to advance to record */
883         if (rc == 0) {
884                 rc = iops->next(env, it);
885
886                 if (rc < 0)
887                         GOTO(out_iops_put, rc);
888                 /* rc > 0 is eof, will be checked in while below */
889         } else {
890                 /* rc == 1, we found initial record and can process below */
891                 rc = 0;
892         }
893
894         new_config = nodemap_config_alloc();
895         if (IS_ERR(new_config)) {
896                 rc = PTR_ERR(new_config);
897                 new_config = NULL;
898                 GOTO(out_iops_put, rc);
899         }
900
901         /* rc > 0 is eof, check initial iops->next here as well */
902         while (rc == 0) {
903                 struct nodemap_key *key;
904                 union nodemap_rec rec;
905                 enum nodemap_idx_type key_type;
906
907                 key = (struct nodemap_key *)iops->key(env, it);
908                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
909                 if ((cur_pass == NM_READ_CLUSTERS &&
910                                 key_type == NODEMAP_CLUSTER_IDX) ||
911                     (cur_pass == NM_READ_ATTRIBUTES &&
912                                 key_type != NODEMAP_CLUSTER_IDX &&
913                                 key_type != NODEMAP_EMPTY_IDX)) {
914                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
915                         if (rc != -ESTALE) {
916                                 if (rc != 0)
917                                         GOTO(out_nodemap_config, rc);
918                                 rc = nodemap_process_keyrec(new_config, key, &rec,
919                                                             &recent_nodemap);
920                                 if (rc < 0)
921                                         GOTO(out_nodemap_config, rc);
922                                 if (rc == NODEMAP_GLOBAL_IDX)
923                                         loaded_global_idx = true;
924                         }
925                 }
926
927                 do
928                         rc = iops->next(env, it);
929                 while (rc == -ESTALE);
930
931                 /* move to second pass */
932                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
933                         cur_pass = NM_READ_ATTRIBUTES;
934                         rc = iops->load(env, it, 0);
935                         if (rc == 0)
936                                 rc = iops->next(env, it);
937                         else if (rc > 0)
938                                 rc = 0;
939                         else
940                                 GOTO(out, rc);
941                 }
942         }
943
944         if (rc > 0)
945                 rc = 0;
946
947 out_nodemap_config:
948         if (rc != 0)
949                 nodemap_config_dealloc(new_config);
950         else
951                 /* creating new default needs to be done outside dt read lock */
952                 activate_nodemap = true;
953 out_iops_put:
954         iops->put(env, it);
955 out_iops_fini:
956         iops->fini(env, it);
957 out:
958         dt_read_unlock(env, nodemap_idx);
959
960         if (rc != 0)
961                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
962                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
963
964         if (!activate_nodemap)
965                 RETURN(rc);
966
967         if (new_config->nmc_default_nodemap == NULL) {
968                 /* new MGS won't have a default nm on disk, so create it here */
969                 struct lu_nodemap *nodemap =
970                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
971                 if (IS_ERR(nodemap)) {
972                         rc = PTR_ERR(nodemap);
973                 } else {
974                         rc = nodemap_idx_nodemap_add_update(
975                                         new_config->nmc_default_nodemap,
976                                         nodemap_idx,
977                                         NM_ADD);
978                         nodemap_putref(new_config->nmc_default_nodemap);
979                 }
980         }
981
982         /* new nodemap config won't have an active/inactive record */
983         if (rc == 0 && loaded_global_idx == false) {
984                 struct nodemap_key       nk;
985                 union nodemap_rec        nr;
986
987                 nodemap_global_key_init(&nk);
988                 nodemap_global_rec_init(&nr, false);
989                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
990         }
991
992         if (rc == 0)
993                 nodemap_config_set_active(new_config);
994         else
995                 nodemap_config_dealloc(new_config);
996
997         RETURN(rc);
998 }
999
1000 /**
1001  * Step through active config and write to disk.
1002  */
1003 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
1004                                             struct dt_device *dev,
1005                                             struct local_oid_storage *los)
1006 {
1007         struct dt_object *o;
1008         struct lu_nodemap *nodemap;
1009         struct lu_nodemap *nm_tmp;
1010         struct lu_nid_range *range;
1011         struct lu_nid_range *range_temp;
1012         struct lu_idmap *idmap;
1013         struct lu_idmap *id_tmp;
1014         struct rb_root root;
1015         struct nodemap_key nk;
1016         union nodemap_rec nr;
1017         LIST_HEAD(nodemap_list_head);
1018         int rc = 0, rc2;
1019
1020         ENTRY;
1021
1022         /* create a new index file to fill with active config */
1023         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
1024         if (IS_ERR(o))
1025                 RETURN(o);
1026
1027         mutex_lock(&active_config_lock);
1028
1029         /* convert hash to list so we don't spin */
1030         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
1031                                nm_hash_list_cb, &nodemap_list_head);
1032
1033         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
1034                 nodemap_cluster_key_init(&nk, nodemap->nm_id);
1035                 nodemap_cluster_rec_init(&nr, nodemap);
1036
1037                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1038                 if (rc2 < 0) {
1039                         rc = rc2;
1040                         continue;
1041                 }
1042
1043                 down_read(&active_config->nmc_range_tree_lock);
1044                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
1045                                          rn_list) {
1046                         lnet_nid_t nid[2] = {
1047                                 range->rn_start,
1048                                 range->rn_end
1049                         };
1050                         nodemap_range_key_init(&nk, nodemap->nm_id,
1051                                                range->rn_id);
1052                         nodemap_range_rec_init(&nr, nid);
1053                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1054                         if (rc2 < 0)
1055                                 rc = rc2;
1056                 }
1057                 up_read(&active_config->nmc_range_tree_lock);
1058
1059                 /* we don't need to take nm_idmap_lock because active config
1060                  * lock prevents changes from happening to nodemaps
1061                  */
1062                 root = nodemap->nm_client_to_fs_uidmap;
1063                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1064                                                         id_client_to_fs) {
1065                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1066                                                idmap->id_client);
1067                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1068                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1069                         if (rc2 < 0)
1070                                 rc = rc2;
1071                 }
1072
1073                 root = nodemap->nm_client_to_fs_gidmap;
1074                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1075                                                         id_client_to_fs) {
1076                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1077                                                idmap->id_client);
1078                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1079                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1080                         if (rc2 < 0)
1081                                 rc = rc2;
1082                 }
1083
1084                 root = nodemap->nm_client_to_fs_projidmap;
1085                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1086                                                         id_client_to_fs) {
1087                         nodemap_idmap_key_init(&nk, nodemap->nm_id,
1088                                                NODEMAP_PROJID,
1089                                                idmap->id_client);
1090                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1091                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1092                         if (rc2 < 0)
1093                                 rc = rc2;
1094                 }
1095         }
1096         nodemap_global_key_init(&nk);
1097         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1098         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1099         if (rc2 < 0)
1100                 rc = rc2;
1101
1102         mutex_unlock(&active_config_lock);
1103
1104         if (rc < 0) {
1105                 dt_object_put(env, o);
1106                 o = ERR_PTR(rc);
1107         }
1108
1109         RETURN(o);
1110 }
1111
1112 static void nodemap_save_all_caches(void)
1113 {
1114         struct nm_config_file   *ncf;
1115         struct lu_env            env;
1116         int                      rc = 0;
1117
1118         /* recreating nodemap cache requires fld_thread_key be in env */
1119         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1120         if (rc != 0) {
1121                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1122                 return;
1123         }
1124
1125         mutex_lock(&ncf_list_lock);
1126         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1127                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1128                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1129                 struct dt_object *o;
1130
1131                 /* put current config file so save conf can rewrite it */
1132                 dt_object_put_nocache(&env, ncf->ncf_obj);
1133                 ncf->ncf_obj = NULL;
1134
1135                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1136                 if (IS_ERR(o))
1137                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1138                               obd->obd_name, rc);
1139                 else
1140                         ncf->ncf_obj = o;
1141         }
1142         mutex_unlock(&ncf_list_lock);
1143
1144         lu_env_fini(&env);
1145 }
1146
1147 /* tracks if config still needs to be loaded, either from disk or network */
1148 static bool nodemap_config_loaded;
1149 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1150
1151 /**
1152  * Ensures that configs loaded over the wire are prioritized over those loaded
1153  * from disk.
1154  *
1155  * \param config        config to set as the active config
1156  */
1157 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1158 {
1159         mutex_lock(&nodemap_config_loaded_lock);
1160         nodemap_config_set_active(config);
1161         nodemap_config_loaded = true;
1162         nodemap_save_all_caches();
1163         mutex_unlock(&nodemap_config_loaded_lock);
1164 }
1165 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1166
1167 /**
1168  * Register a dt_object representing the config index file. This should be
1169  * called by targets in order to load the nodemap configuration from disk. The
1170  * dt_object should be created with local_index_find_or_create and the index
1171  * features should be enabled with do_index_try.
1172  *
1173  * \param obj   dt_object returned by local_index_find_or_create
1174  *
1175  * \retval      on success: nm_config_file handle for later deregistration
1176  * \retval      -ENOMEM         memory allocation failure
1177  * \retval      -ENOENT         error loading nodemap config
1178  * \retval      -EINVAL         error loading nodemap config
1179  * \retval      -EEXIST         nodemap config already registered for MGS
1180  */
1181 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1182                                                    struct dt_object *obj,
1183                                                    struct local_oid_storage *los)
1184 {
1185         struct nm_config_file *ncf;
1186         int rc = 0;
1187         ENTRY;
1188
1189         if (nodemap_mgs_ncf != NULL)
1190                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1191
1192         OBD_ALLOC_PTR(ncf);
1193         if (ncf == NULL)
1194                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1195
1196         /* if loading from cache, prevent activation of MGS config until cache
1197          * loading is done, so disk config is overwritten by MGS config.
1198          */
1199         mutex_lock(&nodemap_config_loaded_lock);
1200         rc = nodemap_load_entries(env, obj);
1201         if (!rc)
1202                 nodemap_config_loaded = true;
1203         mutex_unlock(&nodemap_config_loaded_lock);
1204
1205         if (rc) {
1206                 OBD_FREE_PTR(ncf);
1207                 GOTO(out, ncf = ERR_PTR(rc));
1208         }
1209
1210         lu_object_get(&obj->do_lu);
1211
1212         ncf->ncf_obj = obj;
1213         ncf->ncf_los = los;
1214
1215         nodemap_mgs_ncf = ncf;
1216
1217 out:
1218         return ncf;
1219 }
1220 EXPORT_SYMBOL(nm_config_file_register_mgs);
1221
1222 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1223                                                    struct dt_device *dev,
1224                                                    struct local_oid_storage *los)
1225 {
1226         struct nm_config_file *ncf;
1227         struct dt_object *config_obj = NULL;
1228         int rc = 0;
1229
1230         OBD_ALLOC_PTR(ncf);
1231         if (ncf == NULL)
1232                 RETURN(ERR_PTR(-ENOMEM));
1233
1234         /* don't load from cache if config already loaded */
1235         mutex_lock(&nodemap_config_loaded_lock);
1236         if (!nodemap_config_loaded) {
1237                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1238                 if (IS_ERR(config_obj))
1239                         rc = PTR_ERR(config_obj);
1240                 else
1241                         rc = nodemap_load_entries(env, config_obj);
1242
1243                 if (!rc)
1244                         nodemap_config_loaded = true;
1245         }
1246         mutex_unlock(&nodemap_config_loaded_lock);
1247         if (rc)
1248                 GOTO(out_ncf, rc);
1249
1250         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1251         if (!config_obj) {
1252                 config_obj = nodemap_save_config_cache(env, dev, los);
1253                 if (IS_ERR(config_obj))
1254                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1255         }
1256
1257         ncf->ncf_obj = config_obj;
1258         ncf->ncf_los = los;
1259
1260         mutex_lock(&ncf_list_lock);
1261         list_add(&ncf->ncf_list, &ncf_list_head);
1262         mutex_unlock(&ncf_list_lock);
1263
1264 out_ncf:
1265         if (rc) {
1266                 OBD_FREE_PTR(ncf);
1267                 RETURN(ERR_PTR(rc));
1268         }
1269
1270         RETURN(ncf);
1271 }
1272 EXPORT_SYMBOL(nm_config_file_register_tgt);
1273
1274 /**
1275  * Deregister a nm_config_file. Should be called by targets during cleanup.
1276  *
1277  * \param ncf   config file to deregister
1278  */
1279 void nm_config_file_deregister_mgs(const struct lu_env *env,
1280                                    struct nm_config_file *ncf)
1281 {
1282         ENTRY;
1283         LASSERT(nodemap_mgs_ncf == ncf);
1284
1285         nodemap_mgs_ncf = NULL;
1286         if (ncf->ncf_obj)
1287                 dt_object_put(env, ncf->ncf_obj);
1288
1289         OBD_FREE_PTR(ncf);
1290
1291         EXIT;
1292 }
1293 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1294
1295 void nm_config_file_deregister_tgt(const struct lu_env *env,
1296                                    struct nm_config_file *ncf)
1297 {
1298         ENTRY;
1299
1300         if (ncf == NULL)
1301                 return;
1302
1303         mutex_lock(&ncf_list_lock);
1304         list_del(&ncf->ncf_list);
1305         mutex_unlock(&ncf_list_lock);
1306
1307         if (ncf->ncf_obj)
1308                 dt_object_put(env, ncf->ncf_obj);
1309
1310         OBD_FREE_PTR(ncf);
1311
1312         EXIT;
1313 }
1314 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1315
1316 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1317                               struct lu_nodemap **recent_nodemap)
1318 {
1319         struct nodemap_key *key;
1320         union nodemap_rec *rec;
1321         char *entry;
1322         int j;
1323         int k;
1324         int rc = 0;
1325         int size = dt_nodemap_features.dif_keysize_max +
1326                    dt_nodemap_features.dif_recsize_max;
1327         ENTRY;
1328
1329         for (j = 0; j < LU_PAGE_COUNT; j++) {
1330                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1331                         return -EINVAL;
1332
1333                 /* get and process keys and records from page */
1334                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1335                         entry = lip->lp_idx.lip_entries + k * size;
1336                         key = (struct nodemap_key *)entry;
1337
1338                         entry += dt_nodemap_features.dif_keysize_max;
1339                         rec = (union nodemap_rec *)entry;
1340
1341                         rc = nodemap_process_keyrec(config, key, rec,
1342                                                     recent_nodemap);
1343                         if (rc < 0)
1344                                 return rc;
1345                 }
1346                 lip++;
1347         }
1348
1349         EXIT;
1350         return 0;
1351 }
1352 EXPORT_SYMBOL(nodemap_process_idx_pages);
1353
1354 static int nodemap_page_build(const struct lu_env *env, struct dt_object *obj,
1355                               union lu_page *lp, size_t bytes,
1356                               const struct dt_it_ops *iops,
1357                               struct dt_it *it, __u32 attr, void *arg)
1358 {
1359         struct idx_info *ii = (struct idx_info *)arg;
1360         struct lu_idxpage *lip = &lp->lp_idx;
1361         char *entry;
1362         size_t size = ii->ii_keysize + ii->ii_recsize;
1363         int rc;
1364         ENTRY;
1365
1366         if (bytes < LIP_HDR_SIZE)
1367                 return -EINVAL;
1368
1369         /* initialize the header of the new container */
1370         memset(lip, 0, LIP_HDR_SIZE);
1371         lip->lip_magic = LIP_MAGIC;
1372         bytes -= LIP_HDR_SIZE;
1373
1374         entry = lip->lip_entries;
1375         do {
1376                 char *tmp_entry = entry;
1377                 struct dt_key *key;
1378                 __u64 hash;
1379                 enum nodemap_idx_type key_type;
1380
1381                 /* fetch 64-bit hash value */
1382                 hash = iops->store(env, it);
1383                 ii->ii_hash_end = hash;
1384
1385                 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1386                         if (lip->lip_nr != 0)
1387                                 GOTO(out, rc = 0);
1388                 }
1389
1390                 if (bytes < size) {
1391                         if (lip->lip_nr == 0)
1392                                 GOTO(out, rc = -EINVAL);
1393                         GOTO(out, rc = 0);
1394                 }
1395
1396                 key = iops->key(env, it);
1397                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1398
1399                 /* on the first pass, get only the cluster types. On second
1400                  * pass, get all the rest */
1401                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1402                                 key_type == NODEMAP_CLUSTER_IDX) ||
1403                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1404                                 key_type != NODEMAP_CLUSTER_IDX &&
1405                                 key_type != NODEMAP_EMPTY_IDX)) {
1406                         memcpy(tmp_entry, key, ii->ii_keysize);
1407                         tmp_entry += ii->ii_keysize;
1408
1409                         /* and finally the record */
1410                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1411                                        attr);
1412                         if (rc != -ESTALE) {
1413                                 if (rc != 0)
1414                                         GOTO(out, rc);
1415
1416                                 /* hash/key/record successfully copied! */
1417                                 lip->lip_nr++;
1418                                 if (unlikely(lip->lip_nr == 1 &&
1419                                     ii->ii_count == 0))
1420                                         ii->ii_hash_start = hash;
1421
1422                                 entry = tmp_entry + ii->ii_recsize;
1423                                 bytes -= size;
1424                         }
1425                 }
1426
1427                 /* move on to the next record */
1428                 do {
1429                         rc = iops->next(env, it);
1430                 } while (rc == -ESTALE);
1431
1432                 /* move to second pass */
1433                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1434                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1435                         rc = iops->load(env, it, 0);
1436                         if (rc == 0)
1437                                 rc = iops->next(env, it);
1438                         else if (rc > 0)
1439                                 rc = 0;
1440                         else
1441                                 GOTO(out, rc);
1442                 }
1443
1444         } while (rc == 0);
1445
1446         GOTO(out, rc);
1447 out:
1448         if (rc >= 0 && lip->lip_nr > 0)
1449                 /* one more container */
1450                 ii->ii_count++;
1451         if (rc > 0)
1452                 /* no more entries */
1453                 ii->ii_hash_end = II_END_OFF;
1454         return rc;
1455 }
1456
1457
1458 int nodemap_index_read(struct lu_env *env,
1459                        struct nm_config_file *ncf,
1460                        struct idx_info *ii,
1461                        const struct lu_rdpg *rdpg)
1462 {
1463         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1464         __u64                    version;
1465         int                      rc = 0;
1466
1467         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1468         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1469
1470         dt_read_lock(env, nodemap_idx, 0);
1471         version = dt_version_get(env, nodemap_idx);
1472         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1473                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1474                        ii->ii_version,
1475                        version);
1476                 ii->ii_hash_end = 0;
1477         } else {
1478                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1479                                    ii);
1480                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1481         }
1482
1483         if (rc >= 0)
1484                 ii->ii_version = version;
1485
1486         dt_read_unlock(env, nodemap_idx);
1487         return rc;
1488 }
1489 EXPORT_SYMBOL(nodemap_index_read);
1490
1491 /**
1492  * Returns the current nodemap configuration to MGC by walking the nodemap
1493  * config index and storing it in the response buffer.
1494  *
1495  * \param       req             incoming MGS_CONFIG_READ request
1496  * \retval      0               success
1497  * \retval      -EINVAL         malformed request
1498  * \retval      -ENOTCONN       client evicted/reconnected already
1499  * \retval      -ETIMEDOUT      client timeout or network error
1500  * \retval      -ENOMEM
1501  */
1502 int nodemap_get_config_req(struct obd_device *mgs_obd,
1503                            struct ptlrpc_request *req)
1504 {
1505         const struct ptlrpc_bulk_frag_ops *frag_ops = &ptlrpc_bulk_kiov_pin_ops;
1506         struct mgs_config_body *body;
1507         struct mgs_config_res *res;
1508         struct lu_rdpg rdpg;
1509         struct idx_info nodemap_ii;
1510         struct ptlrpc_bulk_desc *desc;
1511         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1512         int i;
1513         int page_count;
1514         int bytes = 0;
1515         int rc = 0;
1516
1517         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1518         if (!body)
1519                 RETURN(-EINVAL);
1520
1521         if (body->mcb_type != MGS_CFG_T_NODEMAP)
1522                 RETURN(-EINVAL);
1523
1524         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1525         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1526                 PAGE_SHIFT;
1527         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1528                 RETURN(-EINVAL);
1529
1530         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1531                body->mcb_name, rdpg.rp_count);
1532
1533         /* allocate pages to store the containers */
1534         OBD_ALLOC_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1535         if (rdpg.rp_pages == NULL)
1536                 RETURN(-ENOMEM);
1537         for (i = 0; i < rdpg.rp_npages; i++) {
1538                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1539                 if (rdpg.rp_pages[i] == NULL)
1540                         GOTO(out, rc = -ENOMEM);
1541         }
1542
1543         rdpg.rp_hash = body->mcb_offset;
1544         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1545         nodemap_ii.ii_flags = II_FL_NOHASH;
1546         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1547         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1548
1549         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1550                                    mgs_obd->u.obt.obt_nodemap_config_file,
1551                                    &nodemap_ii, &rdpg);
1552         if (bytes < 0)
1553                 GOTO(out, rc = bytes);
1554
1555         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1556
1557         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1558         if (res == NULL)
1559                 GOTO(out, rc = -EINVAL);
1560         res->mcr_offset = nodemap_ii.ii_hash_end;
1561         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1562
1563         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1564         LASSERT(page_count <= rdpg.rp_count);
1565         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1566                                     PTLRPC_BULK_PUT_SOURCE,
1567                                     MGS_BULK_PORTAL, frag_ops);
1568         if (desc == NULL)
1569                 GOTO(out, rc = -ENOMEM);
1570
1571         for (i = 0; i < page_count && bytes > 0; i++) {
1572                 frag_ops->add_kiov_frag(desc, rdpg.rp_pages[i], 0,
1573                                         min_t(int, bytes, PAGE_SIZE));
1574                 bytes -= PAGE_SIZE;
1575         }
1576
1577         rc = target_bulk_io(req->rq_export, desc);
1578         ptlrpc_free_bulk(desc);
1579
1580 out:
1581         if (rdpg.rp_pages != NULL) {
1582                 for (i = 0; i < rdpg.rp_npages; i++)
1583                         if (rdpg.rp_pages[i] != NULL)
1584                                 __free_page(rdpg.rp_pages[i]);
1585                 OBD_FREE_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1586         }
1587         return rc;
1588 }
1589 EXPORT_SYMBOL(nodemap_get_config_req);