Whamcloud - gitweb
f06ee82a908de545b5561a6fc48682911a0e98e9
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_storage.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2015, Trustees of Indiana University
24  *
25  * Copyright (c) 2017, Intel Corporation.
26  *
27  * Author: Joshua Walgenbach <jjw@iu.edu>
28  * Author: Kit Westneat <cwestnea@iu.edu>
29  *
30  * Implements the storage functionality for the nodemap configuration. Functions
31  * in this file prepare, store, and load nodemap configuration data. Targets
32  * using nodemap services should register a configuration file object. Nodemap
33  * configuration changes that need to persist should call the appropriate
34  * storage function for the data being modified.
35  *
36  * There are several index types as defined in enum nodemap_idx_type:
37  *      NODEMAP_CLUSTER_IDX     stores the data found on the lu_nodemap struct,
38  *                              like root squash and config flags, as well as
39  *                              the name.
40  *      NODEMAP_RANGE_IDX       stores NID range information for a nodemap
41  *      NODEMAP_UIDMAP_IDX      stores a fs/client UID mapping pair
42  *      NODEMAP_GIDMAP_IDX      stores a fs/client GID mapping pair
43  *      NODEMAP_GLOBAL_IDX      stores whether or not nodemaps are active
44  */
45
46 #include <libcfs/libcfs.h>
47 #include <linux/err.h>
48 #include <linux/kernel.h>
49 #include <linux/list.h>
50 #include <linux/mutex.h>
51 #include <linux/string.h>
52 #include <linux/types.h>
53 #include <uapi/linux/lnet/lnet-types.h>
54 #include <uapi/linux/lustre/lustre_idl.h>
55 #include <uapi/linux/lustre/lustre_disk.h>
56 #include <dt_object.h>
57 #include <lu_object.h>
58 #include <lustre_net.h>
59 #include <lustre_nodemap.h>
60 #include <obd_class.h>
61 #include <obd_support.h>
62 #include "nodemap_internal.h"
63
64 /* list of registered nodemap index files, except MGS */
65 static LIST_HEAD(ncf_list_head);
66 static DEFINE_MUTEX(ncf_list_lock);
67
68 /* MGS index is different than others, others are listeners to MGS idx */
69 static struct nm_config_file *nodemap_mgs_ncf;
70
71 static void nodemap_cluster_key_init(struct nodemap_key *nk, unsigned int nm_id,
72                                      enum nodemap_cluster_rec_subid subid)
73 {
74         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
75                                                         NODEMAP_CLUSTER_IDX));
76         nk->nk_cluster_subid = subid;
77 }
78
79 static void nodemap_cluster_rec_init(union nodemap_rec *nr,
80                                      const struct lu_nodemap *nodemap)
81 {
82         BUILD_BUG_ON(sizeof(nr->ncr.ncr_name) != sizeof(nodemap->nm_name));
83
84         strncpy(nr->ncr.ncr_name, nodemap->nm_name, sizeof(nr->ncr.ncr_name));
85         nr->ncr.ncr_squash_uid = cpu_to_le32(nodemap->nm_squash_uid);
86         nr->ncr.ncr_squash_gid = cpu_to_le32(nodemap->nm_squash_gid);
87         nr->ncr.ncr_squash_projid = cpu_to_le32(nodemap->nm_squash_projid);
88         nr->ncr.ncr_flags =
89                 (nodemap->nmf_trust_client_ids ?
90                         NM_FL_TRUST_CLIENT_IDS : 0) |
91                 (nodemap->nmf_allow_root_access ?
92                         NM_FL_ALLOW_ROOT_ACCESS : 0) |
93                 (nodemap->nmf_deny_unknown ?
94                         NM_FL_DENY_UNKNOWN : 0) |
95                 (nodemap->nmf_map_mode & NODEMAP_MAP_UID ?
96                         NM_FL_MAP_UID : 0) |
97                 (nodemap->nmf_map_mode & NODEMAP_MAP_GID ?
98                         NM_FL_MAP_GID : 0) |
99                 (nodemap->nmf_map_mode & NODEMAP_MAP_PROJID ?
100                         NM_FL_MAP_PROJID : 0) |
101                 (nodemap->nmf_enable_audit ?
102                         NM_FL_ENABLE_AUDIT : 0) |
103                 (nodemap->nmf_forbid_encryption ?
104                         NM_FL_FORBID_ENCRYPT : 0);
105         nr->ncr.ncr_flags2 =
106                 (nodemap->nmf_readonly_mount ?
107                         NM_FL2_READONLY_MOUNT : 0);
108 }
109
110 static void nodemap_cluster_roles_rec_init(union nodemap_rec *nr,
111                                            const struct lu_nodemap *nodemap)
112 {
113         struct nodemap_cluster_roles_rec *ncrr = &nr->ncrr;
114
115         memset(ncrr, 0, sizeof(struct nodemap_cluster_roles_rec));
116         ncrr->ncrr_roles = cpu_to_le64(nodemap->nmf_rbac);
117 }
118
119 static void nodemap_idmap_key_init(struct nodemap_key *nk, unsigned int nm_id,
120                                    enum nodemap_id_type id_type,
121                                    u32 id_client)
122 {
123         enum nodemap_idx_type idx_type;
124
125         if (id_type == NODEMAP_UID)
126                 idx_type = NODEMAP_UIDMAP_IDX;
127         else if (id_type == NODEMAP_GID)
128                 idx_type = NODEMAP_GIDMAP_IDX;
129         else if (id_type == NODEMAP_PROJID)
130                 idx_type = NODEMAP_PROJIDMAP_IDX;
131         else
132                 idx_type = NODEMAP_EMPTY_IDX;
133
134         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id, idx_type));
135         nk->nk_id_client = cpu_to_le32(id_client);
136 }
137
138 static void nodemap_idmap_rec_init(union nodemap_rec *nr, u32 id_fs)
139 {
140         nr->nir.nir_id_fs = cpu_to_le32(id_fs);
141 }
142
143 static void nodemap_range_key_init(struct nodemap_key *nk, unsigned int nm_id,
144                                    unsigned int rn_id)
145 {
146         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(nm_id,
147                                                         NODEMAP_RANGE_IDX));
148         nk->nk_range_id = cpu_to_le32(rn_id);
149 }
150
151 static void nodemap_range_rec_init(union nodemap_rec *nr,
152                                    const lnet_nid_t nid[2])
153 {
154         nr->nrr.nrr_start_nid = cpu_to_le64(nid[0]);
155         nr->nrr.nrr_end_nid = cpu_to_le64(nid[1]);
156 }
157
158 static void nodemap_global_key_init(struct nodemap_key *nk)
159 {
160         nk->nk_nodemap_id = cpu_to_le32(nm_idx_set_type(0, NODEMAP_GLOBAL_IDX));
161         nk->nk_unused = 0;
162 }
163
164 static void nodemap_global_rec_init(union nodemap_rec *nr, bool active)
165 {
166         nr->ngr.ngr_is_active = active;
167 }
168
169 /* should be called with dt_write lock */
170 static void nodemap_inc_version(const struct lu_env *env,
171                                 struct dt_object *nodemap_idx,
172                                 struct thandle *th)
173 {
174         u64 ver = dt_version_get(env, nodemap_idx);
175         dt_version_set(env, nodemap_idx, ver + 1, th);
176 }
177
178 enum ncfc_find_create {
179         NCFC_CREATE_NEW = 1,
180 };
181
182 static struct dt_object *nodemap_cache_find_create(const struct lu_env *env,
183                                                    struct dt_device *dev,
184                                                    struct local_oid_storage *los,
185                                                    enum ncfc_find_create create_new)
186 {
187         struct lu_fid tfid;
188         struct dt_object *root_obj;
189         struct dt_object *nm_obj;
190         int rc = 0;
191
192         rc = dt_root_get(env, dev, &tfid);
193         if (rc < 0)
194                 GOTO(out, nm_obj = ERR_PTR(rc));
195
196         root_obj = dt_locate(env, dev, &tfid);
197         if (unlikely(IS_ERR(root_obj)))
198                 GOTO(out, nm_obj = root_obj);
199
200         rc = dt_lookup_dir(env, root_obj, LUSTRE_NODEMAP_NAME, &tfid);
201         if (rc == -ENOENT) {
202                 if (dev->dd_rdonly)
203                         GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
204         } else if (rc) {
205                 GOTO(out_root, nm_obj = ERR_PTR(rc));
206         } else if (dev->dd_rdonly && create_new == NCFC_CREATE_NEW) {
207                 GOTO(out_root, nm_obj = ERR_PTR(-EROFS));
208         }
209
210 again:
211         /* if loading index fails the first time, create new index */
212         if (create_new == NCFC_CREATE_NEW && rc != -ENOENT) {
213                 CDEBUG(D_INFO, "removing old index, creating new one\n");
214                 rc = local_object_unlink(env, dev, root_obj,
215                                          LUSTRE_NODEMAP_NAME);
216                 if (rc < 0) {
217                         /* XXX not sure the best way to get obd name. */
218                         CERROR("cannot destroy nodemap index: rc = %d\n",
219                                rc);
220                         GOTO(out_root, nm_obj = ERR_PTR(rc));
221                 }
222         }
223
224         nm_obj = local_index_find_or_create(env, los, root_obj,
225                                                 LUSTRE_NODEMAP_NAME,
226                                                 S_IFREG | S_IRUGO | S_IWUSR,
227                                                 &dt_nodemap_features);
228         if (IS_ERR(nm_obj))
229                 GOTO(out_root, nm_obj);
230
231         if (nm_obj->do_index_ops == NULL) {
232                 rc = nm_obj->do_ops->do_index_try(env, nm_obj,
233                                                       &dt_nodemap_features);
234                 /* even if loading from tgt fails, connecting to MGS will
235                  * rewrite the config
236                  */
237                 if (rc < 0) {
238                         dt_object_put(env, nm_obj);
239
240                         if (create_new == NCFC_CREATE_NEW)
241                                 GOTO(out_root, nm_obj = ERR_PTR(rc));
242
243                         CERROR("cannot load nodemap index from disk, creating "
244                                "new index: rc = %d\n", rc);
245                         create_new = NCFC_CREATE_NEW;
246                         goto again;
247                 }
248         }
249
250 out_root:
251         dt_object_put(env, root_obj);
252 out:
253         return nm_obj;
254 }
255
256 static int nodemap_idx_insert(const struct lu_env *env,
257                               struct dt_object *idx,
258                               const struct nodemap_key *nk,
259                               const union nodemap_rec *nr)
260 {
261         struct thandle *th;
262         struct dt_device *dev = lu2dt_dev(idx->do_lu.lo_dev);
263         int rc;
264
265         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
266
267         th = dt_trans_create(env, dev);
268
269         if (IS_ERR(th))
270                 GOTO(out, rc = PTR_ERR(th));
271
272         rc = dt_declare_insert(env, idx,
273                                (const struct dt_rec *)nr,
274                                (const struct dt_key *)nk, th);
275         if (rc != 0)
276                 GOTO(out, rc);
277
278         rc = dt_declare_version_set(env, idx, th);
279         if (rc != 0)
280                 GOTO(out, rc);
281
282         rc = dt_trans_start_local(env, dev, th);
283         if (rc != 0)
284                 GOTO(out, rc);
285
286         dt_write_lock(env, idx, 0);
287
288         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
289                        (const struct dt_key *)nk, th);
290
291         nodemap_inc_version(env, idx, th);
292         dt_write_unlock(env, idx);
293 out:
294         dt_trans_stop(env, dev, th);
295
296         return rc;
297 }
298
299 static int nodemap_idx_update(const struct lu_env *env,
300                               struct dt_object *idx,
301                               const struct nodemap_key *nk,
302                               const union nodemap_rec *nr)
303 {
304         struct thandle          *th;
305         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
306         int                      rc = 0;
307
308         th = dt_trans_create(env, dev);
309
310         if (IS_ERR(th))
311                 GOTO(out, rc = PTR_ERR(th));
312
313         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
314         if (rc != 0)
315                 GOTO(out, rc);
316
317         rc = dt_declare_insert(env, idx, (const struct dt_rec *)nr,
318                                (const struct dt_key *)nk, th);
319         if (rc != 0)
320                 GOTO(out, rc);
321
322         rc = dt_declare_version_set(env, idx, th);
323         if (rc != 0)
324                 GOTO(out, rc);
325
326         rc = dt_trans_start_local(env, dev, th);
327         if (rc != 0)
328                 GOTO(out, rc);
329
330         dt_write_lock(env, idx, 0);
331
332         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
333         if (rc != 0)
334                 GOTO(out_lock, rc);
335
336         rc = dt_insert(env, idx, (const struct dt_rec *)nr,
337                        (const struct dt_key *)nk, th);
338         if (rc != 0)
339                 GOTO(out_lock, rc);
340
341         nodemap_inc_version(env, idx, th);
342 out_lock:
343         dt_write_unlock(env, idx);
344 out:
345         dt_trans_stop(env, dev, th);
346
347         return rc;
348 }
349
350 static int nodemap_idx_delete(const struct lu_env *env,
351                               struct dt_object *idx,
352                               const struct nodemap_key *nk,
353                               const union nodemap_rec *unused)
354 {
355         struct thandle          *th;
356         struct dt_device        *dev = lu2dt_dev(idx->do_lu.lo_dev);
357         int                      rc = 0;
358
359         th = dt_trans_create(env, dev);
360
361         if (IS_ERR(th))
362                 GOTO(out, rc = PTR_ERR(th));
363
364         rc = dt_declare_delete(env, idx, (const struct dt_key *)nk, th);
365         if (rc != 0)
366                 GOTO(out, rc);
367
368         rc = dt_declare_version_set(env, idx, th);
369         if (rc != 0)
370                 GOTO(out, rc);
371
372         rc = dt_trans_start_local(env, dev, th);
373         if (rc != 0)
374                 GOTO(out, rc);
375
376         dt_write_lock(env, idx, 0);
377
378         rc = dt_delete(env, idx, (const struct dt_key *)nk, th);
379
380         nodemap_inc_version(env, idx, th);
381
382         dt_write_unlock(env, idx);
383 out:
384         dt_trans_stop(env, dev, th);
385
386         return rc;
387 }
388
389 enum nm_add_update {
390         NM_ADD = 0,
391         NM_UPDATE = 1,
392 };
393
394 static int nodemap_idx_cluster_add_update(const struct lu_nodemap *nodemap,
395                                           struct dt_object *idx,
396                                           enum nm_add_update update,
397                                           enum nodemap_cluster_rec_subid subid)
398 {
399         struct nodemap_key nk;
400         union nodemap_rec nr;
401         struct lu_env env;
402         int rc = 0;
403
404         ENTRY;
405
406         if (idx == NULL) {
407                 if (nodemap_mgs_ncf == NULL) {
408                         CERROR("cannot add nodemap config to non-existing MGS.\n");
409                         return -EINVAL;
410                 }
411                 idx = nodemap_mgs_ncf->ncf_obj;
412         }
413
414         rc = lu_env_init(&env, LCT_LOCAL);
415         if (rc)
416                 RETURN(rc);
417
418         nodemap_cluster_key_init(&nk, nodemap->nm_id, subid);
419         switch (subid) {
420         case NODEMAP_CLUSTER_REC:
421                 nodemap_cluster_rec_init(&nr, nodemap);
422                 break;
423         case NODEMAP_CLUSTER_ROLES:
424                 nodemap_cluster_roles_rec_init(&nr, nodemap);
425                 break;
426         default:
427                 CWARN("%s: unknown subtype %u\n", nodemap->nm_name, subid);
428                 GOTO(fini, rc = -EINVAL);
429         }
430
431         if (update == NM_UPDATE)
432                 rc = nodemap_idx_update(&env, idx, &nk, &nr);
433         else
434                 rc = nodemap_idx_insert(&env, idx, &nk, &nr);
435
436 fini:
437         lu_env_fini(&env);
438         RETURN(rc);
439 }
440
441 int nodemap_idx_nodemap_add(const struct lu_nodemap *nodemap)
442 {
443         return nodemap_idx_cluster_add_update(nodemap, NULL,
444                                               NM_ADD, NODEMAP_CLUSTER_REC);
445 }
446
447 int nodemap_idx_nodemap_update(const struct lu_nodemap *nodemap)
448 {
449         return nodemap_idx_cluster_add_update(nodemap, NULL,
450                                               NM_UPDATE, NODEMAP_CLUSTER_REC);
451 }
452
453 int nodemap_idx_nodemap_del(const struct lu_nodemap *nodemap)
454 {
455         struct rb_root           root;
456         struct lu_idmap         *idmap;
457         struct lu_idmap         *temp;
458         struct lu_nid_range     *range;
459         struct lu_nid_range     *range_temp;
460         struct nodemap_key       nk;
461         struct lu_env            env;
462         int                      rc = 0;
463         int                      rc2 = 0;
464
465         ENTRY;
466
467         if (nodemap_mgs_ncf == NULL) {
468                 CERROR("cannot add nodemap config to non-existing MGS.\n");
469                 return -EINVAL;
470         }
471
472         rc = lu_env_init(&env, LCT_LOCAL);
473         if (rc != 0)
474                 RETURN(rc);
475
476         nodemap_cluster_key_init(&nk, nodemap->nm_id, NODEMAP_CLUSTER_ROLES);
477         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
478         if (rc2 < 0 && rc2 != -ENOENT)
479                 rc = rc2;
480
481         root = nodemap->nm_fs_to_client_uidmap;
482         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
483                                                 id_fs_to_client) {
484                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
485                                        idmap->id_client);
486                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
487                                          &nk, NULL);
488                 if (rc2 < 0)
489                         rc = rc2;
490         }
491
492         root = nodemap->nm_client_to_fs_gidmap;
493         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
494                                                 id_client_to_fs) {
495                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
496                                        idmap->id_client);
497                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
498                                          &nk, NULL);
499                 if (rc2 < 0)
500                         rc = rc2;
501         }
502
503         root = nodemap->nm_client_to_fs_projidmap;
504         nm_rbtree_postorder_for_each_entry_safe(idmap, temp, &root,
505                                                 id_client_to_fs) {
506                 nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_PROJID,
507                                        idmap->id_client);
508                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
509                                          &nk, NULL);
510                 if (rc2 < 0)
511                         rc = rc2;
512         }
513
514         list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
515                                  rn_list) {
516                 nodemap_range_key_init(&nk, nodemap->nm_id, range->rn_id);
517                 rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj,
518                                          &nk, NULL);
519                 if (rc2 < 0)
520                         rc = rc2;
521         }
522
523         nodemap_cluster_key_init(&nk, nodemap->nm_id, NODEMAP_CLUSTER_REC);
524         rc2 = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
525         if (rc2 < 0)
526                 rc = rc2;
527
528         lu_env_fini(&env);
529
530         RETURN(rc);
531 }
532
533 int nodemap_idx_cluster_roles_add(const struct lu_nodemap *nodemap)
534 {
535         return nodemap_idx_cluster_add_update(nodemap, NULL, NM_ADD,
536                                               NODEMAP_CLUSTER_ROLES);
537 }
538
539 int nodemap_idx_cluster_roles_update(const struct lu_nodemap *nodemap)
540 {
541         return nodemap_idx_cluster_add_update(nodemap, NULL, NM_UPDATE,
542                                               NODEMAP_CLUSTER_ROLES);
543 }
544
545 int nodemap_idx_cluster_roles_del(const struct lu_nodemap *nodemap)
546 {
547         struct nodemap_key nk;
548         struct lu_env env;
549         int rc = 0;
550
551         ENTRY;
552
553         if (nodemap_mgs_ncf == NULL) {
554                 CERROR("cannot add nodemap config to non-existing MGS.\n");
555                 return -EINVAL;
556         }
557
558         rc = lu_env_init(&env, LCT_LOCAL);
559         if (rc != 0)
560                 RETURN(rc);
561
562         nodemap_cluster_key_init(&nk, nodemap->nm_id, NODEMAP_CLUSTER_ROLES);
563         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
564
565         lu_env_fini(&env);
566         RETURN(rc);
567 }
568
569 int nodemap_idx_range_add(const struct lu_nid_range *range,
570                           const lnet_nid_t nid[2])
571 {
572         struct nodemap_key       nk;
573         union nodemap_rec        nr;
574         struct lu_env            env;
575         int                      rc = 0;
576         ENTRY;
577
578         if (nodemap_mgs_ncf == NULL) {
579                 CERROR("cannot add nodemap config to non-existing MGS.\n");
580                 return -EINVAL;
581         }
582
583         rc = lu_env_init(&env, LCT_LOCAL);
584         if (rc != 0)
585                 RETURN(rc);
586
587         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
588         nodemap_range_rec_init(&nr, nid);
589
590         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
591         lu_env_fini(&env);
592
593         RETURN(rc);
594 }
595
596 int nodemap_idx_range_del(const struct lu_nid_range *range)
597 {
598         struct nodemap_key       nk;
599         struct lu_env            env;
600         int                      rc = 0;
601         ENTRY;
602
603         if (nodemap_mgs_ncf == NULL) {
604                 CERROR("cannot add nodemap config to non-existing MGS.\n");
605                 return -EINVAL;
606         }
607
608         rc = lu_env_init(&env, LCT_LOCAL);
609         if (rc != 0)
610                 RETURN(rc);
611
612         nodemap_range_key_init(&nk, range->rn_nodemap->nm_id, range->rn_id);
613
614         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
615         lu_env_fini(&env);
616
617         RETURN(rc);
618 }
619
620 int nodemap_idx_idmap_add(const struct lu_nodemap *nodemap,
621                           enum nodemap_id_type id_type,
622                           const u32 map[2])
623 {
624         struct nodemap_key       nk;
625         union nodemap_rec        nr;
626         struct lu_env            env;
627         int                      rc = 0;
628         ENTRY;
629
630         if (nodemap_mgs_ncf == NULL) {
631                 CERROR("cannot add nodemap config to non-existing MGS.\n");
632                 return -EINVAL;
633         }
634
635         rc = lu_env_init(&env, LCT_LOCAL);
636         if (rc != 0)
637                 RETURN(rc);
638
639         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
640         nodemap_idmap_rec_init(&nr, map[1]);
641
642         rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj, &nk, &nr);
643         lu_env_fini(&env);
644
645         RETURN(rc);
646 }
647
648 int nodemap_idx_idmap_del(const struct lu_nodemap *nodemap,
649                           enum nodemap_id_type id_type,
650                           const u32 map[2])
651 {
652         struct nodemap_key       nk;
653         struct lu_env            env;
654         int                      rc = 0;
655         ENTRY;
656
657         if (nodemap_mgs_ncf == NULL) {
658                 CERROR("cannot add nodemap config to non-existing MGS.\n");
659                 return -EINVAL;
660         }
661
662         rc = lu_env_init(&env, LCT_LOCAL);
663         if (rc != 0)
664                 RETURN(rc);
665
666         nodemap_idmap_key_init(&nk, nodemap->nm_id, id_type, map[0]);
667
668         rc = nodemap_idx_delete(&env, nodemap_mgs_ncf->ncf_obj, &nk, NULL);
669         lu_env_fini(&env);
670
671         RETURN(rc);
672 }
673
674 static int nodemap_idx_global_add_update(bool value, enum nm_add_update update)
675 {
676         struct nodemap_key       nk;
677         union nodemap_rec        nr;
678         struct lu_env            env;
679         int                      rc = 0;
680         ENTRY;
681
682         if (nodemap_mgs_ncf == NULL) {
683                 CERROR("cannot add nodemap config to non-existing MGS.\n");
684                 return -EINVAL;
685         }
686
687         rc = lu_env_init(&env, LCT_LOCAL);
688         if (rc != 0)
689                 RETURN(rc);
690
691         nodemap_global_key_init(&nk);
692         nodemap_global_rec_init(&nr, value);
693
694         if (update == NM_UPDATE)
695                 rc = nodemap_idx_update(&env, nodemap_mgs_ncf->ncf_obj,
696                                         &nk, &nr);
697         else
698                 rc = nodemap_idx_insert(&env, nodemap_mgs_ncf->ncf_obj,
699                                         &nk, &nr);
700
701         lu_env_fini(&env);
702
703         RETURN(rc);
704 }
705
706 int nodemap_idx_nodemap_activate(bool value)
707 {
708         return nodemap_idx_global_add_update(value, NM_UPDATE);
709 }
710
711 static enum nodemap_idx_type nodemap_get_key_type(const struct nodemap_key *key)
712 {
713         u32                      nodemap_id;
714
715         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
716         return nm_idx_get_type(nodemap_id);
717 }
718
719 static int nodemap_get_key_subtype(const struct nodemap_key *key)
720 {
721         enum nodemap_idx_type type = nodemap_get_key_type(key);
722
723         return type == NODEMAP_CLUSTER_IDX ? key->nk_cluster_subid : -1;
724 }
725
726 static int nodemap_cluster_rec_helper(struct nodemap_config *config,
727                                       u32 nodemap_id,
728                                       const union nodemap_rec *rec,
729                                       struct lu_nodemap **recent_nodemap)
730 {
731         struct lu_nodemap *nodemap, *old_nm;
732         enum nm_flag_bits flags;
733         enum nm_flag2_bits flags2;
734
735         nodemap = cfs_hash_lookup(config->nmc_nodemap_hash, rec->ncr.ncr_name);
736         if (nodemap == NULL) {
737                 if (nodemap_id == LUSTRE_NODEMAP_DEFAULT_ID)
738                         nodemap = nodemap_create(rec->ncr.ncr_name, config, 1);
739                 else
740                         nodemap = nodemap_create(rec->ncr.ncr_name, config, 0);
741                 if (IS_ERR(nodemap))
742                         return PTR_ERR(nodemap);
743
744                 /* we need to override the local ID with the saved ID */
745                 nodemap->nm_id = nodemap_id;
746                 if (nodemap_id > config->nmc_nodemap_highest_id)
747                         config->nmc_nodemap_highest_id = nodemap_id;
748
749         } else if (nodemap->nm_id != nodemap_id) {
750                 nodemap_putref(nodemap);
751                 return -EINVAL;
752         }
753
754         nodemap->nm_squash_uid = le32_to_cpu(rec->ncr.ncr_squash_uid);
755         nodemap->nm_squash_gid = le32_to_cpu(rec->ncr.ncr_squash_gid);
756         nodemap->nm_squash_projid = le32_to_cpu(rec->ncr.ncr_squash_projid);
757
758         flags = rec->ncr.ncr_flags;
759         nodemap->nmf_allow_root_access = flags & NM_FL_ALLOW_ROOT_ACCESS;
760         nodemap->nmf_trust_client_ids = flags & NM_FL_TRUST_CLIENT_IDS;
761         nodemap->nmf_deny_unknown = flags & NM_FL_DENY_UNKNOWN;
762         nodemap->nmf_map_mode =
763                 (flags & NM_FL_MAP_UID ? NODEMAP_MAP_UID : 0) |
764                 (flags & NM_FL_MAP_GID ? NODEMAP_MAP_GID : 0) |
765                 (flags & NM_FL_MAP_PROJID ? NODEMAP_MAP_PROJID : 0);
766         if (nodemap->nmf_map_mode == NODEMAP_MAP_BOTH_LEGACY)
767                 nodemap->nmf_map_mode = NODEMAP_MAP_BOTH;
768         nodemap->nmf_enable_audit = flags & NM_FL_ENABLE_AUDIT;
769         nodemap->nmf_forbid_encryption = flags & NM_FL_FORBID_ENCRYPT;
770         flags2 = rec->ncr.ncr_flags2;
771         nodemap->nmf_readonly_mount = flags2 & NM_FL2_READONLY_MOUNT;
772         /* by default, and in the absence of cluster_roles, grant all roles */
773         nodemap->nmf_rbac = NODEMAP_RBAC_ALL;
774
775         /* The fileset should be saved otherwise it will be empty
776          * every time in case of "NODEMAP_CLUSTER_IDX".
777          */
778         mutex_lock(&active_config_lock);
779         old_nm = nodemap_lookup(rec->ncr.ncr_name);
780         if (!IS_ERR(old_nm) && old_nm->nm_fileset[0] != '\0')
781                 strlcpy(nodemap->nm_fileset, old_nm->nm_fileset,
782                         sizeof(nodemap->nm_fileset));
783         mutex_unlock(&active_config_lock);
784         if (!IS_ERR(old_nm))
785                 nodemap_putref(old_nm);
786
787         if (*recent_nodemap == NULL) {
788                 *recent_nodemap = nodemap;
789                 INIT_LIST_HEAD(&nodemap->nm_list);
790         } else {
791                 list_add(&nodemap->nm_list, &(*recent_nodemap)->nm_list);
792         }
793         nodemap_putref(nodemap);
794
795         return 0;
796 }
797
798 static int nodemap_cluster_roles_helper(struct lu_nodemap *nodemap,
799                                         const union nodemap_rec *rec)
800 {
801         nodemap->nmf_rbac = le64_to_cpu(rec->ncrr.ncrr_roles);
802
803         return 0;
804 }
805
806 /**
807  * Process a key/rec pair and modify the new configuration.
808  *
809  * \param       config          configuration to update with this key/rec data
810  * \param       key             key of the record that was loaded
811  * \param       rec             record that was loaded
812  * \param       recent_nodemap  last referenced nodemap
813  * \retval      type of record processed, see enum #nodemap_idx_type
814  * \retval      -ENOENT         range or map loaded before nodemap record
815  * \retval      -EINVAL         duplicate nodemap cluster records found with
816  *                              different IDs, or nodemap has invalid name
817  * \retval      -ENOMEM
818  */
819 static int nodemap_process_keyrec(struct nodemap_config *config,
820                                   const struct nodemap_key *key,
821                                   const union nodemap_rec *rec,
822                                   struct lu_nodemap **recent_nodemap)
823 {
824         struct lu_nodemap *nodemap = NULL;
825         enum nodemap_idx_type type;
826         enum nodemap_id_type id_type;
827         int subtype;
828         u32 nodemap_id;
829         lnet_nid_t nid[2];
830         u32 map[2];
831         int rc;
832
833         ENTRY;
834
835         BUILD_BUG_ON(sizeof(union nodemap_rec) != 32);
836
837         nodemap_id = le32_to_cpu(key->nk_nodemap_id);
838         type = nodemap_get_key_type(key);
839         subtype = nodemap_get_key_subtype(key);
840         nodemap_id = nm_idx_set_type(nodemap_id, 0);
841
842         CDEBUG(D_INFO, "found config entry, nm_id %d type %d subtype %d\n",
843                nodemap_id, type, subtype);
844
845         /* find the correct nodemap in the load list */
846         if (type == NODEMAP_RANGE_IDX || type == NODEMAP_UIDMAP_IDX ||
847             type == NODEMAP_GIDMAP_IDX || type == NODEMAP_PROJIDMAP_IDX ||
848             (type == NODEMAP_CLUSTER_IDX && subtype != NODEMAP_CLUSTER_REC)) {
849                 struct lu_nodemap *tmp = NULL;
850
851                 nodemap = *recent_nodemap;
852
853                 if (nodemap == NULL)
854                         GOTO(out, rc = -ENOENT);
855
856                 if (nodemap->nm_id != nodemap_id) {
857                         list_for_each_entry(tmp, &nodemap->nm_list, nm_list)
858                                 if (tmp->nm_id == nodemap_id) {
859                                         nodemap = tmp;
860                                         break;
861                                 }
862
863                         if (nodemap->nm_id != nodemap_id)
864                                 GOTO(out, rc = -ENOENT);
865                 }
866
867                 /* update most recently used nodemap if necessay */
868                 if (nodemap != *recent_nodemap)
869                         *recent_nodemap = nodemap;
870         }
871
872         switch (type) {
873         case NODEMAP_EMPTY_IDX:
874                 if (nodemap_id != 0)
875                         CWARN("Found nodemap config record without type field, "
876                               " nodemap_id=%d. nodemap config file corrupt?\n",
877                               nodemap_id);
878                 break;
879         case NODEMAP_CLUSTER_IDX:
880                 switch (nodemap_get_key_subtype(key)) {
881                 case NODEMAP_CLUSTER_REC:
882                         rc = nodemap_cluster_rec_helper(config, nodemap_id, rec,
883                                                         recent_nodemap);
884                         if (rc != 0)
885                                 GOTO(out, rc);
886                         break;
887                 case NODEMAP_CLUSTER_ROLES:
888                         rc = nodemap_cluster_roles_helper(nodemap, rec);
889                         if (rc != 0)
890                                 GOTO(out, rc);
891                         break;
892                 default:
893                         CWARN("%s: ignoring keyrec of type %d with subtype %u\n",
894                               nodemap->nm_name, NODEMAP_CLUSTER_IDX,
895                               nodemap_get_key_subtype(key));
896                         break;
897                 }
898                 break;
899         case NODEMAP_RANGE_IDX:
900                 nid[0] = le64_to_cpu(rec->nrr.nrr_start_nid);
901                 nid[1] = le64_to_cpu(rec->nrr.nrr_end_nid);
902
903                 rc = nodemap_add_range_helper(config, nodemap, nid,
904                                         le32_to_cpu(key->nk_range_id));
905                 if (rc != 0)
906                         GOTO(out, rc);
907                 break;
908         case NODEMAP_UIDMAP_IDX:
909         case NODEMAP_GIDMAP_IDX:
910         case NODEMAP_PROJIDMAP_IDX:
911                 map[0] = le32_to_cpu(key->nk_id_client);
912                 map[1] = le32_to_cpu(rec->nir.nir_id_fs);
913
914                 if (type == NODEMAP_UIDMAP_IDX)
915                         id_type = NODEMAP_UID;
916                 else if (type == NODEMAP_GIDMAP_IDX)
917                         id_type = NODEMAP_GID;
918                 else if (type == NODEMAP_PROJIDMAP_IDX)
919                         id_type = NODEMAP_PROJID;
920                 else
921                         GOTO(out, rc = -EINVAL);
922
923                 rc = nodemap_add_idmap_helper(nodemap, id_type, map);
924                 if (rc != 0)
925                         GOTO(out, rc);
926                 break;
927         case NODEMAP_GLOBAL_IDX:
928                 switch (key->nk_unused) {
929                 case 0:
930                         config->nmc_nodemap_is_active = rec->ngr.ngr_is_active;
931                         break;
932                 default:
933                         CWARN("%s: ignoring keyrec of type %d with subtype %u\n",
934                               recent_nodemap ?
935                                (*recent_nodemap)->nm_name : "nodemap",
936                               NODEMAP_GLOBAL_IDX, key->nk_unused);
937                         break;
938                 }
939                 break;
940         default:
941                 CWARN("%s: ignoring key %u:%u for unknown type %u\n",
942                       recent_nodemap ? (*recent_nodemap)->nm_name : "nodemap",
943                       key->nk_nodemap_id & 0x0FFFFFFF, key->nk_unused, type);
944                 break;
945         }
946
947         rc = type;
948
949         EXIT;
950
951 out:
952         return rc;
953 }
954
955 enum nm_config_passes {
956         NM_READ_CLUSTERS = 0,
957         NM_READ_ATTRIBUTES = 1,
958 };
959
960 static int nodemap_load_entries(const struct lu_env *env,
961                                 struct dt_object *nodemap_idx)
962 {
963         const struct dt_it_ops *iops;
964         struct dt_it *it;
965         struct lu_nodemap *recent_nodemap = NULL;
966         struct nodemap_config *new_config = NULL;
967         u64 hash = 0;
968         bool activate_nodemap = false;
969         bool loaded_global_idx = false;
970         enum nm_config_passes cur_pass = NM_READ_CLUSTERS;
971         int rc = 0;
972
973         ENTRY;
974
975         iops = &nodemap_idx->do_index_ops->dio_it;
976
977         dt_read_lock(env, nodemap_idx, 0);
978         it = iops->init(env, nodemap_idx, 0);
979         if (IS_ERR(it))
980                 GOTO(out, rc = PTR_ERR(it));
981
982         rc = iops->load(env, it, hash);
983         if (rc < 0)
984                 GOTO(out_iops_fini, rc);
985
986         /* rc == 0 means we need to advance to record */
987         if (rc == 0) {
988                 rc = iops->next(env, it);
989
990                 if (rc < 0)
991                         GOTO(out_iops_put, rc);
992                 /* rc > 0 is eof, will be checked in while below */
993         } else {
994                 /* rc == 1, we found initial record and can process below */
995                 rc = 0;
996         }
997
998         new_config = nodemap_config_alloc();
999         if (IS_ERR(new_config)) {
1000                 rc = PTR_ERR(new_config);
1001                 new_config = NULL;
1002                 GOTO(out_iops_put, rc);
1003         }
1004
1005         /* rc > 0 is eof, check initial iops->next here as well */
1006         while (rc == 0) {
1007                 struct nodemap_key *key;
1008                 union nodemap_rec rec;
1009                 enum nodemap_idx_type key_type;
1010                 int sub_type;
1011
1012                 key = (struct nodemap_key *)iops->key(env, it);
1013                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1014                 sub_type = nodemap_get_key_subtype((struct nodemap_key *)key);
1015                 if ((cur_pass == NM_READ_CLUSTERS &&
1016                      key_type == NODEMAP_CLUSTER_IDX &&
1017                      sub_type == NODEMAP_CLUSTER_REC) ||
1018                     (cur_pass == NM_READ_ATTRIBUTES &&
1019                      (key_type != NODEMAP_CLUSTER_IDX ||
1020                       sub_type != NODEMAP_CLUSTER_REC) &&
1021                      key_type != NODEMAP_EMPTY_IDX)) {
1022                         rc = iops->rec(env, it, (struct dt_rec *)&rec, 0);
1023                         if (rc != -ESTALE) {
1024                                 if (rc != 0)
1025                                         GOTO(out_nodemap_config, rc);
1026                                 rc = nodemap_process_keyrec(new_config, key, &rec,
1027                                                             &recent_nodemap);
1028                                 if (rc < 0)
1029                                         GOTO(out_nodemap_config, rc);
1030                                 if (rc == NODEMAP_GLOBAL_IDX)
1031                                         loaded_global_idx = true;
1032                         }
1033                 }
1034
1035                 do
1036                         rc = iops->next(env, it);
1037                 while (rc == -ESTALE);
1038
1039                 /* move to second pass */
1040                 if (rc > 0 && cur_pass == NM_READ_CLUSTERS) {
1041                         cur_pass = NM_READ_ATTRIBUTES;
1042                         rc = iops->load(env, it, 0);
1043                         if (rc == 0)
1044                                 rc = iops->next(env, it);
1045                         else if (rc > 0)
1046                                 rc = 0;
1047                         else
1048                                 GOTO(out, rc);
1049                 }
1050         }
1051
1052         if (rc > 0)
1053                 rc = 0;
1054
1055 out_nodemap_config:
1056         if (rc != 0)
1057                 nodemap_config_dealloc(new_config);
1058         else
1059                 /* creating new default needs to be done outside dt read lock */
1060                 activate_nodemap = true;
1061 out_iops_put:
1062         iops->put(env, it);
1063 out_iops_fini:
1064         iops->fini(env, it);
1065 out:
1066         dt_read_unlock(env, nodemap_idx);
1067
1068         if (rc != 0)
1069                 CWARN("%s: failed to load nodemap configuration: rc = %d\n",
1070                       nodemap_idx->do_lu.lo_dev->ld_obd->obd_name, rc);
1071
1072         if (!activate_nodemap)
1073                 RETURN(rc);
1074
1075         if (new_config->nmc_default_nodemap == NULL) {
1076                 /* new MGS won't have a default nm on disk, so create it here */
1077                 struct lu_nodemap *nodemap =
1078                         nodemap_create(DEFAULT_NODEMAP, new_config, 1);
1079                 if (IS_ERR(nodemap)) {
1080                         rc = PTR_ERR(nodemap);
1081                 } else {
1082                         rc = nodemap_idx_cluster_add_update(
1083                                         new_config->nmc_default_nodemap,
1084                                         nodemap_idx,
1085                                         NM_ADD, NODEMAP_CLUSTER_REC);
1086                         nodemap_putref(new_config->nmc_default_nodemap);
1087                 }
1088         }
1089
1090         /* new nodemap config won't have an active/inactive record */
1091         if (rc == 0 && loaded_global_idx == false) {
1092                 struct nodemap_key       nk;
1093                 union nodemap_rec        nr;
1094
1095                 nodemap_global_key_init(&nk);
1096                 nodemap_global_rec_init(&nr, false);
1097                 rc = nodemap_idx_insert(env, nodemap_idx, &nk, &nr);
1098         }
1099
1100         if (rc == 0)
1101                 nodemap_config_set_active(new_config);
1102         else
1103                 nodemap_config_dealloc(new_config);
1104
1105         RETURN(rc);
1106 }
1107
1108 /**
1109  * Step through active config and write to disk.
1110  */
1111 struct dt_object *nodemap_save_config_cache(const struct lu_env *env,
1112                                             struct dt_device *dev,
1113                                             struct local_oid_storage *los)
1114 {
1115         struct dt_object *o;
1116         struct lu_nodemap *nodemap;
1117         struct lu_nodemap *nm_tmp;
1118         struct lu_nid_range *range;
1119         struct lu_nid_range *range_temp;
1120         struct lu_idmap *idmap;
1121         struct lu_idmap *id_tmp;
1122         struct rb_root root;
1123         struct nodemap_key nk;
1124         union nodemap_rec nr;
1125         LIST_HEAD(nodemap_list_head);
1126         int rc = 0, rc2;
1127
1128         ENTRY;
1129
1130         /* create a new index file to fill with active config */
1131         o = nodemap_cache_find_create(env, dev, los, NCFC_CREATE_NEW);
1132         if (IS_ERR(o))
1133                 RETURN(o);
1134
1135         mutex_lock(&active_config_lock);
1136
1137         /* convert hash to list so we don't spin */
1138         cfs_hash_for_each_safe(active_config->nmc_nodemap_hash,
1139                                nm_hash_list_cb, &nodemap_list_head);
1140
1141         list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) {
1142                 nodemap_cluster_key_init(&nk, nodemap->nm_id,
1143                                          NODEMAP_CLUSTER_REC);
1144                 nodemap_cluster_rec_init(&nr, nodemap);
1145
1146                 rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1147                 if (rc2 < 0) {
1148                         rc = rc2;
1149                         continue;
1150                 }
1151
1152                 /* only insert NODEMAP_CLUSTER_ROLES idx in saved config cache
1153                  * if nmf_rbac is not default value NODEMAP_RBAC_ALL
1154                  */
1155                 if (nodemap->nmf_rbac != NODEMAP_RBAC_ALL) {
1156                         nodemap_cluster_key_init(&nk, nodemap->nm_id,
1157                                                  NODEMAP_CLUSTER_ROLES);
1158                         nodemap_cluster_roles_rec_init(&nr, nodemap);
1159                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1160                         if (rc2 < 0)
1161                                 rc = rc2;
1162                 }
1163
1164                 down_read(&active_config->nmc_range_tree_lock);
1165                 list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges,
1166                                          rn_list) {
1167                         lnet_nid_t nid[2] = {
1168                                 range->rn_start,
1169                                 range->rn_end
1170                         };
1171                         nodemap_range_key_init(&nk, nodemap->nm_id,
1172                                                range->rn_id);
1173                         nodemap_range_rec_init(&nr, nid);
1174                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1175                         if (rc2 < 0)
1176                                 rc = rc2;
1177                 }
1178                 up_read(&active_config->nmc_range_tree_lock);
1179
1180                 /* we don't need to take nm_idmap_lock because active config
1181                  * lock prevents changes from happening to nodemaps
1182                  */
1183                 root = nodemap->nm_client_to_fs_uidmap;
1184                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1185                                                         id_client_to_fs) {
1186                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID,
1187                                                idmap->id_client);
1188                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1189                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1190                         if (rc2 < 0)
1191                                 rc = rc2;
1192                 }
1193
1194                 root = nodemap->nm_client_to_fs_gidmap;
1195                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1196                                                         id_client_to_fs) {
1197                         nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID,
1198                                                idmap->id_client);
1199                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1200                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1201                         if (rc2 < 0)
1202                                 rc = rc2;
1203                 }
1204
1205                 root = nodemap->nm_client_to_fs_projidmap;
1206                 nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root,
1207                                                         id_client_to_fs) {
1208                         nodemap_idmap_key_init(&nk, nodemap->nm_id,
1209                                                NODEMAP_PROJID,
1210                                                idmap->id_client);
1211                         nodemap_idmap_rec_init(&nr, idmap->id_fs);
1212                         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1213                         if (rc2 < 0)
1214                                 rc = rc2;
1215                 }
1216         }
1217         nodemap_global_key_init(&nk);
1218         nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active);
1219         rc2 = nodemap_idx_insert(env, o, &nk, &nr);
1220         if (rc2 < 0)
1221                 rc = rc2;
1222
1223         mutex_unlock(&active_config_lock);
1224
1225         if (rc < 0) {
1226                 dt_object_put(env, o);
1227                 o = ERR_PTR(rc);
1228         }
1229
1230         RETURN(o);
1231 }
1232
1233 static void nodemap_save_all_caches(void)
1234 {
1235         struct nm_config_file   *ncf;
1236         struct lu_env            env;
1237         int                      rc = 0;
1238
1239         /* recreating nodemap cache requires fld_thread_key be in env */
1240         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
1241         if (rc != 0) {
1242                 CWARN("cannot init env for nodemap config: rc = %d\n", rc);
1243                 return;
1244         }
1245
1246         mutex_lock(&ncf_list_lock);
1247         list_for_each_entry(ncf, &ncf_list_head, ncf_list) {
1248                 struct dt_device *dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev);
1249                 struct obd_device *obd = ncf->ncf_obj->do_lu.lo_dev->ld_obd;
1250                 struct dt_object *o;
1251
1252                 /* put current config file so save conf can rewrite it */
1253                 dt_object_put_nocache(&env, ncf->ncf_obj);
1254                 ncf->ncf_obj = NULL;
1255
1256                 o = nodemap_save_config_cache(&env, dev, ncf->ncf_los);
1257                 if (IS_ERR(o))
1258                         CWARN("%s: error writing to nodemap config: rc = %d\n",
1259                               obd->obd_name, rc);
1260                 else
1261                         ncf->ncf_obj = o;
1262         }
1263         mutex_unlock(&ncf_list_lock);
1264
1265         lu_env_fini(&env);
1266 }
1267
1268 /* tracks if config still needs to be loaded, either from disk or network */
1269 static bool nodemap_config_loaded;
1270 static DEFINE_MUTEX(nodemap_config_loaded_lock);
1271
1272 /**
1273  * Ensures that configs loaded over the wire are prioritized over those loaded
1274  * from disk.
1275  *
1276  * \param config        config to set as the active config
1277  */
1278 void nodemap_config_set_active_mgc(struct nodemap_config *config)
1279 {
1280         mutex_lock(&nodemap_config_loaded_lock);
1281         nodemap_config_set_active(config);
1282         nodemap_config_loaded = true;
1283         nodemap_save_all_caches();
1284         mutex_unlock(&nodemap_config_loaded_lock);
1285 }
1286 EXPORT_SYMBOL(nodemap_config_set_active_mgc);
1287
1288 /**
1289  * Register a dt_object representing the config index file. This should be
1290  * called by targets in order to load the nodemap configuration from disk. The
1291  * dt_object should be created with local_index_find_or_create and the index
1292  * features should be enabled with do_index_try.
1293  *
1294  * \param obj   dt_object returned by local_index_find_or_create
1295  *
1296  * \retval      on success: nm_config_file handle for later deregistration
1297  * \retval      -ENOMEM         memory allocation failure
1298  * \retval      -ENOENT         error loading nodemap config
1299  * \retval      -EINVAL         error loading nodemap config
1300  * \retval      -EEXIST         nodemap config already registered for MGS
1301  */
1302 struct nm_config_file *nm_config_file_register_mgs(const struct lu_env *env,
1303                                                    struct dt_object *obj,
1304                                                    struct local_oid_storage *los)
1305 {
1306         struct nm_config_file *ncf;
1307         int rc = 0;
1308         ENTRY;
1309
1310         if (nodemap_mgs_ncf != NULL)
1311                 GOTO(out, ncf = ERR_PTR(-EEXIST));
1312
1313         OBD_ALLOC_PTR(ncf);
1314         if (ncf == NULL)
1315                 GOTO(out, ncf = ERR_PTR(-ENOMEM));
1316
1317         /* if loading from cache, prevent activation of MGS config until cache
1318          * loading is done, so disk config is overwritten by MGS config.
1319          */
1320         mutex_lock(&nodemap_config_loaded_lock);
1321         rc = nodemap_load_entries(env, obj);
1322         if (!rc)
1323                 nodemap_config_loaded = true;
1324         mutex_unlock(&nodemap_config_loaded_lock);
1325
1326         if (rc) {
1327                 OBD_FREE_PTR(ncf);
1328                 GOTO(out, ncf = ERR_PTR(rc));
1329         }
1330
1331         lu_object_get(&obj->do_lu);
1332
1333         ncf->ncf_obj = obj;
1334         ncf->ncf_los = los;
1335
1336         nodemap_mgs_ncf = ncf;
1337
1338 out:
1339         return ncf;
1340 }
1341 EXPORT_SYMBOL(nm_config_file_register_mgs);
1342
1343 struct nm_config_file *nm_config_file_register_tgt(const struct lu_env *env,
1344                                                    struct dt_device *dev,
1345                                                    struct local_oid_storage *los)
1346 {
1347         struct nm_config_file *ncf;
1348         struct dt_object *config_obj = NULL;
1349         int rc = 0;
1350
1351         OBD_ALLOC_PTR(ncf);
1352         if (ncf == NULL)
1353                 RETURN(ERR_PTR(-ENOMEM));
1354
1355         /* don't load from cache if config already loaded */
1356         mutex_lock(&nodemap_config_loaded_lock);
1357         if (!nodemap_config_loaded) {
1358                 config_obj = nodemap_cache_find_create(env, dev, los, 0);
1359                 if (IS_ERR(config_obj))
1360                         rc = PTR_ERR(config_obj);
1361                 else
1362                         rc = nodemap_load_entries(env, config_obj);
1363
1364                 if (!rc)
1365                         nodemap_config_loaded = true;
1366         }
1367         mutex_unlock(&nodemap_config_loaded_lock);
1368         if (rc)
1369                 GOTO(out_ncf, rc);
1370
1371         /* sync on disk caches w/ loaded config in memory, ncf_obj may change */
1372         if (!config_obj) {
1373                 config_obj = nodemap_save_config_cache(env, dev, los);
1374                 if (IS_ERR(config_obj))
1375                         GOTO(out_ncf, rc = PTR_ERR(config_obj));
1376         }
1377
1378         ncf->ncf_obj = config_obj;
1379         ncf->ncf_los = los;
1380
1381         mutex_lock(&ncf_list_lock);
1382         list_add(&ncf->ncf_list, &ncf_list_head);
1383         mutex_unlock(&ncf_list_lock);
1384
1385 out_ncf:
1386         if (rc) {
1387                 OBD_FREE_PTR(ncf);
1388                 RETURN(ERR_PTR(rc));
1389         }
1390
1391         RETURN(ncf);
1392 }
1393 EXPORT_SYMBOL(nm_config_file_register_tgt);
1394
1395 /**
1396  * Deregister a nm_config_file. Should be called by targets during cleanup.
1397  *
1398  * \param ncf   config file to deregister
1399  */
1400 void nm_config_file_deregister_mgs(const struct lu_env *env,
1401                                    struct nm_config_file *ncf)
1402 {
1403         ENTRY;
1404         LASSERT(nodemap_mgs_ncf == ncf);
1405
1406         nodemap_mgs_ncf = NULL;
1407         if (ncf->ncf_obj)
1408                 dt_object_put(env, ncf->ncf_obj);
1409
1410         OBD_FREE_PTR(ncf);
1411
1412         EXIT;
1413 }
1414 EXPORT_SYMBOL(nm_config_file_deregister_mgs);
1415
1416 void nm_config_file_deregister_tgt(const struct lu_env *env,
1417                                    struct nm_config_file *ncf)
1418 {
1419         ENTRY;
1420
1421         if (ncf == NULL)
1422                 return;
1423
1424         mutex_lock(&ncf_list_lock);
1425         list_del(&ncf->ncf_list);
1426         mutex_unlock(&ncf_list_lock);
1427
1428         if (ncf->ncf_obj)
1429                 dt_object_put(env, ncf->ncf_obj);
1430
1431         OBD_FREE_PTR(ncf);
1432
1433         EXIT;
1434 }
1435 EXPORT_SYMBOL(nm_config_file_deregister_tgt);
1436
1437 int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip,
1438                               struct lu_nodemap **recent_nodemap)
1439 {
1440         struct nodemap_key *key;
1441         union nodemap_rec *rec;
1442         char *entry;
1443         int j;
1444         int k;
1445         int rc = 0;
1446         int size = dt_nodemap_features.dif_keysize_max +
1447                    dt_nodemap_features.dif_recsize_max;
1448         ENTRY;
1449
1450         for (j = 0; j < LU_PAGE_COUNT; j++) {
1451                 if (lip->lp_idx.lip_magic != LIP_MAGIC)
1452                         return -EINVAL;
1453
1454                 /* get and process keys and records from page */
1455                 for (k = 0; k < lip->lp_idx.lip_nr; k++) {
1456                         entry = lip->lp_idx.lip_entries + k * size;
1457                         key = (struct nodemap_key *)entry;
1458
1459                         entry += dt_nodemap_features.dif_keysize_max;
1460                         rec = (union nodemap_rec *)entry;
1461
1462                         rc = nodemap_process_keyrec(config, key, rec,
1463                                                     recent_nodemap);
1464                         if (rc < 0)
1465                                 return rc;
1466                 }
1467                 lip++;
1468         }
1469
1470         EXIT;
1471         return 0;
1472 }
1473 EXPORT_SYMBOL(nodemap_process_idx_pages);
1474
1475 static int nodemap_page_build(const struct lu_env *env, struct dt_object *obj,
1476                               union lu_page *lp, size_t bytes,
1477                               const struct dt_it_ops *iops,
1478                               struct dt_it *it, __u32 attr, void *arg)
1479 {
1480         struct idx_info *ii = (struct idx_info *)arg;
1481         struct lu_idxpage *lip = &lp->lp_idx;
1482         char *entry;
1483         size_t size = ii->ii_keysize + ii->ii_recsize;
1484         int rc;
1485         ENTRY;
1486
1487         if (bytes < LIP_HDR_SIZE)
1488                 return -EINVAL;
1489
1490         /* initialize the header of the new container */
1491         memset(lip, 0, LIP_HDR_SIZE);
1492         lip->lip_magic = LIP_MAGIC;
1493         bytes -= LIP_HDR_SIZE;
1494
1495         entry = lip->lip_entries;
1496         do {
1497                 char *tmp_entry = entry;
1498                 struct dt_key *key;
1499                 __u64 hash;
1500                 enum nodemap_idx_type key_type;
1501                 int sub_type;
1502
1503                 /* fetch 64-bit hash value */
1504                 hash = iops->store(env, it);
1505                 ii->ii_hash_end = hash;
1506
1507                 if (CFS_FAIL_CHECK(OBD_FAIL_OBD_IDX_READ_BREAK)) {
1508                         if (lip->lip_nr != 0)
1509                                 GOTO(out, rc = 0);
1510                 }
1511
1512                 if (bytes < size) {
1513                         if (lip->lip_nr == 0)
1514                                 GOTO(out, rc = -EINVAL);
1515                         GOTO(out, rc = 0);
1516                 }
1517
1518                 key = iops->key(env, it);
1519                 key_type = nodemap_get_key_type((struct nodemap_key *)key);
1520                 sub_type = nodemap_get_key_subtype((struct nodemap_key *)key);
1521
1522                 /* on the first pass, get only the cluster types. On second
1523                  * pass, get all the rest */
1524                 if ((ii->ii_attrs == NM_READ_CLUSTERS &&
1525                      key_type == NODEMAP_CLUSTER_IDX &&
1526                      sub_type == NODEMAP_CLUSTER_REC) ||
1527                     (ii->ii_attrs == NM_READ_ATTRIBUTES &&
1528                      (key_type != NODEMAP_CLUSTER_IDX ||
1529                       sub_type != NODEMAP_CLUSTER_REC) &&
1530                      key_type != NODEMAP_EMPTY_IDX)) {
1531                         memcpy(tmp_entry, key, ii->ii_keysize);
1532                         tmp_entry += ii->ii_keysize;
1533
1534                         /* and finally the record */
1535                         rc = iops->rec(env, it, (struct dt_rec *)tmp_entry,
1536                                        attr);
1537                         if (rc != -ESTALE) {
1538                                 if (rc != 0)
1539                                         GOTO(out, rc);
1540
1541                                 /* hash/key/record successfully copied! */
1542                                 lip->lip_nr++;
1543                                 if (unlikely(lip->lip_nr == 1 &&
1544                                     ii->ii_count == 0))
1545                                         ii->ii_hash_start = hash;
1546
1547                                 entry = tmp_entry + ii->ii_recsize;
1548                                 bytes -= size;
1549                         }
1550                 }
1551
1552                 /* move on to the next record */
1553                 do {
1554                         rc = iops->next(env, it);
1555                 } while (rc == -ESTALE);
1556
1557                 /* move to second pass */
1558                 if (rc > 0 && ii->ii_attrs == NM_READ_CLUSTERS) {
1559                         ii->ii_attrs = NM_READ_ATTRIBUTES;
1560                         rc = iops->load(env, it, 0);
1561                         if (rc == 0)
1562                                 rc = iops->next(env, it);
1563                         else if (rc > 0)
1564                                 rc = 0;
1565                         else
1566                                 GOTO(out, rc);
1567                 }
1568
1569         } while (rc == 0);
1570
1571         GOTO(out, rc);
1572 out:
1573         if (rc >= 0 && lip->lip_nr > 0)
1574                 /* one more container */
1575                 ii->ii_count++;
1576         if (rc > 0)
1577                 /* no more entries */
1578                 ii->ii_hash_end = II_END_OFF;
1579         return rc;
1580 }
1581
1582
1583 int nodemap_index_read(struct lu_env *env,
1584                        struct nm_config_file *ncf,
1585                        struct idx_info *ii,
1586                        const struct lu_rdpg *rdpg)
1587 {
1588         struct dt_object        *nodemap_idx = ncf->ncf_obj;
1589         __u64                    version;
1590         int                      rc = 0;
1591
1592         ii->ii_keysize = dt_nodemap_features.dif_keysize_max;
1593         ii->ii_recsize = dt_nodemap_features.dif_recsize_max;
1594
1595         dt_read_lock(env, nodemap_idx, 0);
1596         version = dt_version_get(env, nodemap_idx);
1597         if (rdpg->rp_hash != 0 && ii->ii_version != version) {
1598                 CDEBUG(D_INFO, "nodemap config changed inflight, old %llu, new %llu\n",
1599                        ii->ii_version,
1600                        version);
1601                 ii->ii_hash_end = 0;
1602         } else {
1603                 rc = dt_index_walk(env, nodemap_idx, rdpg, nodemap_page_build,
1604                                    ii);
1605                 CDEBUG(D_INFO, "walked index, hashend %llx\n", ii->ii_hash_end);
1606         }
1607
1608         if (rc >= 0)
1609                 ii->ii_version = version;
1610
1611         dt_read_unlock(env, nodemap_idx);
1612         return rc;
1613 }
1614 EXPORT_SYMBOL(nodemap_index_read);
1615
1616 /**
1617  * Returns the current nodemap configuration to MGC by walking the nodemap
1618  * config index and storing it in the response buffer.
1619  *
1620  * \param       req             incoming MGS_CONFIG_READ request
1621  * \retval      0               success
1622  * \retval      -EINVAL         malformed request
1623  * \retval      -ENOTCONN       client evicted/reconnected already
1624  * \retval      -ETIMEDOUT      client timeout or network error
1625  * \retval      -ENOMEM
1626  */
1627 int nodemap_get_config_req(struct obd_device *mgs_obd,
1628                            struct ptlrpc_request *req)
1629 {
1630         const struct ptlrpc_bulk_frag_ops *frag_ops = &ptlrpc_bulk_kiov_pin_ops;
1631         struct mgs_config_body *body;
1632         struct mgs_config_res *res;
1633         struct lu_rdpg rdpg;
1634         struct idx_info nodemap_ii;
1635         struct ptlrpc_bulk_desc *desc;
1636         struct tg_export_data *rqexp_ted = &req->rq_export->exp_target_data;
1637         int i;
1638         int page_count;
1639         int bytes = 0;
1640         int rc = 0;
1641
1642         body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
1643         if (!body)
1644                 RETURN(-EINVAL);
1645
1646         if (body->mcb_type != MGS_CFG_T_NODEMAP)
1647                 RETURN(-EINVAL);
1648
1649         rdpg.rp_count = (body->mcb_units << body->mcb_bits);
1650         rdpg.rp_npages = (rdpg.rp_count + PAGE_SIZE - 1) >>
1651                 PAGE_SHIFT;
1652         if (rdpg.rp_npages > PTLRPC_MAX_BRW_PAGES)
1653                 RETURN(-EINVAL);
1654
1655         CDEBUG(D_INFO, "reading nodemap log, name '%s', size = %u\n",
1656                body->mcb_name, rdpg.rp_count);
1657
1658         /* allocate pages to store the containers */
1659         OBD_ALLOC_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1660         if (rdpg.rp_pages == NULL)
1661                 RETURN(-ENOMEM);
1662         for (i = 0; i < rdpg.rp_npages; i++) {
1663                 rdpg.rp_pages[i] = alloc_page(GFP_NOFS);
1664                 if (rdpg.rp_pages[i] == NULL)
1665                         GOTO(out, rc = -ENOMEM);
1666         }
1667
1668         rdpg.rp_hash = body->mcb_offset;
1669         nodemap_ii.ii_magic = IDX_INFO_MAGIC;
1670         nodemap_ii.ii_flags = II_FL_NOHASH;
1671         nodemap_ii.ii_version = rqexp_ted->ted_nodemap_version;
1672         nodemap_ii.ii_attrs = body->mcb_nm_cur_pass;
1673
1674         bytes = nodemap_index_read(req->rq_svc_thread->t_env,
1675                                    obd2obt(mgs_obd)->obt_nodemap_config_file,
1676                                    &nodemap_ii, &rdpg);
1677         if (bytes < 0)
1678                 GOTO(out, rc = bytes);
1679
1680         rqexp_ted->ted_nodemap_version = nodemap_ii.ii_version;
1681
1682         res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
1683         if (res == NULL)
1684                 GOTO(out, rc = -EINVAL);
1685         res->mcr_offset = nodemap_ii.ii_hash_end;
1686         res->mcr_nm_cur_pass = nodemap_ii.ii_attrs;
1687
1688         page_count = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
1689         LASSERT(page_count <= rdpg.rp_count);
1690         desc = ptlrpc_prep_bulk_exp(req, page_count, 1,
1691                                     PTLRPC_BULK_PUT_SOURCE,
1692                                     MGS_BULK_PORTAL, frag_ops);
1693         if (desc == NULL)
1694                 GOTO(out, rc = -ENOMEM);
1695
1696         for (i = 0; i < page_count && bytes > 0; i++) {
1697                 frag_ops->add_kiov_frag(desc, rdpg.rp_pages[i], 0,
1698                                         min_t(int, bytes, PAGE_SIZE));
1699                 bytes -= PAGE_SIZE;
1700         }
1701
1702         rc = target_bulk_io(req->rq_export, desc);
1703         ptlrpc_free_bulk(desc);
1704
1705 out:
1706         if (rdpg.rp_pages != NULL) {
1707                 for (i = 0; i < rdpg.rp_npages; i++)
1708                         if (rdpg.rp_pages[i] != NULL)
1709                                 __free_page(rdpg.rp_pages[i]);
1710                 OBD_FREE_PTR_ARRAY(rdpg.rp_pages, rdpg.rp_npages);
1711         }
1712         return rc;
1713 }
1714 EXPORT_SYMBOL(nodemap_get_config_req);