Whamcloud - gitweb
land b_hd_sec on HEAD. various security fixes.
[fs/lustre-release.git] / lustre / mds / mds_lib.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #define DEBUG_SUBSYSTEM S_MDS
23
24 #include <linux/config.h>
25 #include <linux/module.h>
26 #include <linux/kernel.h>
27 #include <linux/mm.h>
28 #include <linux/string.h>
29 #include <linux/stat.h>
30 #include <linux/errno.h>
31 #include <linux/version.h>
32 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
33 # include <linux/locks.h>   // for wait_on_buffer
34 #else
35 # include <linux/buffer_head.h>   // for wait_on_buffer
36 #endif
37 #include <linux/unistd.h>
38
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
41
42 #include <linux/fs.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #include <asm/segment.h>
47
48 #include <linux/obd_support.h>
49 #include <linux/lustre_lib.h>
50 #include <linux/lustre_sec.h>
51 #include <linux/lustre_ucache.h>
52 #include "mds_internal.h"
53
54 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
55 struct group_info *groups_alloc(int ngroups)
56 {
57         struct group_info *ginfo;
58
59         LASSERT(ngroups <= NGROUPS_SMALL);
60
61         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
62         if (!ginfo)
63                 return NULL;
64         ginfo->ngroups = ngroups;
65         ginfo->nblocks = 1;
66         ginfo->blocks[0] = ginfo->small_block;
67         atomic_set(&ginfo->usage, 1);
68
69         return ginfo;
70 }
71
72 void groups_free(struct group_info *ginfo)
73 {
74         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
75         LASSERT(ginfo->nblocks == 1);
76         LASSERT(ginfo->blocks[0] == ginfo->small_block);
77
78         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
79 }
80
81 /* for 2.4 the group number is small, so simply search the
82  * whole array.
83  */
84 int groups_search(struct group_info *ginfo, gid_t grp)
85 {
86         int i;
87
88         if (!ginfo)
89                 return 0;
90
91         for (i = 0; i < ginfo->ngroups; i++)
92                 if (GROUP_AT(ginfo, i) == grp)
93                         return 1;
94         return 0;
95 }
96
97 #else /* >= 2.6.4 */
98
99 void groups_sort(struct group_info *ginfo)
100 {
101         int base, max, stride;
102         int gidsetsize = ginfo->ngroups;
103
104         for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
105                 ; /* nothing */
106         stride /= 3;
107
108         while (stride) {
109                 max = gidsetsize - stride;
110                 for (base = 0; base < max; base++) {
111                         int left = base;
112                         int right = left + stride;
113                         gid_t tmp = GROUP_AT(ginfo, right);
114                                                                                                     
115                         while (left >= 0 && GROUP_AT(ginfo, left) > tmp) {
116                                 GROUP_AT(ginfo, right) =
117                                     GROUP_AT(ginfo, left);
118                                 right = left;
119                                 left -= stride;
120                         }
121                         GROUP_AT(ginfo, right) = tmp;
122                 }
123                 stride /= 3;
124         }
125 }
126
127 int groups_search(struct group_info *ginfo, gid_t grp)
128 {
129         int left, right;
130
131         if (!ginfo)
132                 return 0;
133
134         left = 0;
135         right = ginfo->ngroups;
136         while (left < right) {
137                 int mid = (left + right) / 2;
138                 int cmp = grp - GROUP_AT(ginfo, mid);
139                 if (cmp > 0)
140                         left = mid + 1;
141                 else if (cmp < 0)
142                         right = mid;
143                 else
144                         return 1;
145         }
146         return 0;
147 }
148 #endif
149
150 void groups_from_buffer(struct group_info *ginfo, __u32 *gids)
151 {
152         int i, ngroups = ginfo->ngroups;
153
154         for (i = 0; i < ginfo->nblocks; i++) {
155                 int count = min(NGROUPS_PER_BLOCK, ngroups);
156
157                 memcpy(ginfo->blocks[i], gids, count * sizeof(__u32));
158                 gids += NGROUPS_PER_BLOCK;
159                 ngroups -= count;
160         }
161 }
162
163 void mds_pack_dentry2id(struct obd_device *obd,
164                         struct lustre_id *id,
165                         struct dentry *dentry,
166                         int fid)
167 {
168         id_ino(id) = dentry->d_inum;
169         id_gen(id) = dentry->d_generation;
170         
171         if (fid) {
172                 id_fid(id) = dentry->d_fid;
173                 id_group(id) = dentry->d_mdsnum;
174         }
175 }
176
177 void mds_pack_dentry2body(struct obd_device *obd,
178                           struct mds_body *b,
179                           struct dentry *dentry,
180                           int fid)
181 {
182         b->valid |= OBD_MD_FLID | OBD_MD_FLGENER |
183                 OBD_MD_MDS;
184
185         if (fid)
186                 b->valid |= OBD_MD_FID;
187         
188         mds_pack_dentry2id(obd, &b->id1, dentry, fid);
189 }
190
191 int mds_pack_inode2id(struct obd_device *obd,
192                       struct lustre_id *id,
193                       struct inode *inode,
194                       int fid)
195 {
196         int rc = 0;
197         ENTRY;
198
199         if (fid) {
200                 /* we have to avoid deadlock. */
201                 if (!down_trylock(&inode->i_sem)) {
202                         rc = mds_read_inode_sid(obd, inode, id);
203                         up(&inode->i_sem);
204                 } else {
205                         rc = mds_read_inode_sid(obd, inode, id);
206                 }
207         }
208
209         if (rc == 0) {
210                 id_ino(id) = inode->i_ino;
211                 id_gen(id) = inode->i_generation;
212                 id_type(id) = (S_IFMT & inode->i_mode);
213         }
214         RETURN(rc);
215 }
216
217 /* Note that we can copy all of the fields, just some will not be "valid" */
218 void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b,
219                          struct inode *inode, int fid)
220 {
221         b->valid |= OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID |
222                 OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE |
223                 OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER |
224                 OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */
225
226         if (!S_ISREG(inode->i_mode)) {
227                 b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
228                         OBD_MD_FLATIME | OBD_MD_FLMTIME |
229                         OBD_MD_FLRDEV;
230         }
231         b->atime = LTIME_S(inode->i_atime);
232         b->mtime = LTIME_S(inode->i_mtime);
233         b->ctime = LTIME_S(inode->i_ctime);
234         b->mode = inode->i_mode;
235         b->size = inode->i_size;
236         b->blocks = inode->i_blocks;
237         b->uid = inode->i_uid;
238         b->gid = inode->i_gid;
239         b->flags = inode->i_flags;
240         b->rdev = inode->i_rdev;
241         
242         /* Return the correct link count for orphan inodes */
243         if (mds_inode_is_orphan(inode)) {
244                 b->nlink = 0;
245         } else if (S_ISDIR(inode->i_mode)) {
246                 b->nlink = 1;
247         } else {
248                 b->nlink = inode->i_nlink;
249         }
250
251         if (fid)
252                 b->valid |= OBD_MD_FID;
253         
254         mds_pack_inode2id(obd, &b->id1, inode, fid);
255 }
256
257 /* unpacking */
258 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
259                               struct mds_update_record *r)
260 {
261         struct iattr *attr = &r->ur_iattr;
262         struct mds_rec_setattr *rec;
263         ENTRY;
264
265         rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
266                                  lustre_swab_mds_rec_setattr);
267         if (rec == NULL)
268                 RETURN (-EFAULT);
269
270         r->ur_id1 = &rec->sa_id;
271         attr->ia_valid = rec->sa_valid;
272         attr->ia_mode = rec->sa_mode;
273         attr->ia_uid = rec->sa_uid;
274         attr->ia_gid = rec->sa_gid;
275         attr->ia_size = rec->sa_size;
276         LTIME_S(attr->ia_atime) = rec->sa_atime;
277         LTIME_S(attr->ia_mtime) = rec->sa_mtime;
278         LTIME_S(attr->ia_ctime) = rec->sa_ctime;
279         attr->ia_attr_flags = rec->sa_attr_flags;
280
281         LASSERT_REQSWAB (req, offset + 1);
282         if (req->rq_reqmsg->bufcount > offset + 1) {
283                 r->ur_eadata = lustre_msg_buf (req->rq_reqmsg,
284                                                offset + 1, 0);
285                 if (r->ur_eadata == NULL)
286                         RETURN (-EFAULT);
287                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
288         }
289
290         if (req->rq_reqmsg->bufcount > offset + 2) {
291                 r->ur_ea2data = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
292                 if (r->ur_ea2data == NULL)
293                         RETURN (-EFAULT);
294
295                 r->ur_ea2datalen = req->rq_reqmsg->buflens[offset + 2];
296         }
297
298         RETURN(0);
299 }
300
301 static int mds_create_unpack(struct ptlrpc_request *req, int offset,
302                              struct mds_update_record *r)
303 {
304         struct mds_rec_create *rec;
305         ENTRY;
306
307         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
308                                   lustre_swab_mds_rec_create);
309         if (rec == NULL)
310                 RETURN (-EFAULT);
311
312         r->ur_id1 = &rec->cr_id;
313         r->ur_id2 = &rec->cr_replayid;
314         r->ur_mode = rec->cr_mode;
315         r->ur_rdev = rec->cr_rdev;
316         r->ur_time = rec->cr_time;
317         r->ur_flags = rec->cr_flags;
318
319         LASSERT_REQSWAB (req, offset + 1);
320         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
321         if (r->ur_name == NULL)
322                 RETURN (-EFAULT);
323         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
324
325         LASSERT_REQSWAB (req, offset + 2);
326         if (req->rq_reqmsg->bufcount > offset + 2) {
327                 if (S_ISLNK(r->ur_mode)) {
328                         r->ur_tgt = lustre_msg_string(req->rq_reqmsg,
329                                                       offset + 2, 0);
330                         if (r->ur_tgt == NULL)
331                                 RETURN (-EFAULT);
332                         r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
333                 } else if (S_ISDIR(r->ur_mode) ) {
334                         /* Stripe info for mkdir - just a 16bit integer */
335                         if (req->rq_reqmsg->buflens[offset + 2] != 2) {
336                                 CERROR("mkdir stripe info does not match "
337                                        "expected size %d vs 2\n",
338                                        req->rq_reqmsg->buflens[offset + 2]);
339                                 RETURN (-EINVAL);
340                         }
341                         r->ur_eadata = lustre_swab_buf (req->rq_reqmsg,
342                                                offset + 2, 2, __swab16s);
343                         r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
344                 } else if (S_ISREG(r->ur_mode)){
345                         r->ur_eadata = lustre_msg_buf (req->rq_reqmsg, 
346                                                        offset + 2, 0);
347                         r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
348                 } else {
349                         /* Hm, no other users so far? */
350                         LBUG();
351                 }
352         }
353         RETURN(0);
354 }
355
356 static int mds_link_unpack(struct ptlrpc_request *req, int offset,
357                            struct mds_update_record *r)
358 {
359         struct mds_rec_link *rec;
360         ENTRY;
361
362         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
363                                   lustre_swab_mds_rec_link);
364         if (rec == NULL)
365                 RETURN (-EFAULT);
366
367         r->ur_id1 = &rec->lk_id1;
368         r->ur_id2 = &rec->lk_id2;
369         r->ur_time = rec->lk_time;
370
371         LASSERT_REQSWAB (req, offset + 1);
372         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
373         if (r->ur_name == NULL)
374                 RETURN (-EFAULT);
375         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
376         RETURN(0);
377 }
378
379 static int mds_unlink_unpack(struct ptlrpc_request *req, int offset,
380                              struct mds_update_record *r)
381 {
382         struct mds_rec_unlink *rec;
383         ENTRY;
384
385         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
386                                   lustre_swab_mds_rec_unlink);
387         if (rec == NULL)
388                 RETURN(-EFAULT);
389
390         r->ur_mode = rec->ul_mode;
391         r->ur_id1 = &rec->ul_id1;
392         r->ur_id2 = &rec->ul_id2;
393         r->ur_time = rec->ul_time;
394
395         LASSERT_REQSWAB (req, offset + 1);
396         r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
397         if (r->ur_name == NULL)
398                 RETURN(-EFAULT);
399         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
400         RETURN(0);
401 }
402
403 static int mds_rename_unpack(struct ptlrpc_request *req, int offset,
404                              struct mds_update_record *r)
405 {
406         struct mds_rec_rename *rec;
407         ENTRY;
408
409         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
410                                   lustre_swab_mds_rec_rename);
411         if (rec == NULL)
412                 RETURN(-EFAULT);
413
414         r->ur_id1 = &rec->rn_id1;
415         r->ur_id2 = &rec->rn_id2;
416         r->ur_time = rec->rn_time;
417
418         LASSERT_REQSWAB (req, offset + 1);
419         r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
420         if (r->ur_name == NULL)
421                 RETURN(-EFAULT);
422         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
423
424         LASSERT_REQSWAB (req, offset + 2);
425         r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0);
426         if (r->ur_tgt == NULL)
427                 RETURN(-EFAULT);
428         r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
429         RETURN(0);
430 }
431
432 static int mds_open_unpack(struct ptlrpc_request *req, int offset,
433                            struct mds_update_record *r)
434 {
435         struct mds_rec_create *rec;
436         ENTRY;
437
438         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
439                                   lustre_swab_mds_rec_create);
440         if (rec == NULL)
441                 RETURN(-EFAULT);
442
443         r->ur_id1 = &rec->cr_id;
444         r->ur_id2 = &rec->cr_replayid;
445         r->ur_mode = rec->cr_mode;
446         r->ur_rdev = rec->cr_rdev;
447         r->ur_time = rec->cr_time;
448         r->ur_flags = rec->cr_flags;
449
450         LASSERT_REQSWAB (req, offset + 1);
451         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
452         if (r->ur_name == NULL)
453                 RETURN (-EFAULT);
454         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
455
456         LASSERT_REQSWAB (req, offset + 2);
457         if (req->rq_reqmsg->bufcount > offset + 2) {
458                 r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
459                 if (r->ur_eadata == NULL)
460                         RETURN(-EFAULT);
461                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
462         }
463         RETURN(0);
464 }
465
466 typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset,
467                                struct mds_update_record *r);
468
469 static update_unpacker mds_unpackers[REINT_MAX + 1] = {
470         [REINT_SETATTR] mds_setattr_unpack,
471         [REINT_CREATE] mds_create_unpack,
472         [REINT_LINK] mds_link_unpack,
473         [REINT_UNLINK] mds_unlink_unpack,
474         [REINT_RENAME] mds_rename_unpack,
475         [REINT_OPEN] mds_open_unpack,
476 };
477
478 int mds_update_unpack(struct ptlrpc_request *req, int offset,
479                       struct mds_update_record *rec)
480 {
481         __u32 *opcodep;
482         __u32  opcode;
483         int rc;
484         ENTRY;
485
486         /*
487          * NB don't lustre_swab_reqbuf() here. We're just taking a peek and we
488          * want to leave it to the specific unpacker once we've identified the
489          * message type.
490          */
491         opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof(*opcodep));
492         if (opcodep == NULL)
493                 RETURN(-EFAULT);
494
495         opcode = *opcodep;
496         if (lustre_msg_swabbed (req->rq_reqmsg))
497                 __swab32s (&opcode);
498
499         if (opcode > REINT_MAX ||
500             mds_unpackers[opcode] == NULL) {
501                 CERROR ("Unexpected opcode %d\n", opcode);
502                 RETURN(-EFAULT);
503         }
504
505         rec->ur_id1 = NULL;
506         rec->ur_id2 = NULL;
507         rec->ur_opcode = opcode;
508
509         rc = mds_unpackers[opcode](req, offset, rec);
510         
511 #if CRAY_PORTALS
512         rec->ur_fsuid = req->rq_uid;
513 #endif
514         RETURN(rc);
515 }
516
517 /********************************
518  * MDS uid/gid mapping handling *
519  ********************************/
520
521 static
522 struct mds_idmap_entry* idmap_alloc_entry(__u32 rmt_id, __u32 lcl_id)
523 {
524         struct mds_idmap_entry *e;
525
526         OBD_ALLOC(e, sizeof(*e));
527         if (!e)
528                 return NULL;
529
530         INIT_LIST_HEAD(&e->rmt_hash);
531         INIT_LIST_HEAD(&e->lcl_hash);
532         atomic_set(&e->refcount, 1);
533         e->rmt_id = rmt_id;
534         e->lcl_id = lcl_id;
535
536         return e;
537 }
538
539 void idmap_free_entry(struct mds_idmap_entry *e)
540 {
541         if (!list_empty(&e->rmt_hash))
542                 list_del(&e->rmt_hash);
543         if (!list_empty(&e->lcl_hash))
544                 list_del(&e->lcl_hash);
545         OBD_FREE(e, sizeof(*e));
546 }
547
548 static
549 int idmap_insert_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
550                        struct mds_idmap_entry *new, const char *warn_msg)
551 {
552         struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(new->rmt_id)];
553         struct list_head *lcl_head = &lcl_hash[MDS_IDMAP_HASHFUNC(new->lcl_id)];
554         struct mds_idmap_entry *e;
555
556         list_for_each_entry(e, rmt_head, rmt_hash) {
557                 if (e->rmt_id == new->rmt_id &&
558                     e->lcl_id == new->lcl_id) {
559                         atomic_inc(&e->refcount);
560                         return 1;
561                 }
562                 if (e->rmt_id == new->rmt_id && warn_msg)
563                         CWARN("%s: rmt id %u already map to %u (new %u)\n",
564                               warn_msg, e->rmt_id, e->lcl_id, new->lcl_id);
565                 if (e->lcl_id == new->lcl_id && warn_msg)
566                         CWARN("%s: lcl id %u already be mapped from %u "
567                               "(new %u)\n", warn_msg,
568                               e->lcl_id, e->rmt_id, new->rmt_id);
569         }
570
571         list_add_tail(rmt_head, &new->rmt_hash);
572         list_add_tail(lcl_head, &new->lcl_hash);
573         return 0;
574 }
575
576 static
577 int idmap_remove_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
578                        __u32 rmt_id, __u32 lcl_id)
579 {
580         struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(rmt_id)];
581         struct mds_idmap_entry *e;
582
583         list_for_each_entry(e, rmt_head, rmt_hash) {
584                 if (e->rmt_id == rmt_id && e->lcl_id == lcl_id) {
585                         if (atomic_dec_and_test(&e->refcount)) {
586                                 list_del(&e->rmt_hash);
587                                 list_del(&e->lcl_hash);
588                                 OBD_FREE(e, sizeof(*e));
589                                 return 0;
590                         } else
591                                 return 1;
592                 }
593         }
594         return -ENOENT;
595 }
596
597 int mds_idmap_add(struct mds_idmap_table *tbl,
598                   uid_t rmt_uid, uid_t lcl_uid,
599                   gid_t rmt_gid, gid_t lcl_gid)
600 {
601         struct mds_idmap_entry *ue, *ge;
602         ENTRY;
603
604         if (!tbl)
605                 RETURN(-EPERM);
606
607         ue = idmap_alloc_entry(rmt_uid, lcl_uid);
608         if (!ue)
609                 RETURN(-ENOMEM);
610         ge = idmap_alloc_entry(rmt_gid, lcl_gid);
611         if (!ge) {
612                 idmap_free_entry(ue);
613                 RETURN(-ENOMEM);
614         }
615
616         spin_lock(&tbl->mit_lock);
617
618         if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
619                                tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
620                                ue, "UID mapping")) {
621                 idmap_free_entry(ue);
622         }
623
624         if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
625                                tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
626                                ge, "GID mapping")) {
627                 idmap_free_entry(ge);
628         }
629
630         spin_unlock(&tbl->mit_lock);
631         RETURN(0);
632 }
633
634 int mds_idmap_del(struct mds_idmap_table *tbl,
635                   uid_t rmt_uid, uid_t lcl_uid,
636                   gid_t rmt_gid, gid_t lcl_gid)
637 {
638         ENTRY;
639
640         if (!tbl)
641                 RETURN(0);
642
643         spin_lock(&tbl->mit_lock);
644         idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
645                            tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
646                            rmt_uid, lcl_uid);
647         idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
648                            tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
649                            rmt_gid, lcl_gid);
650         spin_unlock(&tbl->mit_lock);
651         RETURN(0);
652 }
653
654 static
655 __u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id)
656 {
657         struct list_head *head = &hash[MDS_IDMAP_HASHFUNC(id)];
658         struct mds_idmap_entry *e;
659
660         if (!reverse) {
661                 list_for_each_entry(e, head, rmt_hash) {
662                         if (e->rmt_id == id)
663                                 return e->lcl_id;
664                 }
665                 return MDS_IDMAP_NOTFOUND;
666         } else {
667                 list_for_each_entry(e, head, lcl_hash) {
668                         if (e->lcl_id == id)
669                                 return e->rmt_id;
670                 }
671                 return MDS_IDMAP_NOTFOUND;
672         }
673 }
674
675 int mds_idmap_lookup_uid(struct mds_idmap_table *tbl, int reverse, uid_t uid)
676 {
677         struct list_head *hash;
678
679         if (!tbl)
680                 return MDS_IDMAP_NOTFOUND;
681
682         if (!reverse)
683                 hash = tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX];
684         else
685                 hash = tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX];
686
687         spin_lock(&tbl->mit_lock);
688         uid = idmap_lookup_id(hash, reverse, uid);
689         spin_unlock(&tbl->mit_lock);
690
691         return uid;
692 }
693
694 int mds_idmap_lookup_gid(struct mds_idmap_table *tbl, int reverse, gid_t gid)
695 {
696         struct list_head *hash;
697
698         if (!tbl)
699                 return MDS_IDMAP_NOTFOUND;
700
701         if (!reverse)
702                 hash = tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX];
703         else
704                 hash = tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX];
705
706         spin_lock(&tbl->mit_lock);
707         gid = idmap_lookup_id(hash, reverse, gid);
708         spin_unlock(&tbl->mit_lock);
709
710         return gid;
711 }
712
713 struct mds_idmap_table *mds_idmap_alloc()
714 {
715         struct mds_idmap_table *tbl;
716         int i, j;
717
718         OBD_ALLOC(tbl, sizeof(*tbl));
719         if (!tbl)
720                 return NULL;
721
722         spin_lock_init(&tbl->mit_lock);
723         for (i = 0; i < MDS_IDMAP_N_HASHES; i++)
724                 for (j = 0; j < MDS_IDMAP_HASHSIZE; j++)
725                         INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]);
726
727         return tbl;
728 }
729
730 static void idmap_clear_rmt_hash(struct list_head *list)
731 {
732         struct mds_idmap_entry *e;
733         int i;
734
735         for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
736                 while (!list_empty(&list[i])) {
737                         e = list_entry(list[i].next, struct mds_idmap_entry,
738                                        rmt_hash);
739                         idmap_free_entry(e);
740                 }
741         }
742 }
743
744 void mds_idmap_free(struct mds_idmap_table *tbl)
745 {
746         int i;
747
748         spin_lock(&tbl->mit_lock);
749         idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX]);
750         idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX]);
751
752         /* paranoid checking */
753         for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
754                 LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX][i]));
755                 LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX][i]));
756         }
757         spin_unlock(&tbl->mit_lock);
758
759         OBD_FREE(tbl, sizeof(*tbl));
760 }
761
762 /*********************************
763  * helpers doing mapping for MDS *
764  *********************************/
765
766 /*
767  * we allow remote setuid/setgid to an "authencated" one,
768  * this policy probably change later.
769  */
770 static
771 int mds_req_secdesc_do_map(struct mds_export_data *med,
772                            struct mds_req_sec_desc *rsd)
773 {
774         struct mds_idmap_table *idmap = med->med_idmap;
775         uid_t uid, fsuid;
776         gid_t gid, fsgid;
777
778         uid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_uid);
779         if (uid == MDS_IDMAP_NOTFOUND) {
780                 CERROR("can't find map for uid %u\n", rsd->rsd_uid);
781                 return -EPERM;
782         }
783
784         if (rsd->rsd_uid == rsd->rsd_fsuid)
785                 fsuid = uid;
786         else {
787                 fsuid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_fsuid);
788                 if (fsuid == MDS_IDMAP_NOTFOUND) {
789                         CERROR("can't find map for fsuid %u\n", rsd->rsd_fsuid);
790                         return -EPERM;
791                 }
792         }
793
794         gid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_gid);
795         if (gid == MDS_IDMAP_NOTFOUND) {
796                 CERROR("can't find map for gid %u\n", rsd->rsd_gid);
797                 return -EPERM;
798         }
799
800         if (rsd->rsd_gid == rsd->rsd_fsgid)
801                 fsgid = gid;
802         else {
803                 fsgid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_fsgid);
804                 if (fsgid == MDS_IDMAP_NOTFOUND) {
805                         CERROR("can't find map for fsgid %u\n", rsd->rsd_fsgid);
806                         return -EPERM;
807                 }
808         }
809
810         rsd->rsd_uid = uid;
811         rsd->rsd_gid = gid;
812         rsd->rsd_fsuid = fsuid;
813         rsd->rsd_fsgid = fsgid;
814
815         return 0;
816 }
817
818 void mds_body_do_reverse_map(struct mds_export_data *med,
819                              struct mds_body *body)
820 {
821         uid_t uid;
822         gid_t gid;
823
824         if (!med->med_remote)
825                 return;
826
827         ENTRY;
828         if (body->valid & OBD_MD_FLUID) {
829                 uid = mds_idmap_lookup_uid(med->med_idmap, 1, body->uid);
830                 if (uid == MDS_IDMAP_NOTFOUND) {
831                         uid = med->med_nllu;
832                         if (body->valid & OBD_MD_FLMODE) {
833                                 body->mode = (body->mode & ~S_IRWXU) |
834                                              ((body->mode & S_IRWXO) << 6);
835                         }
836                 }
837                 body->uid = uid;
838         }
839         if (body->valid & OBD_MD_FLGID) {
840                 gid = mds_idmap_lookup_gid(med->med_idmap, 1, body->gid);
841                 if (gid == MDS_IDMAP_NOTFOUND) {
842                         gid = med->med_nllg;
843                         if (body->valid & OBD_MD_FLMODE) {
844                                 body->mode = (body->mode & ~S_IRWXG) |
845                                              ((body->mode & S_IRWXO) << 3);
846                         }
847                 }
848                 body->gid = gid;
849         }
850
851         EXIT;
852 }
853
854 /**********************
855  * MDS ucred handling *
856  **********************/
857
858 static inline void drop_ucred_ginfo(struct lvfs_ucred *ucred)
859 {
860         if (ucred->luc_ginfo) {
861                 put_group_info(ucred->luc_ginfo);
862                 ucred->luc_ginfo = NULL;
863         }
864 }
865
866 static inline void drop_ucred_lsd(struct lvfs_ucred *ucred)
867 {
868         if (ucred->luc_lsd) {
869                 mds_put_lsd(ucred->luc_lsd);
870                 ucred->luc_lsd = NULL;
871         }
872 }
873
874 /*
875  * the heart of the uid/gid handling and security checking.
876  *
877  * root could set any group_info if we allowed setgroups, while
878  * normal user only could 'reduce' their group members -- which
879  * is somewhat expensive.
880  *
881  * authenticated as mds user (using mds service credential) could
882  * bypass all checkings.
883  */
884 int mds_init_ucred(struct lvfs_ucred *ucred,
885                    struct ptlrpc_request *req,
886                    struct mds_req_sec_desc *rsd)
887 {
888         struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
889         struct mds_export_data *med = &req->rq_export->u.eu_mds_data;
890         struct lustre_sec_desc *lsd;
891         ptl_nid_t peernid = req->rq_peer.peer_id.nid;
892         struct group_info *gnew;
893         unsigned int setuid, setgid, strong_sec, root_squashed;
894         __u32 lsd_perms;
895         ENTRY;
896
897         LASSERT(ucred);
898         LASSERT(rsd);
899         LASSERT(rsd->rsd_ngroups <= LUSTRE_MAX_GROUPS);
900
901         if (SEC_FLAVOR_MAJOR(req->rq_req_secflvr) == PTLRPCS_FLVR_MAJOR_GSS &&
902             (SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_AUTH ||
903              SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_PRIV))
904                 strong_sec = 1;
905         else
906                 strong_sec = 0;
907
908         LASSERT(!(req->rq_remote_realm && !strong_sec));
909
910         if (strong_sec && req->rq_auth_uid == -1) {
911                 CWARN("user not authenticated, deny access\n");
912                 RETURN(-EPERM);
913         }
914
915         if (req->rq_auth_usr_mds)
916                 goto get_lsd;
917
918         /* if we use strong authentication, we expect the uid which
919          * client claimed is true.
920          */
921         if (strong_sec) {
922                 if (!med->med_remote) {
923                         if (req->rq_auth_uid != rsd->rsd_uid) {
924                                 CERROR("local client "LPU64": auth uid %u "
925                                        "while client claim %u:%u/%u:%u\n",
926                                        peernid, req->rq_auth_uid,
927                                        rsd->rsd_uid, rsd->rsd_gid,
928                                        rsd->rsd_fsuid, rsd->rsd_fsgid);
929                                 RETURN(-EPERM);
930                         }
931                 } else {
932                         if (req->rq_mapped_uid == MDS_IDMAP_NOTFOUND) {
933                                 CWARN("no mapping found, deny\n");
934                                 RETURN(-EPERM);
935                         }
936
937                         if (mds_req_secdesc_do_map(med, rsd))
938                                 RETURN(-EPERM);
939
940                         if (req->rq_mapped_uid != rsd->rsd_uid) {
941                                 CERROR("remote client "LPU64": auth uid %u "
942                                        "while client claim %u:%u/%u:%u\n",
943                                        peernid, req->rq_auth_uid,
944                                        rsd->rsd_uid, rsd->rsd_gid,
945                                        rsd->rsd_fsuid, rsd->rsd_fsgid);
946                         }
947                 }
948         }
949
950 get_lsd:
951         /* now lsd come into play */
952         ucred->luc_ginfo = NULL;
953         ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid);
954
955         if (!lsd) {
956                 CERROR("Deny access without LSD: uid %d\n", rsd->rsd_uid);
957                 RETURN(-EPERM);
958         }
959
960         lsd_perms = mds_lsd_get_perms(lsd, med->med_remote, 0, peernid);
961
962         if (req->rq_auth_usr_mds)
963                 goto squash_root;
964
965         /* find out the setuid/setgid attempt */
966         setuid = (rsd->rsd_uid != rsd->rsd_fsuid);
967         setgid = (rsd->rsd_gid != rsd->rsd_fsgid ||
968                   rsd->rsd_gid != lsd->lsd_gid);
969
970         /* check permission of setuid */
971         if (setuid && !(lsd_perms & LSD_PERM_SETUID)) {
972                 CWARN("mds blocked setuid attempt (%u -> %u) from "LPU64"\n",
973                       rsd->rsd_uid, rsd->rsd_fsuid, peernid);
974                 RETURN(-EPERM);
975         }
976
977         /* check permission of setgid */
978         if (setgid && !(lsd_perms & LSD_PERM_SETGID)) {
979                 CWARN("mds blocked setgid attempt (%u:%u/%u:%u -> %u) from "
980                       LPU64"\n", rsd->rsd_uid, rsd->rsd_gid,
981                       rsd->rsd_fsuid, rsd->rsd_fsgid, lsd->lsd_gid, peernid);
982                 RETURN(-EPERM);
983         }
984
985 squash_root:
986         root_squashed = mds_squash_root(mds, rsd, &peernid); 
987
988         /* remove privilege for non-root user */
989         if (rsd->rsd_fsuid)
990                 rsd->rsd_cap &= ~CAP_FS_MASK;
991
992         /* by now every fields other than groups in rsd have been granted */
993         ucred->luc_uid = rsd->rsd_uid;
994         ucred->luc_gid = rsd->rsd_gid;
995         ucred->luc_fsuid = rsd->rsd_fsuid;
996         ucred->luc_fsgid = rsd->rsd_fsgid;
997         ucred->luc_cap = rsd->rsd_cap;
998
999         /* don't use any supplementary group for remote client or
1000          * we squashed root */
1001         if (med->med_remote || root_squashed)
1002                 RETURN(0);
1003
1004         /* install groups from LSD */
1005         if (lsd->lsd_ginfo) {
1006                 ucred->luc_ginfo = lsd->lsd_ginfo;
1007                 get_group_info(ucred->luc_ginfo);
1008         }
1009
1010         /* everything is done if we don't allow setgroups */
1011         if (!(lsd_perms & LSD_PERM_SETGRP))
1012                 RETURN(0);
1013
1014         /* root could set any groups as he want (if allowed), normal
1015          * users only could reduce his group array.
1016          */
1017         if (ucred->luc_uid == 0) {
1018                 drop_ucred_ginfo(ucred);
1019
1020                 if (rsd->rsd_ngroups == 0)
1021                         RETURN(0);
1022
1023                 gnew = groups_alloc(rsd->rsd_ngroups);
1024                 if (!gnew) {
1025                         CERROR("out of memory\n");
1026                         drop_ucred_lsd(ucred);
1027                         RETURN(-ENOMEM);
1028                 }
1029                 groups_from_buffer(gnew, rsd->rsd_groups);
1030                 groups_sort(gnew); /* don't rely on client doing this */
1031
1032                 ucred->luc_ginfo = gnew;
1033         } else {
1034                 __u32 set = 0, cur = 0;
1035                 struct group_info *ginfo = ucred->luc_ginfo;
1036
1037                 if (!ginfo)
1038                         RETURN(0);
1039
1040                 /* Note: freeing a group_info count on 'nblocks' instead of
1041                  * 'ngroups', thus we can safely alloc enough buffer and reduce
1042                  * and ngroups number later.
1043                  */
1044                 gnew = groups_alloc(rsd->rsd_ngroups);
1045                 if (!gnew) {
1046                         CERROR("out of memory\n");
1047                         drop_ucred_ginfo(ucred);
1048                         drop_ucred_lsd(ucred);
1049                         RETURN(-ENOMEM);
1050                 }
1051
1052                 while (cur < rsd->rsd_ngroups) {
1053                         if (groups_search(ginfo, rsd->rsd_groups[cur])) {
1054                                 GROUP_AT(gnew, set) = rsd->rsd_groups[cur];
1055                                 set++;
1056                         }
1057                         cur++;
1058                 }
1059                 gnew->ngroups = set;
1060
1061                 put_group_info(ucred->luc_ginfo);
1062                 ucred->luc_ginfo = gnew;
1063         }
1064         RETURN(0);
1065 }
1066
1067 void mds_exit_ucred(struct lvfs_ucred *ucred)
1068 {
1069         ENTRY;
1070         drop_ucred_ginfo(ucred);
1071         drop_ucred_lsd(ucred);
1072         EXIT;
1073 }