Whamcloud - gitweb
land b_hd_sec: perm/acl authorization for remote users.
[fs/lustre-release.git] / lustre / mds / mds_lib.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #define DEBUG_SUBSYSTEM S_MDS
23
24 #include <linux/config.h>
25 #include <linux/module.h>
26 #include <linux/kernel.h>
27 #include <linux/mm.h>
28 #include <linux/string.h>
29 #include <linux/stat.h>
30 #include <linux/errno.h>
31 #include <linux/version.h>
32 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
33 # include <linux/locks.h>   // for wait_on_buffer
34 #else
35 # include <linux/buffer_head.h>   // for wait_on_buffer
36 #endif
37 #include <linux/unistd.h>
38
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
41
42 #include <linux/fs.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #include <asm/segment.h>
47
48 #include <linux/obd_support.h>
49 #include <linux/lustre_lib.h>
50 #include <linux/lustre_sec.h>
51 #include <linux/lustre_ucache.h>
52 #include "mds_internal.h"
53
54 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
55 struct group_info *groups_alloc(int ngroups)
56 {
57         struct group_info *ginfo;
58
59         LASSERT(ngroups <= NGROUPS_SMALL);
60
61         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
62         if (!ginfo)
63                 return NULL;
64         ginfo->ngroups = ngroups;
65         ginfo->nblocks = 1;
66         ginfo->blocks[0] = ginfo->small_block;
67         atomic_set(&ginfo->usage, 1);
68
69         return ginfo;
70 }
71
72 void groups_free(struct group_info *ginfo)
73 {
74         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
75         LASSERT(ginfo->nblocks == 1);
76         LASSERT(ginfo->blocks[0] == ginfo->small_block);
77
78         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
79 }
80
81 /* for 2.4 the group number is small, so simply search the
82  * whole array.
83  */
84 int groups_search(struct group_info *ginfo, gid_t grp)
85 {
86         int i;
87
88         if (!ginfo)
89                 return 0;
90
91         for (i = 0; i < ginfo->ngroups; i++)
92                 if (GROUP_AT(ginfo, i) == grp)
93                         return 1;
94         return 0;
95 }
96
97 #else /* >= 2.6.4 */
98
99 void groups_sort(struct group_info *ginfo)
100 {
101         int base, max, stride;
102         int gidsetsize = ginfo->ngroups;
103
104         for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
105                 ; /* nothing */
106         stride /= 3;
107
108         while (stride) {
109                 max = gidsetsize - stride;
110                 for (base = 0; base < max; base++) {
111                         int left = base;
112                         int right = left + stride;
113                         gid_t tmp = GROUP_AT(ginfo, right);
114                                                                                                     
115                         while (left >= 0 && GROUP_AT(ginfo, left) > tmp) {
116                                 GROUP_AT(ginfo, right) =
117                                     GROUP_AT(ginfo, left);
118                                 right = left;
119                                 left -= stride;
120                         }
121                         GROUP_AT(ginfo, right) = tmp;
122                 }
123                 stride /= 3;
124         }
125 }
126
127 int groups_search(struct group_info *ginfo, gid_t grp)
128 {
129         int left, right;
130
131         if (!ginfo)
132                 return 0;
133
134         left = 0;
135         right = ginfo->ngroups;
136         while (left < right) {
137                 int mid = (left + right) / 2;
138                 int cmp = grp - GROUP_AT(ginfo, mid);
139                 if (cmp > 0)
140                         left = mid + 1;
141                 else if (cmp < 0)
142                         right = mid;
143                 else
144                         return 1;
145         }
146         return 0;
147 }
148 #endif
149
150 void groups_from_buffer(struct group_info *ginfo, __u32 *gids)
151 {
152         int i, ngroups = ginfo->ngroups;
153
154         for (i = 0; i < ginfo->nblocks; i++) {
155                 int count = min(NGROUPS_PER_BLOCK, ngroups);
156
157                 memcpy(ginfo->blocks[i], gids, count * sizeof(__u32));
158                 gids += NGROUPS_PER_BLOCK;
159                 ngroups -= count;
160         }
161 }
162
163 void mds_pack_dentry2id(struct obd_device *obd,
164                         struct lustre_id *id,
165                         struct dentry *dentry,
166                         int fid)
167 {
168         id_ino(id) = dentry->d_inum;
169         id_gen(id) = dentry->d_generation;
170         
171         if (fid) {
172                 id_fid(id) = dentry->d_fid;
173                 id_group(id) = dentry->d_mdsnum;
174         }
175 }
176
177 void mds_pack_dentry2body(struct obd_device *obd,
178                           struct mds_body *b,
179                           struct dentry *dentry,
180                           int fid)
181 {
182         b->valid |= OBD_MD_FLID | OBD_MD_FLGENER |
183                 OBD_MD_MDS;
184
185         if (fid)
186                 b->valid |= OBD_MD_FID;
187         
188         mds_pack_dentry2id(obd, &b->id1, dentry, fid);
189 }
190
191 int mds_pack_inode2id(struct obd_device *obd,
192                       struct lustre_id *id,
193                       struct inode *inode,
194                       int fid)
195 {
196         int rc = 0;
197         ENTRY;
198
199         if (fid) {
200                 /* we have to avoid deadlock. */
201                 if (!down_trylock(&inode->i_sem)) {
202                         rc = mds_read_inode_sid(obd, inode, id);
203                         up(&inode->i_sem);
204                 } else {
205                         rc = mds_read_inode_sid(obd, inode, id);
206                 }
207         }
208
209         if (rc == 0) {
210                 id_ino(id) = inode->i_ino;
211                 id_gen(id) = inode->i_generation;
212                 id_type(id) = (S_IFMT & inode->i_mode);
213         }
214         RETURN(rc);
215 }
216
217 /* Note that we can copy all of the fields, just some will not be "valid" */
218 void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b,
219                          struct inode *inode, int fid)
220 {
221         b->valid |= OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID |
222                 OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE |
223                 OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER |
224                 OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */
225
226         if (!S_ISREG(inode->i_mode)) {
227                 b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
228                         OBD_MD_FLATIME | OBD_MD_FLMTIME |
229                         OBD_MD_FLRDEV;
230         }
231         b->atime = LTIME_S(inode->i_atime);
232         b->mtime = LTIME_S(inode->i_mtime);
233         b->ctime = LTIME_S(inode->i_ctime);
234         b->mode = inode->i_mode;
235         b->size = inode->i_size;
236         b->blocks = inode->i_blocks;
237         b->uid = inode->i_uid;
238         b->gid = inode->i_gid;
239         b->flags = inode->i_flags;
240         b->rdev = inode->i_rdev;
241         
242         /* Return the correct link count for orphan inodes */
243         if (mds_inode_is_orphan(inode)) {
244                 b->nlink = 0;
245         } else if (S_ISDIR(inode->i_mode)) {
246                 b->nlink = 1;
247         } else {
248                 b->nlink = inode->i_nlink;
249         }
250
251         if (fid)
252                 b->valid |= OBD_MD_FID;
253         
254         mds_pack_inode2id(obd, &b->id1, inode, fid);
255 }
256
257 /* unpacking */
258 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
259                               struct mds_update_record *r)
260 {
261         struct iattr *attr = &r->ur_iattr;
262         struct mds_rec_setattr *rec;
263         ENTRY;
264
265         rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
266                                  lustre_swab_mds_rec_setattr);
267         if (rec == NULL)
268                 RETURN (-EFAULT);
269
270         r->ur_id1 = &rec->sa_id;
271         attr->ia_valid = rec->sa_valid;
272         attr->ia_mode = rec->sa_mode;
273         attr->ia_uid = rec->sa_uid;
274         attr->ia_gid = rec->sa_gid;
275         attr->ia_size = rec->sa_size;
276         LTIME_S(attr->ia_atime) = rec->sa_atime;
277         LTIME_S(attr->ia_mtime) = rec->sa_mtime;
278         LTIME_S(attr->ia_ctime) = rec->sa_ctime;
279         attr->ia_attr_flags = rec->sa_attr_flags;
280
281         LASSERT_REQSWAB (req, offset + 1);
282         if (req->rq_reqmsg->bufcount > offset + 1) {
283                 r->ur_eadata = lustre_msg_buf (req->rq_reqmsg,
284                                                offset + 1, 0);
285                 if (r->ur_eadata == NULL)
286                         RETURN (-EFAULT);
287                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
288         }
289
290         if (req->rq_reqmsg->bufcount > offset + 2) {
291                 r->ur_ea2data = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
292                 if (r->ur_ea2data == NULL)
293                         RETURN (-EFAULT);
294
295                 r->ur_ea2datalen = req->rq_reqmsg->buflens[offset + 2];
296         }
297
298         RETURN(0);
299 }
300
301 static int mds_create_unpack(struct ptlrpc_request *req, int offset,
302                              struct mds_update_record *r)
303 {
304         struct mds_rec_create *rec;
305         ENTRY;
306
307         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
308                                   lustre_swab_mds_rec_create);
309         if (rec == NULL)
310                 RETURN (-EFAULT);
311
312         r->ur_id1 = &rec->cr_id;
313         r->ur_id2 = &rec->cr_replayid;
314         r->ur_mode = rec->cr_mode;
315         r->ur_rdev = rec->cr_rdev;
316         r->ur_time = rec->cr_time;
317         r->ur_flags = rec->cr_flags;
318
319         LASSERT_REQSWAB (req, offset + 1);
320         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
321         if (r->ur_name == NULL)
322                 RETURN (-EFAULT);
323         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
324
325         LASSERT_REQSWAB (req, offset + 2);
326         if (req->rq_reqmsg->bufcount > offset + 2) {
327                 if (S_ISLNK(r->ur_mode)) {
328                         r->ur_tgt = lustre_msg_string(req->rq_reqmsg,
329                                                       offset + 2, 0);
330                         if (r->ur_tgt == NULL)
331                                 RETURN (-EFAULT);
332                         r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
333                 } else if (S_ISDIR(r->ur_mode) ) {
334                         /* Stripe info for mkdir - just a 16bit integer */
335                         if (req->rq_reqmsg->buflens[offset + 2] != 2) {
336                                 CERROR("mkdir stripe info does not match "
337                                        "expected size %d vs 2\n",
338                                        req->rq_reqmsg->buflens[offset + 2]);
339                                 RETURN (-EINVAL);
340                         }
341                         r->ur_eadata = lustre_swab_buf (req->rq_reqmsg,
342                                                offset + 2, 2, __swab16s);
343                         r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
344                 } else if (S_ISREG(r->ur_mode)){
345                         r->ur_eadata = lustre_msg_buf (req->rq_reqmsg, 
346                                                        offset + 2, 0);
347                         r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
348                 } else {
349                         /* Hm, no other users so far? */
350                         LBUG();
351                 }
352         }
353         RETURN(0);
354 }
355
356 static int mds_link_unpack(struct ptlrpc_request *req, int offset,
357                            struct mds_update_record *r)
358 {
359         struct mds_rec_link *rec;
360         ENTRY;
361
362         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
363                                   lustre_swab_mds_rec_link);
364         if (rec == NULL)
365                 RETURN (-EFAULT);
366
367         r->ur_id1 = &rec->lk_id1;
368         r->ur_id2 = &rec->lk_id2;
369         r->ur_time = rec->lk_time;
370
371         LASSERT_REQSWAB (req, offset + 1);
372         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
373         if (r->ur_name == NULL)
374                 RETURN (-EFAULT);
375         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
376         RETURN(0);
377 }
378
379 static int mds_unlink_unpack(struct ptlrpc_request *req, int offset,
380                              struct mds_update_record *r)
381 {
382         struct mds_rec_unlink *rec;
383         ENTRY;
384
385         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
386                                   lustre_swab_mds_rec_unlink);
387         if (rec == NULL)
388                 RETURN(-EFAULT);
389
390         r->ur_mode = rec->ul_mode;
391         r->ur_id1 = &rec->ul_id1;
392         r->ur_id2 = &rec->ul_id2;
393         r->ur_time = rec->ul_time;
394
395         LASSERT_REQSWAB (req, offset + 1);
396         r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
397         if (r->ur_name == NULL)
398                 RETURN(-EFAULT);
399         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
400         RETURN(0);
401 }
402
403 static int mds_rename_unpack(struct ptlrpc_request *req, int offset,
404                              struct mds_update_record *r)
405 {
406         struct mds_rec_rename *rec;
407         ENTRY;
408
409         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
410                                   lustre_swab_mds_rec_rename);
411         if (rec == NULL)
412                 RETURN(-EFAULT);
413
414         r->ur_id1 = &rec->rn_id1;
415         r->ur_id2 = &rec->rn_id2;
416         r->ur_time = rec->rn_time;
417
418         LASSERT_REQSWAB (req, offset + 1);
419         r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
420         if (r->ur_name == NULL)
421                 RETURN(-EFAULT);
422         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
423
424         LASSERT_REQSWAB (req, offset + 2);
425         r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0);
426         if (r->ur_tgt == NULL)
427                 RETURN(-EFAULT);
428         r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
429         RETURN(0);
430 }
431
432 static int mds_open_unpack(struct ptlrpc_request *req, int offset,
433                            struct mds_update_record *r)
434 {
435         struct mds_rec_create *rec;
436         ENTRY;
437
438         rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
439                                   lustre_swab_mds_rec_create);
440         if (rec == NULL)
441                 RETURN(-EFAULT);
442
443         r->ur_id1 = &rec->cr_id;
444         r->ur_id2 = &rec->cr_replayid;
445         r->ur_mode = rec->cr_mode;
446         r->ur_rdev = rec->cr_rdev;
447         r->ur_time = rec->cr_time;
448         r->ur_flags = rec->cr_flags;
449
450         LASSERT_REQSWAB (req, offset + 1);
451         r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
452         if (r->ur_name == NULL)
453                 RETURN (-EFAULT);
454         r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
455
456         LASSERT_REQSWAB (req, offset + 2);
457         if (req->rq_reqmsg->bufcount > offset + 2) {
458                 r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
459                 if (r->ur_eadata == NULL)
460                         RETURN(-EFAULT);
461                 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
462         }
463         RETURN(0);
464 }
465
466 typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset,
467                                struct mds_update_record *r);
468
469 static update_unpacker mds_unpackers[REINT_MAX + 1] = {
470         [REINT_SETATTR] mds_setattr_unpack,
471         [REINT_CREATE] mds_create_unpack,
472         [REINT_LINK] mds_link_unpack,
473         [REINT_UNLINK] mds_unlink_unpack,
474         [REINT_RENAME] mds_rename_unpack,
475         [REINT_OPEN] mds_open_unpack,
476 };
477
478 int mds_update_unpack(struct ptlrpc_request *req, int offset,
479                       struct mds_update_record *rec)
480 {
481         __u32 *opcodep;
482         __u32  opcode;
483         int rc;
484         ENTRY;
485
486         /*
487          * NB don't lustre_swab_reqbuf() here. We're just taking a peek and we
488          * want to leave it to the specific unpacker once we've identified the
489          * message type.
490          */
491         opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof(*opcodep));
492         if (opcodep == NULL)
493                 RETURN(-EFAULT);
494
495         opcode = *opcodep;
496         if (lustre_msg_swabbed (req->rq_reqmsg))
497                 __swab32s (&opcode);
498
499         if (opcode > REINT_MAX ||
500             mds_unpackers[opcode] == NULL) {
501                 CERROR ("Unexpected opcode %d\n", opcode);
502                 RETURN(-EFAULT);
503         }
504
505         rec->ur_id1 = NULL;
506         rec->ur_id2 = NULL;
507         rec->ur_opcode = opcode;
508
509         rc = mds_unpackers[opcode](req, offset, rec);
510         
511 #if CRAY_PORTALS
512         rec->ur_fsuid = req->rq_uid;
513 #endif
514         RETURN(rc);
515 }
516
517 /* 
518  * here we take simple rule: once uid/fsuid is root, we also squash
519  * the gid/fsgid, don't care setuid/setgid attributes.
520  */
521 static
522 int mds_squash_root(struct mds_obd *mds, struct mds_req_sec_desc *rsd,
523                     ptl_nid_t *peernid)
524 {
525         if (!mds->mds_squash_uid || *peernid == mds->mds_nosquash_nid)
526                 return 0;
527
528         if (rsd->rsd_uid && rsd->rsd_fsuid)
529                 return 0;
530
531         CDEBUG(D_SEC, "squash req from "LPX64":"
532                "(%u:%u-%u:%u/%x)=>(%u:%u-%u:%u/%x)\n", *peernid,
533                 rsd->rsd_uid, rsd->rsd_gid,
534                 rsd->rsd_fsuid, rsd->rsd_fsgid, rsd->rsd_cap,
535                 rsd->rsd_uid ? rsd->rsd_uid : mds->mds_squash_uid,
536                 rsd->rsd_uid ? rsd->rsd_gid : mds->mds_squash_gid,
537                 rsd->rsd_fsuid ? rsd->rsd_fsuid : mds->mds_squash_uid,
538                 rsd->rsd_fsuid ? rsd->rsd_fsgid : mds->mds_squash_gid,
539                 rsd->rsd_cap & ~CAP_FS_MASK);
540
541         if (rsd->rsd_uid == 0) {
542                 rsd->rsd_uid = mds->mds_squash_uid;
543                 rsd->rsd_gid = mds->mds_squash_gid;
544         }
545         if (rsd->rsd_fsuid == 0) {
546                 rsd->rsd_fsuid = mds->mds_squash_uid;
547                 rsd->rsd_fsgid = mds->mds_squash_gid;
548         }
549         rsd->rsd_cap &= ~CAP_FS_MASK;
550
551         return 1;
552 }
553
554 /********************************
555  * MDS uid/gid mapping handling *
556  ********************************/
557
558 static
559 struct mds_idmap_entry* idmap_alloc_entry(__u32 rmt_id, __u32 lcl_id)
560 {
561         struct mds_idmap_entry *e;
562
563         OBD_ALLOC(e, sizeof(*e));
564         if (!e)
565                 return NULL;
566
567         INIT_LIST_HEAD(&e->rmt_hash);
568         INIT_LIST_HEAD(&e->lcl_hash);
569         atomic_set(&e->refcount, 1);
570         e->rmt_id = rmt_id;
571         e->lcl_id = lcl_id;
572
573         return e;
574 }
575
576 void idmap_free_entry(struct mds_idmap_entry *e)
577 {
578         if (!list_empty(&e->rmt_hash))
579                 list_del(&e->rmt_hash);
580         if (!list_empty(&e->lcl_hash))
581                 list_del(&e->lcl_hash);
582         OBD_FREE(e, sizeof(*e));
583 }
584
585 static
586 int idmap_insert_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
587                        struct mds_idmap_entry *new, const char *warn_msg)
588 {
589         struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(new->rmt_id)];
590         struct list_head *lcl_head = &lcl_hash[MDS_IDMAP_HASHFUNC(new->lcl_id)];
591         struct mds_idmap_entry *e;
592
593         list_for_each_entry(e, rmt_head, rmt_hash) {
594                 if (e->rmt_id == new->rmt_id &&
595                     e->lcl_id == new->lcl_id) {
596                         atomic_inc(&e->refcount);
597                         return 1;
598                 }
599                 if (e->rmt_id == new->rmt_id && warn_msg)
600                         CWARN("%s: rmt id %u already map to %u (new %u)\n",
601                               warn_msg, e->rmt_id, e->lcl_id, new->lcl_id);
602                 if (e->lcl_id == new->lcl_id && warn_msg)
603                         CWARN("%s: lcl id %u already be mapped from %u "
604                               "(new %u)\n", warn_msg,
605                               e->lcl_id, e->rmt_id, new->rmt_id);
606         }
607
608         list_add_tail(rmt_head, &new->rmt_hash);
609         list_add_tail(lcl_head, &new->lcl_hash);
610         return 0;
611 }
612
613 static
614 int idmap_remove_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
615                        __u32 rmt_id, __u32 lcl_id)
616 {
617         struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(rmt_id)];
618         struct mds_idmap_entry *e;
619
620         list_for_each_entry(e, rmt_head, rmt_hash) {
621                 if (e->rmt_id == rmt_id && e->lcl_id == lcl_id) {
622                         if (atomic_dec_and_test(&e->refcount)) {
623                                 list_del(&e->rmt_hash);
624                                 list_del(&e->lcl_hash);
625                                 OBD_FREE(e, sizeof(*e));
626                                 return 0;
627                         } else
628                                 return 1;
629                 }
630         }
631         return -ENOENT;
632 }
633
634 int mds_idmap_add(struct mds_idmap_table *tbl,
635                   uid_t rmt_uid, uid_t lcl_uid,
636                   gid_t rmt_gid, gid_t lcl_gid)
637 {
638         struct mds_idmap_entry *ue, *ge;
639         ENTRY;
640
641         if (!tbl)
642                 RETURN(-EPERM);
643
644         ue = idmap_alloc_entry(rmt_uid, lcl_uid);
645         if (!ue)
646                 RETURN(-ENOMEM);
647         ge = idmap_alloc_entry(rmt_gid, lcl_gid);
648         if (!ge) {
649                 idmap_free_entry(ue);
650                 RETURN(-ENOMEM);
651         }
652
653         spin_lock(&tbl->mit_lock);
654
655         if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
656                                tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
657                                ue, "UID mapping")) {
658                 idmap_free_entry(ue);
659         }
660
661         if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
662                                tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
663                                ge, "GID mapping")) {
664                 idmap_free_entry(ge);
665         }
666
667         spin_unlock(&tbl->mit_lock);
668         RETURN(0);
669 }
670
671 int mds_idmap_del(struct mds_idmap_table *tbl,
672                   uid_t rmt_uid, uid_t lcl_uid,
673                   gid_t rmt_gid, gid_t lcl_gid)
674 {
675         ENTRY;
676
677         if (!tbl)
678                 RETURN(0);
679
680         spin_lock(&tbl->mit_lock);
681         idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
682                            tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
683                            rmt_uid, lcl_uid);
684         idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
685                            tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
686                            rmt_gid, lcl_gid);
687         spin_unlock(&tbl->mit_lock);
688         RETURN(0);
689 }
690
691 static
692 __u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id)
693 {
694         struct list_head *head = &hash[MDS_IDMAP_HASHFUNC(id)];
695         struct mds_idmap_entry *e;
696
697         if (!reverse) {
698                 list_for_each_entry(e, head, rmt_hash) {
699                         if (e->rmt_id == id)
700                                 return e->lcl_id;
701                 }
702                 return MDS_IDMAP_NOTFOUND;
703         } else {
704                 list_for_each_entry(e, head, lcl_hash) {
705                         if (e->lcl_id == id)
706                                 return e->rmt_id;
707                 }
708                 return MDS_IDMAP_NOTFOUND;
709         }
710 }
711
712 int mds_idmap_lookup_uid(struct mds_idmap_table *tbl, int reverse, uid_t uid)
713 {
714         struct list_head *hash;
715
716         if (!tbl)
717                 return MDS_IDMAP_NOTFOUND;
718
719         if (!reverse)
720                 hash = tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX];
721         else
722                 hash = tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX];
723
724         spin_lock(&tbl->mit_lock);
725         uid = idmap_lookup_id(hash, reverse, uid);
726         spin_unlock(&tbl->mit_lock);
727
728         return uid;
729 }
730
731 int mds_idmap_lookup_gid(struct mds_idmap_table *tbl, int reverse, gid_t gid)
732 {
733         struct list_head *hash;
734
735         if (!tbl)
736                 return MDS_IDMAP_NOTFOUND;
737
738         if (!reverse)
739                 hash = tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX];
740         else
741                 hash = tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX];
742
743         spin_lock(&tbl->mit_lock);
744         gid = idmap_lookup_id(hash, reverse, gid);
745         spin_unlock(&tbl->mit_lock);
746
747         return gid;
748 }
749
750 struct mds_idmap_table *mds_idmap_alloc()
751 {
752         struct mds_idmap_table *tbl;
753         int i, j;
754
755         OBD_ALLOC(tbl, sizeof(*tbl));
756         if (!tbl)
757                 return NULL;
758
759         spin_lock_init(&tbl->mit_lock);
760         for (i = 0; i < MDS_IDMAP_N_HASHES; i++)
761                 for (j = 0; j < MDS_IDMAP_HASHSIZE; j++)
762                         INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]);
763
764         return tbl;
765 }
766
767 static void idmap_clear_rmt_hash(struct list_head *list)
768 {
769         struct mds_idmap_entry *e;
770         int i;
771
772         for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
773                 while (!list_empty(&list[i])) {
774                         e = list_entry(list[i].next, struct mds_idmap_entry,
775                                        rmt_hash);
776                         idmap_free_entry(e);
777                 }
778         }
779 }
780
781 void mds_idmap_free(struct mds_idmap_table *tbl)
782 {
783         int i;
784
785         spin_lock(&tbl->mit_lock);
786         idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX]);
787         idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX]);
788
789         /* paranoid checking */
790         for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
791                 LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX][i]));
792                 LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX][i]));
793         }
794         spin_unlock(&tbl->mit_lock);
795
796         OBD_FREE(tbl, sizeof(*tbl));
797 }
798
799 /*********************************
800  * helpers doing mapping for MDS *
801  *********************************/
802
803 /*
804  * we allow remote setuid/setgid to an "authencated" one,
805  * this policy probably change later.
806  */
807 static
808 int mds_req_secdesc_do_map(struct mds_export_data *med,
809                            struct mds_req_sec_desc *rsd)
810 {
811         struct mds_idmap_table *idmap = med->med_idmap;
812         uid_t uid, fsuid;
813         gid_t gid, fsgid;
814
815         uid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_uid);
816         if (uid == MDS_IDMAP_NOTFOUND) {
817                 CERROR("can't find map for uid %u\n", rsd->rsd_uid);
818                 return -EPERM;
819         }
820
821         if (rsd->rsd_uid == rsd->rsd_fsuid)
822                 fsuid = uid;
823         else {
824                 fsuid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_fsuid);
825                 if (fsuid == MDS_IDMAP_NOTFOUND) {
826                         CERROR("can't find map for fsuid %u\n", rsd->rsd_fsuid);
827                         return -EPERM;
828                 }
829         }
830
831         gid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_gid);
832         if (gid == MDS_IDMAP_NOTFOUND) {
833                 CERROR("can't find map for gid %u\n", rsd->rsd_gid);
834                 return -EPERM;
835         }
836
837         if (rsd->rsd_gid == rsd->rsd_fsgid)
838                 fsgid = gid;
839         else {
840                 fsgid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_fsgid);
841                 if (fsgid == MDS_IDMAP_NOTFOUND) {
842                         CERROR("can't find map for fsgid %u\n", rsd->rsd_fsgid);
843                         return -EPERM;
844                 }
845         }
846
847         rsd->rsd_uid = uid;
848         rsd->rsd_gid = gid;
849         rsd->rsd_fsuid = fsuid;
850         rsd->rsd_fsgid = fsgid;
851
852         return 0;
853 }
854
855 void mds_body_do_reverse_map(struct mds_export_data *med,
856                              struct mds_body *body)
857 {
858         uid_t uid;
859         gid_t gid;
860
861         if (!med->med_remote)
862                 return;
863
864         ENTRY;
865         if (body->valid & OBD_MD_FLUID) {
866                 uid = mds_idmap_lookup_uid(med->med_idmap, 1, body->uid);
867                 if (uid == MDS_IDMAP_NOTFOUND) {
868                         uid = med->med_nllu;
869                         if (body->valid & OBD_MD_FLMODE) {
870                                 body->mode = (body->mode & ~S_IRWXU) |
871                                              ((body->mode & S_IRWXO) << 6);
872                         }
873                 }
874                 body->uid = uid;
875         }
876         if (body->valid & OBD_MD_FLGID) {
877                 gid = mds_idmap_lookup_gid(med->med_idmap, 1, body->gid);
878                 if (gid == MDS_IDMAP_NOTFOUND) {
879                         gid = med->med_nllg;
880                         if (body->valid & OBD_MD_FLMODE) {
881                                 body->mode = (body->mode & ~S_IRWXG) |
882                                              ((body->mode & S_IRWXO) << 3);
883                         }
884                 }
885                 body->gid = gid;
886         }
887
888         EXIT;
889 }
890
891 /**********************
892  * MDS ucred handling *
893  **********************/
894
895 static inline void drop_ucred_ginfo(struct lvfs_ucred *ucred)
896 {
897         if (ucred->luc_ginfo) {
898                 put_group_info(ucred->luc_ginfo);
899                 ucred->luc_ginfo = NULL;
900         }
901 }
902
903 static inline void drop_ucred_lsd(struct lvfs_ucred *ucred)
904 {
905         if (ucred->luc_lsd) {
906                 mds_put_lsd(ucred->luc_lsd);
907                 ucred->luc_lsd = NULL;
908         }
909 }
910
911 /*
912  * the heart of the uid/gid handling and security checking.
913  *
914  * root could set any group_info if we allowed setgroups, while
915  * normal user only could 'reduce' their group members -- which
916  * is somewhat expensive.
917  *
918  * authenticated as mds user (using mds service credential) could
919  * bypass all checkings.
920  */
921 int mds_init_ucred(struct lvfs_ucred *ucred,
922                    struct ptlrpc_request *req,
923                    struct mds_req_sec_desc *rsd)
924 {
925         struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
926         struct mds_export_data *med = &req->rq_export->u.eu_mds_data;
927         struct lustre_sec_desc *lsd;
928         ptl_nid_t peernid = req->rq_peer.peer_id.nid;
929         struct group_info *gnew;
930         unsigned int setuid, setgid, strong_sec, root_squashed;
931         __u32 lsd_perms;
932         ENTRY;
933
934         LASSERT(ucred);
935         LASSERT(rsd);
936         LASSERT(rsd->rsd_ngroups <= LUSTRE_MAX_GROUPS);
937
938         if (SEC_FLAVOR_MAJOR(req->rq_req_secflvr) == PTLRPCS_FLVR_MAJOR_GSS &&
939             (SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_AUTH ||
940              SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_PRIV))
941                 strong_sec = 1;
942         else
943                 strong_sec = 0;
944
945         LASSERT(!(req->rq_remote_realm && !strong_sec));
946
947         if (strong_sec && req->rq_auth_uid == -1) {
948                 CWARN("user not authenticated, deny access\n");
949                 RETURN(-EPERM);
950         }
951
952         /* sanity check: if we use strong authentication, we expect the
953          * uid which client claimed is true.
954          * not apply to special mds user .
955          */
956         if (!req->rq_auth_usr_mds && strong_sec) {
957                 if (!med->med_remote) {
958                         if (req->rq_auth_uid != rsd->rsd_uid) {
959                                 CERROR("local client "LPU64": auth uid %u "
960                                        "while client claim %u:%u/%u:%u\n",
961                                        peernid, req->rq_auth_uid,
962                                        rsd->rsd_uid, rsd->rsd_gid,
963                                        rsd->rsd_fsuid, rsd->rsd_fsgid);
964                                 RETURN(-EPERM);
965                         }
966                 } else {
967                         if (req->rq_mapped_uid == MDS_IDMAP_NOTFOUND) {
968                                 CWARN("no mapping found, deny\n");
969                                 RETURN(-EPERM);
970                         }
971
972                         if (mds_req_secdesc_do_map(med, rsd))
973                                 RETURN(-EPERM);
974
975                         if (req->rq_mapped_uid != rsd->rsd_uid) {
976                                 CERROR("remote client "LPU64": auth uid %u "
977                                        "while client claim %u:%u/%u:%u\n",
978                                        peernid, req->rq_auth_uid,
979                                        rsd->rsd_uid, rsd->rsd_gid,
980                                        rsd->rsd_fsuid, rsd->rsd_fsgid);
981                                 RETURN(-EPERM);
982                         }
983                 }
984         }
985
986         /* now LSD come into play */
987         ucred->luc_ginfo = NULL;
988         ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid);
989
990         if (!lsd) {
991                 CERROR("Deny access without LSD: uid %d\n", rsd->rsd_uid);
992                 RETURN(-EPERM);
993         }
994
995         lsd_perms = mds_lsd_get_perms(lsd, med->med_remote, 0, peernid);
996
997         /* check setuid/setgid permissions.
998          * again not apply to special mds user.
999          */
1000         if (!req->rq_auth_usr_mds) {
1001                 /* find out the setuid/setgid attempt */
1002                 setuid = (rsd->rsd_uid != rsd->rsd_fsuid);
1003                 setgid = (rsd->rsd_gid != rsd->rsd_fsgid ||
1004                           rsd->rsd_gid != lsd->lsd_gid);
1005
1006                 /* check permission of setuid */
1007                 if (setuid && !(lsd_perms & LSD_PERM_SETUID)) {
1008                         CWARN("mds blocked setuid attempt (%u -> %u) "
1009                               "from "LPU64"\n", rsd->rsd_uid, rsd->rsd_fsuid,
1010                               peernid);
1011                         RETURN(-EPERM);
1012                 }
1013
1014                 /* check permission of setgid */
1015                 if (setgid && !(lsd_perms & LSD_PERM_SETGID)) {
1016                         CWARN("mds blocked setgid attempt (%u:%u/%u:%u -> %u) "
1017                               "from "LPU64"\n", rsd->rsd_uid, rsd->rsd_gid,
1018                               rsd->rsd_fsuid, rsd->rsd_fsgid, lsd->lsd_gid,
1019                               peernid);
1020                         RETURN(-EPERM);
1021                 }
1022         }
1023
1024         root_squashed = mds_squash_root(mds, rsd, &peernid); 
1025
1026         /* remove privilege for non-root user */
1027         if (rsd->rsd_fsuid)
1028                 rsd->rsd_cap &= ~CAP_FS_MASK;
1029
1030         /* by now every fields other than groups in rsd have been granted */
1031         ucred->luc_uid = rsd->rsd_uid;
1032         ucred->luc_gid = rsd->rsd_gid;
1033         ucred->luc_fsuid = rsd->rsd_fsuid;
1034         ucred->luc_fsgid = rsd->rsd_fsgid;
1035         ucred->luc_cap = rsd->rsd_cap;
1036
1037         /* don't use any supplementary group if we squashed root.
1038          * XXX The exact behavior of root_squash is not defined, we just
1039          * keep the reminder here */
1040         if (root_squashed)
1041                 RETURN(0);
1042
1043         /* install groups from LSD */
1044         if (lsd->lsd_ginfo) {
1045                 ucred->luc_ginfo = lsd->lsd_ginfo;
1046                 get_group_info(ucred->luc_ginfo);
1047         }
1048
1049         /* everything is done if we don't allow setgroups, or it is
1050          * from remote client (which implies forced to be no-setgroups).
1051          *
1052          * Note: remote user's supplementary groups sent along the request
1053          * (if any) are all ignored, but we make the mapped local user's
1054          * supplementary groups take effect.
1055          */
1056         if (med->med_remote || !(lsd_perms & LSD_PERM_SETGRP))
1057                 RETURN(0);
1058
1059         /* root could set any groups as he want (if allowed), normal
1060          * users only could reduce his group array.
1061          */
1062         if (ucred->luc_uid == 0) {
1063                 drop_ucred_ginfo(ucred);
1064
1065                 if (rsd->rsd_ngroups == 0)
1066                         RETURN(0);
1067
1068                 gnew = groups_alloc(rsd->rsd_ngroups);
1069                 if (!gnew) {
1070                         CERROR("out of memory\n");
1071                         drop_ucred_lsd(ucred);
1072                         RETURN(-ENOMEM);
1073                 }
1074                 groups_from_buffer(gnew, rsd->rsd_groups);
1075                 groups_sort(gnew); /* don't rely on client doing this */
1076
1077                 ucred->luc_ginfo = gnew;
1078         } else {
1079                 __u32 set = 0, cur = 0;
1080                 struct group_info *ginfo = ucred->luc_ginfo;
1081
1082                 if (!ginfo)
1083                         RETURN(0);
1084
1085                 /* Note: freeing a group_info count on 'nblocks' instead of
1086                  * 'ngroups', thus we can safely alloc enough buffer and reduce
1087                  * and ngroups number later.
1088                  */
1089                 gnew = groups_alloc(rsd->rsd_ngroups);
1090                 if (!gnew) {
1091                         CERROR("out of memory\n");
1092                         drop_ucred_ginfo(ucred);
1093                         drop_ucred_lsd(ucred);
1094                         RETURN(-ENOMEM);
1095                 }
1096
1097                 while (cur < rsd->rsd_ngroups) {
1098                         if (groups_search(ginfo, rsd->rsd_groups[cur])) {
1099                                 GROUP_AT(gnew, set) = rsd->rsd_groups[cur];
1100                                 set++;
1101                         }
1102                         cur++;
1103                 }
1104                 gnew->ngroups = set;
1105
1106                 put_group_info(ucred->luc_ginfo);
1107                 ucred->luc_ginfo = gnew;
1108         }
1109         RETURN(0);
1110 }
1111
1112 void mds_exit_ucred(struct lvfs_ucred *ucred)
1113 {
1114         ENTRY;
1115         drop_ucred_ginfo(ucred);
1116         drop_ucred_lsd(ucred);
1117         EXIT;
1118 }