1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #define DEBUG_SUBSYSTEM S_MDS
24 #include <linux/config.h>
25 #include <linux/module.h>
26 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/stat.h>
30 #include <linux/errno.h>
31 #include <linux/version.h>
32 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
33 # include <linux/locks.h> // for wait_on_buffer
35 # include <linux/buffer_head.h> // for wait_on_buffer
37 #include <linux/unistd.h>
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #include <asm/segment.h>
48 #include <linux/obd_support.h>
49 #include <linux/lustre_lib.h>
50 #include "mds_internal.h"
52 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
53 struct group_info *groups_alloc(int ngroups)
55 struct group_info *ginfo;
57 LASSERT(ngroups <= NGROUPS_SMALL);
59 OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
62 ginfo->ngroups = ngroups;
64 ginfo->blocks[0] = ginfo->small_block;
65 atomic_set(&ginfo->usage, 1);
70 void groups_free(struct group_info *ginfo)
72 LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
73 LASSERT(ginfo->nblocks == 1);
74 LASSERT(ginfo->blocks[0] == ginfo->small_block);
76 OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
79 /* for 2.4 the group number is small, so simply search the
82 int groups_search(struct group_info *ginfo, gid_t grp)
89 for (i = 0; i < ginfo->ngroups; i++)
90 if (GROUP_AT(ginfo, i) == grp)
97 void groups_sort(struct group_info *ginfo)
99 int base, max, stride;
100 int gidsetsize = ginfo->ngroups;
102 for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
107 max = gidsetsize - stride;
108 for (base = 0; base < max; base++) {
110 int right = left + stride;
111 gid_t tmp = GROUP_AT(ginfo, right);
113 while (left >= 0 && GROUP_AT(ginfo, left) > tmp) {
114 GROUP_AT(ginfo, right) =
115 GROUP_AT(ginfo, left);
119 GROUP_AT(ginfo, right) = tmp;
125 int groups_search(struct group_info *ginfo, gid_t grp)
133 right = ginfo->ngroups;
134 while (left < right) {
135 int mid = (left + right) / 2;
136 int cmp = grp - GROUP_AT(ginfo, mid);
148 void groups_from_buffer(struct group_info *ginfo, __u32 *gids)
150 int i, ngroups = ginfo->ngroups;
152 for (i = 0; i < ginfo->nblocks; i++) {
153 int count = min(NGROUPS_PER_BLOCK, ngroups);
155 memcpy(ginfo->blocks[i], gids, count * sizeof(__u32));
156 gids += NGROUPS_PER_BLOCK;
161 void mds_pack_dentry2id(struct obd_device *obd,
162 struct lustre_id *id,
163 struct dentry *dentry,
166 id_ino(id) = dentry->d_inum;
167 id_gen(id) = dentry->d_generation;
170 id_fid(id) = dentry->d_fid;
171 id_group(id) = dentry->d_mdsnum;
175 void mds_pack_dentry2body(struct obd_device *obd,
177 struct dentry *dentry,
180 b->valid |= OBD_MD_FLID | OBD_MD_FLGENER |
184 b->valid |= OBD_MD_FID;
186 mds_pack_dentry2id(obd, &b->id1, dentry,
190 int mds_pack_inode2id(struct obd_device *obd,
191 struct lustre_id *id,
199 /* we have to avoid deadlock. */
200 if (!down_trylock(&inode->i_sem)) {
201 rc = mds_read_inode_sid(obd, inode, id);
204 rc = mds_read_inode_sid(obd, inode, id);
208 id_ino(id) = inode->i_ino;
209 id_gen(id) = inode->i_generation;
210 id_type(id) = (S_IFMT & inode->i_mode);
215 /* Note that we can copy all of the fields, just some will not be "valid" */
216 void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b,
217 struct inode *inode, int read_fid)
219 b->valid |= OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID |
220 OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE |
221 OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER |
222 OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */
224 if (!S_ISREG(inode->i_mode)) {
225 b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
226 OBD_MD_FLATIME | OBD_MD_FLMTIME |
229 b->atime = LTIME_S(inode->i_atime);
230 b->mtime = LTIME_S(inode->i_mtime);
231 b->ctime = LTIME_S(inode->i_ctime);
232 b->mode = inode->i_mode;
233 b->size = inode->i_size;
234 b->blocks = inode->i_blocks;
235 b->uid = inode->i_uid;
236 b->gid = inode->i_gid;
237 b->flags = inode->i_flags;
238 b->rdev = inode->i_rdev;
240 /* Return the correct link count for orphan inodes */
241 if (mds_inode_is_orphan(inode)) {
243 } else if (S_ISDIR(inode->i_mode)) {
246 b->nlink = inode->i_nlink;
249 b->valid |= OBD_MD_FID;
250 mds_pack_inode2id(obd, &b->id1, inode, read_fid);
254 static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
255 struct mds_update_record *r)
257 struct iattr *attr = &r->ur_iattr;
258 struct mds_rec_setattr *rec;
261 rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
262 lustre_swab_mds_rec_setattr);
266 r->ur_id1 = &rec->sa_id;
267 attr->ia_valid = rec->sa_valid;
268 attr->ia_mode = rec->sa_mode;
269 attr->ia_uid = rec->sa_uid;
270 attr->ia_gid = rec->sa_gid;
271 attr->ia_size = rec->sa_size;
272 LTIME_S(attr->ia_atime) = rec->sa_atime;
273 LTIME_S(attr->ia_mtime) = rec->sa_mtime;
274 LTIME_S(attr->ia_ctime) = rec->sa_ctime;
275 attr->ia_attr_flags = rec->sa_attr_flags;
277 LASSERT_REQSWAB (req, offset + 1);
278 if (req->rq_reqmsg->bufcount > offset + 1) {
279 r->ur_eadata = lustre_msg_buf (req->rq_reqmsg,
281 if (r->ur_eadata == NULL)
283 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
286 if (req->rq_reqmsg->bufcount > offset + 2) {
287 r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
288 if (r->ur_eadata == NULL)
291 r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2];
297 static int mds_create_unpack(struct ptlrpc_request *req, int offset,
298 struct mds_update_record *r)
300 struct mds_rec_create *rec;
303 rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
304 lustre_swab_mds_rec_create);
308 r->ur_id1 = &rec->cr_id;
309 r->ur_id2 = &rec->cr_replayid;
310 r->ur_mode = rec->cr_mode;
311 r->ur_rdev = rec->cr_rdev;
312 r->ur_time = rec->cr_time;
313 r->ur_flags = rec->cr_flags;
315 LASSERT_REQSWAB (req, offset + 1);
316 r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
317 if (r->ur_name == NULL)
319 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
321 LASSERT_REQSWAB (req, offset + 2);
322 if (req->rq_reqmsg->bufcount > offset + 2) {
323 if (S_ISLNK(r->ur_mode)) {
324 r->ur_tgt = lustre_msg_string(req->rq_reqmsg,
326 if (r->ur_tgt == NULL)
328 r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
329 } else if (S_ISDIR(r->ur_mode)) {
330 /* Stripe info for mkdir - just a 16bit integer */
331 if (req->rq_reqmsg->buflens[offset + 2] != 2) {
332 CERROR("mkdir stripe info does not match "
333 "expected size %d vs 2\n",
334 req->rq_reqmsg->buflens[offset + 2]);
337 r->ur_eadata = lustre_swab_buf (req->rq_reqmsg,
338 offset + 2, 2, __swab16s);
339 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
341 /* Hm, no other users so far? */
348 static int mds_link_unpack(struct ptlrpc_request *req, int offset,
349 struct mds_update_record *r)
351 struct mds_rec_link *rec;
354 rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
355 lustre_swab_mds_rec_link);
359 r->ur_id1 = &rec->lk_id1;
360 r->ur_id2 = &rec->lk_id2;
361 r->ur_time = rec->lk_time;
363 LASSERT_REQSWAB (req, offset + 1);
364 r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
365 if (r->ur_name == NULL)
367 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
371 static int mds_unlink_unpack(struct ptlrpc_request *req, int offset,
372 struct mds_update_record *r)
374 struct mds_rec_unlink *rec;
377 rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
378 lustre_swab_mds_rec_unlink);
382 r->ur_mode = rec->ul_mode;
383 r->ur_id1 = &rec->ul_id1;
384 r->ur_id2 = &rec->ul_id2;
385 r->ur_time = rec->ul_time;
387 LASSERT_REQSWAB (req, offset + 1);
388 r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
389 if (r->ur_name == NULL)
391 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
395 static int mds_rename_unpack(struct ptlrpc_request *req, int offset,
396 struct mds_update_record *r)
398 struct mds_rec_rename *rec;
401 rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
402 lustre_swab_mds_rec_rename);
406 r->ur_id1 = &rec->rn_id1;
407 r->ur_id2 = &rec->rn_id2;
408 r->ur_time = rec->rn_time;
410 LASSERT_REQSWAB (req, offset + 1);
411 r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
412 if (r->ur_name == NULL)
414 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
416 LASSERT_REQSWAB (req, offset + 2);
417 r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0);
418 if (r->ur_tgt == NULL)
420 r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
424 static int mds_open_unpack(struct ptlrpc_request *req, int offset,
425 struct mds_update_record *r)
427 struct mds_rec_create *rec;
430 rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
431 lustre_swab_mds_rec_create);
435 r->ur_id1 = &rec->cr_id;
436 r->ur_id2 = &rec->cr_replayid;
437 r->ur_mode = rec->cr_mode;
438 r->ur_rdev = rec->cr_rdev;
439 r->ur_time = rec->cr_time;
440 r->ur_flags = rec->cr_flags;
442 LASSERT_REQSWAB (req, offset + 1);
443 r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
444 if (r->ur_name == NULL)
446 r->ur_namelen = req->rq_reqmsg->buflens[offset + 1];
448 LASSERT_REQSWAB (req, offset + 2);
449 if (req->rq_reqmsg->bufcount > offset + 2) {
450 r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
451 if (r->ur_eadata == NULL)
453 r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
458 typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset,
459 struct mds_update_record *r);
461 static update_unpacker mds_unpackers[REINT_MAX + 1] = {
462 [REINT_SETATTR] mds_setattr_unpack,
463 [REINT_CREATE] mds_create_unpack,
464 [REINT_LINK] mds_link_unpack,
465 [REINT_UNLINK] mds_unlink_unpack,
466 [REINT_RENAME] mds_rename_unpack,
467 [REINT_OPEN] mds_open_unpack,
470 int mds_update_unpack(struct ptlrpc_request *req, int offset,
471 struct mds_update_record *rec)
479 * NB don't lustre_swab_reqbuf() here. We're just taking a peek and we
480 * want to leave it to the specific unpacker once we've identified the
483 opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof(*opcodep));
488 if (lustre_msg_swabbed (req->rq_reqmsg))
491 if (opcode > REINT_MAX ||
492 mds_unpackers[opcode] == NULL) {
493 CERROR ("Unexpected opcode %d\n", opcode);
499 rec->ur_opcode = opcode;
501 rc = mds_unpackers[opcode](req, offset, rec);
505 static inline void drop_ucred_ginfo(struct lvfs_ucred *ucred)
507 if (ucred->luc_ginfo) {
508 put_group_info(ucred->luc_ginfo);
509 ucred->luc_ginfo = NULL;
514 * root could set any group_info if we allowed setgroups, while
515 * normal user only could 'reduce' their group members -- which
516 * is somewhat expensive.
518 int mds_init_ucred(struct lvfs_ucred *ucred, struct mds_req_sec_desc *rsd)
520 struct group_info *gnew;
526 ucred->luc_fsuid = rsd->rsd_fsuid;
527 ucred->luc_fsgid = rsd->rsd_fsgid;
528 ucred->luc_cap = rsd->rsd_cap;
529 ucred->luc_uid = rsd->rsd_uid;
530 ucred->luc_ghash = mds_get_group_entry(NULL, rsd->rsd_uid);
531 ucred->luc_ginfo = NULL;
533 if (ucred->luc_ghash && ucred->luc_ghash->ge_group_info) {
534 ucred->luc_ginfo = ucred->luc_ghash->ge_group_info;
535 get_group_info(ucred->luc_ginfo);
538 /* everything is done if we don't allow set groups */
539 if (!mds_allow_setgroups())
542 if (rsd->rsd_ngroups > LUSTRE_MAX_GROUPS) {
543 CERROR("client provide too many groups: %d\n",
545 drop_ucred_ginfo(ucred);
546 mds_put_group_entry(NULL, ucred->luc_ghash);
550 if (ucred->luc_uid == 0) {
551 if (rsd->rsd_ngroups == 0) {
552 drop_ucred_ginfo(ucred);
556 gnew = groups_alloc(rsd->rsd_ngroups);
558 CERROR("out of memory\n");
559 drop_ucred_ginfo(ucred);
560 mds_put_group_entry(NULL, ucred->luc_ghash);
563 groups_from_buffer(gnew, rsd->rsd_groups);
564 /* can't rely on client to sort them */
567 drop_ucred_ginfo(ucred);
568 ucred->luc_ginfo = gnew;
570 __u32 set = 0, cur = 0;
571 struct group_info *ginfo;
573 /* if no group info in hash, we don't
574 * bother createing new
576 if (!ucred->luc_ginfo)
579 /* Note: freeing a group_info count on 'nblocks' instead of
580 * 'ngroups', thus we can safely alloc enough buffer and reduce
581 * and ngroups number later.
583 gnew = groups_alloc(rsd->rsd_ngroups);
585 CERROR("out of memory\n");
586 drop_ucred_ginfo(ucred);
587 mds_put_group_entry(NULL, ucred->luc_ghash);
591 ginfo = ucred->luc_ginfo;
592 while (cur < rsd->rsd_ngroups) {
593 if (groups_search(ginfo, rsd->rsd_groups[cur])) {
594 GROUP_AT(gnew, set) = rsd->rsd_groups[cur];
601 put_group_info(ucred->luc_ginfo);
602 ucred->luc_ginfo = gnew;
607 void mds_exit_ucred(struct lvfs_ucred *ucred)
611 if (ucred->luc_ginfo)
612 put_group_info(ucred->luc_ginfo);
613 if (ucred->luc_ghash)
614 mds_put_group_entry(NULL, ucred->luc_ghash);