X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmds%2Fmds_lib.c;h=2da62bf5192b9a0dbfdf392506841af919f1efc9;hb=f0cf9fa9e22717eb407bea671b99b5c420d43325;hp=81107a1ad6a8676278a0c60d871d988f0f65507e;hpb=4801dab391279a180ac7601424f79298bdbd0fb3;p=fs%2Flustre-release.git diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 81107a1..2da62bf 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -47,55 +47,186 @@ #include #include -#include +#include +#include "mds_internal.h" -#include -#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) +struct group_info *groups_alloc(int ngroups) +{ + struct group_info *ginfo; + + LASSERT(ngroups <= NGROUPS_SMALL); + + OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *)); + if (!ginfo) + return NULL; + ginfo->ngroups = ngroups; + ginfo->nblocks = 1; + ginfo->blocks[0] = ginfo->small_block; + atomic_set(&ginfo->usage, 1); + + return ginfo; +} -void mds_pack_dentry2fid(struct ll_fid *fid, struct dentry *dentry) +void groups_free(struct group_info *ginfo) { - fid->id = dentry->d_inum; - fid->generation = dentry->d_generation; - fid->mds = dentry->d_mdsnum; + LASSERT(ginfo->ngroups <= NGROUPS_SMALL); + LASSERT(ginfo->nblocks == 1); + LASSERT(ginfo->blocks[0] == ginfo->small_block); + + OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *)); } -void mds_pack_dentry2body(struct mds_body *b, struct dentry *dentry) +/* for 2.4 the group number is small, so simply search the + * whole array. + */ +int groups_search(struct group_info *ginfo, gid_t grp) { - b->valid |= OBD_MD_FLID | OBD_MD_FLGENER; - b->ino = dentry->d_inum; - b->generation = dentry->d_generation; - b->mds = dentry->d_mdsnum; + int i; + + if (!ginfo) + return 0; + + for (i = 0; i < ginfo->ngroups; i++) + if (GROUP_AT(ginfo, i) == grp) + return 1; + return 0; } -void mds_pack_inode2fid(struct obd_device *obd, struct ll_fid *fid, - struct inode *inode) +#else /* >= 2.6.4 */ + +void groups_sort(struct group_info *ginfo) { -#ifdef CONFIG_SNAPFS - if (is_smfs_sb(inode->i_sb)) { - struct smfs_inode_info *sm_info = I2SMI(inode); - fid->snap_index = sm_info->sm_sninfo.sn_index; + int base, max, stride; + int gidsetsize = ginfo->ngroups; + + for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) + ; /* nothing */ + stride /= 3; + + while (stride) { + max = gidsetsize - stride; + for (base = 0; base < max; base++) { + int left = base; + int right = left + stride; + gid_t tmp = GROUP_AT(ginfo, right); + + while (left >= 0 && GROUP_AT(ginfo, left) > tmp) { + GROUP_AT(ginfo, right) = + GROUP_AT(ginfo, left); + right = left; + left -= stride; + } + GROUP_AT(ginfo, right) = tmp; + } + stride /= 3; } +} + +int groups_search(struct group_info *ginfo, gid_t grp) +{ + int left, right; + + if (!ginfo) + return 0; + + left = 0; + right = ginfo->ngroups; + while (left < right) { + int mid = (left + right) / 2; + int cmp = grp - GROUP_AT(ginfo, mid); + if (cmp > 0) + left = mid + 1; + else if (cmp < 0) + right = mid; + else + return 1; + } + return 0; +} #endif - fid->id = inode->i_ino; - fid->generation = inode->i_generation; - fid->f_type = (S_IFMT & inode->i_mode); - fid->mds = obd->u.mds.mds_num; + +void groups_from_buffer(struct group_info *ginfo, __u32 *gids) +{ + int i, ngroups = ginfo->ngroups; + + for (i = 0; i < ginfo->nblocks; i++) { + int count = min(NGROUPS_PER_BLOCK, ngroups); + + memcpy(ginfo->blocks[i], gids, count * sizeof(__u32)); + gids += NGROUPS_PER_BLOCK; + ngroups -= count; + } +} + +void mds_pack_dentry2id(struct obd_device *obd, + struct lustre_id *id, + struct dentry *dentry, + int fid) +{ + id_ino(id) = dentry->d_inum; + id_gen(id) = dentry->d_generation; + + if (fid) { + id_fid(id) = dentry->d_fid; + id_group(id) = dentry->d_mdsnum; + } +} + +void mds_pack_dentry2body(struct obd_device *obd, + struct mds_body *b, + struct dentry *dentry, + int fid) +{ + b->valid |= OBD_MD_FLID | OBD_MD_FLGENER | + OBD_MD_MDS; + + if (fid) + b->valid |= OBD_MD_FID; + + mds_pack_dentry2id(obd, &b->id1, dentry, fid); +} + +int mds_pack_inode2id(struct obd_device *obd, + struct lustre_id *id, + struct inode *inode, + int fid) +{ + int rc = 0; + ENTRY; + + if (fid) { + /* we have to avoid deadlock. */ + if (!down_trylock(&inode->i_sem)) { + rc = mds_read_inode_sid(obd, inode, id); + up(&inode->i_sem); + } else { + rc = mds_read_inode_sid(obd, inode, id); + } + } + + if (rc == 0) { + id_ino(id) = inode->i_ino; + id_gen(id) = inode->i_generation; + id_type(id) = (S_IFMT & inode->i_mode); + } + RETURN(rc); } /* Note that we can copy all of the fields, just some will not be "valid" */ void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b, - struct inode *inode) + struct inode *inode, int fid) { b->valid |= OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | - OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE | - OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER | - OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */ - - if (!S_ISREG(inode->i_mode)) - b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLRDEV; + OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE | + OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER | + OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */ - b->ino = inode->i_ino; + if (!S_ISREG(inode->i_mode)) { + b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLRDEV; + } b->atime = LTIME_S(inode->i_atime); b->mtime = LTIME_S(inode->i_mtime); b->ctime = LTIME_S(inode->i_ctime); @@ -106,6 +237,7 @@ void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b, b->gid = inode->i_gid; b->flags = inode->i_flags; b->rdev = inode->i_rdev; + /* Return the correct link count for orphan inodes */ if (mds_inode_is_orphan(inode)) { b->nlink = 0; @@ -114,9 +246,11 @@ void mds_pack_inode2body(struct obd_device *obd, struct mds_body *b, } else { b->nlink = inode->i_nlink; } - b->generation = inode->i_generation; - b->suppgid = -1; - b->mds = obd->u.mds.mds_num; + + if (fid) + b->valid |= OBD_MD_FID; + + mds_pack_inode2id(obd, &b->id1, inode, fid); } /* unpacking */ @@ -132,12 +266,7 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, if (rec == NULL) RETURN (-EFAULT); - r->_ur_fsuid = rec->sa_fsuid; - r->_ur_fsgid = rec->sa_fsgid; - r->_ur_cap = rec->sa_cap; - r->_ur_suppgid1 = rec->sa_suppgid; - r->_ur_suppgid2 = -1; - r->ur_fid1 = &rec->sa_fid; + r->ur_id1 = &rec->sa_id; attr->ia_valid = rec->sa_valid; attr->ia_mode = rec->sa_mode; attr->ia_uid = rec->sa_uid; @@ -158,11 +287,11 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, } if (req->rq_reqmsg->bufcount > offset + 2) { - r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0); - if (r->ur_eadata == NULL) + r->ur_ea2data = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0); + if (r->ur_ea2data == NULL) RETURN (-EFAULT); - r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2]; + r->ur_ea2datalen = req->rq_reqmsg->buflens[offset + 2]; } RETURN(0); @@ -179,17 +308,12 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset, if (rec == NULL) RETURN (-EFAULT); - r->_ur_fsuid = rec->cr_fsuid; - r->_ur_fsgid = rec->cr_fsgid; - r->_ur_cap = rec->cr_cap; - r->ur_fid1 = &rec->cr_fid; - r->ur_fid2 = &rec->cr_replayfid; + r->ur_id1 = &rec->cr_id; + r->ur_id2 = &rec->cr_replayid; r->ur_mode = rec->cr_mode; r->ur_rdev = rec->cr_rdev; r->ur_time = rec->cr_time; r->ur_flags = rec->cr_flags; - r->_ur_suppgid1 = rec->cr_suppgid; - r->_ur_suppgid2 = -1; LASSERT_REQSWAB (req, offset + 1); r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); @@ -235,13 +359,8 @@ static int mds_link_unpack(struct ptlrpc_request *req, int offset, if (rec == NULL) RETURN (-EFAULT); - r->_ur_fsuid = rec->lk_fsuid; - r->_ur_fsgid = rec->lk_fsgid; - r->_ur_cap = rec->lk_cap; - r->_ur_suppgid1 = rec->lk_suppgid1; - r->_ur_suppgid2 = rec->lk_suppgid2; - r->ur_fid1 = &rec->lk_fid1; - r->ur_fid2 = &rec->lk_fid2; + r->ur_id1 = &rec->lk_id1; + r->ur_id2 = &rec->lk_id2; r->ur_time = rec->lk_time; LASSERT_REQSWAB (req, offset + 1); @@ -263,14 +382,9 @@ static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, if (rec == NULL) RETURN(-EFAULT); - r->_ur_fsuid = rec->ul_fsuid; - r->_ur_fsgid = rec->ul_fsgid; - r->_ur_cap = rec->ul_cap; r->ur_mode = rec->ul_mode; - r->_ur_suppgid1 = rec->ul_suppgid; - r->_ur_suppgid2 = -1; - r->ur_fid1 = &rec->ul_fid1; - r->ur_fid2 = &rec->ul_fid2; + r->ur_id1 = &rec->ul_id1; + r->ur_id2 = &rec->ul_id2; r->ur_time = rec->ul_time; LASSERT_REQSWAB (req, offset + 1); @@ -288,17 +402,12 @@ static int mds_rename_unpack(struct ptlrpc_request *req, int offset, ENTRY; rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_unlink); + lustre_swab_mds_rec_rename); if (rec == NULL) RETURN(-EFAULT); - r->_ur_fsuid = rec->rn_fsuid; - r->_ur_fsgid = rec->rn_fsgid; - r->_ur_cap = rec->rn_cap; - r->_ur_suppgid1 = rec->rn_suppgid1; - r->_ur_suppgid2 = rec->rn_suppgid2; - r->ur_fid1 = &rec->rn_fid1; - r->ur_fid2 = &rec->rn_fid2; + r->ur_id1 = &rec->rn_id1; + r->ur_id2 = &rec->rn_id2; r->ur_time = rec->rn_time; LASSERT_REQSWAB (req, offset + 1); @@ -324,19 +433,14 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), lustre_swab_mds_rec_create); if (rec == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); - r->_ur_fsuid = rec->cr_fsuid; - r->_ur_fsgid = rec->cr_fsgid; - r->_ur_cap = rec->cr_cap; - r->ur_fid1 = &rec->cr_fid; - r->ur_fid2 = &rec->cr_replayfid; + r->ur_id1 = &rec->cr_id; + r->ur_id2 = &rec->cr_replayid; r->ur_mode = rec->cr_mode; r->ur_rdev = rec->cr_rdev; r->ur_time = rec->cr_time; r->ur_flags = rec->cr_flags; - r->_ur_suppgid1 = rec->cr_suppgid; - r->_ur_suppgid2 = -1; LASSERT_REQSWAB (req, offset + 1); r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); @@ -348,7 +452,7 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, if (req->rq_reqmsg->bufcount > offset + 2) { r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0); if (r->ur_eadata == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2]; } RETURN(0); @@ -374,10 +478,12 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset, int rc; ENTRY; - /* NB don't lustre_swab_reqbuf() here. We're just taking a peek - * and we want to leave it to the specific unpacker once we've - * identified the message type */ - opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof (*opcodep)); + /* + * NB don't lustre_swab_reqbuf() here. We're just taking a peek and we + * want to leave it to the specific unpacker once we've identified the + * message type. + */ + opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof(*opcodep)); if (opcodep == NULL) RETURN(-EFAULT); @@ -391,7 +497,554 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset, RETURN(-EFAULT); } + rec->ur_id1 = NULL; + rec->ur_id2 = NULL; rec->ur_opcode = opcode; + rc = mds_unpackers[opcode](req, offset, rec); + +#if CRAY_PORTALS + rec->ur_fsuid = req->rq_uid; +#endif RETURN(rc); } + +/******************************** + * MDS uid/gid mapping handling * + ********************************/ + +static +struct mds_idmap_entry* idmap_alloc_entry(__u32 rmt_id, __u32 lcl_id) +{ + struct mds_idmap_entry *e; + + OBD_ALLOC(e, sizeof(*e)); + if (!e) + return NULL; + + INIT_LIST_HEAD(&e->rmt_hash); + INIT_LIST_HEAD(&e->lcl_hash); + atomic_set(&e->refcount, 1); + e->rmt_id = rmt_id; + e->lcl_id = lcl_id; + + return e; +} + +void idmap_free_entry(struct mds_idmap_entry *e) +{ + if (!list_empty(&e->rmt_hash)) + list_del(&e->rmt_hash); + if (!list_empty(&e->lcl_hash)) + list_del(&e->lcl_hash); + OBD_FREE(e, sizeof(*e)); +} + +static +int idmap_insert_entry(struct list_head *rmt_hash, struct list_head *lcl_hash, + struct mds_idmap_entry *new, const char *warn_msg) +{ + struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(new->rmt_id)]; + struct list_head *lcl_head = &lcl_hash[MDS_IDMAP_HASHFUNC(new->lcl_id)]; + struct mds_idmap_entry *e; + + list_for_each_entry(e, rmt_head, rmt_hash) { + if (e->rmt_id == new->rmt_id && + e->lcl_id == new->lcl_id) { + atomic_inc(&e->refcount); + return 1; + } + if (e->rmt_id == new->rmt_id && warn_msg) + CWARN("%s: rmt id %u already map to %u (new %u)\n", + warn_msg, e->rmt_id, e->lcl_id, new->lcl_id); + if (e->lcl_id == new->lcl_id && warn_msg) + CWARN("%s: lcl id %u already be mapped from %u " + "(new %u)\n", warn_msg, + e->lcl_id, e->rmt_id, new->rmt_id); + } + + list_add_tail(rmt_head, &new->rmt_hash); + list_add_tail(lcl_head, &new->lcl_hash); + return 0; +} + +static +int idmap_remove_entry(struct list_head *rmt_hash, struct list_head *lcl_hash, + __u32 rmt_id, __u32 lcl_id) +{ + struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(rmt_id)]; + struct mds_idmap_entry *e; + + list_for_each_entry(e, rmt_head, rmt_hash) { + if (e->rmt_id == rmt_id && e->lcl_id == lcl_id) { + if (atomic_dec_and_test(&e->refcount)) { + list_del(&e->rmt_hash); + list_del(&e->lcl_hash); + OBD_FREE(e, sizeof(*e)); + return 0; + } else + return 1; + } + } + return -ENOENT; +} + +int mds_idmap_add(struct mds_idmap_table *tbl, + uid_t rmt_uid, uid_t lcl_uid, + gid_t rmt_gid, gid_t lcl_gid) +{ + struct mds_idmap_entry *ue, *ge; + ENTRY; + + if (!tbl) + RETURN(-EPERM); + + ue = idmap_alloc_entry(rmt_uid, lcl_uid); + if (!ue) + RETURN(-ENOMEM); + ge = idmap_alloc_entry(rmt_gid, lcl_gid); + if (!ge) { + idmap_free_entry(ue); + RETURN(-ENOMEM); + } + + spin_lock(&tbl->mit_lock); + + if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX], + tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX], + ue, "UID mapping")) { + idmap_free_entry(ue); + } + + if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX], + tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX], + ge, "GID mapping")) { + idmap_free_entry(ge); + } + + spin_unlock(&tbl->mit_lock); + RETURN(0); +} + +int mds_idmap_del(struct mds_idmap_table *tbl, + uid_t rmt_uid, uid_t lcl_uid, + gid_t rmt_gid, gid_t lcl_gid) +{ + ENTRY; + + if (!tbl) + RETURN(0); + + spin_lock(&tbl->mit_lock); + idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX], + tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX], + rmt_uid, lcl_uid); + idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX], + tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX], + rmt_gid, lcl_gid); + spin_unlock(&tbl->mit_lock); + RETURN(0); +} + +static +__u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id) +{ + struct list_head *head = &hash[MDS_IDMAP_HASHFUNC(id)]; + struct mds_idmap_entry *e; + + if (!reverse) { + list_for_each_entry(e, head, rmt_hash) { + if (e->rmt_id == id) + return e->lcl_id; + } + return MDS_IDMAP_NOTFOUND; + } else { + list_for_each_entry(e, head, lcl_hash) { + if (e->lcl_id == id) + return e->rmt_id; + } + return MDS_IDMAP_NOTFOUND; + } +} + +int mds_idmap_lookup_uid(struct mds_idmap_table *tbl, int reverse, uid_t uid) +{ + struct list_head *hash; + + if (!tbl) + return MDS_IDMAP_NOTFOUND; + + if (!reverse) + hash = tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX]; + else + hash = tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + uid = idmap_lookup_id(hash, reverse, uid); + spin_unlock(&tbl->mit_lock); + + return uid; +} + +int mds_idmap_lookup_gid(struct mds_idmap_table *tbl, int reverse, gid_t gid) +{ + struct list_head *hash; + + if (!tbl) + return MDS_IDMAP_NOTFOUND; + + if (!reverse) + hash = tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX]; + else + hash = tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX]; + + spin_lock(&tbl->mit_lock); + gid = idmap_lookup_id(hash, reverse, gid); + spin_unlock(&tbl->mit_lock); + + return gid; +} + +struct mds_idmap_table *mds_idmap_alloc() +{ + struct mds_idmap_table *tbl; + int i, j; + + OBD_ALLOC(tbl, sizeof(*tbl)); + if (!tbl) + return NULL; + + spin_lock_init(&tbl->mit_lock); + for (i = 0; i < MDS_IDMAP_N_HASHES; i++) + for (j = 0; j < MDS_IDMAP_HASHSIZE; j++) + INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]); + + return tbl; +} + +static void idmap_clear_rmt_hash(struct list_head *list) +{ + struct mds_idmap_entry *e; + int i; + + for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) { + while (!list_empty(&list[i])) { + e = list_entry(list[i].next, struct mds_idmap_entry, + rmt_hash); + idmap_free_entry(e); + } + } +} + +void mds_idmap_free(struct mds_idmap_table *tbl) +{ + int i; + + spin_lock(&tbl->mit_lock); + idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX]); + idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX]); + + /* paranoid checking */ + for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) { + LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX][i])); + LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX][i])); + } + spin_unlock(&tbl->mit_lock); + + OBD_FREE(tbl, sizeof(*tbl)); +} + +/********************************* + * helpers doing mapping for MDS * + *********************************/ + +/* + * we allow remote setuid/setgid to an "authencated" one, + * this policy probably change later. + */ +static +int mds_req_secdesc_do_map(struct mds_export_data *med, + struct mds_req_sec_desc *rsd) +{ + struct mds_idmap_table *idmap = med->med_idmap; + uid_t uid, fsuid; + gid_t gid, fsgid; + + uid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_uid); + if (uid == MDS_IDMAP_NOTFOUND) { + CERROR("can't find map for uid %u\n", rsd->rsd_uid); + return -EPERM; + } + + if (rsd->rsd_uid == rsd->rsd_fsuid) + fsuid = uid; + else { + fsuid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_fsuid); + if (fsuid == MDS_IDMAP_NOTFOUND) { + CERROR("can't find map for fsuid %u\n", rsd->rsd_fsuid); + return -EPERM; + } + } + + gid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_gid); + if (gid == MDS_IDMAP_NOTFOUND) { + CERROR("can't find map for gid %u\n", rsd->rsd_gid); + return -EPERM; + } + + if (rsd->rsd_gid == rsd->rsd_fsgid) + fsgid = gid; + else { + fsgid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_fsgid); + if (fsgid == MDS_IDMAP_NOTFOUND) { + CERROR("can't find map for fsgid %u\n", rsd->rsd_fsgid); + return -EPERM; + } + } + + rsd->rsd_uid = uid; + rsd->rsd_gid = gid; + rsd->rsd_fsuid = fsuid; + rsd->rsd_fsgid = fsgid; + + return 0; +} + +void mds_body_do_reverse_map(struct mds_export_data *med, + struct mds_body *body) +{ + uid_t uid; + gid_t gid; + + if (!med->med_remote) + return; + + ENTRY; + if (body->valid & OBD_MD_FLUID) { + uid = mds_idmap_lookup_uid(med->med_idmap, 1, body->uid); + if (uid == MDS_IDMAP_NOTFOUND) { + uid = med->med_nllu; + if (body->valid & OBD_MD_FLMODE) { + body->mode = (body->mode & ~S_IRWXU) | + ((body->mode & S_IRWXO) << 6); + } + } + body->uid = uid; + } + if (body->valid & OBD_MD_FLGID) { + gid = mds_idmap_lookup_gid(med->med_idmap, 1, body->gid); + if (gid == MDS_IDMAP_NOTFOUND) { + gid = med->med_nllg; + if (body->valid & OBD_MD_FLMODE) { + body->mode = (body->mode & ~S_IRWXG) | + ((body->mode & S_IRWXO) << 3); + } + } + body->gid = gid; + } + + EXIT; +} + +/********************** + * MDS ucred handling * + **********************/ + +static inline void drop_ucred_ginfo(struct lvfs_ucred *ucred) +{ + if (ucred->luc_ginfo) { + put_group_info(ucred->luc_ginfo); + ucred->luc_ginfo = NULL; + } +} + +static inline void drop_ucred_lsd(struct lvfs_ucred *ucred) +{ + if (ucred->luc_lsd) { + mds_put_lsd(ucred->luc_lsd); + ucred->luc_lsd = NULL; + } +} + +/* + * the heart of the uid/gid handling and security checking. + * + * root could set any group_info if we allowed setgroups, while + * normal user only could 'reduce' their group members -- which + * is somewhat expensive. + */ +int mds_init_ucred(struct lvfs_ucred *ucred, + struct ptlrpc_request *req, + struct mds_req_sec_desc *rsd) +{ + struct mds_obd *mds = &req->rq_export->exp_obd->u.mds; + struct mds_export_data *med = &req->rq_export->u.eu_mds_data; + struct lustre_sec_desc *lsd; + ptl_nid_t peernid = req->rq_peer.peer_id.nid; + struct group_info *gnew; + unsigned int setuid, setgid, strong_sec, root_squashed; + __u32 lsd_perms; + ENTRY; + + LASSERT(ucred); + LASSERT(rsd); + LASSERT(rsd->rsd_ngroups <= LUSTRE_MAX_GROUPS); + + /* XXX We'v no dedicated bits indicating whether GSS is used, + * and authenticated/mapped uid is valid. currently we suppose + * gss must initialize rq_sec_svcdata. + */ + if (req->rq_sec_svcdata && req->rq_auth_uid == -1) { + CWARN("user not authenticated, deny access\n"); + RETURN(-EPERM); + } + + strong_sec = (req->rq_auth_uid != -1); + LASSERT(!(req->rq_remote_realm && !strong_sec)); + + /* if we use strong authentication for a local client, we + * expect the uid which client claimed is true. + */ + if (!med->med_remote && strong_sec && + req->rq_auth_uid != rsd->rsd_uid) { + CWARN("nid "LPX64": UID %u was authenticated while client " + "claimed %u, enforce to be %u\n", + peernid, req->rq_auth_uid, rsd->rsd_uid, + req->rq_auth_uid); + if (rsd->rsd_uid != rsd->rsd_fsuid) + rsd->rsd_uid = req->rq_auth_uid; + else + rsd->rsd_uid = rsd->rsd_fsuid = req->rq_auth_uid; + } + + if (med->med_remote) { + int rc; + + if (req->rq_mapped_uid == MDS_IDMAP_NOTFOUND) { + CWARN("no mapping found, deny\n"); + RETURN(-EPERM); + } + + rc = mds_req_secdesc_do_map(med, rsd); + if (rc) + RETURN(rc); + } + + /* now lsd come into play */ + ucred->luc_ginfo = NULL; + ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid); + + if (!lsd) { + CERROR("Deny access without LSD: uid %d\n", rsd->rsd_uid); + RETURN(-EPERM); + } + + /* find out the setuid/setgid attempt */ + setuid = (rsd->rsd_uid != rsd->rsd_fsuid); + setgid = (rsd->rsd_gid != rsd->rsd_fsgid || + rsd->rsd_gid != lsd->lsd_gid); + + lsd_perms = mds_lsd_get_perms(lsd, med->med_remote, 0, peernid); + + /* check permission of setuid */ + if (setuid && !(lsd_perms & LSD_PERM_SETUID)) { + CWARN("mds blocked setuid attempt: %u -> %u\n", + rsd->rsd_uid, rsd->rsd_fsuid); + RETURN(-EPERM); + } + + /* check permission of setgid */ + if (setgid && !(lsd_perms & LSD_PERM_SETGID)) { + CWARN("mds blocked setgid attempt: %u -> %u\n", + rsd->rsd_gid, rsd->rsd_fsgid); + RETURN(-EPERM); + } + + root_squashed = mds_squash_root(mds, rsd, &peernid); + + /* remove privilege for non-root user */ + if (rsd->rsd_fsuid) + rsd->rsd_cap &= ~CAP_FS_MASK; + + /* by now every fields other than groups in rsd have been granted */ + ucred->luc_uid = rsd->rsd_uid; + ucred->luc_gid = rsd->rsd_gid; + ucred->luc_fsuid = rsd->rsd_fsuid; + ucred->luc_fsgid = rsd->rsd_fsgid; + ucred->luc_cap = rsd->rsd_cap; + + /* don't use any supplementary group for remote client or + * we squashed root */ + if (med->med_remote || root_squashed) + RETURN(0); + + /* install groups from LSD */ + if (lsd->lsd_ginfo) { + ucred->luc_ginfo = lsd->lsd_ginfo; + get_group_info(ucred->luc_ginfo); + } + + /* everything is done if we don't allow setgroups */ + if (!(lsd_perms & LSD_PERM_SETGRP)) + RETURN(0); + + /* root could set any groups as he want (if allowed), normal + * users only could reduce his group array. + */ + if (ucred->luc_uid == 0) { + drop_ucred_ginfo(ucred); + + if (rsd->rsd_ngroups == 0) + RETURN(0); + + gnew = groups_alloc(rsd->rsd_ngroups); + if (!gnew) { + CERROR("out of memory\n"); + drop_ucred_lsd(ucred); + RETURN(-ENOMEM); + } + groups_from_buffer(gnew, rsd->rsd_groups); + groups_sort(gnew); /* don't rely on client doing this */ + + ucred->luc_ginfo = gnew; + } else { + __u32 set = 0, cur = 0; + struct group_info *ginfo = ucred->luc_ginfo; + + if (!ginfo) + RETURN(0); + + /* Note: freeing a group_info count on 'nblocks' instead of + * 'ngroups', thus we can safely alloc enough buffer and reduce + * and ngroups number later. + */ + gnew = groups_alloc(rsd->rsd_ngroups); + if (!gnew) { + CERROR("out of memory\n"); + drop_ucred_ginfo(ucred); + drop_ucred_lsd(ucred); + RETURN(-ENOMEM); + } + + while (cur < rsd->rsd_ngroups) { + if (groups_search(ginfo, rsd->rsd_groups[cur])) { + GROUP_AT(gnew, set) = rsd->rsd_groups[cur]; + set++; + } + cur++; + } + gnew->ngroups = set; + + put_group_info(ucred->luc_ginfo); + ucred->luc_ginfo = gnew; + } + RETURN(0); +} + +void mds_exit_ucred(struct lvfs_ucred *ucred) +{ + ENTRY; + drop_ucred_ginfo(ucred); + drop_ucred_lsd(ucred); + EXIT; +}