+
+/*
+ * here we take simple rule: once uid/fsuid is root, we also squash
+ * the gid/fsgid, don't care setuid/setgid attributes.
+ */
+static
+int mds_squash_root(struct mds_obd *mds, struct mds_req_sec_desc *rsd,
+ ptl_nid_t *peernid)
+{
+ if (!mds->mds_squash_uid || *peernid == mds->mds_nosquash_nid)
+ return 0;
+
+ if (rsd->rsd_uid && rsd->rsd_fsuid)
+ return 0;
+
+ CDEBUG(D_SEC, "squash req from "LPX64":"
+ "(%u:%u-%u:%u/%x)=>(%u:%u-%u:%u/%x)\n", *peernid,
+ rsd->rsd_uid, rsd->rsd_gid,
+ rsd->rsd_fsuid, rsd->rsd_fsgid, rsd->rsd_cap,
+ rsd->rsd_uid ? rsd->rsd_uid : mds->mds_squash_uid,
+ rsd->rsd_uid ? rsd->rsd_gid : mds->mds_squash_gid,
+ rsd->rsd_fsuid ? rsd->rsd_fsuid : mds->mds_squash_uid,
+ rsd->rsd_fsuid ? rsd->rsd_fsgid : mds->mds_squash_gid,
+ rsd->rsd_cap & ~CAP_FS_MASK);
+
+ if (rsd->rsd_uid == 0) {
+ rsd->rsd_uid = mds->mds_squash_uid;
+ rsd->rsd_gid = mds->mds_squash_gid;
+ }
+ if (rsd->rsd_fsuid == 0) {
+ rsd->rsd_fsuid = mds->mds_squash_uid;
+ rsd->rsd_fsgid = mds->mds_squash_gid;
+ }
+ rsd->rsd_cap &= ~CAP_FS_MASK;
+
+ return 1;
+}
+
+/********************************
+ * MDS uid/gid mapping handling *
+ ********************************/
+
+static
+struct mds_idmap_entry* idmap_alloc_entry(__u32 rmt_id, __u32 lcl_id)
+{
+ struct mds_idmap_entry *e;
+
+ OBD_ALLOC(e, sizeof(*e));
+ if (!e)
+ return NULL;
+
+ INIT_LIST_HEAD(&e->rmt_hash);
+ INIT_LIST_HEAD(&e->lcl_hash);
+ atomic_set(&e->refcount, 1);
+ e->rmt_id = rmt_id;
+ e->lcl_id = lcl_id;
+
+ return e;
+}
+
+void idmap_free_entry(struct mds_idmap_entry *e)
+{
+ if (!list_empty(&e->rmt_hash))
+ list_del(&e->rmt_hash);
+ if (!list_empty(&e->lcl_hash))
+ list_del(&e->lcl_hash);
+ OBD_FREE(e, sizeof(*e));
+}
+
+static
+int idmap_insert_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
+ struct mds_idmap_entry *new, const char *warn_msg)
+{
+ struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(new->rmt_id)];
+ struct list_head *lcl_head = &lcl_hash[MDS_IDMAP_HASHFUNC(new->lcl_id)];
+ struct mds_idmap_entry *e;
+
+ list_for_each_entry(e, rmt_head, rmt_hash) {
+ if (e->rmt_id == new->rmt_id &&
+ e->lcl_id == new->lcl_id) {
+ atomic_inc(&e->refcount);
+ return 1;
+ }
+ if (e->rmt_id == new->rmt_id && warn_msg)
+ CWARN("%s: rmt id %u already map to %u (new %u)\n",
+ warn_msg, e->rmt_id, e->lcl_id, new->lcl_id);
+ if (e->lcl_id == new->lcl_id && warn_msg)
+ CWARN("%s: lcl id %u already be mapped from %u "
+ "(new %u)\n", warn_msg,
+ e->lcl_id, e->rmt_id, new->rmt_id);
+ }
+
+ list_add_tail(rmt_head, &new->rmt_hash);
+ list_add_tail(lcl_head, &new->lcl_hash);
+ return 0;
+}
+
+static
+int idmap_remove_entry(struct list_head *rmt_hash, struct list_head *lcl_hash,
+ __u32 rmt_id, __u32 lcl_id)
+{
+ struct list_head *rmt_head = &rmt_hash[MDS_IDMAP_HASHFUNC(rmt_id)];
+ struct mds_idmap_entry *e;
+
+ list_for_each_entry(e, rmt_head, rmt_hash) {
+ if (e->rmt_id == rmt_id && e->lcl_id == lcl_id) {
+ if (atomic_dec_and_test(&e->refcount)) {
+ list_del(&e->rmt_hash);
+ list_del(&e->lcl_hash);
+ OBD_FREE(e, sizeof(*e));
+ return 0;
+ } else
+ return 1;
+ }
+ }
+ return -ENOENT;
+}
+
+int mds_idmap_add(struct mds_idmap_table *tbl,
+ uid_t rmt_uid, uid_t lcl_uid,
+ gid_t rmt_gid, gid_t lcl_gid)
+{
+ struct mds_idmap_entry *ue, *ge;
+ ENTRY;
+
+ if (!tbl)
+ RETURN(-EPERM);
+
+ ue = idmap_alloc_entry(rmt_uid, lcl_uid);
+ if (!ue)
+ RETURN(-ENOMEM);
+ ge = idmap_alloc_entry(rmt_gid, lcl_gid);
+ if (!ge) {
+ idmap_free_entry(ue);
+ RETURN(-ENOMEM);
+ }
+
+ spin_lock(&tbl->mit_lock);
+
+ if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
+ tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
+ ue, "UID mapping")) {
+ idmap_free_entry(ue);
+ }
+
+ if (idmap_insert_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
+ tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
+ ge, "GID mapping")) {
+ idmap_free_entry(ge);
+ }
+
+ spin_unlock(&tbl->mit_lock);
+ RETURN(0);
+}
+
+int mds_idmap_del(struct mds_idmap_table *tbl,
+ uid_t rmt_uid, uid_t lcl_uid,
+ gid_t rmt_gid, gid_t lcl_gid)
+{
+ ENTRY;
+
+ if (!tbl)
+ RETURN(0);
+
+ spin_lock(&tbl->mit_lock);
+ idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX],
+ tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX],
+ rmt_uid, lcl_uid);
+ idmap_remove_entry(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX],
+ tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX],
+ rmt_gid, lcl_gid);
+ spin_unlock(&tbl->mit_lock);
+ RETURN(0);
+}
+
+static
+__u32 idmap_lookup_id(struct list_head *hash, int reverse, __u32 id)
+{
+ struct list_head *head = &hash[MDS_IDMAP_HASHFUNC(id)];
+ struct mds_idmap_entry *e;
+
+ if (!reverse) {
+ list_for_each_entry(e, head, rmt_hash) {
+ if (e->rmt_id == id)
+ return e->lcl_id;
+ }
+ return MDS_IDMAP_NOTFOUND;
+ } else {
+ list_for_each_entry(e, head, lcl_hash) {
+ if (e->lcl_id == id)
+ return e->rmt_id;
+ }
+ return MDS_IDMAP_NOTFOUND;
+ }
+}
+
+int mds_idmap_lookup_uid(struct mds_idmap_table *tbl, int reverse, uid_t uid)
+{
+ struct list_head *hash;
+
+ if (!tbl)
+ return MDS_IDMAP_NOTFOUND;
+
+ if (!reverse)
+ hash = tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX];
+ else
+ hash = tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX];
+
+ spin_lock(&tbl->mit_lock);
+ uid = idmap_lookup_id(hash, reverse, uid);
+ spin_unlock(&tbl->mit_lock);
+
+ return uid;
+}
+
+int mds_idmap_lookup_gid(struct mds_idmap_table *tbl, int reverse, gid_t gid)
+{
+ struct list_head *hash;
+
+ if (!tbl)
+ return MDS_IDMAP_NOTFOUND;
+
+ if (!reverse)
+ hash = tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX];
+ else
+ hash = tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX];
+
+ spin_lock(&tbl->mit_lock);
+ gid = idmap_lookup_id(hash, reverse, gid);
+ spin_unlock(&tbl->mit_lock);
+
+ return gid;
+}
+
+struct mds_idmap_table *mds_idmap_alloc()
+{
+ struct mds_idmap_table *tbl;
+ int i, j;
+
+ OBD_ALLOC(tbl, sizeof(*tbl));
+ if (!tbl)
+ return NULL;
+
+ spin_lock_init(&tbl->mit_lock);
+ for (i = 0; i < MDS_IDMAP_N_HASHES; i++)
+ for (j = 0; j < MDS_IDMAP_HASHSIZE; j++)
+ INIT_LIST_HEAD(&tbl->mit_idmaps[i][j]);
+
+ return tbl;
+}
+
+static void idmap_clear_rmt_hash(struct list_head *list)
+{
+ struct mds_idmap_entry *e;
+ int i;
+
+ for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
+ while (!list_empty(&list[i])) {
+ e = list_entry(list[i].next, struct mds_idmap_entry,
+ rmt_hash);
+ idmap_free_entry(e);
+ }
+ }
+}
+
+void mds_idmap_free(struct mds_idmap_table *tbl)
+{
+ int i;
+
+ spin_lock(&tbl->mit_lock);
+ idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_UIDMAP_IDX]);
+ idmap_clear_rmt_hash(tbl->mit_idmaps[MDS_RMT_GIDMAP_IDX]);
+
+ /* paranoid checking */
+ for (i = 0; i < MDS_IDMAP_HASHSIZE; i++) {
+ LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_UIDMAP_IDX][i]));
+ LASSERT(list_empty(&tbl->mit_idmaps[MDS_LCL_GIDMAP_IDX][i]));
+ }
+ spin_unlock(&tbl->mit_lock);
+
+ OBD_FREE(tbl, sizeof(*tbl));
+}
+
+/*********************************
+ * helpers doing mapping for MDS *
+ *********************************/
+
+/*
+ * we allow remote setuid/setgid to an "authencated" one,
+ * this policy probably change later.
+ */
+static
+int mds_req_secdesc_do_map(struct mds_export_data *med,
+ struct mds_req_sec_desc *rsd)
+{
+ struct mds_idmap_table *idmap = med->med_idmap;
+ uid_t uid, fsuid;
+ gid_t gid, fsgid;
+
+ uid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_uid);
+ if (uid == MDS_IDMAP_NOTFOUND) {
+ CERROR("can't find map for uid %u\n", rsd->rsd_uid);
+ return -EPERM;
+ }
+
+ if (rsd->rsd_uid == rsd->rsd_fsuid)
+ fsuid = uid;
+ else {
+ fsuid = mds_idmap_lookup_uid(idmap, 0, rsd->rsd_fsuid);
+ if (fsuid == MDS_IDMAP_NOTFOUND) {
+ CERROR("can't find map for fsuid %u\n", rsd->rsd_fsuid);
+ return -EPERM;
+ }
+ }
+
+ gid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_gid);
+ if (gid == MDS_IDMAP_NOTFOUND) {
+ CERROR("can't find map for gid %u\n", rsd->rsd_gid);
+ return -EPERM;
+ }
+
+ if (rsd->rsd_gid == rsd->rsd_fsgid)
+ fsgid = gid;
+ else {
+ fsgid = mds_idmap_lookup_gid(idmap, 0, rsd->rsd_fsgid);
+ if (fsgid == MDS_IDMAP_NOTFOUND) {
+ CERROR("can't find map for fsgid %u\n", rsd->rsd_fsgid);
+ return -EPERM;
+ }
+ }
+
+ rsd->rsd_uid = uid;
+ rsd->rsd_gid = gid;
+ rsd->rsd_fsuid = fsuid;
+ rsd->rsd_fsgid = fsgid;
+
+ return 0;
+}
+
+void mds_body_do_reverse_map(struct mds_export_data *med,
+ struct mds_body *body)
+{
+ uid_t uid;
+ gid_t gid;
+
+ if (!med->med_remote)
+ return;
+
+ ENTRY;
+ if (body->valid & OBD_MD_FLUID) {
+ uid = mds_idmap_lookup_uid(med->med_idmap, 1, body->uid);
+ if (uid == MDS_IDMAP_NOTFOUND) {
+ uid = med->med_nllu;
+ if (body->valid & OBD_MD_FLMODE) {
+ body->mode = (body->mode & ~S_IRWXU) |
+ ((body->mode & S_IRWXO) << 6);
+ }
+ }
+ body->uid = uid;
+ }
+ if (body->valid & OBD_MD_FLGID) {
+ gid = mds_idmap_lookup_gid(med->med_idmap, 1, body->gid);
+ if (gid == MDS_IDMAP_NOTFOUND) {
+ gid = med->med_nllg;
+ if (body->valid & OBD_MD_FLMODE) {
+ body->mode = (body->mode & ~S_IRWXG) |
+ ((body->mode & S_IRWXO) << 3);
+ }
+ }
+ body->gid = gid;
+ }
+
+ EXIT;
+}
+
+/*
+ * return error if can't find mapping, it's a error so should not
+ * fall into nllu/nllg.
+ */
+int mds_remote_perm_do_reverse_map(struct mds_export_data *med,
+ struct mds_remote_perm *perm)
+{
+ uid_t uid;
+ gid_t gid;
+
+ LASSERT(med->med_remote);
+
+ uid = mds_idmap_lookup_uid(med->med_idmap, 1, perm->mrp_auth_uid);
+ if (uid == MDS_IDMAP_NOTFOUND) {
+ CERROR("no map for uid %u\n", perm->mrp_auth_uid);
+ return -EPERM;
+ }
+ gid = mds_idmap_lookup_gid(med->med_idmap, 1, perm->mrp_auth_gid);
+ if (gid == MDS_IDMAP_NOTFOUND) {
+ CERROR("no map for uid %u\n", perm->mrp_auth_uid);
+ return -EPERM;
+ }
+
+ perm->mrp_auth_uid = uid;
+ perm->mrp_auth_gid = gid;
+ return 0;
+}
+
+/**********************
+ * MDS ucred handling *
+ **********************/
+
+static inline void drop_ucred_ginfo(struct lvfs_ucred *ucred)
+{
+ if (ucred->luc_ginfo) {
+ put_group_info(ucred->luc_ginfo);
+ ucred->luc_ginfo = NULL;
+ }
+}
+
+static inline void drop_ucred_lsd(struct lvfs_ucred *ucred)
+{
+ if (ucred->luc_lsd) {
+ mds_put_lsd(ucred->luc_lsd);
+ ucred->luc_lsd = NULL;
+ }
+}
+
+/*
+ * the heart of the uid/gid handling and security checking.
+ *
+ * root could set any group_info if we allowed setgroups, while
+ * normal user only could 'reduce' their group members -- which
+ * is somewhat expensive.
+ *
+ * authenticated as mds user (using mds service credential) could
+ * bypass all checkings.
+ */
+int mds_init_ucred(struct lvfs_ucred *ucred,
+ struct ptlrpc_request *req,
+ struct mds_req_sec_desc *rsd)
+{
+ struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
+ struct mds_export_data *med = &req->rq_export->u.eu_mds_data;
+ struct lustre_sec_desc *lsd;
+ ptl_nid_t peernid = req->rq_peer.peer_id.nid;
+ struct group_info *gnew;
+ unsigned int setuid, setgid, strong_sec, root_squashed;
+ __u32 lsd_perms;
+ ENTRY;
+
+ LASSERT(ucred);
+ LASSERT(rsd);
+ LASSERT(rsd->rsd_ngroups <= LUSTRE_MAX_GROUPS);
+
+ if (SEC_FLAVOR_MAJOR(req->rq_req_secflvr) == PTLRPCS_FLVR_MAJOR_GSS &&
+ (SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_AUTH ||
+ SEC_FLAVOR_SVC(req->rq_req_secflvr) == PTLRPCS_SVC_PRIV))
+ strong_sec = 1;
+ else
+ strong_sec = 0;
+
+ LASSERT(!(req->rq_remote_realm && !strong_sec));
+
+ if (strong_sec && req->rq_auth_uid == -1) {
+ CWARN("user not authenticated, deny access\n");
+ RETURN(-EPERM);
+ }
+
+ /* sanity check: if we use strong authentication, we expect the
+ * uid which client claimed is true.
+ * not apply to special mds user .
+ */
+ if (!req->rq_auth_usr_mds && strong_sec) {
+ if (!med->med_remote) {
+ if (req->rq_auth_uid != rsd->rsd_uid) {
+ CERROR("local client "LPU64": auth uid %u "
+ "while client claim %u:%u/%u:%u\n",
+ peernid, req->rq_auth_uid,
+ rsd->rsd_uid, rsd->rsd_gid,
+ rsd->rsd_fsuid, rsd->rsd_fsgid);
+ RETURN(-EPERM);
+ }
+ } else {
+ if (req->rq_mapped_uid == MDS_IDMAP_NOTFOUND) {
+ CWARN("no mapping found, deny\n");
+ RETURN(-EPERM);
+ }
+
+ if (mds_req_secdesc_do_map(med, rsd))
+ RETURN(-EPERM);
+
+ if (req->rq_mapped_uid != rsd->rsd_uid) {
+ CERROR("remote client "LPU64": auth uid %u "
+ "while client claim %u:%u/%u:%u\n",
+ peernid, req->rq_auth_uid,
+ rsd->rsd_uid, rsd->rsd_gid,
+ rsd->rsd_fsuid, rsd->rsd_fsgid);
+ RETURN(-EPERM);
+ }
+ }
+ }
+
+ /* now LSD come into play */
+ ucred->luc_ginfo = NULL;
+ ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid);
+
+ if (!lsd) {
+ CERROR("Deny access without LSD: uid %d\n", rsd->rsd_uid);
+ RETURN(-EPERM);
+ }
+
+ lsd_perms = mds_lsd_get_perms(lsd, med->med_remote, 0, peernid);
+
+ /* check setuid/setgid permissions.
+ * again not apply to special mds user.
+ */
+ if (!req->rq_auth_usr_mds) {
+ /* find out the setuid/setgid attempt */
+ setuid = (rsd->rsd_uid != rsd->rsd_fsuid);
+ setgid = (rsd->rsd_gid != rsd->rsd_fsgid ||
+ rsd->rsd_gid != lsd->lsd_gid);
+
+ /* check permission of setuid */
+ if (setuid && !(lsd_perms & LSD_PERM_SETUID)) {
+ CWARN("mds blocked setuid attempt (%u -> %u) "
+ "from "LPU64"\n", rsd->rsd_uid, rsd->rsd_fsuid,
+ peernid);
+ RETURN(-EPERM);
+ }
+
+ /* check permission of setgid */
+ if (setgid && !(lsd_perms & LSD_PERM_SETGID)) {
+ CWARN("mds blocked setgid attempt (%u:%u/%u:%u -> %u) "
+ "from "LPU64"\n", rsd->rsd_uid, rsd->rsd_gid,
+ rsd->rsd_fsuid, rsd->rsd_fsgid, lsd->lsd_gid,
+ peernid);
+ RETURN(-EPERM);
+ }
+ }
+
+ root_squashed = mds_squash_root(mds, rsd, &peernid);
+
+ /* remove privilege for non-root user */
+ if (rsd->rsd_fsuid)
+ rsd->rsd_cap &= ~CAP_FS_MASK;
+
+ /* by now every fields other than groups in rsd have been granted */
+ ucred->luc_nid = peernid;
+ ucred->luc_uid = rsd->rsd_uid;
+ ucred->luc_gid = rsd->rsd_gid;
+ ucred->luc_fsuid = rsd->rsd_fsuid;
+ ucred->luc_fsgid = rsd->rsd_fsgid;
+ ucred->luc_cap = rsd->rsd_cap;
+
+ /* don't use any supplementary group if we squashed root.
+ * XXX The exact behavior of root_squash is not defined, we just
+ * keep the reminder here */
+ if (root_squashed)
+ RETURN(0);
+
+ /* install groups from LSD */
+ if (lsd->lsd_ginfo) {
+ ucred->luc_ginfo = lsd->lsd_ginfo;
+ get_group_info(ucred->luc_ginfo);
+ }
+
+ /* everything is done if we don't allow setgroups, or it is
+ * from remote client (which implies forced to be no-setgroups).
+ *
+ * Note: remote user's supplementary groups sent along the request
+ * (if any) are all ignored, but we make the mapped local user's
+ * supplementary groups take effect.
+ */
+ if (med->med_remote || !(lsd_perms & LSD_PERM_SETGRP))
+ RETURN(0);
+
+ /* root could set any groups as he want (if allowed), normal
+ * users only could reduce his group array.
+ */
+ if (ucred->luc_uid == 0) {
+ drop_ucred_ginfo(ucred);
+
+ if (rsd->rsd_ngroups == 0)
+ RETURN(0);
+
+ gnew = groups_alloc(rsd->rsd_ngroups);
+ if (!gnew) {
+ CERROR("out of memory\n");
+ drop_ucred_lsd(ucred);
+ RETURN(-ENOMEM);
+ }
+ groups_from_buffer(gnew, rsd->rsd_groups);
+ groups_sort(gnew); /* don't rely on client doing this */
+
+ ucred->luc_ginfo = gnew;
+ } else {
+ __u32 set = 0, cur = 0;
+ struct group_info *ginfo = ucred->luc_ginfo;
+
+ if (!ginfo)
+ RETURN(0);
+
+ /* Note: freeing a group_info count on 'nblocks' instead of
+ * 'ngroups', thus we can safely alloc enough buffer and reduce
+ * and ngroups number later.
+ */
+ gnew = groups_alloc(rsd->rsd_ngroups);
+ if (!gnew) {
+ CERROR("out of memory\n");
+ drop_ucred_ginfo(ucred);
+ drop_ucred_lsd(ucred);
+ RETURN(-ENOMEM);
+ }
+
+ while (cur < rsd->rsd_ngroups) {
+ if (groups_search(ginfo, rsd->rsd_groups[cur])) {
+ GROUP_AT(gnew, set) = rsd->rsd_groups[cur];
+ set++;
+ }
+ cur++;
+ }
+ gnew->ngroups = set;
+
+ put_group_info(ucred->luc_ginfo);
+ ucred->luc_ginfo = gnew;
+ }
+ RETURN(0);
+}
+
+void mds_exit_ucred(struct lvfs_ucred *ucred)
+{
+ ENTRY;
+ drop_ucred_ginfo(ucred);
+ drop_ucred_lsd(ucred);
+ EXIT;
+}
+