struct lu_ucred;
extern void lustre_groups_from_list(struct group_info *ginfo, gid_t *glist);
+extern void lustre_list_from_groups(gid_t *glist, struct group_info *ginfo);
extern void lustre_groups_sort(struct group_info *group_info);
extern int lustre_groups_search(struct group_info *group_info, gid_t grp);
extern int lustre_in_group_p(struct lu_ucred *mu, gid_t grp);
#include <obd.h>
#include <lustre_sec.h>
+/* The special identity_upcall value "INTERNAL" implements a particular behavior
+ * which does not involve an actual upcall. Instead, the cache is filled with
+ * supplementary groups read from the user's credentials provided as input
+ * (usually got from the client request), cumulatively at each request.
+ */
+#define IDENTITY_UPCALL_INTERNAL "INTERNAL"
+
/** \defgroup ucache ucache
*
* @{
CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_OPEN_DELAY, cfs_fail_val);
- rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
- &ll_md_blocking_ast, 0);
+ rc = ll_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
+ &ll_md_blocking_ast, 0, true);
kfree(name);
ll_finish_md_op_data(op_data);
if (rc == -ESTALE) {
it.it_open_flags = fmode | open_flags;
it.it_open_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
+ rc = ll_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
&ll_md_blocking_lease_ast,
/* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
* it can be cancelled which may mislead applications that the lease is
* open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
* doesn't deal with openhandle, so normal openhandle will be leaked.
*/
- LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+ LDLM_FL_NO_LRU | LDLM_FL_EXCL,
+ true);
ll_finish_md_op_data(op_data);
ptlrpc_req_put(req);
if (rc < 0)
RETURN(PTR_ERR(op_data));
op_data->op_flags |= flags;
- rc = md_intent_lock(exp, op_data, &oit, &req, &ll_md_blocking_ast, 0);
+ rc = ll_intent_lock(exp, op_data, &oit, &req,
+ &ll_md_blocking_ast, 0, true);
ll_finish_md_op_data(op_data);
if (rc < 0) {
rc = ll_inode_revalidate_fini(inode, rc);
LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
sbi->ll_fsname, PFID(&lli->lli_fid), inode);
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
- &ll_md_blocking_ast, 0);
+ rc = ll_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
+ &ll_md_blocking_ast, 0, true);
if (it.it_request != NULL)
ptlrpc_req_put(it.it_request);
it.it_request = NULL;
struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
int ll_rmdir_entry(struct inode *dir, char *name, int namelen);
void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
+int ll_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
+ struct lookup_intent *it, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags,
+ bool tryagain);
/* llite/rw.c */
int ll_writepage(struct page *page, struct writeback_control *wbc);
return rc;
}
+static int get_acl_from_req(struct ptlrpc_request *req, struct posix_acl **acl)
+{
+ struct mdt_body *body;
+ void *buf;
+ int rc;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ if (!body->mbo_aclsize) {
+ *acl = NULL;
+ return 0;
+ }
+
+ buf = req_capsule_server_sized_get(&req->rq_pill, &RMF_ACL,
+ body->mbo_aclsize);
+ if (!buf)
+ return -EPROTO;
+
+ *acl = posix_acl_from_xattr(&init_user_ns, buf, body->mbo_aclsize);
+ if (IS_ERR_OR_NULL(*acl)) {
+ rc = *acl ? PTR_ERR(*acl) : 0;
+ CDEBUG(D_SEC, "convert xattr to acl: %d\n", rc);
+ return rc;
+ }
+
+ rc = posix_acl_valid(&init_user_ns, *acl);
+ if (rc) {
+ CDEBUG(D_SEC, "validate acl: %d\n", rc);
+ posix_acl_release(*acl);
+ return rc;
+ }
+
+ return 0;
+}
+
+static inline int accmode_from_openflags(u64 open_flags)
+{
+ unsigned int may_mask = 0;
+
+ if (open_flags & (FMODE_READ | FMODE_PREAD))
+ may_mask |= MAY_READ;
+ if (open_flags & (FMODE_WRITE | FMODE_PWRITE))
+ may_mask |= MAY_WRITE;
+ if (open_flags & FMODE_EXEC)
+ may_mask = MAY_EXEC;
+
+ return may_mask;
+}
+
+static __u32 get_uc_group_from_acl(const struct posix_acl *acl, int want)
+{
+ const struct posix_acl_entry *pa, *pe;
+
+ FOREACH_ACL_ENTRY(pa, acl, pe) {
+ switch (pa->e_tag) {
+ case ACL_GROUP_OBJ:
+ case ACL_GROUP:
+ if (in_group_p(pa->e_gid) &&
+ (pa->e_perm & want) == want)
+ return (__u32)from_kgid(&init_user_ns,
+ pa->e_gid);
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+ }
+
+ return (__u32)__kgid_val(INVALID_GID);
+}
+
+/* This function implements a retry mechanism on top of md_intent_lock().
+ * This is useful because the client can provide at most 2 supplementary
+ * groups in the request sent to the MDS, but sometimes it does not know
+ * which ones are useful for credentials calculation on server side. For
+ * instance in case of lookup, the client does not have the child inode yet
+ * when it sends the intent lock request.
+ * Hopefully, the server can hint at the useful groups, by putting in the
+ * request reply the target inode's GID, and also its ACL.
+ * So in case the server replies -EACCES, we check the user's credentials
+ * against those, and try again the intent lock request if we find a matching
+ * supplementary group.
+ */
+int ll_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
+ struct lookup_intent *it, struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags,
+ bool tryagain)
+{
+ int rc;
+
+ ENTRY;
+
+intent:
+ rc = md_intent_lock(exp, op_data, it, reqp, cb_blocking,
+ extra_lock_flags);
+ CDEBUG(D_VFSTRACE,
+ "intent lock %d on i1 "DFID" suppgids %d %d: rc %d\n",
+ it->it_op, PFID(&op_data->op_fid1),
+ op_data->op_suppgids[0], op_data->op_suppgids[1], rc);
+ if (rc == -EACCES && tryagain && it->it_op & IT_OPEN &&
+ it_disposition(it, DISP_OPEN_DENY) && *reqp) {
+ struct mdt_body *body;
+ __u32 new_suppgid;
+
+ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+ new_suppgid = body->mbo_gid;
+ CDEBUG(D_SEC, "new suppgid from body: %d\n", new_suppgid);
+ if (op_data->op_suppgids[0] == body->mbo_gid ||
+ op_data->op_suppgids[1] == body->mbo_gid ||
+ !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
+ int accmode = accmode_from_openflags(it->it_open_flags);
+ struct posix_acl *acl;
+
+ rc = get_acl_from_req(*reqp, &acl);
+ if (rc || !acl)
+ GOTO(out, rc = -EACCES);
+
+ new_suppgid = get_uc_group_from_acl(acl, accmode);
+ posix_acl_release(acl);
+ CDEBUG(D_SEC, "new suppgid from acl: %d\n",
+ new_suppgid);
+
+ if (new_suppgid == (__u32)__kgid_val(INVALID_GID))
+ GOTO(out, rc = -EACCES);
+ }
+
+ if (!(it->it_open_flags & MDS_OPEN_BY_FID))
+ fid_zero(&op_data->op_fid2);
+ op_data->op_suppgids[1] = new_suppgid;
+ ptlrpc_req_put(*reqp);
+ *reqp = NULL;
+ ll_intent_release(it);
+ tryagain = false;
+ goto intent;
+ }
+
+out:
+ RETURN(rc);
+}
+
static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
struct lookup_intent *it,
void **secctx, __u32 *secctxlen,
it->it_open_flags |= MDS_OPEN_PCC;
}
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- &ll_md_blocking_ast, 0);
/* If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
* client does not know which suppgid should be sent to the MDS, or
* some other(s) changed the target file's GID after this RPC sent
* to the MDS with the suppgid as the original GID, then we should
* try again with right suppgid.
*/
- if (rc == -EACCES && it->it_op & IT_OPEN &&
- it_disposition(it, DISP_OPEN_DENY)) {
- struct mdt_body *body;
-
- LASSERT(req != NULL);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (op_data->op_suppgids[0] == body->mbo_gid ||
- op_data->op_suppgids[1] == body->mbo_gid ||
- !in_group_p(make_kgid(&init_user_ns, body->mbo_gid)))
- GOTO(out, retval = ERR_PTR(-EACCES));
-
- fid_zero(&op_data->op_fid2);
- op_data->op_suppgids[1] = body->mbo_gid;
- ptlrpc_req_put(req);
- req = NULL;
- ll_intent_release(it);
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- &ll_md_blocking_ast, 0);
- }
-
+ rc = ll_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
+ &ll_md_blocking_ast, 0, true);
if (rc < 0)
GOTO(out, retval = ERR_PTR(rc));
op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
- rc = md_intent_lock(exp, op_data, oit, req, &ll_md_blocking_ast, 0);
+ rc = ll_intent_lock(exp, op_data, oit, req,
+ &ll_md_blocking_ast, 0, true);
ll_finish_md_op_data(op_data);
*req = oit->it_request;
mdt_tunables_fini(m);
upcall_cache_cleanup(m->mdt_identity_cache);
m->mdt_identity_cache = NULL;
+ upcall_cache_cleanup(m->mdt_identity_cache_int);
+ m->mdt_identity_cache_int = NULL;
tgt_fini(env, &m->mdt_lut);
struct lu_site *s;
struct seq_server_site *ss_site;
const char *identity_upcall = "NONE";
+ char cache_internal[NAME_MAX + 1] = { 0 };
struct md_device *next;
struct lu_fid fid;
int rc;
*/
if (m->mdt_opts.mo_acl)
identity_upcall = MDT_IDENTITY_UPCALL_PATH;
-
m->mdt_identity_cache = upcall_cache_init(mdt_obd_name(m),
identity_upcall,
UC_IDCACHE_HASH_SIZE,
GOTO(err_free_hsm, rc);
}
+ snprintf(cache_internal, sizeof(cache_internal), "%s_int",
+ mdt_obd_name(m));
+ m->mdt_identity_cache_int = upcall_cache_init(cache_internal,
+ IDENTITY_UPCALL_INTERNAL,
+ UC_IDCACHE_HASH_SIZE,
+ 1200, /* entry expire: 20 mn */
+ 30, /* acquire expire: 30 s */
+ true, /* acquire can replay */
+ &mdt_identity_upcall_cache_ops);
+ if (IS_ERR(m->mdt_identity_cache_int)) {
+ rc = PTR_ERR(m->mdt_identity_cache_int);
+ m->mdt_identity_cache_int = NULL;
+ GOTO(err_cache, rc);
+ }
+
rc = mdt_tunables_init(m, dev);
if (rc) {
CERROR("Can't init MDT lprocfs, rc %d\n", rc);
err_procfs:
mdt_tunables_fini(m);
err_recovery:
+ upcall_cache_cleanup(m->mdt_identity_cache_int);
+ m->mdt_identity_cache_int = NULL;
+err_cache:
upcall_cache_cleanup(m->mdt_identity_cache);
m->mdt_identity_cache = NULL;
err_free_hsm:
static void mdt_identity_entry_init(struct upcall_cache_entry *entry,
void *unused)
{
- entry->u.identity.mi_uc_entry = entry;
+ struct md_identity *identity = &entry->u.identity;
+
+ memset(identity, 0, sizeof(*identity));
+ identity->mi_uc_entry = entry;
}
static void mdt_identity_entry_free(struct upcall_cache *cache,
RETURN(rc);
}
-struct md_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid)
+struct md_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid,
+ struct mdt_thread_info *info)
{
struct upcall_cache_entry *entry;
if (!cache)
return ERR_PTR(-ENOENT);
- entry = upcall_cache_get_entry(cache, (__u64)uid, NULL);
+ entry = upcall_cache_get_entry(cache, (__u64)uid,
+ info ? mdt_ucred(info) : NULL);
if (unlikely(!entry))
return ERR_PTR(-ENOENT);
if (IS_ERR(entry))
__u32 mdt_brw_size;
struct upcall_cache *mdt_identity_cache;
+ struct upcall_cache *mdt_identity_cache_int;
unsigned int mdt_evict_tgt_nids:1,
mdt_dom_read_open:1,
#define UC_IDCACHE_HASH_SIZE 128
extern struct upcall_cache_ops mdt_identity_upcall_cache_ops;
-struct md_identity *mdt_identity_get(struct upcall_cache *, __u32);
+struct md_identity *mdt_identity_get(struct upcall_cache *cache, __u32 uid,
+ struct mdt_thread_info *info);
void mdt_identity_put(struct upcall_cache *, struct md_identity *);
if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
identity = mdt_identity_get(mdt->mdt_identity_cache,
- pud->pud_uid);
+ pud->pud_uid, info);
if (IS_ERR(identity)) {
if (unlikely(PTR_ERR(identity) == -EREMCHG ||
cap_raised(ucred->uc_cap,
if (is_identity_get_disabled(mdt->mdt_identity_cache))
RETURN(0);
- identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
+ identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid,
+ info);
if (IS_ERR(identity)) {
if (unlikely(PTR_ERR(identity) == -EREMCHG)) {
RETURN(0);
if (!is_identity_get_disabled(mdt->mdt_identity_cache)) {
identity = mdt_identity_get(mdt->mdt_identity_cache,
- uc->uc_fsuid);
+ uc->uc_fsuid, info);
if (IS_ERR(identity)) {
if (unlikely(PTR_ERR(identity) == -EREMCHG ||
cap_raised(uc->uc_cap,
mdt->mdt_identity_cache->uc_entry_expire);
}
-static ssize_t identity_expire_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t entry_expire_store(struct upcall_cache *cache,
+ const char *buffer, size_t count)
{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
time64_t val;
int rc;
if (val < 0)
return -ERANGE;
- mdt->mdt_identity_cache->uc_entry_expire = val;
+ cache->uc_entry_expire = val;
return count;
}
+
+static ssize_t identity_expire_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return entry_expire_store(mdt->mdt_identity_cache, buffer, count);
+}
LUSTRE_RW_ATTR(identity_expire);
static ssize_t identity_acquire_expire_show(struct kobject *kobj,
mdt->mdt_identity_cache->uc_acquire_expire);
}
-static ssize_t identity_acquire_expire_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t acquire_expire_store(struct upcall_cache *cache,
+ const char *buffer, size_t count)
{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
time64_t val;
int rc;
if (val < 0 || val > INT_MAX)
return -ERANGE;
- mdt->mdt_identity_cache->uc_acquire_expire = val;
+ cache->uc_acquire_expire = val;
return count;
}
+
+static ssize_t identity_acquire_expire_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return acquire_expire_store(mdt->mdt_identity_cache, buffer, count);
+}
LUSTRE_RW_ATTR(identity_acquire_expire);
static ssize_t identity_upcall_show(struct kobject *kobj,
}
LUSTRE_RW_ATTR(identity_upcall);
-static ssize_t identity_flush_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer, size_t count)
+static ssize_t flush_store(struct upcall_cache *cache,
+ const char *buffer, size_t count)
{
- struct obd_device *obd = container_of(kobj, struct obd_device,
- obd_kset.kobj);
- struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
int uid;
int rc;
if (rc)
return rc;
- mdt_flush_identity(mdt->mdt_identity_cache, uid);
+ mdt_flush_identity(cache, uid);
return count;
}
+
+static ssize_t identity_flush_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return flush_store(mdt->mdt_identity_cache, buffer, count);
+}
LUSTRE_WO_ATTR(identity_flush);
static ssize_t
}
LPROC_SEQ_FOPS_WR_ONLY(mdt, identity_info);
+static ssize_t identity_int_expire_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%lld\n",
+ mdt->mdt_identity_cache_int->uc_entry_expire);
+}
+
+static ssize_t identity_int_expire_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return entry_expire_store(mdt->mdt_identity_cache_int, buffer, count);
+}
+LUSTRE_RW_ATTR(identity_int_expire);
+
+static ssize_t identity_int_acquire_expire_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%lld\n",
+ mdt->mdt_identity_cache_int->uc_acquire_expire);
+}
+
+static ssize_t identity_int_acquire_expire_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return acquire_expire_store(mdt->mdt_identity_cache_int, buffer, count);
+}
+LUSTRE_RW_ATTR(identity_int_acquire_expire);
+
+static ssize_t identity_int_flush_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+ return flush_store(mdt->mdt_identity_cache_int, buffer, count);
+}
+LUSTRE_WO_ATTR(identity_int_flush);
+
static int mdt_site_stats_seq_show(struct seq_file *m, void *data)
{
struct obd_device *obd = m->private;
&lustre_attr_identity_acquire_expire.attr,
&lustre_attr_identity_upcall.attr,
&lustre_attr_identity_flush.attr,
+ &lustre_attr_identity_int_expire.attr,
+ &lustre_attr_identity_int_acquire_expire.attr,
+ &lustre_attr_identity_int_flush.attr,
&lustre_attr_evict_tgt_nids.attr,
&lustre_attr_enable_cap_mask.attr,
&lustre_attr_enable_chprojid_gid.attr,
obdclass-all-objs += page_pools.o
@SERVER_TRUE@obdclass-all-objs += idmap.o
+@SERVER_TRUE@obdclass-all-objs += upcall_cache_internal.o
@SERVER_TRUE@obdclass-all-objs += lprocfs_jobstats.o
@SERVER_TRUE@obdclass-all-objs += lprocfs_status_server.o
@SERVER_TRUE@obdclass-all-objs += lu_ucred.o
EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
EXTRA_DIST = $(obdclass-all-objs:.o=.c) llog_internal.h
-EXTRA_DIST += cl_internal.h local_storage.h
+EXTRA_DIST += cl_internal.h local_storage.h upcall_cache_internal.h
EXTRA_DIST += range_lock.c
@SERVER_FALSE@EXTRA_DIST += idmap.c
+@SERVER_FALSE@EXTRA_DIST += upcall_cache_internal.c
@SERVER_FALSE@EXTRA_DIST += lprocfs_jobstats.c
@SERVER_FALSE@EXTRA_DIST += lprocfs_status_server.c
@SERVER_FALSE@EXTRA_DIST += lu_ucred.c
}
EXPORT_SYMBOL(lustre_groups_from_list);
+void lustre_list_from_groups(gid_t *glist, struct group_info *ginfo)
+{
+#ifdef HAVE_GROUP_INFO_GID
+ memcpy(glist, ginfo->gid, ginfo->ngroups * sizeof(__u32));
+#else
+ int i;
+ int count = ginfo->ngroups;
+
+ /* fill in gid array from group_info */
+ for (i = 0; i < ginfo->nblocks && count > 0; i++) {
+ int cp_count = min(CFS_NGROUPS_PER_BLOCK, count);
+ int off = i * CFS_NGROUPS_PER_BLOCK;
+ int len = cp_count * sizeof(*glist);
+
+ memcpy(glist + off, ginfo->blocks[i], len);
+ count -= cp_count;
+ }
+#endif
+}
+EXPORT_SYMBOL(lustre_list_from_groups);
+
/* groups_sort() is copied from linux kernel! */
/* a simple shell-metzner sort */
void lustre_groups_sort(struct group_info *group_info)
#include <libcfs/libcfs.h>
#include <uapi/linux/lnet/lnet-types.h>
#include <upcall_cache.h>
+#include "upcall_cache_internal.h"
static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache,
__u64 key, void *args)
return entry;
}
-/* protected by cache lock */
-static void free_entry(struct upcall_cache *cache,
- struct upcall_cache_entry *entry)
-{
- if (cache->uc_ops->free_entry)
- cache->uc_ops->free_entry(cache, entry);
-
- list_del(&entry->ue_hash);
- CDEBUG(D_OTHER, "destroy cache entry %p for key %llu\n",
- entry, entry->ue_key);
- LIBCFS_FREE(entry, sizeof(*entry));
-}
-
static inline int upcall_compare(struct upcall_cache *cache,
struct upcall_cache_entry *entry,
__u64 key, void *args)
return 0;
}
-static inline void get_entry(struct upcall_cache_entry *entry)
-{
- atomic_inc(&entry->ue_refcount);
-}
-
-static inline void put_entry(struct upcall_cache *cache,
- struct upcall_cache_entry *entry)
-{
- if (atomic_dec_and_test(&entry->ue_refcount) &&
- (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
- free_entry(cache, entry);
- }
-}
-
static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
{
if (!*writelock) {
EXPORT_SYMBOL(upcall_cache_set_upcall);
static inline int refresh_entry(struct upcall_cache *cache,
- struct upcall_cache_entry *entry)
+ struct upcall_cache_entry *entry, __u32 fsgid)
{
LASSERT(cache->uc_ops->do_upcall);
return cache->uc_ops->do_upcall(cache, entry);
__u64 key, void *args)
{
struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
+ gid_t fsgid = (__u32)__kgid_val(INVALID_GID);
+ struct group_info *ginfo = NULL;
bool failedacquiring = false;
struct list_head *head;
wait_queue_entry_t wait;
bool writelock;
- int rc, found;
+ int rc = 0, found;
ENTRY;
/* now we hold a write lock */
get_entry(entry);
+ /* special processing of supp groups for identity upcall */
+ if (strcmp(cache->uc_upcall, IDENTITY_UPCALL_INTERNAL) == 0) {
+ write_unlock(&cache->uc_lock);
+ rc = upcall_cache_get_entry_internal(cache, entry, args,
+ &fsgid, &ginfo);
+ write_lock(&cache->uc_lock);
+ if (rc)
+ GOTO(out, entry = ERR_PTR(rc));
+ }
+
/* acquire for new one */
if (UC_CACHE_IS_NEW(entry)) {
- UC_CACHE_SET_ACQUIRING(entry);
UC_CACHE_CLEAR_NEW(entry);
- write_unlock(&cache->uc_lock);
- rc = refresh_entry(cache, entry);
- write_lock(&cache->uc_lock);
+ if (strcmp(cache->uc_upcall, IDENTITY_UPCALL_INTERNAL) == 0) {
+ refresh_entry_internal(cache, entry, fsgid, &ginfo);
+ } else {
+ UC_CACHE_SET_ACQUIRING(entry);
+ write_unlock(&cache->uc_lock);
+ rc = refresh_entry(cache, entry, fsgid);
+ write_lock(&cache->uc_lock);
+ }
entry->ue_acquire_expire = ktime_get_seconds() +
cache->uc_acquire_expire;
if (rc < 0) {
write_unlock(&cache->uc_lock);
else
read_unlock(&cache->uc_lock);
+ if (ginfo)
+ groups_free(ginfo);
RETURN(entry);
}
EXPORT_SYMBOL(upcall_cache_get_entry);
--- /dev/null
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2023, Whamcloud.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ *
+ */
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <lustre_idmap.h>
+#include <md_object.h>
+#include <upcall_cache.h>
+#include "upcall_cache_internal.h"
+
+inline void refresh_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u32 fsgid, struct group_info **ginfo)
+{
+ get_entry(entry);
+ entry->u.identity.mi_uid = entry->ue_key;
+ entry->u.identity.mi_gid = fsgid;
+ if (*ginfo)
+ entry->u.identity.mi_ginfo = *ginfo;
+ entry->u.identity.mi_nperms = 0;
+ entry->u.identity.mi_perms = NULL;
+ entry->ue_expire = ktime_get_seconds() + cache->uc_entry_expire;
+ UC_CACHE_SET_VALID(entry);
+ put_entry(cache, entry);
+
+ CDEBUG(D_OTHER,
+ "%s: INTERNAL refreshed entry for '%llu' with %d groups\n",
+ cache->uc_name, entry->ue_key,
+ *ginfo ? (*ginfo)->ngroups : 0);
+
+ *ginfo = NULL;
+}
+
+int upcall_cache_get_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ void *args, gid_t *fsgid,
+ struct group_info **pginfo)
+{
+ struct lu_ucred *uc = (struct lu_ucred *)args;
+ gid_t inval = (__u32)__kgid_val(INVALID_GID);
+ struct md_identity *identity;
+ bool supp_in_ginfo[2];
+ gid_t *groups = NULL, *glist_p;
+ int i, groups_num, ginfo_ngroups = 0, rc = 0;
+
+ if (*pginfo || !uc)
+ /* ginfo already built, or no creds provided
+ * => return immediately
+ */
+ goto out;
+
+restart:
+ groups_num = 0;
+ /* We just deal with NEW and VALID entries. Other states will
+ * be handled by the caller, no need to return an error.
+ */
+ if (!UC_CACHE_IS_NEW(entry) && !UC_CACHE_IS_VALID(entry))
+ goto out;
+
+ identity = &entry->u.identity;
+ *fsgid = uc->uc_fsgid;
+ supp_in_ginfo[0] = (uc->uc_suppgids[0] == inval);
+ supp_in_ginfo[1] = (uc->uc_suppgids[1] == inval);
+ if (identity->mi_ginfo && identity->mi_ginfo->ngroups)
+ ginfo_ngroups = identity->mi_ginfo->ngroups;
+
+ /* check if provided supp groups are already in cache */
+ for (i = 0; i < 2 && uc->uc_suppgids[i] != inval; i++) {
+ if (unlikely(uc->uc_suppgids[i] == uc->uc_fsuid)) {
+ /* Do not place user's group ID in group list */
+ supp_in_ginfo[i] = true;
+ } else if (ginfo_ngroups) {
+ atomic_inc(&identity->mi_ginfo->usage);
+ supp_in_ginfo[i] =
+ lustre_groups_search(identity->mi_ginfo,
+ uc->uc_suppgids[i]);
+ atomic_dec(&identity->mi_ginfo->usage);
+ }
+ }
+
+ /* build new list of groups, which is a merge of provided supp
+ * groups and all other groups already in cache
+ */
+ if (!supp_in_ginfo[0] || !supp_in_ginfo[1]) {
+ CDEBUG(D_OTHER,
+ "%s: INTERNAL might add suppgids %d,%d for entry '%llu'\n",
+ cache->uc_name, uc->uc_suppgids[0],
+ uc->uc_suppgids[1], entry->ue_key);
+
+ if (!supp_in_ginfo[0])
+ groups_num++;
+ if (!supp_in_ginfo[1])
+ groups_num++;
+ CFS_ALLOC_PTR_ARRAY(groups, groups_num + ginfo_ngroups);
+ if (groups == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ glist_p = groups;
+ for (i = 0; i < 2; i++) {
+ if (!supp_in_ginfo[i])
+ *(glist_p++) = uc->uc_suppgids[i];
+ }
+
+ /* An existing entry is never modified once it is marked as
+ * VALID. But it can change when updated from NEW to VALID,
+ * for instance the mi_ginfo can be set. This means the number
+ * of groups can only grow from 0 (mi_ginfo not set) to
+ * mi_ginfo->ngroups.
+ * So only copy mi_ginfo to the groups array if necessary space
+ * was allocated for it.
+ * In case we detect a concurrent change in mi_ginfo->ngroups,
+ * just start over.
+ */
+ if (ginfo_ngroups) {
+ atomic_inc(&identity->mi_ginfo->usage);
+ lustre_list_from_groups(glist_p, identity->mi_ginfo);
+ atomic_dec(&identity->mi_ginfo->usage);
+ } else if (identity->mi_ginfo && identity->mi_ginfo->ngroups) {
+ CFS_FREE_PTR_ARRAY(groups, groups_num + ginfo_ngroups);
+ groups = NULL;
+ goto restart;
+ }
+
+ if (!UC_CACHE_IS_NEW(entry)) {
+ /* force refresh as an existing cache entry
+ * cannot be modified
+ */
+ /* we are called from upcall_cache_get_entry() after
+ * write lock has been dropped
+ */
+ write_lock(&cache->uc_lock);
+ entry->ue_expire = ktime_get_seconds();
+ write_unlock(&cache->uc_lock);
+ }
+ }
+
+out:
+ if (groups) {
+ int ngroups = groups_num + ginfo_ngroups;
+ struct group_info *ginfo;
+
+ ginfo = groups_alloc(ngroups);
+ if (ginfo) {
+ lustre_groups_from_list(ginfo, groups);
+ lustre_groups_sort(ginfo);
+ *pginfo = ginfo;
+ } else {
+ CDEBUG(D_OTHER,
+ "failed to alloc %d groups: rc = %d\n",
+ ngroups, -ENOMEM);
+ rc = -ENOMEM;
+ }
+ CFS_FREE_PTR_ARRAY(groups, ngroups);
+ }
+ return rc;
+}
--- /dev/null
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2023, Whamcloud.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ *
+ */
+
+#ifndef _UPCALL_CACHE_INTERNAL_H
+#define _UPCALL_CACHE_INTERNAL_H
+
+#include <upcall_cache.h>
+
+/* protected by cache lock */
+static void free_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (cache->uc_ops->free_entry)
+ cache->uc_ops->free_entry(cache, entry);
+
+ list_del(&entry->ue_hash);
+ CDEBUG(D_OTHER, "destroy cache entry %p for key %llu\n",
+ entry, entry->ue_key);
+ LIBCFS_FREE(entry, sizeof(*entry));
+}
+
+static inline void get_entry(struct upcall_cache_entry *entry)
+{
+ atomic_inc(&entry->ue_refcount);
+}
+
+static inline void put_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (atomic_dec_and_test(&entry->ue_refcount) &&
+ (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
+ free_entry(cache, entry);
+ }
+}
+
+#ifdef HAVE_SERVER_SUPPORT
+void refresh_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u32 fsgid, struct group_info **ginfo);
+int upcall_cache_get_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ void *args, gid_t *fsgid,
+ struct group_info **ginfo);
+#else /* HAVE_SERVER_SUPPORT */
+static inline
+void refresh_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u32 fsgid, struct group_info **ginfo)
+{ }
+static inline int upcall_cache_get_entry_internal(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ void *args, gid_t *fsgid,
+ struct group_info **ginfo)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif /* _UPCALL_CACHE_INTERNAL_H */
#endif
static const char usage[] =
-"Usage: %s -u user_id [-g grp_id] [-v euid] [-j egid] [-G[gid0,gid1,...]] command\n"
-" -u user_id switch to UID user_id\n"
-" -g grp_id switch to GID grp_id\n"
-" -v euid switch euid to UID\n"
-" -j egid switch egid to GID\n"
-" -G[gid0,gid1,...] set supplementary groups\n";
+"Usage: %s -u USER_ID [-g GRP_ID] [-v EUID] [-j EGID] [-G <GID0,GID1,...>] command\n"
+" -u USER_ID switch to uid USER_ID\n"
+" -g GRP_ID switch to gid GRP_ID\n"
+" -v EUID switch euid to EUID\n"
+" -j EGID switch egid to EGID\n"
+" -G <GID0,GID1,...> set supplementary groups\n";
static void Usage_and_abort(const char *name)
{
}
/* get UID and GID */
- while ((c = getopt(argc, argv, "+u:g:v:j:hG::")) != -1) {
+ while ((c = getopt(argc, argv, "+u:g:v:j:hG:")) != -1) {
switch (c) {
case 'u':
if (!isdigit(optarg[0])) {
break;
case 'G':
- num_supp = 0;
- if (!optarg || !isdigit(optarg[0]))
+ if (!optarg || !isdigit(optarg[0])) {
+ fprintf(stderr,
+ "Provided parameter '%s' for option '-G' is bad\n",
+ optarg);
+ Usage_and_abort(name);
break;
+ }
+ num_supp = 0;
while ((grp = strsep(&optarg, ",")) != NULL) {
printf("adding supp group %d\n", atoi(grp));
supp_groups[num_supp++] = atoi(grp);
exit(-1);
}
- if (num_supp >= 0) {
- status = setgroups(num_supp, supp_groups);
- if (status == -1) {
- perror("setting supplementary groups");
- exit(-1);
- }
+ if (num_supp == -1) {
+ /* at least one supp group needs to be provided,
+ * so take the gid
+ */
+ num_supp = 1;
+ supp_groups[0] = grp_id;
+ }
+ status = setgroups(num_supp, supp_groups);
+ if (status == -1) {
+ perror("setting supplementary groups");
+ exit(-1);
}
/* set UID */
check_times_61 $file "${tim[@]}"
echo "normal user migrate $tfile and test timestamps"
- $RUNAS $LFS migrate -n $file || error "cannot migrate $file"
+ $RUNAS -G0 $LFS migrate -n $file || error "cannot migrate $file"
check_times_61 $file "${tim[@]}"
}
run_test 61a "mirror extend and migrate preserve timestamps"
check_times_61 $file "${tim[@]}"
echo "normal user mirror extend $tfile and test timestamps"
- $RUNAS $LFS mirror extend -N -c1 -i1 $file ||
+ $RUNAS -G0 $LFS mirror extend -N -c1 -i1 $file ||
error "cannot extend mirror $file"
check_times_61 $file "${tim[@]}"
}
local hsm_root="$mntpt/$tdir"
local file=$DIR/$tfile
local fsuuid=$($LFS getname $MOUNT | awk '{print $1}')
+ local runascmd="$RUNAS -G0"
$LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
skip "Server does not support PCC-RO"
$LCTL pcc list $MOUNT
local mode=$($LCTL get_param -n llite.$fsuuid.pcc_mode)
- $RUNAS id
+ $runascmd id
echo "Mode: $mode"
echo "QQQQQ" > $file || error "write $file failed"
$LCTL set_param llite.$fsuuid.pcc_mode="0" ||
error "Set PCC mode failed"
stack_trap "$LCTL set_param llite.$fsuuid.pcc_mode=$mode" EXIT
- $RUNAS $LFS pcc attach -r $file &&
+ $runascmd $LFS pcc attach -r $file &&
error "User should not attach $file"
- $RUNAS cat $file || error "cat $file failed"
+ $runascmd cat $file || error "cat $file failed"
check_lpcc_state $file "none" client
$LCTL set_param llite.$fsuuid.pcc_mode="0400" ||
error "Set PCC mode failed"
stack_trap "$LCTL set_param llite.$fsuuid.pcc_mode=$mode" EXIT
- $RUNAS $LFS pcc attach -r $file &&
+ $runascmd $LFS pcc attach -r $file &&
error "User should not attach $file"
- $RUNAS cat $file || error "cat $file failed"
+ $runascmd cat $file || error "cat $file failed"
check_lpcc_state $file "none" client
$LCTL set_param llite.$fsuuid.pcc_mode="0004" ||
error "Set PCC mode failed"
- $RUNAS cat $file || error "cat $file failed"
+ $runascmd cat $file || error "cat $file failed"
$LFS pcc state $file
check_lpcc_state $file "readonly" client
- $RUNAS $LFS pcc detach $file || error "Detach $file failed"
+ $runascmd $LFS pcc detach $file || error "Detach $file failed"
- $RUNAS stat $file || error "stat $file failed"
+ $runascmd stat $file || error "stat $file failed"
$LFS pcc attach -r $file || error "failed to attach $file"
check_lpcc_state $file "readonly" client
- $RUNAS $LFS pcc detach $file || error "failed to detach $file"
+ $runascmd $LFS pcc detach $file || error "failed to detach $file"
$LCTL set_param llite.$fsuuid.pcc_mode="0040" ||
error "Set PCC mode failed"
chmod 660 $file || error "chmod $file failed"
- $RUNAS cat $file || error "cat $file failed"
+ $runascmd cat $file || error "cat $file failed"
$LFS pcc state $file
check_lpcc_state $file "readonly" client
- $RUNAS $LFS pcc detach $file || error "failed to detach $file"
+ $runascmd $LFS pcc detach $file || error "failed to detach $file"
- $RUNAS $LFS pcc attach -r $file || error "attach $file failed"
+ $runascmd $LFS pcc attach -r $file || error "attach $file failed"
stat $file || error "stat $file failed"
$LFS pcc state $file
check_lpcc_state $file "readonly" client
- $RUNAS $LFS pcc detach $file || error "Detach $file failed"
+ $runascmd $LFS pcc detach $file || error "Detach $file failed"
}
run_test 46 "Verify PCC mode setting works correctly"
local duration=""
[ "$SLOW" = "no" ] && duration=" -t 120"
- $RUNAS bash rundbench -D $DIR/$tdir 3 $duration ||
+ $RUNAS -G0 bash rundbench -D $DIR/$tdir 3 $duration ||
quota_error a $TSTUSR "dbench failed!"
is_project_quota_supported && change_project -C $DIR/$tdir
chown -R $RUNAS_ID $dir
echo "testing non-root lfs migrate mode when not all links are in xattr"
- check_migrate_links "$dir" 101 100 "$RUNAS"
+ check_migrate_links "$dir" 101 100 "$RUNAS -G0"
# clean up
rm -rf $dir