From f6cd6a880cd03fd1d3fd3d8e03133f0835ad275d Mon Sep 17 00:00:00 2001 From: "Mr. NeilBrown" Date: Thu, 18 Mar 2021 17:57:03 -0400 Subject: [PATCH] LU-9859 libcfs: remove linux-curproc.c The only real functionality remaining here is cfs_curproc_cap_pack(), and it can be trivially implemented as an inline in curproc.h. So do that and remove the file. The rest can be moved to jobid.c Linux-commit: 37d3b407dc14a13ec8bba3a4d7737c92f996e9c0 Change-Id: I3546841fa44accb19d0867099c17b16ede48228e Signed-off-by: Mr. NeilBrown Signed-off-by: Greg Kroah-Hartman Reviewed-on: https://review.whamcloud.com/41938 Tested-by: jenkins Reviewed-by: Arshad Hussain Tested-by: Maloo Reviewed-by: Oleg Drokin --- libcfs/include/libcfs/curproc.h | 7 +- libcfs/include/libcfs/linux/linux-misc.h | 2 - libcfs/libcfs/Makefile.in | 1 - libcfs/libcfs/linux/Makefile.am | 1 - libcfs/libcfs/linux/linux-curproc.c | 275 ------------------------------- lustre/mdt/mdt_coordinator.c | 5 +- lustre/mdt/mdt_lib.c | 15 +- lustre/mdt/mdt_restripe.c | 3 +- lustre/obdclass/jobid.c | 166 +++++++++++++++++++ lustre/obdecho/echo_client.c | 8 +- 10 files changed, 194 insertions(+), 289 deletions(-) delete mode 100644 libcfs/libcfs/linux/linux-curproc.c diff --git a/libcfs/include/libcfs/curproc.h b/libcfs/include/libcfs/curproc.h index 8501041..296e2d0 100644 --- a/libcfs/include/libcfs/curproc.h +++ b/libcfs/include/libcfs/curproc.h @@ -51,8 +51,11 @@ typedef __u32 cfs_cap_t; BIT(CAP_SYS_BOOT) | \ BIT(CAP_SYS_RESOURCE)) -cfs_cap_t cfs_curproc_cap_pack(void); -void cfs_curproc_cap_unpack(cfs_cap_t cap); +static inline cfs_cap_t cfs_curproc_cap_pack(void) +{ + /* cfs_cap_t is only the first word of kernel_cap_t */ + return (cfs_cap_t)(current_cap().cap[0]); +} /* __LIBCFS_CURPROC_H__ */ #endif diff --git a/libcfs/include/libcfs/linux/linux-misc.h b/libcfs/include/libcfs/linux/linux-misc.h index 0f230d3..2309c6d 100644 --- a/libcfs/include/libcfs/linux/linux-misc.h +++ b/libcfs/include/libcfs/linux/linux-misc.h @@ -58,8 +58,6 @@ #endif #endif /* HAVE_IOV_ITER_TYPE */ -int cfs_get_environ(const char *key, char *value, int *val_len); - int cfs_kernel_write(struct file *filp, const void *buf, size_t count, loff_t *pos); ssize_t cfs_kernel_read(struct file *file, void *buf, size_t count, diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in index 3587ba6..7a1c6cf 100644 --- a/libcfs/libcfs/Makefile.in +++ b/libcfs/libcfs/Makefile.in @@ -1,7 +1,6 @@ MODULES = libcfs libcfs-linux-objs := linux-prim.o -libcfs-linux-objs += linux-curproc.o libcfs-linux-objs += linux-hash.o libcfs-linux-objs += linux-wait.o libcfs-linux-objs += xarray.o diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am index cb1d0d2..ef34750 100644 --- a/libcfs/libcfs/linux/Makefile.am +++ b/libcfs/libcfs/linux/Makefile.am @@ -1,5 +1,4 @@ EXTRA_DIST = linux-prim.c \ - linux-curproc.c \ linux-hash.c \ linux-wait.c \ xarray.c diff --git a/libcfs/libcfs/linux/linux-curproc.c b/libcfs/libcfs/linux/linux-curproc.c deleted file mode 100644 index da2b107..0000000 --- a/libcfs/libcfs/linux/linux-curproc.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2017, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-curproc.c - * - * Lustre curproc API implementation for Linux kernel - * - * Author: Nikita Danilov - */ - -#include -#ifdef HAVE_SCHED_HEADERS -#include -#include -#endif -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -/* Currently all the CFS_CAP_* defines match CAP_* ones. */ -#define cfs_cap_pack(cap) (cap) -#define cfs_cap_unpack(cap) (cap) - -static void cfs_kernel_cap_pack(kernel_cap_t kcap, cfs_cap_t *cap) -{ -#if defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x19980330 - *cap = cfs_cap_pack(kcap); -#elif defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x20071026 - *cap = cfs_cap_pack(kcap[0]); -#elif defined(_KERNEL_CAPABILITY_VERSION) && _KERNEL_CAPABILITY_VERSION == 0x20080522 - /* XXX lost high byte */ - *cap = cfs_cap_pack(kcap.cap[0]); -#else - #error "need correct _KERNEL_CAPABILITY_VERSION " -#endif -} - -static void cfs_kernel_cap_unpack(kernel_cap_t *kcap, cfs_cap_t cap) -{ -#if defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x19980330 - *kcap = cfs_cap_unpack(cap); -#elif defined (_LINUX_CAPABILITY_VERSION) && _LINUX_CAPABILITY_VERSION == 0x20071026 - (*kcap)[0] = cfs_cap_unpack(cap); -#elif defined(_KERNEL_CAPABILITY_VERSION) && _KERNEL_CAPABILITY_VERSION == 0x20080522 - kcap->cap[0] = cfs_cap_unpack(cap); -#else - #error "need correct _KERNEL_CAPABILITY_VERSION " -#endif -} - -cfs_cap_t cfs_curproc_cap_pack(void) -{ - cfs_cap_t cap; - cfs_kernel_cap_pack(current_cap(), &cap); - return cap; -} - -void cfs_curproc_cap_unpack(cfs_cap_t cap) -{ - struct cred *cred; - if ((cred = prepare_creds())) { - cfs_kernel_cap_unpack(&cred->cap_effective, cap); - commit_creds(cred); - } -} - -static int cfs_access_process_vm(struct task_struct *tsk, - struct mm_struct *mm, - unsigned long addr, - void *buf, int len, int write) -{ - /* Just copied from kernel for the kernels which doesn't - * have access_process_vm() exported */ - struct vm_area_struct *vma; - struct page *page; - void *old_buf = buf; - - /* Avoid deadlocks on mmap_lock if called from sys_mmap_pgoff(), - * which is already holding mmap_lock for writes. If some other - * thread gets the write lock in the meantime, this thread will - * block, but at least it won't deadlock on itself. LU-1735 */ - if (!mmap_read_trylock(mm)) - return -EDEADLK; - - /* ignore errors, just check how much was successfully transferred */ - while (len) { - int bytes, rc, offset; - void *maddr; - -#if defined(HAVE_GET_USER_PAGES_GUP_FLAGS) - rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page, &vma); -#elif defined(HAVE_GET_USER_PAGES_6ARG) - rc = get_user_pages(addr, 1, write, 1, &page, &vma); -#else - rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); -#endif - if (rc <= 0) - break; - - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; - - maddr = kmap(page); - if (write) { - copy_to_user_page(vma, page, addr, - maddr + offset, buf, bytes); - set_page_dirty_lock(page); - } else { - copy_from_user_page(vma, page, addr, - buf, maddr + offset, bytes); - } - kunmap(page); - put_page(page); - len -= bytes; - buf += bytes; - addr += bytes; - } - mmap_read_unlock(mm); - - return buf - old_buf; -} - -/* Read the environment variable of current process specified by @key. */ -int cfs_get_environ(const char *key, char *value, int *val_len) -{ - struct mm_struct *mm; - char *buffer; - int buf_len = PAGE_SIZE; - int key_len = strlen(key); - unsigned long addr; - int rc; - bool skip = false; - ENTRY; - - buffer = kmalloc(buf_len, GFP_USER); - if (!buffer) - RETURN(-ENOMEM); - - mm = get_task_mm(current); - if (!mm) { - kfree(buffer); - RETURN(-EINVAL); - } - - addr = mm->env_start; - while (addr < mm->env_end) { - int this_len, retval, scan_len; - char *env_start, *env_end; - - memset(buffer, 0, buf_len); - - this_len = min_t(int, mm->env_end - addr, buf_len); - retval = cfs_access_process_vm(current, mm, addr, buffer, - this_len, 0); - if (retval < 0) - GOTO(out, rc = retval); - else if (retval != this_len) - break; - - addr += retval; - - /* Parse the buffer to find out the specified key/value pair. - * The "key=value" entries are separated by '\0'. */ - env_start = buffer; - scan_len = this_len; - while (scan_len) { - char *entry; - int entry_len; - - env_end = memscan(env_start, '\0', scan_len); - LASSERT(env_end >= env_start && - env_end <= env_start + scan_len); - - /* The last entry of this buffer cross the buffer - * boundary, reread it in next cycle. */ - if (unlikely(env_end - env_start == scan_len)) { - /* Just skip the entry larger than page size, - * it can't be jobID env variable. */ - if (unlikely(scan_len == this_len)) - skip = true; - else - addr -= scan_len; - break; - } else if (unlikely(skip)) { - skip = false; - goto skip; - } - - entry = env_start; - entry_len = env_end - env_start; - CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry); - - /* Key length + length of '=' */ - if (entry_len > key_len + 1 && - entry[key_len] == '=' && - !memcmp(entry, key, key_len)) { - entry += key_len + 1; - entry_len -= key_len + 1; - - /* The 'value' buffer passed in is too small. - * Copy what fits, but return -EOVERFLOW. */ - if (entry_len >= *val_len) { - memcpy(value, entry, *val_len); - value[*val_len - 1] = 0; - GOTO(out, rc = -EOVERFLOW); - } - - memcpy(value, entry, entry_len); - *val_len = entry_len; - GOTO(out, rc = 0); - } -skip: - scan_len -= (env_end - env_start + 1); - env_start = env_end + 1; - } - } - GOTO(out, rc = -ENOENT); - -out: - mmput(mm); - kfree((void *)buffer); - return rc; -} -EXPORT_SYMBOL(cfs_get_environ); - -EXPORT_SYMBOL(cfs_curproc_cap_pack); - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 9f8372a..ada7c9f 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -922,8 +922,9 @@ static int mdt_hsm_pending_restore(struct mdt_thread_info *mti) int hsm_init_ucred(struct lu_ucred *uc) { - ENTRY; + kernel_cap_t kcap = cap_combine(CAP_FS_SET, CAP_NFSD_SET); + ENTRY; uc->uc_valid = UCRED_OLD; uc->uc_o_uid = 0; uc->uc_o_gid = 0; @@ -935,7 +936,7 @@ int hsm_init_ucred(struct lu_ucred *uc) uc->uc_fsgid = 0; uc->uc_suppgids[0] = -1; uc->uc_suppgids[1] = -1; - uc->uc_cap = CFS_CAP_FS_MASK; + uc->uc_cap = kcap.cap[0]; uc->uc_umask = 0777; uc->uc_ginfo = NULL; uc->uc_identity = NULL; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 74102aa..0c6ddb2 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -480,8 +480,12 @@ static int old_init_ucred_common(struct mdt_thread_info *info, identity = mdt_identity_get(mdt->mdt_identity_cache, uc->uc_fsuid); if (IS_ERR(identity)) { + kernel_cap_t kcap = cap_combine(CAP_FS_SET, + CAP_NFSD_SET); + u32 cap_mask = kcap.cap[0]; + if (unlikely(PTR_ERR(identity) == -EREMCHG || - uc->uc_cap & CFS_CAP_FS_MASK)) { + uc->uc_cap & cap_mask)) { identity = NULL; } else { CDEBUG(D_SEC, "Deny access without identity: " @@ -607,8 +611,13 @@ int mdt_init_ucred_reint(struct mdt_thread_info *info) /* LU-5564: for normal close request, skip permission check */ if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE && - !(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP))) - uc->uc_cap |= CFS_CAP_FS_MASK; + !(ma->ma_attr_flags & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP))) { + kernel_cap_t kcap = { { uc->uc_cap, } }; + + kcap = cap_raise_nfsd_set(kcap, CAP_FULL_SET); + kcap = cap_raise_fs_set(kcap, CAP_FULL_SET); + uc->uc_cap = kcap.cap[0]; + } mdt_exit_ucred(info); diff --git a/lustre/mdt/mdt_restripe.c b/lustre/mdt/mdt_restripe.c index cddf47c..9d5abea 100644 --- a/lustre/mdt/mdt_restripe.c +++ b/lustre/mdt/mdt_restripe.c @@ -877,6 +877,7 @@ static int mdt_restriper_main(void *arg) int mdt_restriper_start(struct mdt_device *mdt) { struct mdt_dir_restriper *restriper = &mdt->mdt_restriper; + kernel_cap_t kcap = cap_combine(CAP_FS_SET, CAP_NFSD_SET); struct task_struct *task; struct mdt_thread_info *info; struct lu_ucred *uc; @@ -924,7 +925,7 @@ int mdt_restriper_start(struct mdt_device *mdt) uc->uc_fsgid = 0; uc->uc_suppgids[0] = -1; uc->uc_suppgids[1] = -1; - uc->uc_cap = CFS_CAP_FS_MASK; + uc->uc_cap = kcap.cap[0]; uc->uc_umask = 0644; uc->uc_ginfo = NULL; uc->uc_identity = NULL; diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index e29c51c..dc40eb9 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -209,6 +209,172 @@ static void jobid_prune_expedite(void) } } +static int cfs_access_process_vm(struct task_struct *tsk, + struct mm_struct *mm, + unsigned long addr, + void *buf, int len, int write) +{ + /* Just copied from kernel for the kernels which doesn't + * have access_process_vm() exported + */ + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), + * which is already holding mmap_sem for writes. If some other + * thread gets the write lock in the meantime, this thread will + * block, but at least it won't deadlock on itself. LU-1735 + */ + if (!mmap_read_trylock(mm)) + return -EDEADLK; + + /* ignore errors, just check how much was successfully transferred */ + while (len) { + int bytes, rc, offset; + void *maddr; + +#if defined(HAVE_GET_USER_PAGES_GUP_FLAGS) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page, + &vma); +#elif defined(HAVE_GET_USER_PAGES_6ARG) + rc = get_user_pages(addr, 1, write, 1, &page, &vma); +#else + rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); +#endif + if (rc <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + put_page(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + mmap_read_unlock(mm); + + return buf - old_buf; +} + +/* Read the environment variable of current process specified by @key. */ +static int cfs_get_environ(const char *key, char *value, int *val_len) +{ + struct mm_struct *mm; + char *buffer; + int buf_len = PAGE_SIZE; + int key_len = strlen(key); + unsigned long addr; + int rc; + bool skip = false; + + ENTRY; + buffer = kmalloc(buf_len, GFP_USER); + if (!buffer) + RETURN(-ENOMEM); + + mm = get_task_mm(current); + if (!mm) { + kfree(buffer); + RETURN(-EINVAL); + } + + addr = mm->env_start; + while (addr < mm->env_end) { + int this_len, retval, scan_len; + char *env_start, *env_end; + + memset(buffer, 0, buf_len); + + this_len = min_t(int, mm->env_end - addr, buf_len); + retval = cfs_access_process_vm(current, mm, addr, buffer, + this_len, 0); + if (retval < 0) + GOTO(out, rc = retval); + else if (retval != this_len) + break; + + addr += retval; + + /* Parse the buffer to find out the specified key/value pair. + * The "key=value" entries are separated by '\0'. + */ + env_start = buffer; + scan_len = this_len; + while (scan_len) { + char *entry; + int entry_len; + + env_end = memscan(env_start, '\0', scan_len); + LASSERT(env_end >= env_start && + env_end <= env_start + scan_len); + + /* The last entry of this buffer cross the buffer + * boundary, reread it in next cycle. + */ + if (unlikely(env_end - env_start == scan_len)) { + /* Just skip the entry larger than page size, + * it can't be jobID env variable. + */ + if (unlikely(scan_len == this_len)) + skip = true; + else + addr -= scan_len; + break; + } else if (unlikely(skip)) { + skip = false; + goto skip; + } + entry = env_start; + entry_len = env_end - env_start; + CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry); + + /* Key length + length of '=' */ + if (entry_len > key_len + 1 && + entry[key_len] == '=' && + !memcmp(entry, key, key_len)) { + entry += key_len + 1; + entry_len -= key_len + 1; + + /* The 'value' buffer passed in is too small. + * Copy what fits, but return -EOVERFLOW. + */ + if (entry_len >= *val_len) { + memcpy(value, entry, *val_len); + value[*val_len - 1] = 0; + GOTO(out, rc = -EOVERFLOW); + } + + memcpy(value, entry, entry_len); + *val_len = entry_len; + GOTO(out, rc = 0); + } +skip: + scan_len -= (env_end - env_start + 1); + env_start = env_end + 1; + } + } + GOTO(out, rc = -ENOENT); + +out: + mmput(mm); + kfree((void *)buffer); + return rc; +} + /* * Get jobid of current process by reading the environment variable * stored in between the "env_start" & "env_end" of task struct. diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 1ef99a5..d35fbb9 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -2246,6 +2246,7 @@ static struct lu_object *echo_resolve_path(const struct lu_env *env, static void echo_ucred_init(struct lu_env *env) { struct lu_ucred *ucred = lu_ucred(env); + kernel_cap_t kcap = current_cap(); ucred->uc_valid = UCRED_INVALID; @@ -2263,8 +2264,11 @@ static void echo_ucred_init(struct lu_env *env) ucred->uc_cap = cfs_curproc_cap_pack(); /* remove fs privilege for non-root user. */ - if (ucred->uc_fsuid) - ucred->uc_cap &= ~CFS_CAP_FS_MASK; + if (ucred->uc_fsuid) { + kcap = cap_drop_nfsd_set(kcap); + kcap = cap_drop_fs_set(kcap); + } + ucred->uc_cap = kcap.cap[0]; ucred->uc_valid = UCRED_NEW; } -- 1.8.3.1