From: Frank Zago Date: Mon, 30 Mar 2015 20:23:21 +0000 (-0500) Subject: LU-6245 utils: split kernel comm between user and kernel X-Git-Tag: 2.7.53~46 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=fd44a6d8c71e59ed54b36b2b15e2bdd2b9a74d38;ds=sidebyside LU-6245 utils: split kernel comm between user and kernel The kernel communication code used for HSM and changelog is entangled. Move the user space bits into the liblustreapi. This will also help for a possible relicensing. The kernel portion is also moved from libcfs to obdclass. The original libcfs_kernelcomm.h header is split into three parts: * lustre_kernelcomm.h, a new header for the kernel parts; * uapi_kernelcomm.h, a new header for the data structures shared between userspace and kernelspace; * lustreapi_internal.h receives the private liblustreapi prototypes. The original code in kernel_user_comm.c is split into two parts: * obdclass/kernelcomm.c for the kernel part. filp_user_write() was moved there, and linux-fs.c deleted; * liblustreapi_kernelconn.c for the user part. The calls to CDEBUG have been removed, and calls to CERROR have been transformed to llapi_err_noerrno. The type lustre_kernelcomm has been removed and replace by struct lustre_kernelcomm. Various names and filenames have been harmonized to *kernelcomm*. The unused symbol KUC_FL_BLOCK has been removed. Signed-off-by: frank zago Change-Id: Id15ab6ea23a160e0aa0517a1a890266b2e971982 Reviewed-on: http://review.whamcloud.com/14270 Tested-by: Jenkins Reviewed-by: Nathan Rutman Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Dmitry Eremin Reviewed-by: John L. Hammond Reviewed-by: Oleg Drokin --- diff --git a/libcfs/include/libcfs/Makefile.am b/libcfs/include/libcfs/Makefile.am index d4cf82e..8d73baa 100644 --- a/libcfs/include/libcfs/Makefile.am +++ b/libcfs/include/libcfs/Makefile.am @@ -20,7 +20,6 @@ EXTRA_DIST = \ libcfs_hash.h \ libcfs_heap.h \ libcfs_ioctl.h \ - libcfs_kernelcomm.h \ libcfs_prim.h \ libcfs_private.h \ libcfs_string.h \ diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h index 9f3d814..d32d061 100644 --- a/libcfs/include/libcfs/libcfs.h +++ b/libcfs/include/libcfs/libcfs.h @@ -254,8 +254,7 @@ void cfs_get_random_bytes(void *buf, int size); #include #ifdef __KERNEL__ # include -#endif /* __KERNEL__ */ -#include +#endif #include #ifdef __KERNEL__ # include diff --git a/libcfs/include/libcfs/libcfs_kernelcomm.h b/libcfs/include/libcfs/libcfs_kernelcomm.h deleted file mode 100644 index b930fce..0000000 --- a/libcfs/include/libcfs/libcfs_kernelcomm.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * libcfs/include/libcfs/libcfs_kernelcomm.h - * - * Kernel <-> userspace communication routines. - * The definitions below are used in the kernel and userspace. - * - */ - -#ifndef __LIBCFS_KERNELCOMM_H__ -#define __LIBCFS_KERNELCOMM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include instead -#endif - - -/* KUC message header. - * All current and future KUC messages should use this header. - * To avoid having to include Lustre headers from libcfs, define this here. - */ -struct kuc_hdr { - __u16 kuc_magic; - __u8 kuc_transport; /* Each new Lustre feature should use a different - transport */ - __u8 kuc_flags; - __u16 kuc_msgtype; /* Message type or opcode, transport-specific */ - __u16 kuc_msglen; /* Including header */ -} __attribute__((aligned(sizeof(__u64)))); - -#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr)+CR_MAXSIZE) - -#define KUC_MAGIC 0x191C /*Lustre9etLinC */ -#define KUC_FL_BLOCK 0x01 /* Wait for send */ - -/* kuc_msgtype values are defined in each transport */ -enum kuc_transport_type { - KUC_TRANSPORT_GENERIC = 1, - KUC_TRANSPORT_HSM = 2, - KUC_TRANSPORT_CHANGELOG = 3, -}; - -enum kuc_generic_message_type { - KUC_MSG_SHUTDOWN = 1, -}; - -/* prototype for callback function on kuc groups */ -typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg); - -/* KUC Broadcast Groups. This determines which userspace process hears which - * messages. Mutliple transports may be used within a group, or multiple - * groups may use the same transport. Broadcast - * groups need not be used if e.g. a UID is specified instead; - * use group 0 to signify unicast. - */ -#define KUC_GRP_HSM 0x02 -#define KUC_GRP_MAX KUC_GRP_HSM - -/* Kernel methods */ -extern int libcfs_kkuc_msg_put(struct file *fp, void *payload); -extern int libcfs_kkuc_group_put(int group, void *payload); -extern int libcfs_kkuc_group_add(struct file *fp, int uid, int group, - void *data); -extern int libcfs_kkuc_group_rem(int uid, int group, void **pdata); -extern int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func, - void *cb_arg); - -#define LK_FLG_STOP 0x01 -#define LK_NOFD -1U - -/* kernelcomm control structure, passed from userspace to kernel */ -typedef struct lustre_kernelcomm { - __u32 lk_wfd; - __u32 lk_rfd; - __u32 lk_uid; - __u32 lk_group; - __u32 lk_data; - __u32 lk_flags; -} __attribute__((packed)) lustre_kernelcomm; - -/* Userspace methods */ -extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups, int rfd_flags); -extern int libcfs_ukuc_stop(lustre_kernelcomm *l); -int libcfs_ukuc_get_rfd(lustre_kernelcomm *link); -extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize, - int transport); - -#endif /* __LIBCFS_KERNELCOMM_H__ */ - diff --git a/libcfs/include/libcfs/linux/linux-fs.h b/libcfs/include/libcfs/linux/linux-fs.h index 9051491..6e0d8b0 100644 --- a/libcfs/include/libcfs/linux/linux-fs.h +++ b/libcfs/include/libcfs/linux/linux-fs.h @@ -83,9 +83,6 @@ #define flock_end(fl) ((fl)->fl_end) #define flock_set_end(fl, end) do { (fl)->fl_end = (end); } while (0) -ssize_t filp_user_write(struct file *filp, const void *buf, size_t count, - loff_t *offset); - #ifndef IFSHIFT #define IFSHIFT 12 #endif diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in index 9d5cd2d..177f54c 100644 --- a/libcfs/libcfs/Makefile.in +++ b/libcfs/libcfs/Makefile.in @@ -2,7 +2,7 @@ MODULES = libcfs libcfs-linux-objs := linux-tracefile.o linux-debug.o libcfs-linux-objs += linux-prim.o linux-mem.o linux-cpu.o -libcfs-linux-objs += linux-proc.o linux-curproc.o linux-fs.o +libcfs-linux-objs += linux-proc.o linux-curproc.o libcfs-linux-objs += linux-utils.o linux-module.o libcfs-linux-objs += linux-crypto.o linux-crypto-adler.o @HAVE_CRC32_TRUE@libcfs-linux-objs += linux-crypto-crc32.o @@ -14,7 +14,7 @@ default: all libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) libcfs-all-objs := debug.o fail.o module.o tracefile.o watchdog.o \ - libcfs_string.o hash.o kernel_user_comm.o \ + libcfs_string.o hash.o \ prng.o workitem.o libcfs_cpu.o \ libcfs_mem.o libcfs_lock.o heap.o diff --git a/libcfs/libcfs/autoMakefile.am b/libcfs/libcfs/autoMakefile.am index 1bbe71c..4a04674 100644 --- a/libcfs/libcfs/autoMakefile.am +++ b/libcfs/libcfs/autoMakefile.am @@ -62,5 +62,5 @@ endif # MODULES MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o libcfs EXTRA_DIST := $(libcfs-all-objs:%.o=%.c) tracefile.h prng.c \ - workitem.c kernel_user_comm.c fail.c libcfs_cpu.c \ + workitem.c fail.c libcfs_cpu.c \ heap.c libcfs_mem.c libcfs_lock.c diff --git a/libcfs/libcfs/kernel_user_comm.c b/libcfs/libcfs/kernel_user_comm.c deleted file mode 100644 index cb8f320..0000000 --- a/libcfs/libcfs/kernel_user_comm.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * Kernel <-> userspace communication routines. - * Using pipes for all arches. - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#define D_KUC D_OTHER - -#include - -#ifdef LUSTRE_UTILS -/* This is the userspace side. */ - -/** Start the userspace side of a KUC pipe. - * @param link Private descriptor for pipe/socket. - * @param groups KUC broadcast group to listen to - * (can be null for unicast to this pid) - * @param rfd_flags flags for read side of pipe (e.g. O_NONBLOCK) - */ -int libcfs_ukuc_start(lustre_kernelcomm *link, int group, int rfd_flags) -{ - int pfd[2]; - int rc; - - link->lk_rfd = link->lk_wfd = LK_NOFD; - - if (pipe(pfd) < 0) - return -errno; - - if (fcntl(pfd[0], F_SETFL, rfd_flags) < 0) { - rc = -errno; - close(pfd[0]); - close(pfd[1]); - return rc; - } - - memset(link, 0, sizeof(*link)); - link->lk_rfd = pfd[0]; - link->lk_wfd = pfd[1]; - link->lk_group = group; - link->lk_uid = getpid(); - return 0; -} - -int libcfs_ukuc_stop(lustre_kernelcomm *link) -{ - int rc; - - if (link->lk_wfd != LK_NOFD) - close(link->lk_wfd); - rc = close(link->lk_rfd); - link->lk_rfd = link->lk_wfd = LK_NOFD; - return rc; -} - -/** Returns the file descriptor for the read side of the pipe, - * to be used with poll/select. - * @param link Private descriptor for pipe/socket. - */ -int libcfs_ukuc_get_rfd(lustre_kernelcomm *link) -{ - return link->lk_rfd; -} - -#define lhsz sizeof(*kuch) - -/** Read a message from the link. - * Allocates memory, returns handle - * - * @param link Private descriptor for pipe/socket. - * @param buf Buffer to read into, must include size for kuc_hdr - * @param maxsize Maximum message size allowed - * @param transport Only listen to messages on this transport - * (and the generic transport) - */ -int libcfs_ukuc_msg_get(lustre_kernelcomm *link, char *buf, int maxsize, - int transport) -{ - struct kuc_hdr *kuch; - int rc = 0; - - memset(buf, 0, maxsize); - - CDEBUG(D_KUC, "Waiting for message from kernel on fd %d\n", - link->lk_rfd); - - while (1) { - /* Read header first to get message size */ - rc = read(link->lk_rfd, buf, lhsz); - if (rc <= 0) { - rc = -errno; - break; - } - kuch = (struct kuc_hdr *)buf; - - CDEBUG(D_KUC, "Received message mg=%x t=%d m=%d l=%d\n", - kuch->kuc_magic, kuch->kuc_transport, kuch->kuc_msgtype, - kuch->kuc_msglen); - - if (kuch->kuc_magic != KUC_MAGIC) { - CERROR("bad message magic %x != %x\n", - kuch->kuc_magic, KUC_MAGIC); - rc = -EPROTO; - break; - } - - if (kuch->kuc_msglen > maxsize) { - rc = -EMSGSIZE; - break; - } - - /* Read payload */ - rc = read(link->lk_rfd, buf + lhsz, kuch->kuc_msglen - lhsz); - if (rc < 0) { - rc = -errno; - break; - } - if (rc < (kuch->kuc_msglen - lhsz)) { - CERROR("short read: got %d of %d bytes\n", - rc, kuch->kuc_msglen); - rc = -EPROTO; - break; - } - - if (kuch->kuc_transport == transport || - kuch->kuc_transport == KUC_TRANSPORT_GENERIC) { - return 0; - } - /* Drop messages for other transports */ - } - return rc; -} - -#else /* LUSTRE_UTILS */ -/* This is the kernel side (liblustre as well). */ - -/** - * libcfs_kkuc_msg_put - send an message from kernel to userspace - * @param fp to send the message to - * @param payload Payload data. First field of payload is always - * struct kuc_hdr - */ -int libcfs_kkuc_msg_put(struct file *filp, void *payload) -{ - struct kuc_hdr *kuch = (struct kuc_hdr *)payload; - int rc = -ENOSYS; - - if (filp == NULL || IS_ERR(filp)) - return -EBADF; - - if (kuch->kuc_magic != KUC_MAGIC) { - CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic); - return -ENOSYS; - } - -#ifdef __KERNEL__ - { - loff_t offset = 0; - rc = filp_user_write(filp, payload, kuch->kuc_msglen, - &offset); - } -#endif - - if (rc < 0) - CWARN("message send failed (%d)\n", rc); - else - CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp); - - return rc; -} -EXPORT_SYMBOL(libcfs_kkuc_msg_put); - -/* Broadcast groups are global across all mounted filesystems; - * i.e. registering for a group on 1 fs will get messages for that - * group from any fs */ -/** A single group registration has a uid and a file pointer */ -struct kkuc_reg { - struct list_head kr_chain; - int kr_uid; - struct file *kr_fp; - void *kr_data; -}; - -static struct list_head kkuc_groups[KUC_GRP_MAX+1] = {}; -/* Protect message sending against remove and adds */ -static DECLARE_RWSEM(kg_sem); - -/** Add a receiver to a broadcast group - * @param filp pipe to write into - * @param uid identifier for this receiver - * @param group group number - * @param data user data - */ -int libcfs_kkuc_group_add(struct file *filp, int uid, int group, void *data) -{ - struct kkuc_reg *reg; - - if (group > KUC_GRP_MAX) { - CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); - return -EINVAL; - } - - /* fput in group_rem */ - if (filp == NULL) - return -EBADF; - - /* freed in group_rem */ - reg = kmalloc(sizeof(*reg), 0); - if (reg == NULL) - return -ENOMEM; - - reg->kr_fp = filp; - reg->kr_uid = uid; - reg->kr_data = data; - - down_write(&kg_sem); - if (kkuc_groups[group].next == NULL) - INIT_LIST_HEAD(&kkuc_groups[group]); - list_add(®->kr_chain, &kkuc_groups[group]); - up_write(&kg_sem); - - CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group); - - return 0; -} -EXPORT_SYMBOL(libcfs_kkuc_group_add); - -int libcfs_kkuc_group_rem(int uid, int group, void **pdata) -{ - struct kkuc_reg *reg, *next; - ENTRY; - - if (kkuc_groups[group].next == NULL) - RETURN(0); - - if (uid == 0) { - /* Broadcast a shutdown message */ - struct kuc_hdr lh; - - lh.kuc_magic = KUC_MAGIC; - lh.kuc_transport = KUC_TRANSPORT_GENERIC; - lh.kuc_msgtype = KUC_MSG_SHUTDOWN; - lh.kuc_msglen = sizeof(lh); - libcfs_kkuc_group_put(group, &lh); - } - - down_write(&kg_sem); - list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) { - if ((uid == 0) || (uid == reg->kr_uid)) { - list_del(®->kr_chain); - CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n", - reg->kr_uid, reg->kr_fp, group); - if (reg->kr_fp != NULL) - fput(reg->kr_fp); - if (pdata != NULL) - *pdata = reg->kr_data; - kfree(reg); - } - } - up_write(&kg_sem); - - RETURN(0); -} -EXPORT_SYMBOL(libcfs_kkuc_group_rem); - -int libcfs_kkuc_group_put(int group, void *payload) -{ - struct kkuc_reg *reg; - int rc = 0; - int one_success = 0; - ENTRY; - - down_write(&kg_sem); - list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { - if (reg->kr_fp != NULL) { - rc = libcfs_kkuc_msg_put(reg->kr_fp, payload); - if (rc == 0) - one_success = 1; - else if (rc == -EPIPE) { - fput(reg->kr_fp); - reg->kr_fp = NULL; - } - } - } - up_write(&kg_sem); - - /* don't return an error if the message has been delivered - * at least to one agent */ - if (one_success) - rc = 0; - - RETURN(rc); -} -EXPORT_SYMBOL(libcfs_kkuc_group_put); - -/** - * Calls a callback function for each link of the given kuc group. - * @param group the group to call the function on. - * @param cb_func the function to be called. - * @param cb_arg extra argument to be passed to the callback function. - */ -int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func, - void *cb_arg) -{ - struct kkuc_reg *reg; - int rc = 0; - ENTRY; - - if (group > KUC_GRP_MAX) { - CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); - RETURN(-EINVAL); - } - - /* no link for this group */ - if (kkuc_groups[group].next == NULL) - RETURN(0); - - down_read(&kg_sem); - list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { - if (reg->kr_fp != NULL) { - rc = cb_func(reg->kr_data, cb_arg); - } - } - up_read(&kg_sem); - - RETURN(rc); -} -EXPORT_SYMBOL(libcfs_kkuc_group_foreach); - -#endif /* LUSTRE_UTILS */ - diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am index fffb4b3..247bfb1 100644 --- a/libcfs/libcfs/linux/Makefile.am +++ b/libcfs/libcfs/linux/Makefile.am @@ -1,5 +1,5 @@ EXTRA_DIST = linux-debug.c linux-prim.c linux-tracefile.c \ - linux-fs.c linux-mem.c linux-proc.c linux-utils.c \ + linux-mem.c linux-proc.c linux-utils.c \ linux-module.c linux-curproc.c linux-cpu.c \ linux-crypto.c linux-crypto-crc32.c linux-crypto-adler.c\ linux-crypto-crc32pclmul.c linux-crypto-crc32c-pclmul.c \ diff --git a/libcfs/libcfs/linux/linux-fs.c b/libcfs/libcfs/linux/linux-fs.c deleted file mode 100644 index a460fe0..0000000 --- a/libcfs/libcfs/linux/linux-fs.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2014, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include - -#include - -/* write a userspace buffer to disk. - * NOTE: this returns 0 on success, not the number of bytes written. */ -ssize_t -filp_user_write(struct file *filp, const void *buf, size_t count, - loff_t *offset) -{ - mm_segment_t fs; - ssize_t size = 0; - - fs = get_fs(); - set_fs(KERNEL_DS); - while ((ssize_t)count > 0) { - size = vfs_write(filp, (const void __user *)buf, count, offset); - if (size < 0) - break; - count -= size; - buf += size; - size = 0; - } - set_fs(fs); - - return size; -} -EXPORT_SYMBOL(filp_user_write); diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index 98c0756..9e3d3fa 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -92,4 +92,6 @@ EXTRA_DIST = \ obd.h \ obd_support.h \ obd_target.h \ - upcall_cache.h + upcall_cache.h \ + lustre_kernelcomm.h \ + uapi_kernelcomm.h diff --git a/lustre/include/lustre_kernelcomm.h b/lustre/include/lustre_kernelcomm.h new file mode 100644 index 0000000..d9955b8 --- /dev/null +++ b/lustre/include/lustre_kernelcomm.h @@ -0,0 +1,56 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Nathan Rutman + * + * Kernel <-> userspace communication routines. + * The definitions below are used in the kernel and userspace. + */ + +#ifndef __LUSTRE_KERNELCOMM_H__ +#define __LUSTRE_KERNELCOMM_H__ + +/* For declarations shared with userspace */ +#include + +/* prototype for callback function on kuc groups */ +typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg); + +/* Kernel methods */ +int libcfs_kkuc_msg_put(struct file *fp, void *payload); +int libcfs_kkuc_group_put(int group, void *payload); +int libcfs_kkuc_group_add(struct file *fp, int uid, int group, + void *data); +int libcfs_kkuc_group_rem(int uid, int group, void **pdata); +int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func, + void *cb_arg); + +#endif /* __LUSTRE_KERNELCOMM_H__ */ + diff --git a/lustre/include/uapi_kernelcomm.h b/lustre/include/uapi_kernelcomm.h new file mode 100644 index 0000000..2df058a --- /dev/null +++ b/lustre/include/uapi_kernelcomm.h @@ -0,0 +1,92 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Nathan Rutman + * + * Kernel <-> userspace communication routines. + * The definitions below are used in the kernel and userspace. + */ + +#ifndef __UAPI_KERNELCOMM_H__ +#define __UAPI_KERNELCOMM_H__ + +#include + +/* KUC message header. + * All current and future KUC messages should use this header. + * To avoid having to include Lustre headers from libcfs, define this here. + */ +struct kuc_hdr { + __u16 kuc_magic; + __u8 kuc_transport; /* Each new Lustre feature should use a different + transport */ + __u8 kuc_flags; + __u16 kuc_msgtype; /* Message type or opcode, transport-specific */ + __u16 kuc_msglen; /* Including header */ +} __attribute__((aligned(sizeof(__u64)))); + +#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr)+CR_MAXSIZE) + +#define KUC_MAGIC 0x191C /*Lustre9etLinC */ + +/* kuc_msgtype values are defined in each transport */ +enum kuc_transport_type { + KUC_TRANSPORT_GENERIC = 1, + KUC_TRANSPORT_HSM = 2, + KUC_TRANSPORT_CHANGELOG = 3, +}; + +enum kuc_generic_message_type { + KUC_MSG_SHUTDOWN = 1, +}; + +/* KUC Broadcast Groups. This determines which userspace process hears which + * messages. Mutliple transports may be used within a group, or multiple + * groups may use the same transport. Broadcast + * groups need not be used if e.g. a UID is specified instead; + * use group 0 to signify unicast. + */ +#define KUC_GRP_HSM 0x02 +#define KUC_GRP_MAX KUC_GRP_HSM + +#define LK_FLG_STOP 0x01 +#define LK_NOFD -1U + +/* kernelcomm control structure, passed from userspace to kernel */ +struct lustre_kernelcomm { + __u32 lk_wfd; + __u32 lk_rfd; + __u32 lk_uid; + __u32 lk_group; + __u32 lk_data; + __u32 lk_flags; +} __attribute__((packed)); + +#endif /* __UAPI_KERNELCOMM_H__ */ diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index dc9312e..8fb5a7c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "llite_internal.h" /* diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 6e99b70..fb990f8 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "lmv_internal.h" static void lmv_activate_target(struct lmv_obd *lmv, diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index e354272..f0cc9fd 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "mdc_internal.h" diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 10932f2..f6bc081 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "mdt_internal.h" static struct lprocfs_vars lprocfs_mdt_hsm_vars[]; diff --git a/lustre/mdt/mdt_hsm_cdt_agent.c b/lustre/mdt/mdt_hsm_cdt_agent.c index 622b1ca..5c099eb 100644 --- a/lustre/mdt/mdt_hsm_cdt_agent.c +++ b/lustre/mdt/mdt_hsm_cdt_agent.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "mdt_internal.h" /* diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 8a48593..3370b8c 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -14,6 +14,7 @@ obdclass-all-objs += lu_object.o dt_object.o capa.o obdclass-all-objs += cl_object.o cl_page.o cl_lock.o cl_io.o lu_ref.o obdclass-all-objs += acl.o obdclass-all-objs += linkea.o +obdclass-all-objs += kernelcomm.o @SERVER_TRUE@obdclass-all-objs += idmap.o @SERVER_TRUE@obdclass-all-objs += upcall_cache.o diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index d38619e..89f7c4b 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -42,6 +42,7 @@ #define DEBUG_SUBSYSTEM S_CLASS #include #include +#include spinlock_t obd_types_lock; diff --git a/lustre/obdclass/kernelcomm.c b/lustre/obdclass/kernelcomm.c new file mode 100644 index 0000000..f9763f1 --- /dev/null +++ b/lustre/obdclass/kernelcomm.c @@ -0,0 +1,251 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Nathan Rutman + * + * Kernel <-> userspace communication routines. + * Using pipes for all arches. + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#define D_KUC D_OTHER + +#include +#include + +/* write a userspace buffer to disk. + * NOTE: this returns 0 on success, not the number of bytes written. */ +static ssize_t +filp_user_write(struct file *filp, const void *buf, size_t count, + loff_t *offset) +{ + mm_segment_t fs; + ssize_t size = 0; + + fs = get_fs(); + set_fs(KERNEL_DS); + while ((ssize_t)count > 0) { + size = vfs_write(filp, (const void __user *)buf, count, offset); + if (size < 0) + break; + count -= size; + buf += size; + size = 0; + } + set_fs(fs); + + return size; +} + +/** + * libcfs_kkuc_msg_put - send an message from kernel to userspace + * @param fp to send the message to + * @param payload Payload data. First field of payload is always + * struct kuc_hdr + */ +int libcfs_kkuc_msg_put(struct file *filp, void *payload) +{ + struct kuc_hdr *kuch = (struct kuc_hdr *)payload; + int rc = -ENOSYS; + loff_t offset = 0; + + if (filp == NULL || IS_ERR(filp)) + return -EBADF; + + if (kuch->kuc_magic != KUC_MAGIC) { + CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic); + return -ENOSYS; + } + + rc = filp_user_write(filp, payload, kuch->kuc_msglen, &offset); + if (rc < 0) + CWARN("message send failed (%d)\n", rc); + else + CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp); + + return rc; +} +EXPORT_SYMBOL(libcfs_kkuc_msg_put); + +/* Broadcast groups are global across all mounted filesystems; + * i.e. registering for a group on 1 fs will get messages for that + * group from any fs */ +/** A single group registration has a uid and a file pointer */ +struct kkuc_reg { + struct list_head kr_chain; + int kr_uid; + struct file *kr_fp; + void *kr_data; +}; + +static struct list_head kkuc_groups[KUC_GRP_MAX+1] = {}; +/* Protect message sending against remove and adds */ +static DECLARE_RWSEM(kg_sem); + +/** Add a receiver to a broadcast group + * @param filp pipe to write into + * @param uid identifier for this receiver + * @param group group number + * @param data user data + */ +int libcfs_kkuc_group_add(struct file *filp, int uid, int group, void *data) +{ + struct kkuc_reg *reg; + + if (group > KUC_GRP_MAX) { + CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); + return -EINVAL; + } + + /* fput in group_rem */ + if (filp == NULL) + return -EBADF; + + /* freed in group_rem */ + reg = kmalloc(sizeof(*reg), 0); + if (reg == NULL) + return -ENOMEM; + + reg->kr_fp = filp; + reg->kr_uid = uid; + reg->kr_data = data; + + down_write(&kg_sem); + if (kkuc_groups[group].next == NULL) + INIT_LIST_HEAD(&kkuc_groups[group]); + list_add(®->kr_chain, &kkuc_groups[group]); + up_write(&kg_sem); + + CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group); + + return 0; +} +EXPORT_SYMBOL(libcfs_kkuc_group_add); + +int libcfs_kkuc_group_rem(int uid, int group, void **pdata) +{ + struct kkuc_reg *reg, *next; + ENTRY; + + if (kkuc_groups[group].next == NULL) + RETURN(0); + + if (uid == 0) { + /* Broadcast a shutdown message */ + struct kuc_hdr lh; + + lh.kuc_magic = KUC_MAGIC; + lh.kuc_transport = KUC_TRANSPORT_GENERIC; + lh.kuc_msgtype = KUC_MSG_SHUTDOWN; + lh.kuc_msglen = sizeof(lh); + libcfs_kkuc_group_put(group, &lh); + } + + down_write(&kg_sem); + list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) { + if ((uid == 0) || (uid == reg->kr_uid)) { + list_del(®->kr_chain); + CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n", + reg->kr_uid, reg->kr_fp, group); + if (reg->kr_fp != NULL) + fput(reg->kr_fp); + if (pdata != NULL) + *pdata = reg->kr_data; + kfree(reg); + } + } + up_write(&kg_sem); + + RETURN(0); +} +EXPORT_SYMBOL(libcfs_kkuc_group_rem); + +int libcfs_kkuc_group_put(int group, void *payload) +{ + struct kkuc_reg *reg; + int rc = 0; + int one_success = 0; + ENTRY; + + down_write(&kg_sem); + list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { + if (reg->kr_fp != NULL) { + rc = libcfs_kkuc_msg_put(reg->kr_fp, payload); + if (rc == 0) + one_success = 1; + else if (rc == -EPIPE) { + fput(reg->kr_fp); + reg->kr_fp = NULL; + } + } + } + up_write(&kg_sem); + + /* don't return an error if the message has been delivered + * at least to one agent */ + if (one_success) + rc = 0; + + RETURN(rc); +} +EXPORT_SYMBOL(libcfs_kkuc_group_put); + +/** + * Calls a callback function for each link of the given kuc group. + * @param group the group to call the function on. + * @param cb_func the function to be called. + * @param cb_arg extra argument to be passed to the callback function. + */ +int libcfs_kkuc_group_foreach(int group, libcfs_kkuc_cb_t cb_func, + void *cb_arg) +{ + struct kkuc_reg *reg; + int rc = 0; + ENTRY; + + if (group > KUC_GRP_MAX) { + CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); + RETURN(-EINVAL); + } + + /* no link for this group */ + if (kkuc_groups[group].next == NULL) + RETURN(0); + + down_read(&kg_sem); + list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { + if (reg->kr_fp != NULL) + rc = cb_func(reg->kr_data, cb_arg); + } + up_read(&kg_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(libcfs_kkuc_group_foreach); diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 1faf1cf..536b434 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -82,12 +82,11 @@ llverfs_LDADD := $(EXT2FSLIB) $(E2PLIB) llverdev_LDADD := $(EXT2FSLIB) $(BLKIDLIB) L_STRING := $(top_builddir)/libcfs/libcfs/util/string.c -L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/kernel_user_comm.c liblustreapitmp_a_SOURCES = liblustreapi.c liblustreapi_hsm.c \ liblustreapi_nodemap.c lustreapi_internal.h \ liblustreapi_json.c liblustreapi_layout.c \ liblustreapi_lease.c liblustreapi_util.c \ - $(L_KERNELCOMM) $(L_STRING) + liblustreapi_kernelconn.c $(L_STRING) if UTILS # build static and shared lib lustreapi diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 5827603..309e8c5 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -4223,7 +4223,7 @@ static int changelog_ioctl(const char *mdtname, int opc, int id, struct changelog_private { int magic; enum changelog_send_flag flags; - lustre_kernelcomm kuc; + struct lustre_kernelcomm kuc; }; /** Start reading from a changelog diff --git a/lustre/utils/liblustreapi_hsm.c b/lustre/utils/liblustreapi_hsm.c index 4c3c5b9..492a3b1 100644 --- a/lustre/utils/liblustreapi_hsm.c +++ b/lustre/utils/liblustreapi_hsm.c @@ -71,7 +71,7 @@ struct hsm_copytool_private { struct kuc_hdr *kuch; int mnt_fd; int open_by_fid_fd; - lustre_kernelcomm kuc; + struct lustre_kernelcomm kuc; __u32 archives; }; diff --git a/lustre/utils/liblustreapi_kernelconn.c b/lustre/utils/liblustreapi_kernelconn.c new file mode 100644 index 0000000..e93fa9e --- /dev/null +++ b/lustre/utils/liblustreapi_kernelconn.c @@ -0,0 +1,161 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Author: Nathan Rutman + * + * Kernel <-> userspace communication routines. + * Using pipes for all arches. + */ + +#include +#include +#include +#include + +#include + +#include "lustreapi_internal.h" + +/** Start the userspace side of a KUC pipe. + * @param link Private descriptor for pipe/socket. + * @param groups KUC broadcast group to listen to + * (can be null for unicast to this pid) + * @param rfd_flags flags for read side of pipe (e.g. O_NONBLOCK) + */ +int libcfs_ukuc_start(struct lustre_kernelcomm *link, + int group, int rfd_flags) +{ + int pfd[2]; + int rc; + + link->lk_rfd = link->lk_wfd = LK_NOFD; + + if (pipe(pfd) < 0) + return -errno; + + if (fcntl(pfd[0], F_SETFL, rfd_flags) < 0) { + rc = -errno; + close(pfd[0]); + close(pfd[1]); + return rc; + } + + memset(link, 0, sizeof(*link)); + link->lk_rfd = pfd[0]; + link->lk_wfd = pfd[1]; + link->lk_group = group; + link->lk_uid = getpid(); + return 0; +} + +int libcfs_ukuc_stop(struct lustre_kernelcomm *link) +{ + int rc; + + if (link->lk_wfd != LK_NOFD) + close(link->lk_wfd); + rc = close(link->lk_rfd); + link->lk_rfd = link->lk_wfd = LK_NOFD; + return rc; +} + +/** Returns the file descriptor for the read side of the pipe, + * to be used with poll/select. + * @param link Private descriptor for pipe/socket. + */ +int libcfs_ukuc_get_rfd(struct lustre_kernelcomm *link) +{ + return link->lk_rfd; +} + +#define lhsz sizeof(*kuch) + +/** Read a message from the link. + * Allocates memory, returns handle + * + * @param link Private descriptor for pipe/socket. + * @param buf Buffer to read into, must include size for kuc_hdr + * @param maxsize Maximum message size allowed + * @param transport Only listen to messages on this transport + * (and the generic transport) + */ +int libcfs_ukuc_msg_get(struct lustre_kernelcomm *link, char *buf, int maxsize, + int transport) +{ + struct kuc_hdr *kuch; + int rc = 0; + + memset(buf, 0, maxsize); + + while (1) { + /* Read header first to get message size */ + rc = read(link->lk_rfd, buf, lhsz); + if (rc <= 0) { + rc = -errno; + break; + } + kuch = (struct kuc_hdr *)buf; + + if (kuch->kuc_magic != KUC_MAGIC) { + llapi_err_noerrno(LLAPI_MSG_ERROR, + "bad message magic %x != %x\n", + kuch->kuc_magic, KUC_MAGIC); + rc = -EPROTO; + break; + } + + if (kuch->kuc_msglen > maxsize) { + rc = -EMSGSIZE; + break; + } + + /* Read payload */ + rc = read(link->lk_rfd, buf + lhsz, kuch->kuc_msglen - lhsz); + if (rc < 0) { + rc = -errno; + break; + } + if (rc < (kuch->kuc_msglen - lhsz)) { + llapi_err_noerrno(LLAPI_MSG_ERROR, + "short read: got %d of %d bytes\n", + rc, kuch->kuc_msglen); + rc = -EPROTO; + break; + } + + if (kuch->kuc_transport == transport || + kuch->kuc_transport == KUC_TRANSPORT_GENERIC) { + return 0; + } + /* Drop messages for other transports */ + } + return rc; +} + diff --git a/lustre/utils/lustreapi_internal.h b/lustre/utils/lustreapi_internal.h index 646d440..a5d7ac1 100644 --- a/lustre/utils/lustreapi_internal.h +++ b/lustre/utils/lustreapi_internal.h @@ -35,6 +35,8 @@ #ifndef _LUSTREAPI_INTERNAL_H_ #define _LUSTREAPI_INTERNAL_H_ +#include + #define WANT_PATH 0x1 #define WANT_FSNAME 0x2 #define WANT_FD 0x4 @@ -74,4 +76,13 @@ static inline bool llapi_stripe_index_is_valid(int64_t index) * terminology instead of the preferred "index". */ #define llapi_stripe_offset_is_valid(os) llapi_stripe_index_is_valid(os) +/* + * Kernel communication for Changelogs and HSM requests. + */ +int libcfs_ukuc_start(struct lustre_kernelcomm *l, int groups, int rfd_flags); +int libcfs_ukuc_stop(struct lustre_kernelcomm *l); +int libcfs_ukuc_get_rfd(struct lustre_kernelcomm *link); +int libcfs_ukuc_msg_get(struct lustre_kernelcomm *l, char *buf, int maxsize, + int transport); + #endif /* _LUSTREAPI_INTERNAL_H_ */