From d70db3335f52cc49f5e01858d27b0ccd61036c62 Mon Sep 17 00:00:00 2001 From: Nathan Rutman Date: Fri, 12 Mar 2010 09:46:01 -0800 Subject: [PATCH] b=20878 change kernelcomms from netlink to pipes i=alexg i=emoly.liu for portability to Solaris, change kernel-userspace comm mechanism from NetLink to pipes --- libcfs/ChangeLog | 4 + libcfs/autoconf/lustre-libcfs.m4 | 1 - libcfs/include/libcfs/libcfs_debug.h | 2 + libcfs/include/libcfs/libcfs_kernelcomm.h | 89 ++++--- libcfs/include/libcfs/linux/linux-fs.h | 1 + libcfs/include/libcfs/posix/Makefile.am | 2 +- libcfs/include/libcfs/posix/libcfs.h | 4 +- libcfs/include/libcfs/posix/posix-kernelcomm.h | 59 ----- libcfs/include/libcfs/user-lock.h | 1 + libcfs/include/libcfs/user-prim.h | 3 +- libcfs/libcfs/Makefile.in | 4 +- libcfs/libcfs/autoMakefile.am | 10 +- libcfs/libcfs/kernel_user_comm.c | 283 ++++++++++++++++++++++ libcfs/libcfs/linux/Makefile.am | 4 +- libcfs/libcfs/linux/linux-fs.c | 3 +- libcfs/libcfs/linux/linux-kernelcomm.c | 226 ------------------ libcfs/libcfs/ulinux/.cvsignore | 2 - libcfs/libcfs/ulinux/.gitignore | 2 - libcfs/libcfs/ulinux/Makefile.am | 2 - libcfs/libcfs/ulinux/ulinux-kernelcomm.c | 183 --------------- lustre/include/liblustre.h | 1 - lustre/include/lustre/liblustreapi.h | 4 +- lustre/include/lustre/lustre_idl.h | 8 +- lustre/include/lustre/lustre_user.h | 7 +- lustre/include/lustre_lib.h | 1 + lustre/include/lustre_log.h | 8 + lustre/ldlm/ldlm_lockd.c | 2 +- lustre/llite/dir.c | 54 +++-- lustre/lmv/lmv_obd.c | 3 +- lustre/mdc/lproc_mdc.c | 153 ++---------- lustre/mdc/mdc_request.c | 226 ++++++++++++++++-- lustre/mdt/mdt_handler.c | 10 +- lustre/obdclass/llog.c | 15 +- lustre/obdclass/llog_cat.c | 27 ++- lustre/obdclass/llog_internal.h | 1 + lustre/ptlrpc/pack_generic.c | 12 +- lustre/tests/copytool.c | 27 ++- lustre/tests/lustre-rsync-test.sh | 5 +- lustre/tests/sanity.sh | 11 +- lustre/utils/Makefile.am | 2 +- lustre/utils/liblustreapi.c | 312 ++++++++++++++----------- 41 files changed, 877 insertions(+), 897 deletions(-) delete mode 100644 libcfs/include/libcfs/posix/posix-kernelcomm.h create mode 100644 libcfs/libcfs/kernel_user_comm.c delete mode 100644 libcfs/libcfs/linux/linux-kernelcomm.c delete mode 100644 libcfs/libcfs/ulinux/.cvsignore delete mode 100644 libcfs/libcfs/ulinux/.gitignore delete mode 100644 libcfs/libcfs/ulinux/Makefile.am delete mode 100644 libcfs/libcfs/ulinux/ulinux-kernelcomm.c diff --git a/libcfs/ChangeLog b/libcfs/ChangeLog index a7b9e08..d17ecbd 100644 --- a/libcfs/ChangeLog +++ b/libcfs/ChangeLog @@ -6,6 +6,10 @@ Bugzilla : 19856 Description: Add LustreNetLink, a kernel-userspace communcation path. Add ulinux dir for Linux userspace tools. +Severity : normal +Bugzilla : 20878 +Description: Replace LustreNetLink with kernel_user_comm based on pipes + ------------------------------------------------------------------------------- 2008-07-15 Sun Microsystems, Inc. diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index c908fc3..4c8e400 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -917,7 +917,6 @@ libcfs/include/libcfs/util/Makefile libcfs/libcfs/Makefile libcfs/libcfs/autoMakefile libcfs/libcfs/linux/Makefile -libcfs/libcfs/ulinux/Makefile libcfs/libcfs/posix/Makefile libcfs/libcfs/util/Makefile libcfs/include/libcfs/darwin/Makefile diff --git a/libcfs/include/libcfs/libcfs_debug.h b/libcfs/include/libcfs/libcfs_debug.h index 5380c61..66f2f2c 100644 --- a/libcfs/include/libcfs/libcfs_debug.h +++ b/libcfs/include/libcfs/libcfs_debug.h @@ -163,6 +163,8 @@ struct ptldebug_header { #define D_SEC 0x08000000 /* keep these in sync with lnet/{utils,libcfs}/debug.c */ +#define D_HSM D_TRACE + #define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) #ifndef DEBUG_SUBSYSTEM diff --git a/libcfs/include/libcfs/libcfs_kernelcomm.h b/libcfs/include/libcfs/libcfs_kernelcomm.h index 5f31d1d..59eb194 100644 --- a/libcfs/include/libcfs/libcfs_kernelcomm.h +++ b/libcfs/include/libcfs/libcfs_kernelcomm.h @@ -37,9 +37,7 @@ * * libcfs/include/libcfs/libcfs_kernelcomm.h * - * Kernel <-> userspace communication routines. We'll use a shorthand term - * "lnl" (Lustre NetLink) for this interface name for all arches, even though - * an implemtation may not use NetLink. + * Kernel <-> userspace communication routines. * The definitions below are used in the kernel and userspace. * */ @@ -51,67 +49,66 @@ #error Do not #include this file directly. #include instead #endif -/* LNL message header. - * All current and future LNL messages should use this header. + +/* KUC message header. + * All current and future KUC messages should use this header. * To avoid having to include Lustre headers from libcfs, define this here. */ -struct lnl_hdr { - __u16 lnl_magic; - __u8 lnl_transport; /* Each new Lustre feature should use a different +struct kuc_hdr { + __u16 kuc_magic; + __u8 kuc_transport; /* Each new Lustre feature should use a different transport */ - __u8 lnl_flags; - __u16 lnl_msgtype; /* Message type or opcode, transport-specific */ - __u16 lnl_msglen; + __u8 kuc_flags; + __u16 kuc_msgtype; /* Message type or opcode, transport-specific */ + __u16 kuc_msglen; /* Including header */ } __attribute__((aligned(sizeof(__u64)))); -#define LNL_MAGIC 0x191C /*Lustre9etLinC */ -#define LNL_FL_BLOCK 0x01 /* Wait for send */ +#define KUC_MAGIC 0x191C /*Lustre9etLinC */ +#define KUC_FL_BLOCK 0x01 /* Wait for send */ -/* lnl_msgtype values are defined in each transport */ -enum lnl_transport_type { - LNL_TRANSPORT_GENERIC = 1, - LNL_TRANSPORT_HSM = 2, - LNL_TRANSPORT_CHANGELOG = 3, +/* kuc_msgtype values are defined in each transport */ +enum kuc_transport_type { + KUC_TRANSPORT_GENERIC = 1, + KUC_TRANSPORT_HSM = 2, + KUC_TRANSPORT_CHANGELOG = 3, }; -enum lnl_generic_message_type { - LNL_MSG_SHUTDOWN = 1, +enum kuc_generic_message_type { + KUC_MSG_SHUTDOWN = 1, }; -/* LNL Broadcast Groups. This determines which userspace process hears which +/* KUC Broadcast Groups. This determines which userspace process hears which * messages. Mutliple transports may be used within a group, or multiple * groups may use the same transport. Broadcast - * groups need not be used if e.g. a PID is specified instead; + * groups need not be used if e.g. a UID is specified instead; * use group 0 to signify unicast. */ -#define LNL_GRP_HSM 0x02 -#define LNL_GRP_CNT 2 +#define KUC_GRP_HSM 0x02 +#define KUC_GRP_MAX KUC_GRP_HSM +/* Kernel methods */ +extern int libcfs_kkuc_msg_put(cfs_file_t *fp, void *payload); +extern int libcfs_kkuc_group_put(int group, void *payload); +extern int libcfs_kkuc_group_add(cfs_file_t *fp, int uid, int group); +extern int libcfs_kkuc_group_rem(int uid, int group); -#if defined(HAVE_NETLINK) && defined (__KERNEL__) -extern int libcfs_klnl_start(int transport); -extern int libcfs_klnl_stop(int transport, int group); -extern int libcfs_klnl_msg_put(int pid, int group, void *payload); -#else -static inline int libcfs_klnl_start(int transport) { - return -ENOSYS; -} -static inline int libcfs_klnl_stop(int transport, int group) { - return 0; -} -static inline int libcfs_klnl_msg_put(int pid, int group, void *payload) { - return -ENOSYS; -} -#endif +#define LK_FLG_STOP 0x01 -/* - * NetLink socket number, see include/linux/netlink.h - * All LNL users share a single netlink socket. This actually is NetLink - * specific, but is not to be used outside of the Linux implementation - * (linux-kernelcomm.c and posix-kernelcomm.c). - */ -#define LNL_SOCKET 26 +/* kernelcomm control structure, passed from userspace to kernel */ +typedef struct lustre_kernelcomm { + __u32 lk_wfd; + __u32 lk_rfd; + __u32 lk_uid; + __u32 lk_group; + __u32 lk_data; + __u32 lk_flags; +} __attribute__((packed)) lustre_kernelcomm; +/* Userspace methods */ +extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups); +extern int libcfs_ukuc_stop(lustre_kernelcomm *l); +extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize, + int transport); #endif /* __LIBCFS_KERNELCOMM_H__ */ diff --git a/libcfs/include/libcfs/linux/linux-fs.h b/libcfs/include/libcfs/linux/linux-fs.h index 64d8936..2e740f5 100644 --- a/libcfs/include/libcfs/linux/linux-fs.h +++ b/libcfs/include/libcfs/linux/linux-fs.h @@ -71,6 +71,7 @@ cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err); #define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1) #define cfs_get_file(f) get_file(f) +#define cfs_get_fd(x) fget(x) #define cfs_put_file(f) fput(f) #define cfs_file_count(f) file_count(f) diff --git a/libcfs/include/libcfs/posix/Makefile.am b/libcfs/include/libcfs/posix/Makefile.am index 3aca75b..feba1d5 100644 --- a/libcfs/include/libcfs/posix/Makefile.am +++ b/libcfs/include/libcfs/posix/Makefile.am @@ -1 +1 @@ -EXTRA_DIST := libcfs.h posix-wordsize.h posix-types.h posix-kernelcomm.h +EXTRA_DIST := libcfs.h posix-wordsize.h posix-types.h diff --git a/libcfs/include/libcfs/posix/libcfs.h b/libcfs/include/libcfs/posix/libcfs.h index c16d8d9..c0d5913 100644 --- a/libcfs/include/libcfs/posix/libcfs.h +++ b/libcfs/include/libcfs/posix/libcfs.h @@ -104,7 +104,6 @@ #include #include #include -#include # define cfs_gettimeofday(tv) gettimeofday(tv, NULL); typedef unsigned long long cfs_cycles_t; @@ -126,6 +125,9 @@ typedef struct dentry cfs_dentry_t; typedef struct dirent64 cfs_dirent_t; #endif +#define cfs_get_fd(x) NULL +#define cfs_put_file(f) do {} while (0) + #ifdef __linux__ /* Userpace byte flipping */ # include diff --git a/libcfs/include/libcfs/posix/posix-kernelcomm.h b/libcfs/include/libcfs/posix/posix-kernelcomm.h deleted file mode 100644 index 7275631..0000000 --- a/libcfs/include/libcfs/posix/posix-kernelcomm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * libcfs/include/libcfs/posix-kernelcomm.h - * - * kernel - userspace communications. - */ - -#ifndef __LIBCFS_POSIX_KERNELCOMM_H__ -#define __LIBCFS_POSIX_KERNELCOMM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include instead -#endif - -typedef int lustre_netlink; -int libcfs_ulnl_start(lustre_netlink *l, int groups); -int libcfs_ulnl_stop(lustre_netlink *l); -struct lnl_hdr; -int libcfs_ulnl_msg_get(lustre_netlink *l, int maxsize, int transport, - struct lnl_hdr **lnlhh); -int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh); - -#endif - diff --git a/libcfs/include/libcfs/user-lock.h b/libcfs/include/libcfs/user-lock.h index 7c7a80c..bd89217 100644 --- a/libcfs/include/libcfs/user-lock.h +++ b/libcfs/include/libcfs/user-lock.h @@ -177,6 +177,7 @@ int cfs_down_write_trylock(cfs_rw_semaphore_t *s); void cfs_up_read(cfs_rw_semaphore_t *s); void cfs_up_write(cfs_rw_semaphore_t *s); void cfs_fini_rwsem(cfs_rw_semaphore_t *s); +#define CFS_DECLARE_RWSEM(name) cfs_rw_semaphore_t name = { } /* * read-write lock : Need to be investigated more!! diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h index b32db0a..7aa0258 100644 --- a/libcfs/include/libcfs/user-prim.h +++ b/libcfs/include/libcfs/user-prim.h @@ -141,7 +141,8 @@ static inline int cfs_psdev_deregister(cfs_psdev_t *foo) #define cfs_lock_kernel() do {} while (0) #define cfs_sigfillset(l) do {} while (0) #define cfs_recalc_sigpending(l) do {} while (0) -#define cfs_kernel_thread(l,m,n) LBUG() +/* Fine, crash, but stop giving me compile warnings */ +#define cfs_kernel_thread(l,m,n) (LBUG(), (int)l) #define cfs_kthread_run(fn,d,fmt,...) LBUG() #ifdef HAVE_LIBPTHREAD diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in index b0b3f9e..0f7913b 100644 --- a/libcfs/libcfs/Makefile.in +++ b/libcfs/libcfs/Makefile.in @@ -4,7 +4,7 @@ libcfs-linux-objs := linux-tracefile.o linux-debug.o libcfs-linux-objs += linux-prim.o linux-mem.o libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o -libcfs-linux-objs += linux-utils.o linux-module.o linux-kernelcomm.o +libcfs-linux-objs += linux-utils.o linux-module.o ifeq ($(PATCHLEVEL),6) libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) @@ -25,7 +25,7 @@ sources: endif libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o \ - libcfs_string.o hash.o + libcfs_string.o hash.o kernel_user_comm.o libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/libcfs/libcfs/autoMakefile.am b/libcfs/libcfs/autoMakefile.am index b70e72e..2182377 100644 --- a/libcfs/libcfs/autoMakefile.am +++ b/libcfs/libcfs/autoMakefile.am @@ -34,7 +34,7 @@ # Lustre is a trademark of Sun Microsystems, Inc. # -SUBDIRS := linux util posix ulinux +SUBDIRS := linux util posix if DARWIN SUBDIRS += darwin endif @@ -43,7 +43,7 @@ DIST_SUBDIRS := linux util posix ulinux darwin if LIBLUSTRE noinst_LIBRARIES= libcfs.a libcfs_a_SOURCES= posix/posix-debug.c user-prim.c user-lock.c user-tcpip.c \ - user-bitops.c user-mem.c hash.c ulinux/ulinux-kernelcomm.c + user-bitops.c user-mem.c hash.c kernel_user_comm.c libcfs_a_CPPFLAGS = $(LLCPPFLAGS) libcfs_a_CFLAGS = $(LLCFLAGS) endif @@ -68,7 +68,7 @@ nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \ darwin/darwin-debug.c darwin/darwin-proc.c \ darwin/darwin-tracefile.c darwin/darwin-module.c \ posix/posix-debug.c module.c tracefile.c nidstrings.c watchdog.c \ - ulinux/ulinux-kernelcomm.c hash.c + kernel_user_comm.c hash.c libcfs_CFLAGS := $(EXTRA_KCFLAGS) libcfs_LDFLAGS := $(EXTRA_KLDFLAGS) @@ -88,5 +88,5 @@ EXTRA_DIST := Info.plist MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \ - user-lock.c user-tcpip.c user-bitops.c\ - user-mem.c linux/linux-tracefile.h + user-lock.c user-tcpip.c user-bitops.c\ + user-mem.c kernel_user_comm.c linux/linux-tracefile.h diff --git a/libcfs/libcfs/kernel_user_comm.c b/libcfs/libcfs/kernel_user_comm.c new file mode 100644 index 0000000..7ac1f9e --- /dev/null +++ b/libcfs/libcfs/kernel_user_comm.c @@ -0,0 +1,283 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * Author: Nathan Rutman + * + * Kernel <-> userspace communication routines. + * Using pipes for all arches. + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#define D_KUC 0 + +#include + +#ifdef LUSTRE_UTILS +/* This is the userspace side. */ + +/** Start the userspace side of a KUC pipe. + * @param link Private descriptor for pipe/socket. + * @param groups KUC broadcast group to listen to + * (can be null for unicast to this pid) + */ +int libcfs_ukuc_start(lustre_kernelcomm *link, int group) +{ + int pfd[2]; + + if (pipe(pfd) < 0) + return -errno; + + link->lk_rfd = pfd[0]; + link->lk_wfd = pfd[1]; + link->lk_group = group; + link->lk_uid = getpid(); + return 0; +} + +int libcfs_ukuc_stop(lustre_kernelcomm *link) +{ + if (link->lk_wfd > 0) + close(link->lk_wfd); + return close(link->lk_rfd); +} + +#define lhsz sizeof(*kuch) + +/** Read a message from the link. + * Allocates memory, returns handle + * + * @param link Private descriptor for pipe/socket. + * @param buf Buffer to read into + * @param maxsize Maximum message size allowed + * @param transport Only listen to messages on this transport + * (and the generic transport) + */ +int libcfs_ukuc_msg_get(lustre_kernelcomm *link, char *buf, int maxsize, + int transport) +{ + struct kuc_hdr *kuch; + int rc = 0; + + memset(buf, 0, maxsize); + + CDEBUG(D_KUC, "Waiting for message from kernel on fd %d\n", + link->lk_rfd); + + while (1) { + /* Read header first to get message size */ + rc = read(link->lk_rfd, buf, lhsz); + if (rc <= 0) { + rc = -errno; + break; + } + kuch = (struct kuc_hdr *)buf; + + CDEBUG(D_KUC, " Received message mg=%x t=%d m=%d l=%d\n", + kuch->kuc_magic, kuch->kuc_transport, kuch->kuc_msgtype, + kuch->kuc_msglen); + + if (kuch->kuc_magic != KUC_MAGIC) { + CERROR("bad message magic %x != %x\n", + kuch->kuc_magic, KUC_MAGIC); + rc = -EPROTO; + break; + } + + if (kuch->kuc_msglen > maxsize) { + rc = -EMSGSIZE; + break; + } + + /* Read payload */ + rc = read(link->lk_rfd, buf + lhsz, kuch->kuc_msglen - lhsz); + if (rc < 0) { + rc = -errno; + break; + } + if (rc < (kuch->kuc_msglen - lhsz)) { + CERROR("short read: got %d of %d bytes\n", + rc, kuch->kuc_msglen); + rc = -EPROTO; + break; + } + + if (kuch->kuc_transport == transport || + kuch->kuc_transport == KUC_TRANSPORT_GENERIC) { + return 0; + } + /* Drop messages for other transports */ + } + return rc; +} + +#else /* LUSTRE_UTILS */ +/* This is the kernel side (liblustre as well). */ + +/** + * libcfs_kkuc_msg_put - send an message from kernel to userspace + * @param fp to send the message to + * @param payload Payload data. First field of payload is always + * struct kuc_hdr + */ +int libcfs_kkuc_msg_put(cfs_file_t *filp, void *payload) +{ + struct kuc_hdr *kuch = (struct kuc_hdr *)payload; + int rc = -ENOSYS; + + if (filp == NULL || IS_ERR(filp)) + return -EBADF; + + if (kuch->kuc_magic != KUC_MAGIC) { + CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic); + return -ENOSYS; + } + +#ifdef __KERNEL__ + rc = cfs_user_write(filp, (char *)payload, kuch->kuc_msglen, 0); +#endif + + if (rc < 0) + CWARN("message send failed (%d)\n", rc); + else + CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp); + + return rc; +} +CFS_EXPORT_SYMBOL(libcfs_kkuc_msg_put); + +/* Broadcast groups are global across all mounted filesystems; + * i.e. registering for a group on 1 fs will get messages for that + * group from any fs */ +/** A single group reigstration has a uid and a file pointer */ +struct kkuc_reg { + cfs_list_t kr_chain; + int kr_uid; + cfs_file_t *kr_fp; +}; +static cfs_list_t kkuc_groups[KUC_GRP_MAX+1] = {}; +/* Protect message sending against remove and adds */ +static CFS_DECLARE_RWSEM(kg_sem); + +/** Add a receiver to a broadcast group + * @param filp pipe to write into + * @param uid identidier for this receiver + * @param group group number + */ +int libcfs_kkuc_group_add(cfs_file_t *filp, int uid, int group) +{ + struct kkuc_reg *reg; + + if (group > KUC_GRP_MAX) { + CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); + return -EINVAL; + } + + /* fput in group_rem */ + if (filp == NULL) + return -EBADF; + + /* freed in group_rem */ + reg = cfs_alloc(sizeof(*reg), 0); + if (reg == NULL) + return -ENOMEM; + + reg->kr_fp = filp; + reg->kr_uid = uid; + + cfs_down_write(&kg_sem); + if (kkuc_groups[group].next == NULL) + CFS_INIT_LIST_HEAD(&kkuc_groups[group]); + cfs_list_add(®->kr_chain, &kkuc_groups[group]); + cfs_up_write(&kg_sem); + + CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group); + + return 0; +} +CFS_EXPORT_SYMBOL(libcfs_kkuc_group_add); + +int libcfs_kkuc_group_rem(int uid, int group) +{ + struct kkuc_reg *reg, *next; + ENTRY; + + if (kkuc_groups[group].next == NULL) + RETURN(0); + + if (uid == 0) { + /* Broadcast a shutdown message */ + struct kuc_hdr lh; + + lh.kuc_magic = KUC_MAGIC; + lh.kuc_transport = KUC_TRANSPORT_GENERIC; + lh.kuc_msgtype = KUC_MSG_SHUTDOWN; + lh.kuc_msglen = sizeof(lh); + libcfs_kkuc_group_put(group, &lh); + } + + cfs_down_write(&kg_sem); + cfs_list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) { + if ((uid == 0) || (uid == reg->kr_uid)) { + cfs_list_del(®->kr_chain); + CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n", + reg->kr_uid, reg->kr_fp, group); + cfs_put_file(reg->kr_fp); + cfs_free(reg); + } + } + cfs_up_write(&kg_sem); + + RETURN(0); +} +CFS_EXPORT_SYMBOL(libcfs_kkuc_group_rem); + +int libcfs_kkuc_group_put(int group, void *payload) +{ + struct kkuc_reg *reg; + int rc = 0; + ENTRY; + + cfs_down_read(&kg_sem); + cfs_list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { + rc = libcfs_kkuc_msg_put(reg->kr_fp, payload); + } + cfs_up_read(&kg_sem); + + RETURN(rc); +} +CFS_EXPORT_SYMBOL(libcfs_kkuc_group_put); + +#endif /* LUSTRE_UTILS */ + diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am index 43d0581..48c4dfd 100644 --- a/libcfs/libcfs/linux/Makefile.am +++ b/libcfs/libcfs/linux/Makefile.am @@ -1,5 +1,5 @@ EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ - linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c \ - linux-kernelcomm.c + linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c + diff --git a/libcfs/libcfs/linux/linux-fs.c b/libcfs/libcfs/linux/linux-fs.c index d5d50e7..4c661fe 100644 --- a/libcfs/libcfs/linux/linux-fs.c +++ b/libcfs/libcfs/linux/linux-fs.c @@ -75,11 +75,12 @@ cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset) fs = get_fs(); set_fs(KERNEL_DS); - while (count > 0) { + while ((ssize_t)count > 0) { size = filp->f_op->write(filp, (char *)buf, count, offset); if (size < 0) break; count -= size; + buf += size; size = 0; } set_fs(fs); diff --git a/libcfs/libcfs/linux/linux-kernelcomm.c b/libcfs/libcfs/linux/linux-kernelcomm.c deleted file mode 100644 index 98a34a0..0000000 --- a/libcfs/libcfs/linux/linux-kernelcomm.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * Kernel <-> userspace communication routines. We'll use a shorthand term - * "lnl" (Lustre NetLink) for the interface names for all arches (even though - * implemtation may not use NetLink). - * For Linux, we use Netlink sockets. - */ - -#define DEBUG_SUBSYSTEM S_CLASS - - -/* This is the kernel side. - * See libcfs/ulinux/ulinux-kernelcomm.c for the user side. - */ - -#if defined(HAVE_NETLINK) && defined(__KERNEL__) - -#include -#include -#include -#include - -#include - -/* Single Netlink Message type to send all Lustre messages */ -#define LNL_MSG 26 - -static struct sock *lnl_socket = NULL; -static atomic_t lnl_start_count = ATOMIC_INIT(0); -static spinlock_t lnl_lock = SPIN_LOCK_UNLOCKED; - -/** Start the netlink socket for this transport - * @param transport lnl_transport - */ -int libcfs_klnl_start(int transport) -{ - int rc = 0; - ENTRY; - - /* If anyone needs it, we can add per-transport incoming message - callbacks. Add the callback as a param here. Store the transport - and callback in a table. Include a generalized incoming msg - callback here to dispatch messages to the appropriate - per-transport callback. */ - - spin_lock(&lnl_lock); - if (atomic_inc_return(&lnl_start_count) > 1) - GOTO(out, rc = 0); - - lnl_socket = netlink_kernel_create( -#ifdef HAVE_NETLINK_NS - DEFAULT_NET, -#endif - LNL_SOCKET, LNL_GRP_CNT, - NULL /* incoming cb */, -#ifdef HAVE_NETLINK_CBMUTEX - NULL, -#endif - THIS_MODULE); - if (lnl_socket == NULL) { - CERROR("Cannot open socket %d\n", LNL_SOCKET); - atomic_dec(&lnl_start_count); - GOTO(out, rc = -ENODEV); - } - -out: - spin_unlock(&lnl_lock); - RETURN(rc); -} -EXPORT_SYMBOL(libcfs_klnl_start); - -static void send_shutdown_msg(int transport, int group) { - struct lnl_hdr lh; - - lh.lnl_magic = LNL_MAGIC; - lh.lnl_transport = LNL_TRANSPORT_GENERIC; - lh.lnl_msgtype = LNL_MSG_SHUTDOWN; - lh.lnl_msglen = sizeof(lh); - - libcfs_klnl_msg_put(0, group, &lh); -} - -/* This should be called once per (started) transport - * @param transport lnl_transport - * @param group Broadcast group for shutdown message */ -int libcfs_klnl_stop(int transport, int group) -{ - if (group) - send_shutdown_msg(transport, group); - - spin_lock(&lnl_lock); - - if (atomic_dec_and_test(&lnl_start_count)) { - sock_release(lnl_socket->sk_socket); - lnl_socket = NULL; - } - - spin_unlock(&lnl_lock); - return 0; -} -EXPORT_SYMBOL(libcfs_klnl_stop); - -static struct sk_buff *netlink_make_msg(int pid, int seq, void *payload, - int size) -{ - struct sk_buff *skb; - struct nlmsghdr *nlh; - int len = NLMSG_SPACE(size); - void *data; - -#ifdef HAVE_NETLINK_NL2 - skb = nlmsg_new(len, GFP_KERNEL); -#else /* old */ - skb = nlmsg_new(len); -#endif - - if (!skb) - return NULL; - - nlh = nlmsg_put(skb, pid, seq, LNL_MSG, size, 0); - if (!nlh) { - nlmsg_free(skb); - return NULL; - } - - data = nlmsg_data(nlh); - memcpy(data, payload, size); - return skb; -} - -/** - * libcfs_klnl_msg_put - send an message from kernel to userspace - * @param pid Process id to send message to for unicast messages; must be 0 for - * broadcast - * @param group Broadcast group; 0 for unicast messages - * @param payload Payload data. First field of payload is always struct lnl_hdr - * - * Allocates an skb, builds the netlink message, and sends it to the pid. - */ -int libcfs_klnl_msg_put(int pid, int group, void *payload) -{ - struct lnl_hdr *lnlh = (struct lnl_hdr *)payload; - struct sk_buff *skb; - int rc; - - if (lnl_socket == NULL) { - CERROR("LustreNetLink: not running\n"); - return -ENOSYS; - } - - if (lnlh->lnl_magic != LNL_MAGIC) { - CERROR("LustreNetLink: bad magic %x\n", lnlh->lnl_magic); - return -ENOSYS; - } - - if ((pid != 0) && (group != 0)) { - CERROR("LustreNetLink: pid=%d or group=%d must be 0\n", - pid, group); - return -EINVAL; - } - - skb = netlink_make_msg(pid, 0, payload, lnlh->lnl_msglen); - if (!skb) - return -ENOMEM; - - if (pid) { - rc = netlink_unicast(lnl_socket, skb, pid, - lnlh->lnl_flags & LNL_FL_BLOCK ? 0 : MSG_DONTWAIT); - if (rc > 0) - rc = 0; - } else { -#ifdef HAVE_NLMSG_MULTICAST_5ARGS - rc = nlmsg_multicast(lnl_socket, skb, 0, group, GFP_KERNEL); -#else - rc = nlmsg_multicast(lnl_socket, skb, 0, group); -#endif - } - - CDEBUG(0, "Sent message pid=%d, group=%d, rc=%d\n", pid, group, rc); - - if (rc < 0) - CWARN("message send failed (%d) [pid=%d,group=%d]\n", rc, - pid, group); - - return rc; -} -EXPORT_SYMBOL(libcfs_klnl_msg_put); - - -#endif - diff --git a/libcfs/libcfs/ulinux/.cvsignore b/libcfs/libcfs/ulinux/.cvsignore deleted file mode 100644 index 282522d..0000000 --- a/libcfs/libcfs/ulinux/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/libcfs/libcfs/ulinux/.gitignore b/libcfs/libcfs/ulinux/.gitignore deleted file mode 100644 index b336cc7..0000000 --- a/libcfs/libcfs/ulinux/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/Makefile -/Makefile.in diff --git a/libcfs/libcfs/ulinux/Makefile.am b/libcfs/libcfs/ulinux/Makefile.am deleted file mode 100644 index 0541235..0000000 --- a/libcfs/libcfs/ulinux/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -EXTRA_DIST := ulinux-kernelcomm.c - diff --git a/libcfs/libcfs/ulinux/ulinux-kernelcomm.c b/libcfs/libcfs/ulinux/ulinux-kernelcomm.c deleted file mode 100644 index 4d7e09d..0000000 --- a/libcfs/libcfs/ulinux/ulinux-kernelcomm.c +++ /dev/null @@ -1,183 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * Kernel - userspace communication routines. We'll use a shorthand term - * "lnl" (Lustre NetLink) for the interface names for all arches (even though - * implemtation may not use NetLink). - * For Linux, we use Netlink sockets. - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -/* This is the userspace side. - * See libcfs/linux/linux-kernelcomm.c for the kernel side. - */ - -#ifdef HAVE_NETLINK - -#include -#include -#include - -/** Start the userspace side of a LNL pipe. - * @param link Private descriptor for pipe/socket. - * @param groups LNL broadcast group to listen to - * (can be null for unicast to this pid) - */ -int libcfs_ulnl_start(lustre_netlink *link, int groups) -{ - struct sockaddr_nl src_addr; - int sock; - int rc = 0; - - sock = socket(PF_NETLINK, SOCK_RAW, LNL_SOCKET); - if (sock < 0) - return -errno; - - memset(&src_addr, 0, sizeof(src_addr)); - src_addr.nl_family = AF_NETLINK; - src_addr.nl_pid = getpid(); /* self pid */ - src_addr.nl_groups = groups; - rc = bind(sock, (struct sockaddr*)&src_addr, sizeof(src_addr)); - if (rc < 0) { - close(sock); - return -errno; - } - *link = sock; - return 0; -} - -int libcfs_ulnl_stop(lustre_netlink *link) -{ - return close(*link); -} - -/** Read a message from the netlink layer. - * Allocates memory, returns handle - * - * @param link Private descriptor for pipe/socket. - * @param maxsize Maximum message size allowed - * @param transport Only listen to messages on this transport - * (and the generic transport) - * @param lnlhh Handle to the new LNL message - */ -int libcfs_ulnl_msg_get(lustre_netlink *link, int maxsize, int transport, - struct lnl_hdr **lnlhh) -{ - struct iovec iov; - struct sockaddr_nl dest_addr; - struct msghdr msg; - struct nlmsghdr *nlh = NULL; - struct lnl_hdr *lnlh; - int rc = 0; - - nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(maxsize)); - if (!nlh) - return -ENOMEM; - - memset(nlh, 0, NLMSG_SPACE(maxsize)); - iov.iov_base = (void *)nlh; - iov.iov_len = NLMSG_SPACE(maxsize); - - memset(&dest_addr, 0, sizeof(dest_addr)); - msg.msg_name = (void *)&dest_addr; - msg.msg_namelen = sizeof(dest_addr); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - CDEBUG(0, "Waiting for message from kernel on pid %d\n", getpid()); - - while (1) { - /* Read message from kernel */ - rc = recvmsg(*link, &msg, 0); - if (rc <= 0) { - rc = -errno; - break; - } - lnlh = (struct lnl_hdr *)NLMSG_DATA(nlh); - CDEBUG(0, " Received message mg=%x t=%d m=%d l=%d\n", - lnlh->lnl_magic, lnlh->lnl_transport, lnlh->lnl_msgtype, - lnlh->lnl_msglen); - if (lnlh->lnl_magic != LNL_MAGIC) { - CERROR("bad message magic %x != %x\n", - lnlh->lnl_magic, LNL_MAGIC); - rc = -EPROTO; - break; - } - if (lnlh->lnl_transport == transport || - lnlh->lnl_transport == LNL_TRANSPORT_GENERIC) { - *lnlhh = lnlh; - return 0; - } - /* Ignore messages on other transports */ - } - free(nlh); - return rc; -} - -/* Free a message returned by the above fn */ -int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh) -{ - /* compute nlmsdghdr offset */ - char *p = (char *)NLMSG_DATA(0); - - free((void *)((char *)*lnlhh - p)); - *lnlhh = NULL; - return 0; -} - -#else /* HAVE_NETLINK */ - -#include - -typedef int lustre_netlink; -int libcfs_ulnl_start(lustre_netlink *link, int groups) { - return -ENOSYS; -} -int libcfs_ulnl_stop(lustre_netlink *link) { - return 0; -} -struct lnl_hdr; -int libcfs_ulnl_msg_get(lustre_netlink *link, int maxsize, int transport, - struct lnl_hdr **lnlhh) { - return -ENOSYS; -} -int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh) { - return -ENOSYS; -} -#endif /* HAVE_NETLINK */ - diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index bc91d39..b18deb8 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -302,7 +302,6 @@ int cfs_curproc_is_in_groups(gid_t gid); #define daemonize(l) do {} while (0) #define sigfillset(l) do {} while (0) #define recalc_sigpending(l) do {} while (0) -#define cfs_kernel_thread(l,m,n) LBUG() #define USERMODEHELPER(path, argv, envp) (0) #define SIGNAL_MASK_ASSERT() diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index b47b54e..a272031 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -216,8 +216,8 @@ extern int llapi_changelog_clear(const char *mdtname, const char *idstr, /* HSM copytool interface. priv is private state, managed internally by these functions */ -extern int llapi_copytool_start(void **priv, int flags, int archive_num_count, - int *archive_nums); +extern int llapi_copytool_start(void **priv, char *fsname, int flags, + int archive_count, int *archives); extern int llapi_copytool_fini(void **priv); extern int llapi_copytool_recv(void *priv, struct hsm_action_list **hal, int *msgsize); diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 51d5e09..f331ec5 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -2229,12 +2229,6 @@ struct changelog_setinfo { __u32 cs_id; } __attribute__((packed)); -struct changelog_show { - __u64 cs_startrec; - __u32 cs_pid; - __u32 cs_flags; -} __attribute__((packed)); - /** changelog record */ struct llog_changelog_rec { struct llog_rec_hdr cr_hdr; @@ -2612,7 +2606,7 @@ struct getinfo_fid2path { void lustre_swab_fid2path (struct getinfo_fid2path *gf); -extern void lustre_swab_lnlh(struct lnl_hdr *); +extern void lustre_swab_kuch(struct kuc_hdr *); #endif diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 07d2fc4..f56f4b4 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -136,6 +136,8 @@ struct obd_statfs { #define LL_IOC_PATH2FID _IOR ('f', 173, long) #define LL_IOC_GET_MDTIDX _IOR ('f', 174, int) +#define LL_IOC_HSM_CT_START _IOW ('f', 178, struct lustre_kernelcomm *) + #define LL_STATFS_MDC 1 #define LL_STATFS_LOV 2 @@ -500,10 +502,11 @@ struct changelog_rec { char cr_name[0]; /**< last element */ } __attribute__((packed)); -struct ioc_changelog_clear { +struct ioc_changelog { + __u64 icc_recno; __u32 icc_mdtindex; __u32 icc_id; - __u64 icc_recno; + __u32 icc_flags; }; enum changelog_message_type { diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 21cb37d..623932c 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -485,6 +485,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) #define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) #define OBD_IOC_CHANGELOG_REG _IOW ('f', 151, OBD_IOC_DATA_TYPE) diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index d1d9d59..6b21ff0 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -106,12 +106,17 @@ int llog_init_handle(struct llog_handle *handle, int flags, extern void llog_free_handle(struct llog_handle *handle); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data, void *catdata); +int llog_process_flags(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata, int flags); int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, void *data, void *catdata); extern int llog_cancel_rec(struct llog_handle *loghandle, int index); extern int llog_close(struct llog_handle *cathandle); extern int llog_get_size(struct llog_handle *loghandle); +/* llog_process flags */ +#define LLOG_FLAG_NODEAMON 0x0001 + /* llog_cat.c - catalog api */ struct llog_process_data { /** @@ -129,6 +134,7 @@ struct llog_process_data { */ int lpd_startcat; int lpd_startidx; + int lpd_flags; /** llog_process flags */ }; struct llog_process_cat_data { @@ -164,6 +170,8 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, int startcat, int startidx); +int llog_cat_process_flags(struct llog_handle *cat_llh, llog_cb_t cb, void *data, + int flags, int startcat, int startidx); int llog_cat_process_thread(void *data); int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b89b699..95b1d82 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1692,7 +1692,7 @@ static int ldlm_handle_setinfo(struct ptlrpc_request *req) int rc = -ENOSYS; ENTRY; - DEBUG_REQ(D_ERROR, req, "%s: handle setinfo\n", obd->obd_name); + DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name); req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO); diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 52732ce..4871640 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -734,11 +734,29 @@ int ll_get_mdt_idx(struct inode *inode) return mdtidx; } +static int copy_and_ioctl(int cmd, struct obd_export *exp, void *data, int len) +{ + void *ptr; + int rc; + + OBD_ALLOC(ptr, len); + if (ptr == NULL) + return -ENOMEM; + if (cfs_copy_from_user(ptr, data, len)) { + OBD_FREE(ptr, len); + return -EFAULT; + } + rc = obd_iocontrol(cmd, exp, len, data, NULL); + OBD_FREE(ptr, len); + return rc; +} + static int ll_dir_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct obd_ioctl_data *data; + int rc = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n", @@ -776,7 +794,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, } case IOC_MDC_LOOKUP: { struct ptlrpc_request *request = NULL; - int namelen, rc, len = 0; + int namelen, len = 0; char *buf = NULL; char *filename; struct md_op_data *op_data; @@ -818,7 +836,6 @@ out_free: struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg; struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg; - int rc = 0; int set_default = 0; LASSERT(sizeof(lumv3) == sizeof(*lumv3p)); @@ -852,7 +869,7 @@ out_free: struct lov_mds_md *lmm = NULL; struct mdt_body *body; char *filename = NULL; - int rc, lmmsize; + int lmmsize; if (cmd == IOC_MDC_GETFILEINFO || cmd == IOC_MDC_GETFILESTRIPE) { @@ -930,7 +947,6 @@ out_free: struct lov_mds_md *lmm; int lmmsize; lstat_t st; - int rc; lumd = (struct lov_user_mds_data *)arg; lum = &lumd->lmd_lmm; @@ -992,7 +1008,6 @@ out_free: char *buf = NULL; char *str; int len = 0; - int rc; rc = obd_ioctl_getdata(&buf, &len, (void *)arg); if (rc) @@ -1047,7 +1062,7 @@ out_free: } case OBD_IOC_QUOTACHECK: { struct obd_quotactl *oqctl; - int rc, error = 0; + int error = 0; if (!cfs_capable(CFS_CAP_SYS_ADMIN) || sbi->ll_flags & LL_SBI_RMT_CLIENT) @@ -1072,7 +1087,6 @@ out_free: } case OBD_IOC_POLL_QUOTACHECK: { struct if_quotacheck *check; - int rc; if (!cfs_capable(CFS_CAP_SYS_ADMIN) || sbi->ll_flags & LL_SBI_RMT_CLIENT) @@ -1107,7 +1121,7 @@ out_free: } case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl; - int cmd, type, id, valid, rc = 0; + int cmd, type, id, valid; OBD_ALLOC_PTR(qctl); if (!qctl) @@ -1232,7 +1246,6 @@ out_free: if (sbi->ll_flags & LL_SBI_RMT_CLIENT && inode == inode->i_sb->s_root->d_inode) { struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - int rc; LASSERT(fd != NULL); rc = rct_add(&sbi->ll_rct, cfs_curproc_pid(), arg); @@ -1269,24 +1282,17 @@ out_free: sizeof(struct lu_fid))) RETURN(-EFAULT); RETURN(0); - case OBD_IOC_CHANGELOG_CLEAR: { - struct ioc_changelog_clear *icc; - int rc; - - OBD_ALLOC_PTR(icc); - if (icc == NULL) - RETURN(-ENOMEM); - if (cfs_copy_from_user(icc, (void *)arg, sizeof(*icc))) - GOTO(icc_free, rc = -EFAULT); - - rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(*icc), icc,NULL); - -icc_free: - OBD_FREE_PTR(icc); + case OBD_IOC_CHANGELOG_SEND: + case OBD_IOC_CHANGELOG_CLEAR: + rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg, + sizeof(struct ioc_changelog)); RETURN(rc); - } case OBD_IOC_FID2PATH: RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg)); + case LL_IOC_HSM_CT_START: + rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg, + sizeof(struct lustre_kernelcomm)); + RETURN(rc); default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index b4ba417..def20ca 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -806,8 +806,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, OBD_FREE_PTR(oqctl); break; } + case OBD_IOC_CHANGELOG_SEND: case OBD_IOC_CHANGELOG_CLEAR: { - struct ioc_changelog_clear *icc = karg; + struct ioc_changelog *icc = karg; if (icc->icc_mdtindex >= count) RETURN(-ENODEV); diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index e78332d..b7d0dae 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -77,151 +77,34 @@ static int mdc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, return count; } -static struct lnl_hdr *changelog_lnl_alloc(int len, int flags) -{ - struct lnl_hdr *lh; - - OBD_ALLOC(lh, len); - if (lh == NULL) - RETURN(NULL); - - lh->lnl_magic = LNL_MAGIC; - lh->lnl_transport = LNL_TRANSPORT_CHANGELOG; - lh->lnl_flags = flags; - lh->lnl_msgtype = CL_RECORD; - lh->lnl_msglen = len; - return lh; -} - -#define D_CHANGELOG 0 - -static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, - void *data) -{ - struct changelog_show *cs = data; - struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; - struct lnl_hdr *lh; - int len, rc; - ENTRY; - - if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) || - (rec->cr.cr_type >= CL_LAST)) { - CERROR("Not a changelog rec %d/%d\n", rec->cr_hdr.lrh_type, - rec->cr.cr_type); - RETURN(-EINVAL); - } - - if (rec->cr.cr_index < cs->cs_startrec) { - /* Skip entries earlier than what we are interested in */ - CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64"\n", - rec->cr.cr_index, cs->cs_startrec); - RETURN(0); - } - - CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID - " %.*s\n", rec->cr.cr_index, rec->cr.cr_type, - changelog_type2str(rec->cr.cr_type), rec->cr.cr_time, - rec->cr.cr_flags & CLF_FLAGMASK, - PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid), - rec->cr.cr_namelen, rec->cr.cr_name); - - len = sizeof(*lh) + sizeof(rec->cr) + rec->cr.cr_namelen; - - /* Set up the netlink message */ - lh = changelog_lnl_alloc(len, cs->cs_flags); - if (lh == NULL) - RETURN(-ENOMEM); - memcpy(lh + 1, &rec->cr, len - sizeof(*lh)); - - rc = libcfs_klnl_msg_put(cs->cs_pid, 0, lh); - CDEBUG(D_CHANGELOG, "nlmsg pid %d len %d rc %d\n", cs->cs_pid, len, rc); - - OBD_FREE(lh, len); - - RETURN(rc); -} - -static int lproc_mdc_wr_changelog(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - struct llog_ctxt *ctxt; - struct llog_handle *llh; - struct lnl_hdr *lnlh; - struct changelog_show cs = {}; - int rc; - - if (count != sizeof(cs)) - return -EINVAL; - - if (cfs_copy_from_user(&cs, buffer, sizeof(cs))) - return -EFAULT; - - CDEBUG(D_CHANGELOG, "changelog to pid=%d start "LPU64"\n", - cs.cs_pid, cs.cs_startrec); - - /* Set up the remote catalog handle */ - ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT); - if (ctxt == NULL) - RETURN(-ENOENT); - rc = llog_create(ctxt, &llh, NULL, CHANGELOG_CATALOG); - if (rc) { - CERROR("llog_create() failed %d\n", rc); - GOTO(out, rc); - } - rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL); - if (rc) { - CERROR("llog_init_handle failed %d\n", rc); - GOTO(out, rc); - } - - rc = llog_cat_process(llh, changelog_show_cb, &cs, 0, 0); - - /* Send EOF */ - if ((lnlh = changelog_lnl_alloc(sizeof(*lnlh), cs.cs_flags))) { - lnlh->lnl_msgtype = CL_EOF; - libcfs_klnl_msg_put(cs.cs_pid, 0, lnlh); - OBD_FREE(lnlh, sizeof(*lnlh)); - } - -out: - if (llh) - llog_cat_put(llh); - if (ctxt) - llog_ctxt_put(ctxt); - if (rc < 0) - return rc; - return count; -} - /* temporary for testing */ -static int mdc_wr_netlink(struct file *file, const char *buffer, - unsigned long count, void *data) +static int mdc_wr_kuc(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = data; - struct lnl_hdr *lh; + struct kuc_hdr *lh; struct hsm_action_list *hal; struct hsm_action_item *hai; int len; - int pid, rc; + int fd, rc; - rc = lprocfs_write_helper(buffer, count, &pid); + rc = lprocfs_write_helper(buffer, count, &fd); if (rc) return rc; - if (pid < 0) + if (fd < 0) return -ERANGE; - CWARN("message to pid %d\n", pid); + CWARN("message to fd %d\n", fd); len = sizeof(*lh) + sizeof(*hal) + MTI_NAME_MAXLEN + /* for mockup below */ 2 * cfs_size_round(sizeof(*hai)); OBD_ALLOC(lh, len); - lh->lnl_magic = LNL_MAGIC; - lh->lnl_transport = LNL_TRANSPORT_HSM; - lh->lnl_msgtype = HMT_ACTION_LIST; - lh->lnl_msglen = len; + lh->kuc_magic = KUC_MAGIC; + lh->kuc_transport = KUC_TRANSPORT_HSM; + lh->kuc_msgtype = HMT_ACTION_LIST; + lh->kuc_msglen = len; hal = (struct hsm_action_list *)(lh + 1); hal->hal_version = HAL_VERSION; @@ -239,9 +122,14 @@ static int mdc_wr_netlink(struct file *file, const char *buffer, hai->hai_fid.f_oid = 10; hai->hai_len = sizeof(*hai); - /* This works for either broadcast or unicast to a single pid */ - rc = libcfs_klnl_msg_put(pid, pid == 0 ? LNL_GRP_HSM : 0, lh); - + /* This works for either broadcast or unicast to a single fd */ + if (fd == 0) { + rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh); + } else { + cfs_file_t *fp = cfs_get_fd(fd); + rc = libcfs_kkuc_msg_put(fp, lh); + cfs_put_file(fp); + } OBD_FREE(lh, len); if (rc < 0) return rc; @@ -266,8 +154,7 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, { "state", lprocfs_rd_state, 0, 0 }, - { "changelog_trigger",0,lproc_mdc_wr_changelog, 0 }, - { "hsm_nl", 0, mdc_wr_netlink, 0, 0, 0222 }, + { "hsm_nl", 0, mdc_wr_kuc, 0, 0, 0222 }, { 0 } }; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 74afc27..aaf785b 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1165,6 +1165,159 @@ out: return rc; } +static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags) +{ + struct kuc_hdr *lh = (struct kuc_hdr *)buf; + + LASSERT(len <= CR_MAXSIZE); + + lh->kuc_magic = KUC_MAGIC; + lh->kuc_transport = KUC_TRANSPORT_CHANGELOG; + lh->kuc_flags = flags; + lh->kuc_msgtype = CL_RECORD; + lh->kuc_msglen = len; + return lh; +} + +#define D_CHANGELOG 0 + +struct changelog_show { + __u64 cs_startrec; + __u32 cs_flags; + cfs_file_t *cs_fp; + char *cs_buf; + struct obd_device *cs_obd; +}; + +static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, + void *data) +{ + struct changelog_show *cs = data; + struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; + struct kuc_hdr *lh; + int len, rc; + ENTRY; + + if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) || + (rec->cr.cr_type >= CL_LAST)) { + CERROR("Not a changelog rec %d/%d\n", rec->cr_hdr.lrh_type, + rec->cr.cr_type); + RETURN(-EINVAL); + } + + if (rec->cr.cr_index < cs->cs_startrec) { + /* Skip entries earlier than what we are interested in */ + CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64"\n", + rec->cr.cr_index, cs->cs_startrec); + RETURN(0); + } + + CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID + " %.*s\n", rec->cr.cr_index, rec->cr.cr_type, + changelog_type2str(rec->cr.cr_type), rec->cr.cr_time, + rec->cr.cr_flags & CLF_FLAGMASK, + PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid), + rec->cr.cr_namelen, rec->cr.cr_name); + + len = sizeof(*lh) + sizeof(rec->cr) + rec->cr.cr_namelen; + + /* Set up the message */ + lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags); + memcpy(lh + 1, &rec->cr, len - sizeof(*lh)); + + rc = libcfs_kkuc_msg_put(cs->cs_fp, lh); + CDEBUG(D_CHANGELOG, "kucmsg fp %p len %d rc %d\n", cs->cs_fp, len,rc); + + RETURN(rc); +} + +static int mdc_changelog_send_thread(void *csdata) +{ + struct changelog_show *cs = csdata; + struct llog_ctxt *ctxt = NULL; + struct llog_handle *llh = NULL; + struct kuc_hdr *kuch; + int rc; + + CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n", + cs->cs_fp, cs->cs_startrec); + + OBD_ALLOC(cs->cs_buf, CR_MAXSIZE); + if (cs->cs_buf == NULL) + GOTO(out, rc = -ENOMEM); + + /* Set up the remote catalog handle */ + ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT); + if (ctxt == NULL) + GOTO(out, rc = -ENOENT); + rc = llog_create(ctxt, &llh, NULL, CHANGELOG_CATALOG); + if (rc) { + CERROR("llog_create() failed %d\n", rc); + GOTO(out, rc); + } + rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL); + if (rc) { + CERROR("llog_init_handle failed %d\n", rc); + GOTO(out, rc); + } + + /* We need the pipe fd open, so llog_process can't daemonize */ + rc = llog_cat_process_flags(llh, changelog_show_cb, cs, + LLOG_FLAG_NODEAMON, 0, 0); + + /* Send EOF no matter what our result */ + if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), + cs->cs_flags))) { + kuch->kuc_msgtype = CL_EOF; + libcfs_kkuc_msg_put(cs->cs_fp, kuch); + } + +out: + cfs_put_file(cs->cs_fp); + if (llh) + llog_cat_put(llh); + if (ctxt) + llog_ctxt_put(ctxt); + if (cs->cs_buf) + OBD_FREE(cs->cs_buf, CR_MAXSIZE); + OBD_FREE_PTR(cs); + return rc; +} + +static int mdc_ioc_changelog_send(struct obd_device *obd, + struct ioc_changelog *icc) +{ + struct changelog_show *cs; + int rc; + + /* Freed in mdc_changelog_send_thread */ + OBD_ALLOC_PTR(cs); + if (!cs) + return -ENOMEM; + + cs->cs_obd = obd; + cs->cs_startrec = icc->icc_recno; + /* matching cfs_put_file in mdc_changelog_send_thread */ + cs->cs_fp = cfs_get_fd(icc->icc_id); + cs->cs_flags = icc->icc_flags; + + /* New thread because we should return to user app before + writing into our pipe */ + rc = cfs_kernel_thread(mdc_changelog_send_thread, cs, + CLONE_VM | CLONE_FILES); + if (rc >= 0) { + CDEBUG(D_CHANGELOG, "start changelog thread: %d\n", rc); + return 0; + } + + CERROR("Failed to start changelog thread: %d\n", rc); + OBD_FREE_PTR(cs); + return rc; +} + +static int mdc_ioc_hsm_ct_start(struct obd_export *exp, + struct lustre_kernelcomm *lk); + static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -1180,10 +1333,16 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, return -EINVAL; } switch (cmd) { + case LL_IOC_HSM_CT_START: + rc = mdc_ioc_hsm_ct_start(exp, karg); + GOTO(out, rc); + case OBD_IOC_CHANGELOG_SEND: + rc = mdc_ioc_changelog_send(obd, karg); + GOTO(out, rc); case OBD_IOC_CHANGELOG_CLEAR: { - struct ioc_changelog_clear *icc = karg; + struct ioc_changelog *icc = karg; struct changelog_setinfo cs = - {icc->icc_recno, icc->icc_id}; + {.cs_recno = icc->icc_recno, .cs_id = icc->icc_id}; rc = obd_set_info_async(exp, strlen(KEY_CHANGELOG_CLEAR), KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, NULL); @@ -1334,14 +1493,43 @@ static void lustre_swab_hal(struct hsm_action_list *h) } } +static int mdc_ioc_hsm_ct_start(struct obd_export *exp, + struct lustre_kernelcomm *lk) +{ + int rc = 0; + + if (lk->lk_group != KUC_GRP_HSM) { + CERROR("Bad copytool group %d\n", lk->lk_group); + return -EINVAL; + } + + CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd, + lk->lk_uid, lk->lk_group, lk->lk_flags); + + if (lk->lk_flags & LK_FLG_STOP) + rc = libcfs_kkuc_group_rem(lk->lk_uid,lk->lk_group); + else { + cfs_file_t *fp = cfs_get_fd(lk->lk_wfd); + rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group); + if (rc && fp) + cfs_put_file(fp); + } + + /* lk_data is archive number mask */ + /* TODO: register archive num with mdt so coordinator can choose + correct agent. */ + + return rc; +} + /** - * Send a message to any listening copytools, nonblocking - * @param val LNL message (lnl_hdr + hsm_action_list) + * Send a message to any listening copytools + * @param val KUC message (kuc_hdr + hsm_action_list) * @param len total length of message */ static int mdc_hsm_copytool_send(int len, void *val) { - struct lnl_hdr *lh = (struct lnl_hdr *)val; + struct kuc_hdr *lh = (struct kuc_hdr *)val; struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1); int rc; ENTRY; @@ -1351,20 +1539,20 @@ static int mdc_hsm_copytool_send(int len, void *val) (int) (sizeof(*lh) + sizeof(*hal))); RETURN(-EPROTO); } - if (lh->lnl_magic == __swab16(LNL_MAGIC)) { - lustre_swab_lnlh(lh); + if (lh->kuc_magic == __swab16(KUC_MAGIC)) { + lustre_swab_kuch(lh); lustre_swab_hal(hal); - } else if (lh->lnl_magic != LNL_MAGIC) { - CERROR("Bad magic %x!=%x\n", lh->lnl_magic, LNL_MAGIC); + } else if (lh->kuc_magic != KUC_MAGIC) { + CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC); RETURN(-EPROTO); } - CDEBUG(D_IOCTL, " Received message mg=%x t=%d m=%d l=%d actions=%d\n", - lh->lnl_magic, lh->lnl_transport, lh->lnl_msgtype, - lh->lnl_msglen, hal->hal_count); + CDEBUG(D_HSM, " Received message mg=%x t=%d m=%d l=%d actions=%d\n", + lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype, + lh->kuc_msglen, hal->hal_count); /* Broadcast to HSM listeners */ - rc = libcfs_klnl_msg_put(0, LNL_GRP_HSM, lh); + rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh); RETURN(rc); } @@ -1762,10 +1950,6 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) CERROR("failed to setup llogging subsystems\n"); } - /* ignore errors */ - libcfs_klnl_start(LNL_TRANSPORT_HSM); - libcfs_klnl_start(LNL_TRANSPORT_CHANGELOG); - RETURN(rc); err_close_lock: @@ -1808,7 +1992,12 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) switch (stage) { case OBD_CLEANUP_EARLY: + break; case OBD_CLEANUP_EXPORTS: + /* Failsafe, ok if racy */ + if (obd->obd_type->typ_refcnt <= 1) + libcfs_kkuc_group_rem(0, KUC_GRP_HSM); + /* If we set up but never connected, the client import will not have been cleaned. */ if (obd->u.cli.cl_import) { @@ -1833,9 +2022,6 @@ static int mdc_cleanup(struct obd_device *obd) { struct client_obd *cli = &obd->u.cli; - libcfs_klnl_stop(LNL_TRANSPORT_HSM, LNL_GRP_HSM); - libcfs_klnl_stop(LNL_TRANSPORT_CHANGELOG, 0); - OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock)); OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index f8a6f9b..ae0c669 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5570,7 +5570,7 @@ int mdt_obd_postrecov(struct obd_device *obd) */ int mdt_hsm_copytool_send(struct obd_export *exp) { - struct lnl_hdr *lh; + struct kuc_hdr *lh; struct hsm_action_list *hal; struct hsm_action_item *hai; int rc, len; @@ -5585,10 +5585,10 @@ int mdt_hsm_copytool_send(struct obd_export *exp) if (lh == NULL) RETURN(-ENOMEM); - lh->lnl_magic = LNL_MAGIC; - lh->lnl_transport = LNL_TRANSPORT_HSM; - lh->lnl_msgtype = HMT_ACTION_LIST; - lh->lnl_msglen = len; + lh->kuc_magic = KUC_MAGIC; + lh->kuc_transport = KUC_TRANSPORT_HSM; + lh->kuc_msgtype = HMT_ACTION_LIST; + lh->kuc_msglen = len; hal = (struct hsm_action_list *)(lh + 1); hal->hal_version = HAL_VERSION; diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 14ed25d..202610d 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -242,7 +242,8 @@ static int llog_process_thread(void *arg) return 0; } - cfs_daemonize_ctxt("llog_process_thread"); + if (!(lpi->lpi_flags & LLOG_FLAG_NODEAMON)) + cfs_daemonize_ctxt("llog_process_thread"); if (cd != NULL) { last_called_index = cd->lpcd_first_idx; @@ -354,8 +355,8 @@ static int llog_process_thread(void *arg) return 0; } -int llog_process(struct llog_handle *loghandle, llog_cb_t cb, - void *data, void *catdata) +int llog_process_flags(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata, int flags) { struct llog_process_info *lpi; int rc; @@ -370,6 +371,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, lpi->lpi_cb = cb; lpi->lpi_cbdata = data; lpi->lpi_catdata = catdata; + lpi->lpi_flags = flags; #ifdef __KERNEL__ cfs_init_completion(&lpi->lpi_completion); @@ -387,6 +389,13 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, OBD_FREE_PTR(lpi); RETURN(rc); } +EXPORT_SYMBOL(llog_process_flags); + +int llog_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata) +{ + return llog_process_flags(loghandle, cb, data, catdata, 0); +} EXPORT_SYMBOL(llog_process); inline int llog_get_size(struct llog_handle *loghandle) diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index de05bb6..8d32edf 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -394,18 +394,20 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, cd.lpcd_first_idx = d->lpd_startidx; cd.lpcd_last_idx = 0; - rc = llog_process(llh, d->lpd_cb, d->lpd_data, &cd); + rc = llog_process_flags(llh, d->lpd_cb, d->lpd_data, &cd, + d->lpd_flags); /* Continue processing the next log from idx 0 */ d->lpd_startidx = 0; } else { - rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL); + rc = llog_process_flags(llh, d->lpd_cb, d->lpd_data, NULL, + d->lpd_flags); } RETURN(rc); } -int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, - int startcat, int startidx) +int llog_cat_process_flags(struct llog_handle *cat_llh, llog_cb_t cb, + void *data, int flags, int startcat, int startidx) { struct llog_process_data d; struct llog_log_hdr *llh = cat_llh->lgh_hdr; @@ -417,6 +419,7 @@ int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, d.lpd_cb = cb; d.lpd_startcat = startcat; d.lpd_startidx = startidx; + d.lpd_flags = flags; if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { struct llog_process_cat_data cd; @@ -426,19 +429,29 @@ int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, cd.lpcd_first_idx = llh->llh_cat_idx; cd.lpcd_last_idx = 0; - rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd); + rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, &cd, + flags); if (rc != 0) RETURN(rc); cd.lpcd_first_idx = 0; cd.lpcd_last_idx = cat_llh->lgh_last_idx; - rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd); + rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, &cd, + flags); } else { - rc = llog_process(cat_llh, llog_cat_process_cb, &d, NULL); + rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, NULL, + flags); } RETURN(rc); } +EXPORT_SYMBOL(llog_cat_process_flags); + +int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data, + int startcat, int startidx) +{ + return llog_cat_process_flags(cat_llh, cb, data, 0, startcat, startidx); +} EXPORT_SYMBOL(llog_cat_process); #ifdef __KERNEL__ diff --git a/lustre/obdclass/llog_internal.h b/lustre/obdclass/llog_internal.h index 82433cb..a0d4205 100644 --- a/lustre/obdclass/llog_internal.h +++ b/lustre/obdclass/llog_internal.h @@ -45,6 +45,7 @@ struct llog_process_info { void *lpi_cbdata; void *lpi_catdata; int lpi_rc; + int lpi_flags; cfs_completion_t lpi_completion; }; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 14b2d31..678246a 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2196,12 +2196,12 @@ void lustre_swab_lustre_capa_key(struct lustre_capa_key *k) __swab32s (&k->lk_padding); } -void lustre_swab_lnlh(struct lnl_hdr *l) +void lustre_swab_kuch(struct kuc_hdr *l) { - __swab16s(&l->lnl_magic); - /* __u8 l->lnl_transport */ - __swab16s(&l->lnl_msgtype); - __swab16s(&l->lnl_msglen); + __swab16s(&l->kuc_magic); + /* __u8 l->kuc_transport */ + __swab16s(&l->kuc_msgtype); + __swab16s(&l->kuc_msglen); } -EXPORT_SYMBOL(lustre_swab_lnlh); +EXPORT_SYMBOL(lustre_swab_kuch); diff --git a/lustre/tests/copytool.c b/lustre/tests/copytool.c index a6d72d3..8bacd55 100644 --- a/lustre/tests/copytool.c +++ b/lustre/tests/copytool.c @@ -48,18 +48,30 @@ #include #include +#include #include #include #include +void *ctdata; + +void handler(int signal ) { + psignal(signal, "exiting"); + /* If we don't clean up upon interrupt, umount thinks there's a ref + * and doesn't remove us from mtab (EINPROGRESS). The lustre client + * does successfully unmount and the mount is actually gone, but the + * mtab entry remains. So this just makes mtab happier. */ + llapi_copytool_fini(&ctdata); + exit(1); +} + int main(int argc, char **argv) { int c, test = 0; struct option long_opts[] = { {"test", no_argument, 0, 't'}, {0, 0, 0, 0} }; - void *ctdata; - int archive_nums[] = {1}; /* which archive numbers we care about */ + int archives[] = {1}; /* which archives we care about */ int rc; optind = 0; @@ -75,8 +87,13 @@ int main(int argc, char **argv) { } } - rc = llapi_copytool_start(&ctdata, 0, ARRAY_SIZE(archive_nums), - archive_nums); + if (optind != argc - 1) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return -EINVAL; + } + + rc = llapi_copytool_start(&ctdata, argv[optind], 0, + ARRAY_SIZE(archives), archives); if (rc < 0) { fprintf(stderr, "Can't start copytool interface: %s\n", strerror(-rc)); @@ -88,6 +105,8 @@ int main(int argc, char **argv) { printf("Waiting for message from kernel (pid=%d)\n", getpid()); + signal(SIGINT, handler); + while(1) { struct hsm_action_list *hal; struct hsm_action_item *hai; diff --git a/lustre/tests/lustre-rsync-test.sh b/lustre/tests/lustre-rsync-test.sh index 0e95931..953b396 100644 --- a/lustre/tests/lustre-rsync-test.sh +++ b/lustre/tests/lustre-rsync-test.sh @@ -10,8 +10,9 @@ SRCDIR=`dirname $0` export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin ONLY=${ONLY:-"$*"} -ALWAYS_EXCEPT="$LRSYNC_EXCEPT 5a 5b" -# bug number for skipped test: - 20878 +[ -n "$ONLY" ] && SLOW=yes +ALWAYS_EXCEPT="$LRSYNC_EXCEPT" +# bug number for skipped test: # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && \ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 4e08db8..7490e2c 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6622,15 +6622,16 @@ run_test 162 "path lookup sanity" test_163() { remote_mds_nodsh && skip "remote MDS with nodsh" && return - copytool --test || { skip "copytool not runnable: $?" && return; } - copytool & + copytool --test $FSNAME || { skip "copytool not runnable: $?" && return; } + copytool $FSNAME & sleep 1 local uuid=$($LCTL get_param -n mdc.${FSNAME}-MDT0000-mdc-*.uuid) # this proc file is temporary and linux-only - do_facet mds lctl set_param mdt.${FSNAME}-MDT0000.mdccomm=$uuid || error "lnl send failed" - kill $! + do_facet mds lctl set_param mdt.${FSNAME}-MDT0000.mdccomm=$uuid ||\ + error "kernel->userspace send failed" + kill -INT $! } -run_test 163 "LustreNetLink kernelcomms" +run_test 163 "kernel <-> userspace comms" test_169() { # do directio so as not to populate the page cache diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index e178f28..4dc9cc4 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -74,7 +74,7 @@ llverfs_LDADD := $(EXT2FSLIB) $(E2PLIB) llverdev_LDADD := $(EXT2FSLIB) $(BLKIDLIB) L_IOCTL := $(top_builddir)/libcfs/libcfs/util/l_ioctl.c -L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/ulinux/ulinux-kernelcomm.c +L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/kernel_user_comm.c liblustreapi_a_SOURCES = liblustreapi.c $(L_IOCTL) $(L_KERNELCOMM) libiam_a_SOURCES = libiam.c diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 96d31e6..7750e7e 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -603,6 +603,7 @@ int llapi_search_mounts(const char *pathname, int index, char *mntdir, return get_root_path(want, fsname, NULL, mntdir, idx); } +/* Given a path, find the corresponding Lustre fsname */ int llapi_search_fsname(const char *pathname, char *fsname) { return get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL, @@ -2708,6 +2709,8 @@ int llapi_ls(int argc, char *argv[]) /* Print mdtname 'name' into 'buf' using 'format'. Add -MDT0000 if needed. * format must have %s%s, buf must be > 16 + * Eg: if name = "lustre-MDT0000", "lustre", or "lustre-MDT0000_UUID" + * then buf = "lustre-MDT0000" */ static int get_mdtname(char *name, char *format, char *buf) { @@ -2733,12 +2736,72 @@ static int get_mdtname(char *name, char *format, char *buf) return sprintf(buf, format, name, suffix); } +/** ioctl on filsystem root, with mdtindex sent as data + * \param mdtname path, fsname, or mdtname (lutre-MDT0004) + * \param mdtidxp pointer to integer within data to be filled in with the + * mdt index (0 if no mdt is specified). NULL won't be filled. + */ +static int root_ioctl(const char *mdtname, int opc, void *data, int *mdtidxp, + int want_error) +{ + char fsname[20]; + char *ptr; + int fd, index, rc; + + /* Take path, fsname, or MDTname. Assume MDT0000 in the former cases. + Open root and parse mdt index. */ + if (mdtname[0] == '/') { + index = 0; + rc = get_root_path(WANT_FD | want_error, NULL, &fd, + (char *)mdtname, -1); + } else { + if (get_mdtname((char *)mdtname, "%s%s", fsname) < 0) + return -EINVAL; + ptr = fsname + strlen(fsname) - 8; + *ptr = '\0'; + index = strtol(ptr + 4, NULL, 10); + rc = get_root_path(WANT_FD | want_error, fsname, &fd, NULL, -1); + } + if (rc < 0) { + if (want_error) + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "Can't open %s: %d\n", mdtname, rc); + return rc; + } + + if (mdtidxp) + *mdtidxp = index; + + rc = ioctl(fd, opc, data); + if (rc && want_error) + llapi_err(LLAPI_MSG_ERROR, "ioctl %d err %d", opc, rc); + + close(fd); + return rc; +} + /****** Changelog API ********/ + +static int changelog_ioctl(const char *mdtname, int opc, int id, + long long recno, int flags) +{ + struct ioc_changelog data; + int *idx; + + data.icc_id = id; + data.icc_recno = recno; + data.icc_flags = flags; + idx = (int *)(&data.icc_mdtindex); + + return root_ioctl(mdtname, opc, &data, idx, WANT_ERROR); +} + #define CHANGELOG_PRIV_MAGIC 0xCA8E1080 struct changelog_private { int magic; int flags; - lustre_netlink lnl; + lustre_kernelcomm kuc; + char *buf; }; /** Start reading from a changelog @@ -2752,75 +2815,47 @@ int llapi_changelog_start(void **priv, int flags, const char *device, long long startrec) { struct changelog_private *cp; - struct changelog_show cs = {}; - char mdtname[20]; - char pattern[PATH_MAX]; - char trigger[PATH_MAX]; - int fd, rc, pid; - - /* Find mdtname from path, fsname, mdtname, or mdtname_UUID */ - if (device[0] == '/') { - if ((rc = llapi_search_fsname(device, mdtname))) - return rc; - if ((rc = get_mdtname(mdtname, "%s%s", mdtname)) < 0) - return rc; - } else { - if ((rc = get_mdtname((char *)device, "%s%s", mdtname)) < 0) - return rc; - } - - /* Find corresponding mdc trigger */ - snprintf(pattern, PATH_MAX, - "/proc/fs/lustre/mdc/%s-*/changelog_trigger", mdtname); - rc = first_match(pattern, trigger); - if (rc) - return rc; - - /* Make sure we can write the trigger */ - fd = open(trigger, O_WRONLY); - if (fd < 0) - return -errno; + int rc; /* Set up the receiver control struct */ cp = malloc(sizeof(*cp)); - if (cp == NULL) { - close(fd); + if (cp == NULL) return -ENOMEM; + + cp->buf = malloc(CR_MAXSIZE); + if (cp->buf == NULL) { + rc = -ENOMEM; + goto out_free; } cp->magic = CHANGELOG_PRIV_MAGIC; cp->flags = flags; - /* Start the receiver */ - rc = libcfs_ulnl_start(&cp->lnl, 0 /* unicast */); + + /* Set up the receiver */ + rc = libcfs_ukuc_start(&cp->kuc, 0 /* no group registration */); if (rc < 0) goto out_free; - /* We need to trigger Lustre to start sending messages now. - We could send a lnl message to a kernel listener, - or write into proc. Proc has the advantage of running in this - context, avoiding the need for a kernel thread. */ - cs.cs_pid = getpid(); - cs.cs_startrec = startrec; - cs.cs_flags = flags & CHANGELOG_FLAG_BLOCK ? LNL_FL_BLOCK : 0; - if ((pid = fork()) < 0) { - goto out_free; - } else if (!pid) { - /* Write triggers Lustre to start sending, but it - won't return until it is complete, meaning everything - got shipped through lnl (or error). So we trigger it - from a child process here, allowing the llapi call to - return and wait for the lnl messages. */ - rc = write(fd, &cs, sizeof(cs)); - exit(rc); + *priv = cp; + + /* Tell the kernel to start sending */ + rc = changelog_ioctl(device, OBD_IOC_CHANGELOG_SEND, cp->kuc.lk_wfd, + startrec, flags); + /* Only the kernel reference keeps the write side open */ + close(cp->kuc.lk_wfd); + cp->kuc.lk_wfd = 0; + if (rc < 0) { + /* frees and clears priv */ + llapi_changelog_fini(priv); + return rc; } - close(fd); - *priv = cp; return 0; out_free: + if (cp->buf) + free(cp->buf); free(cp); - close(fd); return rc; } @@ -2832,7 +2867,8 @@ int llapi_changelog_fini(void **priv) if (!cp || (cp->magic != CHANGELOG_PRIV_MAGIC)) return -EINVAL; - libcfs_ulnl_stop(&cp->lnl); + libcfs_ukuc_stop(&cp->kuc); + free(cp->buf); free(cp); *priv = NULL; return 0; @@ -2848,7 +2884,7 @@ int llapi_changelog_fini(void **priv) int llapi_changelog_recv(void *priv, struct changelog_rec **rech) { struct changelog_private *cp = (struct changelog_private *)priv; - struct lnl_hdr *lnlh; + struct kuc_hdr *kuch; int rc = 0; if (!cp || (cp->magic != CHANGELOG_PRIV_MAGIC)) @@ -2857,22 +2893,23 @@ int llapi_changelog_recv(void *priv, struct changelog_rec **rech) return -EINVAL; repeat: - rc = libcfs_ulnl_msg_get(&cp->lnl, CR_MAXSIZE, LNL_TRANSPORT_CHANGELOG, - &lnlh); + rc = libcfs_ukuc_msg_get(&cp->kuc, cp->buf, CR_MAXSIZE, + KUC_TRANSPORT_CHANGELOG); if (rc < 0) return rc; - if ((lnlh->lnl_transport != LNL_TRANSPORT_CHANGELOG) || - ((lnlh->lnl_msgtype != CL_RECORD) && - (lnlh->lnl_msgtype != CL_EOF))) { + kuch = (struct kuc_hdr *)cp->buf; + if ((kuch->kuc_transport != KUC_TRANSPORT_CHANGELOG) || + ((kuch->kuc_msgtype != CL_RECORD) && + (kuch->kuc_msgtype != CL_EOF))) { llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, "Unknown changelog message type %d:%d\n", - lnlh->lnl_transport, lnlh->lnl_msgtype); + kuch->kuc_transport, kuch->kuc_msgtype); rc = -EPROTO; goto out_free; } - if (lnlh->lnl_msgtype == CL_EOF) { + if (kuch->kuc_msgtype == CL_EOF) { if (cp->flags & CHANGELOG_FLAG_FOLLOW) { /* Ignore EOFs */ goto repeat; @@ -2883,12 +2920,11 @@ repeat: } /* Our message is a changelog_rec */ - *rech = (struct changelog_rec *)(lnlh + 1); + *rech = (struct changelog_rec *)(kuch + 1); return 0; out_free: - libcfs_ulnl_msg_free(&lnlh); *rech = NULL; return rc; } @@ -2896,10 +2932,6 @@ out_free: /** Release the changelog record when done with it. */ int llapi_changelog_free(struct changelog_rec **rech) { - if (*rech) { - struct lnl_hdr *lnlh = (struct lnl_hdr *)*rech - 1; - libcfs_ulnl_msg_free(&lnlh); - } *rech = NULL; return 0; } @@ -2907,10 +2939,7 @@ int llapi_changelog_free(struct changelog_rec **rech) int llapi_changelog_clear(const char *mdtname, const char *idstr, long long endrec) { - struct ioc_changelog_clear data; - char fsname[17]; - char *ptr; - int id, fd, index, rc; + int id; if (endrec < 0) { llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, @@ -2927,34 +2956,7 @@ int llapi_changelog_clear(const char *mdtname, const char *idstr, return -EINVAL; } - /* Take path, fsname, or MDTNAME. Assume MDT0000 in the former cases */ - if (mdtname[0] == '/') { - index = 0; - fd = open(mdtname, O_RDONLY | O_DIRECTORY | O_NONBLOCK); - rc = fd < 0 ? -errno : 0; - } else { - if (get_mdtname((char *)mdtname, "%s%s", fsname) < 0) - return -EINVAL; - ptr = fsname + strlen(fsname) - 8; - *ptr = '\0'; - index = strtol(ptr + 4, NULL, 10); - rc = get_root_path(WANT_FD | WANT_ERROR, fsname, &fd, NULL, -1); - } - if (rc < 0) { - llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, - "Can't open %s: %d\n", mdtname, rc); - return rc; - } - - data.icc_mdtindex = index; - data.icc_id = id; - data.icc_recno = endrec; - rc = ioctl(fd, OBD_IOC_CHANGELOG_CLEAR, &data); - if (rc) - llapi_err(LLAPI_MSG_ERROR, "ioctl err %d", rc); - - close(fd); - return rc; + return changelog_ioctl(mdtname, OBD_IOC_CHANGELOG_CLEAR, id, endrec, 0); } int llapi_fid2path(const char *device, const char *fidstr, char *buf, @@ -3047,43 +3049,70 @@ int llapi_path2fid(const char *path, lustre_fid *fid) #define CT_PRIV_MAGIC 0xC0BE2001 struct copytool_private { int magic; - lustre_netlink lnl; - int archive_num_count; - int archive_nums[0]; + char *buf; + char *fsname; + lustre_kernelcomm kuc; + __u32 archives; }; #include /** Register a copytool - * @param priv Opaque private control structure + * @param[out] priv Opaque private control structure + * @param fsname Lustre filesystem * @param flags Open flags, currently unused (e.g. O_NONBLOCK) - * @param archive_num_count - * @param archive_nums Which archive numbers this copytool is responsible for + * @param archive_count + * @param archives Which archive numbers this copytool is responsible for */ -int llapi_copytool_start(void **priv, int flags, int archive_num_count, - int *archive_nums) +int llapi_copytool_start(void **priv, char *fsname, int flags, + int archive_count, int *archives) { struct copytool_private *ct; int rc; - if (archive_num_count > 0 && archive_nums == NULL) { + if (archive_count > 0 && archives == NULL) { llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, "NULL archive numbers"); return -EINVAL; } - ct = malloc(sizeof(*ct) + - archive_num_count * sizeof(ct->archive_nums[0])); + ct = malloc(sizeof(*ct)); if (ct == NULL) return -ENOMEM; + ct->buf = malloc(HAL_MAXSIZE); + ct->fsname = malloc(strlen(fsname) + 1); + if (ct->buf == NULL || ct->fsname == NULL) { + rc = -ENOMEM; + goto out_err; + } + strcpy(ct->fsname, fsname); ct->magic = CT_PRIV_MAGIC; - ct->archive_num_count = archive_num_count; - if (ct->archive_num_count > 0) - memcpy(ct->archive_nums, archive_nums, archive_num_count * - sizeof(ct->archive_nums[0])); + ct->archives = 0; + for (rc = 0; rc < archive_count; rc++) { + if (archives[rc] > sizeof(ct->archives)) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "Maximum of %d archives supported", + sizeof(ct->archives)); + goto out_err; + } + ct->archives |= 1 << archives[rc]; + } + /* special case: if no archives specified, default to archive #0. */ + if (ct->archives == 0) + ct->archives = 1; + + rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM); + if (rc < 0) + goto out_err; - rc = libcfs_ulnl_start(&ct->lnl, LNL_GRP_HSM); + /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */ + ct->kuc.lk_data = ct->archives; + rc = root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL, + WANT_ERROR); + /* Only the kernel reference keeps the write side open */ + close(ct->kuc.lk_wfd); + ct->kuc.lk_wfd = 0; if (rc < 0) goto out_err; @@ -3091,6 +3120,10 @@ int llapi_copytool_start(void **priv, int flags, int archive_num_count, return 0; out_err: + if (ct->buf) + free(ct->buf); + if (ct->fsname) + free(ct->fsname); free(ct); return rc; } @@ -3103,7 +3136,15 @@ int llapi_copytool_fini(void **priv) if (!ct || (ct->magic != CT_PRIV_MAGIC)) return -EINVAL; - libcfs_ulnl_stop(&ct->lnl); + /* Tell the kernel to stop sending us messages */ + ct->kuc.lk_flags = LK_FLG_STOP; + root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL, 0); + + /* Shut down the kernelcomms */ + libcfs_ukuc_stop(&ct->kuc); + + free(ct->buf); + free(ct->fsname); free(ct); *priv = NULL; return 0; @@ -3119,7 +3160,7 @@ int llapi_copytool_fini(void **priv) int llapi_copytool_recv(void *priv, struct hsm_action_list **halh, int *msgsize) { struct copytool_private *ct = (struct copytool_private *)priv; - struct lnl_hdr *lnlh; + struct kuc_hdr *kuch; struct hsm_action_list *hal; int rc = 0; @@ -3128,49 +3169,46 @@ int llapi_copytool_recv(void *priv, struct hsm_action_list **halh, int *msgsize) if (halh == NULL || msgsize == NULL) return -EINVAL; - rc = libcfs_ulnl_msg_get(&ct->lnl, HAL_MAXSIZE, - LNL_TRANSPORT_HSM, &lnlh); + rc = libcfs_ukuc_msg_get(&ct->kuc, ct->buf, HAL_MAXSIZE, + KUC_TRANSPORT_HSM); if (rc < 0) return rc; /* Handle generic messages */ - if (lnlh->lnl_transport == LNL_TRANSPORT_GENERIC && - lnlh->lnl_msgtype == LNL_MSG_SHUTDOWN) { + kuch = (struct kuc_hdr *)ct->buf; + if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC && + kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) { rc = -ESHUTDOWN; goto out_free; } - if (lnlh->lnl_transport != LNL_TRANSPORT_HSM || - lnlh->lnl_msgtype != HMT_ACTION_LIST) { + if (kuch->kuc_transport != KUC_TRANSPORT_HSM || + kuch->kuc_msgtype != HMT_ACTION_LIST) { llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, "Unknown HSM message type %d:%d\n", - lnlh->lnl_transport, lnlh->lnl_msgtype); + kuch->kuc_transport, kuch->kuc_msgtype); rc = -EPROTO; goto out_free; } /* Our message is an hsm_action_list */ - hal = (struct hsm_action_list *)(lnlh + 1); + hal = (struct hsm_action_list *)(kuch + 1); /* Check that we have registered for this archive # */ - for (rc = 0; rc < ct->archive_num_count; rc++) { - if (hal->hal_archive_num == ct->archive_nums[rc]) - break; - } - if (rc >= ct->archive_num_count) { - CDEBUG(D_INFO, "This copytool does not service archive #%d, " - "ignoring this request.\n", hal->hal_archive_num); + if (((1 << hal->hal_archive_num) & ct->archives) == 0) { + llapi_err(LLAPI_MSG_INFO | LLAPI_MSG_NO_ERRNO, + "Ignoring request for archive #%d (bitmask %#x)\n", + hal->hal_archive_num, ct->archives); rc = 0; goto out_free; } *halh = hal; - *msgsize = lnlh->lnl_msglen - sizeof(*lnlh); + *msgsize = kuch->kuc_msglen - sizeof(*kuch); return 0; out_free: - libcfs_ulnl_msg_free(&lnlh); *halh = NULL; *msgsize = 0; return rc; @@ -3179,10 +3217,6 @@ out_free: /** Release the action list when done with it. */ int llapi_copytool_free(struct hsm_action_list **hal) { - if (*hal) { - struct lnl_hdr *lnlh = (struct lnl_hdr *)*hal - 1; - libcfs_ulnl_msg_free(&lnlh); - } *hal = NULL; return 0; } -- 1.8.3.1