Description: Add LustreNetLink, a kernel-userspace communcation path. Add
ulinux dir for Linux userspace tools.
+Severity : normal
+Bugzilla : 20878
+Description: Replace LustreNetLink with kernel_user_comm based on pipes
+
-------------------------------------------------------------------------------
2008-07-15 Sun Microsystems, Inc.
libcfs/libcfs/Makefile
libcfs/libcfs/autoMakefile
libcfs/libcfs/linux/Makefile
-libcfs/libcfs/ulinux/Makefile
libcfs/libcfs/posix/Makefile
libcfs/libcfs/util/Makefile
libcfs/include/libcfs/darwin/Makefile
#define D_SEC 0x08000000
/* keep these in sync with lnet/{utils,libcfs}/debug.c */
+#define D_HSM D_TRACE
+
#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
#ifndef DEBUG_SUBSYSTEM
*
* libcfs/include/libcfs/libcfs_kernelcomm.h
*
- * Kernel <-> userspace communication routines. We'll use a shorthand term
- * "lnl" (Lustre NetLink) for this interface name for all arches, even though
- * an implemtation may not use NetLink.
+ * Kernel <-> userspace communication routines.
* The definitions below are used in the kernel and userspace.
*
*/
#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
#endif
-/* LNL message header.
- * All current and future LNL messages should use this header.
+
+/* KUC message header.
+ * All current and future KUC messages should use this header.
* To avoid having to include Lustre headers from libcfs, define this here.
*/
-struct lnl_hdr {
- __u16 lnl_magic;
- __u8 lnl_transport; /* Each new Lustre feature should use a different
+struct kuc_hdr {
+ __u16 kuc_magic;
+ __u8 kuc_transport; /* Each new Lustre feature should use a different
transport */
- __u8 lnl_flags;
- __u16 lnl_msgtype; /* Message type or opcode, transport-specific */
- __u16 lnl_msglen;
+ __u8 kuc_flags;
+ __u16 kuc_msgtype; /* Message type or opcode, transport-specific */
+ __u16 kuc_msglen; /* Including header */
} __attribute__((aligned(sizeof(__u64))));
-#define LNL_MAGIC 0x191C /*Lustre9etLinC */
-#define LNL_FL_BLOCK 0x01 /* Wait for send */
+#define KUC_MAGIC 0x191C /*Lustre9etLinC */
+#define KUC_FL_BLOCK 0x01 /* Wait for send */
-/* lnl_msgtype values are defined in each transport */
-enum lnl_transport_type {
- LNL_TRANSPORT_GENERIC = 1,
- LNL_TRANSPORT_HSM = 2,
- LNL_TRANSPORT_CHANGELOG = 3,
+/* kuc_msgtype values are defined in each transport */
+enum kuc_transport_type {
+ KUC_TRANSPORT_GENERIC = 1,
+ KUC_TRANSPORT_HSM = 2,
+ KUC_TRANSPORT_CHANGELOG = 3,
};
-enum lnl_generic_message_type {
- LNL_MSG_SHUTDOWN = 1,
+enum kuc_generic_message_type {
+ KUC_MSG_SHUTDOWN = 1,
};
-/* LNL Broadcast Groups. This determines which userspace process hears which
+/* KUC Broadcast Groups. This determines which userspace process hears which
* messages. Mutliple transports may be used within a group, or multiple
* groups may use the same transport. Broadcast
- * groups need not be used if e.g. a PID is specified instead;
+ * groups need not be used if e.g. a UID is specified instead;
* use group 0 to signify unicast.
*/
-#define LNL_GRP_HSM 0x02
-#define LNL_GRP_CNT 2
+#define KUC_GRP_HSM 0x02
+#define KUC_GRP_MAX KUC_GRP_HSM
+/* Kernel methods */
+extern int libcfs_kkuc_msg_put(cfs_file_t *fp, void *payload);
+extern int libcfs_kkuc_group_put(int group, void *payload);
+extern int libcfs_kkuc_group_add(cfs_file_t *fp, int uid, int group);
+extern int libcfs_kkuc_group_rem(int uid, int group);
-#if defined(HAVE_NETLINK) && defined (__KERNEL__)
-extern int libcfs_klnl_start(int transport);
-extern int libcfs_klnl_stop(int transport, int group);
-extern int libcfs_klnl_msg_put(int pid, int group, void *payload);
-#else
-static inline int libcfs_klnl_start(int transport) {
- return -ENOSYS;
-}
-static inline int libcfs_klnl_stop(int transport, int group) {
- return 0;
-}
-static inline int libcfs_klnl_msg_put(int pid, int group, void *payload) {
- return -ENOSYS;
-}
-#endif
+#define LK_FLG_STOP 0x01
-/*
- * NetLink socket number, see include/linux/netlink.h
- * All LNL users share a single netlink socket. This actually is NetLink
- * specific, but is not to be used outside of the Linux implementation
- * (linux-kernelcomm.c and posix-kernelcomm.c).
- */
-#define LNL_SOCKET 26
+/* kernelcomm control structure, passed from userspace to kernel */
+typedef struct lustre_kernelcomm {
+ __u32 lk_wfd;
+ __u32 lk_rfd;
+ __u32 lk_uid;
+ __u32 lk_group;
+ __u32 lk_data;
+ __u32 lk_flags;
+} __attribute__((packed)) lustre_kernelcomm;
+/* Userspace methods */
+extern int libcfs_ukuc_start(lustre_kernelcomm *l, int groups);
+extern int libcfs_ukuc_stop(lustre_kernelcomm *l);
+extern int libcfs_ukuc_msg_get(lustre_kernelcomm *l, char *buf, int maxsize,
+ int transport);
#endif /* __LIBCFS_KERNELCOMM_H__ */
#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1)
#define cfs_get_file(f) get_file(f)
+#define cfs_get_fd(x) fget(x)
#define cfs_put_file(f) fput(f)
#define cfs_file_count(f) file_count(f)
-EXTRA_DIST := libcfs.h posix-wordsize.h posix-types.h posix-kernelcomm.h
+EXTRA_DIST := libcfs.h posix-wordsize.h posix-types.h
#include <libcfs/user-tcpip.h>
#include <libcfs/posix/posix-wordsize.h>
#include <libcfs/user-bitops.h>
-#include <libcfs/posix/posix-kernelcomm.h>
# define cfs_gettimeofday(tv) gettimeofday(tv, NULL);
typedef unsigned long long cfs_cycles_t;
typedef struct dirent64 cfs_dirent_t;
#endif
+#define cfs_get_fd(x) NULL
+#define cfs_put_file(f) do {} while (0)
+
#ifdef __linux__
/* Userpace byte flipping */
# include <endian.h>
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * libcfs/include/libcfs/posix-kernelcomm.h
- *
- * kernel - userspace communications.
- */
-
-#ifndef __LIBCFS_POSIX_KERNELCOMM_H__
-#define __LIBCFS_POSIX_KERNELCOMM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
-#endif
-
-typedef int lustre_netlink;
-int libcfs_ulnl_start(lustre_netlink *l, int groups);
-int libcfs_ulnl_stop(lustre_netlink *l);
-struct lnl_hdr;
-int libcfs_ulnl_msg_get(lustre_netlink *l, int maxsize, int transport,
- struct lnl_hdr **lnlhh);
-int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh);
-
-#endif
-
void cfs_up_read(cfs_rw_semaphore_t *s);
void cfs_up_write(cfs_rw_semaphore_t *s);
void cfs_fini_rwsem(cfs_rw_semaphore_t *s);
+#define CFS_DECLARE_RWSEM(name) cfs_rw_semaphore_t name = { }
/*
* read-write lock : Need to be investigated more!!
#define cfs_lock_kernel() do {} while (0)
#define cfs_sigfillset(l) do {} while (0)
#define cfs_recalc_sigpending(l) do {} while (0)
-#define cfs_kernel_thread(l,m,n) LBUG()
+/* Fine, crash, but stop giving me compile warnings */
+#define cfs_kernel_thread(l,m,n) (LBUG(), (int)l)
#define cfs_kthread_run(fn,d,fmt,...) LBUG()
#ifdef HAVE_LIBPTHREAD
libcfs-linux-objs += linux-prim.o linux-mem.o
libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o
libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o
-libcfs-linux-objs += linux-utils.o linux-module.o linux-kernelcomm.o
+libcfs-linux-objs += linux-utils.o linux-module.o
ifeq ($(PATCHLEVEL),6)
libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
endif
libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o \
- libcfs_string.o hash.o
+ libcfs_string.o hash.o kernel_user_comm.o
libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
# Lustre is a trademark of Sun Microsystems, Inc.
#
-SUBDIRS := linux util posix ulinux
+SUBDIRS := linux util posix
if DARWIN
SUBDIRS += darwin
endif
if LIBLUSTRE
noinst_LIBRARIES= libcfs.a
libcfs_a_SOURCES= posix/posix-debug.c user-prim.c user-lock.c user-tcpip.c \
- user-bitops.c user-mem.c hash.c ulinux/ulinux-kernelcomm.c
+ user-bitops.c user-mem.c hash.c kernel_user_comm.c
libcfs_a_CPPFLAGS = $(LLCPPFLAGS)
libcfs_a_CFLAGS = $(LLCFLAGS)
endif
darwin/darwin-debug.c darwin/darwin-proc.c \
darwin/darwin-tracefile.c darwin/darwin-module.c \
posix/posix-debug.c module.c tracefile.c nidstrings.c watchdog.c \
- ulinux/ulinux-kernelcomm.c hash.c
+ kernel_user_comm.c hash.c
libcfs_CFLAGS := $(EXTRA_KCFLAGS)
libcfs_LDFLAGS := $(EXTRA_KLDFLAGS)
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs
DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \
- user-lock.c user-tcpip.c user-bitops.c\
- user-mem.c linux/linux-tracefile.h
+ user-lock.c user-tcpip.c user-bitops.c\
+ user-mem.c kernel_user_comm.c linux/linux-tracefile.h
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * Author: Nathan Rutman <nathan.rutman@sun.com>
+ *
+ * Kernel <-> userspace communication routines.
+ * Using pipes for all arches.
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#define D_KUC 0
+
+#include <libcfs/libcfs.h>
+
+#ifdef LUSTRE_UTILS
+/* This is the userspace side. */
+
+/** Start the userspace side of a KUC pipe.
+ * @param link Private descriptor for pipe/socket.
+ * @param groups KUC broadcast group to listen to
+ * (can be null for unicast to this pid)
+ */
+int libcfs_ukuc_start(lustre_kernelcomm *link, int group)
+{
+ int pfd[2];
+
+ if (pipe(pfd) < 0)
+ return -errno;
+
+ link->lk_rfd = pfd[0];
+ link->lk_wfd = pfd[1];
+ link->lk_group = group;
+ link->lk_uid = getpid();
+ return 0;
+}
+
+int libcfs_ukuc_stop(lustre_kernelcomm *link)
+{
+ if (link->lk_wfd > 0)
+ close(link->lk_wfd);
+ return close(link->lk_rfd);
+}
+
+#define lhsz sizeof(*kuch)
+
+/** Read a message from the link.
+ * Allocates memory, returns handle
+ *
+ * @param link Private descriptor for pipe/socket.
+ * @param buf Buffer to read into
+ * @param maxsize Maximum message size allowed
+ * @param transport Only listen to messages on this transport
+ * (and the generic transport)
+ */
+int libcfs_ukuc_msg_get(lustre_kernelcomm *link, char *buf, int maxsize,
+ int transport)
+{
+ struct kuc_hdr *kuch;
+ int rc = 0;
+
+ memset(buf, 0, maxsize);
+
+ CDEBUG(D_KUC, "Waiting for message from kernel on fd %d\n",
+ link->lk_rfd);
+
+ while (1) {
+ /* Read header first to get message size */
+ rc = read(link->lk_rfd, buf, lhsz);
+ if (rc <= 0) {
+ rc = -errno;
+ break;
+ }
+ kuch = (struct kuc_hdr *)buf;
+
+ CDEBUG(D_KUC, " Received message mg=%x t=%d m=%d l=%d\n",
+ kuch->kuc_magic, kuch->kuc_transport, kuch->kuc_msgtype,
+ kuch->kuc_msglen);
+
+ if (kuch->kuc_magic != KUC_MAGIC) {
+ CERROR("bad message magic %x != %x\n",
+ kuch->kuc_magic, KUC_MAGIC);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (kuch->kuc_msglen > maxsize) {
+ rc = -EMSGSIZE;
+ break;
+ }
+
+ /* Read payload */
+ rc = read(link->lk_rfd, buf + lhsz, kuch->kuc_msglen - lhsz);
+ if (rc < 0) {
+ rc = -errno;
+ break;
+ }
+ if (rc < (kuch->kuc_msglen - lhsz)) {
+ CERROR("short read: got %d of %d bytes\n",
+ rc, kuch->kuc_msglen);
+ rc = -EPROTO;
+ break;
+ }
+
+ if (kuch->kuc_transport == transport ||
+ kuch->kuc_transport == KUC_TRANSPORT_GENERIC) {
+ return 0;
+ }
+ /* Drop messages for other transports */
+ }
+ return rc;
+}
+
+#else /* LUSTRE_UTILS */
+/* This is the kernel side (liblustre as well). */
+
+/**
+ * libcfs_kkuc_msg_put - send an message from kernel to userspace
+ * @param fp to send the message to
+ * @param payload Payload data. First field of payload is always
+ * struct kuc_hdr
+ */
+int libcfs_kkuc_msg_put(cfs_file_t *filp, void *payload)
+{
+ struct kuc_hdr *kuch = (struct kuc_hdr *)payload;
+ int rc = -ENOSYS;
+
+ if (filp == NULL || IS_ERR(filp))
+ return -EBADF;
+
+ if (kuch->kuc_magic != KUC_MAGIC) {
+ CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic);
+ return -ENOSYS;
+ }
+
+#ifdef __KERNEL__
+ rc = cfs_user_write(filp, (char *)payload, kuch->kuc_msglen, 0);
+#endif
+
+ if (rc < 0)
+ CWARN("message send failed (%d)\n", rc);
+ else
+ CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp);
+
+ return rc;
+}
+CFS_EXPORT_SYMBOL(libcfs_kkuc_msg_put);
+
+/* Broadcast groups are global across all mounted filesystems;
+ * i.e. registering for a group on 1 fs will get messages for that
+ * group from any fs */
+/** A single group reigstration has a uid and a file pointer */
+struct kkuc_reg {
+ cfs_list_t kr_chain;
+ int kr_uid;
+ cfs_file_t *kr_fp;
+};
+static cfs_list_t kkuc_groups[KUC_GRP_MAX+1] = {};
+/* Protect message sending against remove and adds */
+static CFS_DECLARE_RWSEM(kg_sem);
+
+/** Add a receiver to a broadcast group
+ * @param filp pipe to write into
+ * @param uid identidier for this receiver
+ * @param group group number
+ */
+int libcfs_kkuc_group_add(cfs_file_t *filp, int uid, int group)
+{
+ struct kkuc_reg *reg;
+
+ if (group > KUC_GRP_MAX) {
+ CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group);
+ return -EINVAL;
+ }
+
+ /* fput in group_rem */
+ if (filp == NULL)
+ return -EBADF;
+
+ /* freed in group_rem */
+ reg = cfs_alloc(sizeof(*reg), 0);
+ if (reg == NULL)
+ return -ENOMEM;
+
+ reg->kr_fp = filp;
+ reg->kr_uid = uid;
+
+ cfs_down_write(&kg_sem);
+ if (kkuc_groups[group].next == NULL)
+ CFS_INIT_LIST_HEAD(&kkuc_groups[group]);
+ cfs_list_add(®->kr_chain, &kkuc_groups[group]);
+ cfs_up_write(&kg_sem);
+
+ CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group);
+
+ return 0;
+}
+CFS_EXPORT_SYMBOL(libcfs_kkuc_group_add);
+
+int libcfs_kkuc_group_rem(int uid, int group)
+{
+ struct kkuc_reg *reg, *next;
+ ENTRY;
+
+ if (kkuc_groups[group].next == NULL)
+ RETURN(0);
+
+ if (uid == 0) {
+ /* Broadcast a shutdown message */
+ struct kuc_hdr lh;
+
+ lh.kuc_magic = KUC_MAGIC;
+ lh.kuc_transport = KUC_TRANSPORT_GENERIC;
+ lh.kuc_msgtype = KUC_MSG_SHUTDOWN;
+ lh.kuc_msglen = sizeof(lh);
+ libcfs_kkuc_group_put(group, &lh);
+ }
+
+ cfs_down_write(&kg_sem);
+ cfs_list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) {
+ if ((uid == 0) || (uid == reg->kr_uid)) {
+ cfs_list_del(®->kr_chain);
+ CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n",
+ reg->kr_uid, reg->kr_fp, group);
+ cfs_put_file(reg->kr_fp);
+ cfs_free(reg);
+ }
+ }
+ cfs_up_write(&kg_sem);
+
+ RETURN(0);
+}
+CFS_EXPORT_SYMBOL(libcfs_kkuc_group_rem);
+
+int libcfs_kkuc_group_put(int group, void *payload)
+{
+ struct kkuc_reg *reg;
+ int rc = 0;
+ ENTRY;
+
+ cfs_down_read(&kg_sem);
+ cfs_list_for_each_entry(reg, &kkuc_groups[group], kr_chain) {
+ rc = libcfs_kkuc_msg_put(reg->kr_fp, payload);
+ }
+ cfs_up_read(&kg_sem);
+
+ RETURN(rc);
+}
+CFS_EXPORT_SYMBOL(libcfs_kkuc_group_put);
+
+#endif /* LUSTRE_UTILS */
+
EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \
linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \
- linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c \
- linux-kernelcomm.c
+ linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c
+
fs = get_fs();
set_fs(KERNEL_DS);
- while (count > 0) {
+ while ((ssize_t)count > 0) {
size = filp->f_op->write(filp, (char *)buf, count, offset);
if (size < 0)
break;
count -= size;
+ buf += size;
size = 0;
}
set_fs(fs);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel <-> userspace communication routines. We'll use a shorthand term
- * "lnl" (Lustre NetLink) for the interface names for all arches (even though
- * implemtation may not use NetLink).
- * For Linux, we use Netlink sockets.
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-
-/* This is the kernel side.
- * See libcfs/ulinux/ulinux-kernelcomm.c for the user side.
- */
-
-#if defined(HAVE_NETLINK) && defined(__KERNEL__)
-
-#include <linux/module.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <net/netlink.h>
-
-#include <libcfs/libcfs.h>
-
-/* Single Netlink Message type to send all Lustre messages */
-#define LNL_MSG 26
-
-static struct sock *lnl_socket = NULL;
-static atomic_t lnl_start_count = ATOMIC_INIT(0);
-static spinlock_t lnl_lock = SPIN_LOCK_UNLOCKED;
-
-/** Start the netlink socket for this transport
- * @param transport lnl_transport
- */
-int libcfs_klnl_start(int transport)
-{
- int rc = 0;
- ENTRY;
-
- /* If anyone needs it, we can add per-transport incoming message
- callbacks. Add the callback as a param here. Store the transport
- and callback in a table. Include a generalized incoming msg
- callback here to dispatch messages to the appropriate
- per-transport callback. */
-
- spin_lock(&lnl_lock);
- if (atomic_inc_return(&lnl_start_count) > 1)
- GOTO(out, rc = 0);
-
- lnl_socket = netlink_kernel_create(
-#ifdef HAVE_NETLINK_NS
- DEFAULT_NET,
-#endif
- LNL_SOCKET, LNL_GRP_CNT,
- NULL /* incoming cb */,
-#ifdef HAVE_NETLINK_CBMUTEX
- NULL,
-#endif
- THIS_MODULE);
- if (lnl_socket == NULL) {
- CERROR("Cannot open socket %d\n", LNL_SOCKET);
- atomic_dec(&lnl_start_count);
- GOTO(out, rc = -ENODEV);
- }
-
-out:
- spin_unlock(&lnl_lock);
- RETURN(rc);
-}
-EXPORT_SYMBOL(libcfs_klnl_start);
-
-static void send_shutdown_msg(int transport, int group) {
- struct lnl_hdr lh;
-
- lh.lnl_magic = LNL_MAGIC;
- lh.lnl_transport = LNL_TRANSPORT_GENERIC;
- lh.lnl_msgtype = LNL_MSG_SHUTDOWN;
- lh.lnl_msglen = sizeof(lh);
-
- libcfs_klnl_msg_put(0, group, &lh);
-}
-
-/* This should be called once per (started) transport
- * @param transport lnl_transport
- * @param group Broadcast group for shutdown message */
-int libcfs_klnl_stop(int transport, int group)
-{
- if (group)
- send_shutdown_msg(transport, group);
-
- spin_lock(&lnl_lock);
-
- if (atomic_dec_and_test(&lnl_start_count)) {
- sock_release(lnl_socket->sk_socket);
- lnl_socket = NULL;
- }
-
- spin_unlock(&lnl_lock);
- return 0;
-}
-EXPORT_SYMBOL(libcfs_klnl_stop);
-
-static struct sk_buff *netlink_make_msg(int pid, int seq, void *payload,
- int size)
-{
- struct sk_buff *skb;
- struct nlmsghdr *nlh;
- int len = NLMSG_SPACE(size);
- void *data;
-
-#ifdef HAVE_NETLINK_NL2
- skb = nlmsg_new(len, GFP_KERNEL);
-#else /* old */
- skb = nlmsg_new(len);
-#endif
-
- if (!skb)
- return NULL;
-
- nlh = nlmsg_put(skb, pid, seq, LNL_MSG, size, 0);
- if (!nlh) {
- nlmsg_free(skb);
- return NULL;
- }
-
- data = nlmsg_data(nlh);
- memcpy(data, payload, size);
- return skb;
-}
-
-/**
- * libcfs_klnl_msg_put - send an message from kernel to userspace
- * @param pid Process id to send message to for unicast messages; must be 0 for
- * broadcast
- * @param group Broadcast group; 0 for unicast messages
- * @param payload Payload data. First field of payload is always struct lnl_hdr
- *
- * Allocates an skb, builds the netlink message, and sends it to the pid.
- */
-int libcfs_klnl_msg_put(int pid, int group, void *payload)
-{
- struct lnl_hdr *lnlh = (struct lnl_hdr *)payload;
- struct sk_buff *skb;
- int rc;
-
- if (lnl_socket == NULL) {
- CERROR("LustreNetLink: not running\n");
- return -ENOSYS;
- }
-
- if (lnlh->lnl_magic != LNL_MAGIC) {
- CERROR("LustreNetLink: bad magic %x\n", lnlh->lnl_magic);
- return -ENOSYS;
- }
-
- if ((pid != 0) && (group != 0)) {
- CERROR("LustreNetLink: pid=%d or group=%d must be 0\n",
- pid, group);
- return -EINVAL;
- }
-
- skb = netlink_make_msg(pid, 0, payload, lnlh->lnl_msglen);
- if (!skb)
- return -ENOMEM;
-
- if (pid) {
- rc = netlink_unicast(lnl_socket, skb, pid,
- lnlh->lnl_flags & LNL_FL_BLOCK ? 0 : MSG_DONTWAIT);
- if (rc > 0)
- rc = 0;
- } else {
-#ifdef HAVE_NLMSG_MULTICAST_5ARGS
- rc = nlmsg_multicast(lnl_socket, skb, 0, group, GFP_KERNEL);
-#else
- rc = nlmsg_multicast(lnl_socket, skb, 0, group);
-#endif
- }
-
- CDEBUG(0, "Sent message pid=%d, group=%d, rc=%d\n", pid, group, rc);
-
- if (rc < 0)
- CWARN("message send failed (%d) [pid=%d,group=%d]\n", rc,
- pid, group);
-
- return rc;
-}
-EXPORT_SYMBOL(libcfs_klnl_msg_put);
-
-
-#endif
-
+++ /dev/null
-Makefile
-Makefile.in
+++ /dev/null
-/Makefile
-/Makefile.in
+++ /dev/null
-EXTRA_DIST := ulinux-kernelcomm.c
-
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Author: Nathan Rutman <nathan.rutman@sun.com>
- *
- * Kernel - userspace communication routines. We'll use a shorthand term
- * "lnl" (Lustre NetLink) for the interface names for all arches (even though
- * implemtation may not use NetLink).
- * For Linux, we use Netlink sockets.
- */
-
-#define DEBUG_SUBSYSTEM S_CLASS
-
-/* This is the userspace side.
- * See libcfs/linux/linux-kernelcomm.c for the kernel side.
- */
-
-#ifdef HAVE_NETLINK
-
-#include <libcfs/libcfs.h>
-#include <sys/socket.h>
-#include <linux/netlink.h>
-
-/** Start the userspace side of a LNL pipe.
- * @param link Private descriptor for pipe/socket.
- * @param groups LNL broadcast group to listen to
- * (can be null for unicast to this pid)
- */
-int libcfs_ulnl_start(lustre_netlink *link, int groups)
-{
- struct sockaddr_nl src_addr;
- int sock;
- int rc = 0;
-
- sock = socket(PF_NETLINK, SOCK_RAW, LNL_SOCKET);
- if (sock < 0)
- return -errno;
-
- memset(&src_addr, 0, sizeof(src_addr));
- src_addr.nl_family = AF_NETLINK;
- src_addr.nl_pid = getpid(); /* self pid */
- src_addr.nl_groups = groups;
- rc = bind(sock, (struct sockaddr*)&src_addr, sizeof(src_addr));
- if (rc < 0) {
- close(sock);
- return -errno;
- }
- *link = sock;
- return 0;
-}
-
-int libcfs_ulnl_stop(lustre_netlink *link)
-{
- return close(*link);
-}
-
-/** Read a message from the netlink layer.
- * Allocates memory, returns handle
- *
- * @param link Private descriptor for pipe/socket.
- * @param maxsize Maximum message size allowed
- * @param transport Only listen to messages on this transport
- * (and the generic transport)
- * @param lnlhh Handle to the new LNL message
- */
-int libcfs_ulnl_msg_get(lustre_netlink *link, int maxsize, int transport,
- struct lnl_hdr **lnlhh)
-{
- struct iovec iov;
- struct sockaddr_nl dest_addr;
- struct msghdr msg;
- struct nlmsghdr *nlh = NULL;
- struct lnl_hdr *lnlh;
- int rc = 0;
-
- nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(maxsize));
- if (!nlh)
- return -ENOMEM;
-
- memset(nlh, 0, NLMSG_SPACE(maxsize));
- iov.iov_base = (void *)nlh;
- iov.iov_len = NLMSG_SPACE(maxsize);
-
- memset(&dest_addr, 0, sizeof(dest_addr));
- msg.msg_name = (void *)&dest_addr;
- msg.msg_namelen = sizeof(dest_addr);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
-
- CDEBUG(0, "Waiting for message from kernel on pid %d\n", getpid());
-
- while (1) {
- /* Read message from kernel */
- rc = recvmsg(*link, &msg, 0);
- if (rc <= 0) {
- rc = -errno;
- break;
- }
- lnlh = (struct lnl_hdr *)NLMSG_DATA(nlh);
- CDEBUG(0, " Received message mg=%x t=%d m=%d l=%d\n",
- lnlh->lnl_magic, lnlh->lnl_transport, lnlh->lnl_msgtype,
- lnlh->lnl_msglen);
- if (lnlh->lnl_magic != LNL_MAGIC) {
- CERROR("bad message magic %x != %x\n",
- lnlh->lnl_magic, LNL_MAGIC);
- rc = -EPROTO;
- break;
- }
- if (lnlh->lnl_transport == transport ||
- lnlh->lnl_transport == LNL_TRANSPORT_GENERIC) {
- *lnlhh = lnlh;
- return 0;
- }
- /* Ignore messages on other transports */
- }
- free(nlh);
- return rc;
-}
-
-/* Free a message returned by the above fn */
-int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh)
-{
- /* compute nlmsdghdr offset */
- char *p = (char *)NLMSG_DATA(0);
-
- free((void *)((char *)*lnlhh - p));
- *lnlhh = NULL;
- return 0;
-}
-
-#else /* HAVE_NETLINK */
-
-#include <errno.h>
-
-typedef int lustre_netlink;
-int libcfs_ulnl_start(lustre_netlink *link, int groups) {
- return -ENOSYS;
-}
-int libcfs_ulnl_stop(lustre_netlink *link) {
- return 0;
-}
-struct lnl_hdr;
-int libcfs_ulnl_msg_get(lustre_netlink *link, int maxsize, int transport,
- struct lnl_hdr **lnlhh) {
- return -ENOSYS;
-}
-int libcfs_ulnl_msg_free(struct lnl_hdr **lnlhh) {
- return -ENOSYS;
-}
-#endif /* HAVE_NETLINK */
-
#define daemonize(l) do {} while (0)
#define sigfillset(l) do {} while (0)
#define recalc_sigpending(l) do {} while (0)
-#define cfs_kernel_thread(l,m,n) LBUG()
#define USERMODEHELPER(path, argv, envp) (0)
#define SIGNAL_MASK_ASSERT()
/* HSM copytool interface. priv is private state, managed internally
by these functions */
-extern int llapi_copytool_start(void **priv, int flags, int archive_num_count,
- int *archive_nums);
+extern int llapi_copytool_start(void **priv, char *fsname, int flags,
+ int archive_count, int *archives);
extern int llapi_copytool_fini(void **priv);
extern int llapi_copytool_recv(void *priv, struct hsm_action_list **hal,
int *msgsize);
__u32 cs_id;
} __attribute__((packed));
-struct changelog_show {
- __u64 cs_startrec;
- __u32 cs_pid;
- __u32 cs_flags;
-} __attribute__((packed));
-
/** changelog record */
struct llog_changelog_rec {
struct llog_rec_hdr cr_hdr;
void lustre_swab_fid2path (struct getinfo_fid2path *gf);
-extern void lustre_swab_lnlh(struct lnl_hdr *);
+extern void lustre_swab_kuch(struct kuc_hdr *);
#endif
#define LL_IOC_PATH2FID _IOR ('f', 173, long)
#define LL_IOC_GET_MDTIDX _IOR ('f', 174, int)
+#define LL_IOC_HSM_CT_START _IOW ('f', 178, struct lustre_kernelcomm *)
+
#define LL_STATFS_MDC 1
#define LL_STATFS_LOV 2
char cr_name[0]; /**< last element */
} __attribute__((packed));
-struct ioc_changelog_clear {
+struct ioc_changelog {
+ __u64 icc_recno;
__u32 icc_mdtindex;
__u32 icc_id;
- __u64 icc_recno;
+ __u32 icc_flags;
};
enum changelog_message_type {
#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE)
#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
#define OBD_IOC_CHANGELOG_REG _IOW ('f', 151, OBD_IOC_DATA_TYPE)
extern void llog_free_handle(struct llog_handle *handle);
int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
void *data, void *catdata);
+int llog_process_flags(struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata, int flags);
int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
void *data, void *catdata);
extern int llog_cancel_rec(struct llog_handle *loghandle, int index);
extern int llog_close(struct llog_handle *cathandle);
extern int llog_get_size(struct llog_handle *loghandle);
+/* llog_process flags */
+#define LLOG_FLAG_NODEAMON 0x0001
+
/* llog_cat.c - catalog api */
struct llog_process_data {
/**
*/
int lpd_startcat;
int lpd_startidx;
+ int lpd_flags; /** llog_process flags */
};
struct llog_process_cat_data {
struct llog_cookie *cookies);
int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data,
int startcat, int startidx);
+int llog_cat_process_flags(struct llog_handle *cat_llh, llog_cb_t cb, void *data,
+ int flags, int startcat, int startidx);
int llog_cat_process_thread(void *data);
int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data);
int llog_cat_set_first_idx(struct llog_handle *cathandle, int index);
int rc = -ENOSYS;
ENTRY;
- DEBUG_REQ(D_ERROR, req, "%s: handle setinfo\n", obd->obd_name);
+ DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name);
req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO);
return mdtidx;
}
+static int copy_and_ioctl(int cmd, struct obd_export *exp, void *data, int len)
+{
+ void *ptr;
+ int rc;
+
+ OBD_ALLOC(ptr, len);
+ if (ptr == NULL)
+ return -ENOMEM;
+ if (cfs_copy_from_user(ptr, data, len)) {
+ OBD_FREE(ptr, len);
+ return -EFAULT;
+ }
+ rc = obd_iocontrol(cmd, exp, len, data, NULL);
+ OBD_FREE(ptr, len);
+ return rc;
+}
+
static int ll_dir_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct obd_ioctl_data *data;
+ int rc = 0;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
}
case IOC_MDC_LOOKUP: {
struct ptlrpc_request *request = NULL;
- int namelen, rc, len = 0;
+ int namelen, len = 0;
char *buf = NULL;
char *filename;
struct md_op_data *op_data;
struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
- int rc = 0;
int set_default = 0;
LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
struct lov_mds_md *lmm = NULL;
struct mdt_body *body;
char *filename = NULL;
- int rc, lmmsize;
+ int lmmsize;
if (cmd == IOC_MDC_GETFILEINFO ||
cmd == IOC_MDC_GETFILESTRIPE) {
struct lov_mds_md *lmm;
int lmmsize;
lstat_t st;
- int rc;
lumd = (struct lov_user_mds_data *)arg;
lum = &lumd->lmd_lmm;
char *buf = NULL;
char *str;
int len = 0;
- int rc;
rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
if (rc)
}
case OBD_IOC_QUOTACHECK: {
struct obd_quotactl *oqctl;
- int rc, error = 0;
+ int error = 0;
if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
sbi->ll_flags & LL_SBI_RMT_CLIENT)
}
case OBD_IOC_POLL_QUOTACHECK: {
struct if_quotacheck *check;
- int rc;
if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
sbi->ll_flags & LL_SBI_RMT_CLIENT)
}
case OBD_IOC_QUOTACTL: {
struct if_quotactl *qctl;
- int cmd, type, id, valid, rc = 0;
+ int cmd, type, id, valid;
OBD_ALLOC_PTR(qctl);
if (!qctl)
if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
inode == inode->i_sb->s_root->d_inode) {
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int rc;
LASSERT(fd != NULL);
rc = rct_add(&sbi->ll_rct, cfs_curproc_pid(), arg);
sizeof(struct lu_fid)))
RETURN(-EFAULT);
RETURN(0);
- case OBD_IOC_CHANGELOG_CLEAR: {
- struct ioc_changelog_clear *icc;
- int rc;
-
- OBD_ALLOC_PTR(icc);
- if (icc == NULL)
- RETURN(-ENOMEM);
- if (cfs_copy_from_user(icc, (void *)arg, sizeof(*icc)))
- GOTO(icc_free, rc = -EFAULT);
-
- rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(*icc), icc,NULL);
-
-icc_free:
- OBD_FREE_PTR(icc);
+ case OBD_IOC_CHANGELOG_SEND:
+ case OBD_IOC_CHANGELOG_CLEAR:
+ rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
+ sizeof(struct ioc_changelog));
RETURN(rc);
- }
case OBD_IOC_FID2PATH:
RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
+ case LL_IOC_HSM_CT_START:
+ rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void *)arg,
+ sizeof(struct lustre_kernelcomm));
+ RETURN(rc);
default:
RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg));
OBD_FREE_PTR(oqctl);
break;
}
+ case OBD_IOC_CHANGELOG_SEND:
case OBD_IOC_CHANGELOG_CLEAR: {
- struct ioc_changelog_clear *icc = karg;
+ struct ioc_changelog *icc = karg;
if (icc->icc_mdtindex >= count)
RETURN(-ENODEV);
return count;
}
-static struct lnl_hdr *changelog_lnl_alloc(int len, int flags)
-{
- struct lnl_hdr *lh;
-
- OBD_ALLOC(lh, len);
- if (lh == NULL)
- RETURN(NULL);
-
- lh->lnl_magic = LNL_MAGIC;
- lh->lnl_transport = LNL_TRANSPORT_CHANGELOG;
- lh->lnl_flags = flags;
- lh->lnl_msgtype = CL_RECORD;
- lh->lnl_msglen = len;
- return lh;
-}
-
-#define D_CHANGELOG 0
-
-static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr,
- void *data)
-{
- struct changelog_show *cs = data;
- struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
- struct lnl_hdr *lh;
- int len, rc;
- ENTRY;
-
- if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) ||
- (rec->cr.cr_type >= CL_LAST)) {
- CERROR("Not a changelog rec %d/%d\n", rec->cr_hdr.lrh_type,
- rec->cr.cr_type);
- RETURN(-EINVAL);
- }
-
- if (rec->cr.cr_index < cs->cs_startrec) {
- /* Skip entries earlier than what we are interested in */
- CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64"\n",
- rec->cr.cr_index, cs->cs_startrec);
- RETURN(0);
- }
-
- CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID
- " %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
- changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
- rec->cr.cr_flags & CLF_FLAGMASK,
- PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid),
- rec->cr.cr_namelen, rec->cr.cr_name);
-
- len = sizeof(*lh) + sizeof(rec->cr) + rec->cr.cr_namelen;
-
- /* Set up the netlink message */
- lh = changelog_lnl_alloc(len, cs->cs_flags);
- if (lh == NULL)
- RETURN(-ENOMEM);
- memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
-
- rc = libcfs_klnl_msg_put(cs->cs_pid, 0, lh);
- CDEBUG(D_CHANGELOG, "nlmsg pid %d len %d rc %d\n", cs->cs_pid, len, rc);
-
- OBD_FREE(lh, len);
-
- RETURN(rc);
-}
-
-static int lproc_mdc_wr_changelog(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- struct obd_device *obd = data;
- struct llog_ctxt *ctxt;
- struct llog_handle *llh;
- struct lnl_hdr *lnlh;
- struct changelog_show cs = {};
- int rc;
-
- if (count != sizeof(cs))
- return -EINVAL;
-
- if (cfs_copy_from_user(&cs, buffer, sizeof(cs)))
- return -EFAULT;
-
- CDEBUG(D_CHANGELOG, "changelog to pid=%d start "LPU64"\n",
- cs.cs_pid, cs.cs_startrec);
-
- /* Set up the remote catalog handle */
- ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT);
- if (ctxt == NULL)
- RETURN(-ENOENT);
- rc = llog_create(ctxt, &llh, NULL, CHANGELOG_CATALOG);
- if (rc) {
- CERROR("llog_create() failed %d\n", rc);
- GOTO(out, rc);
- }
- rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
- if (rc) {
- CERROR("llog_init_handle failed %d\n", rc);
- GOTO(out, rc);
- }
-
- rc = llog_cat_process(llh, changelog_show_cb, &cs, 0, 0);
-
- /* Send EOF */
- if ((lnlh = changelog_lnl_alloc(sizeof(*lnlh), cs.cs_flags))) {
- lnlh->lnl_msgtype = CL_EOF;
- libcfs_klnl_msg_put(cs.cs_pid, 0, lnlh);
- OBD_FREE(lnlh, sizeof(*lnlh));
- }
-
-out:
- if (llh)
- llog_cat_put(llh);
- if (ctxt)
- llog_ctxt_put(ctxt);
- if (rc < 0)
- return rc;
- return count;
-}
-
/* temporary for testing */
-static int mdc_wr_netlink(struct file *file, const char *buffer,
- unsigned long count, void *data)
+static int mdc_wr_kuc(struct file *file, const char *buffer,
+ unsigned long count, void *data)
{
struct obd_device *obd = data;
- struct lnl_hdr *lh;
+ struct kuc_hdr *lh;
struct hsm_action_list *hal;
struct hsm_action_item *hai;
int len;
- int pid, rc;
+ int fd, rc;
- rc = lprocfs_write_helper(buffer, count, &pid);
+ rc = lprocfs_write_helper(buffer, count, &fd);
if (rc)
return rc;
- if (pid < 0)
+ if (fd < 0)
return -ERANGE;
- CWARN("message to pid %d\n", pid);
+ CWARN("message to fd %d\n", fd);
len = sizeof(*lh) + sizeof(*hal) + MTI_NAME_MAXLEN +
/* for mockup below */ 2 * cfs_size_round(sizeof(*hai));
OBD_ALLOC(lh, len);
- lh->lnl_magic = LNL_MAGIC;
- lh->lnl_transport = LNL_TRANSPORT_HSM;
- lh->lnl_msgtype = HMT_ACTION_LIST;
- lh->lnl_msglen = len;
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_HSM;
+ lh->kuc_msgtype = HMT_ACTION_LIST;
+ lh->kuc_msglen = len;
hal = (struct hsm_action_list *)(lh + 1);
hal->hal_version = HAL_VERSION;
hai->hai_fid.f_oid = 10;
hai->hai_len = sizeof(*hai);
- /* This works for either broadcast or unicast to a single pid */
- rc = libcfs_klnl_msg_put(pid, pid == 0 ? LNL_GRP_HSM : 0, lh);
-
+ /* This works for either broadcast or unicast to a single fd */
+ if (fd == 0) {
+ rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh);
+ } else {
+ cfs_file_t *fp = cfs_get_fd(fd);
+ rc = libcfs_kkuc_msg_put(fp, lh);
+ cfs_put_file(fp);
+ }
OBD_FREE(lh, len);
if (rc < 0)
return rc;
{ "timeouts", lprocfs_rd_timeouts, 0, 0 },
{ "import", lprocfs_rd_import, 0, 0 },
{ "state", lprocfs_rd_state, 0, 0 },
- { "changelog_trigger",0,lproc_mdc_wr_changelog, 0 },
- { "hsm_nl", 0, mdc_wr_netlink, 0, 0, 0222 },
+ { "hsm_nl", 0, mdc_wr_kuc, 0, 0, 0222 },
{ 0 }
};
return rc;
}
+static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
+{
+ struct kuc_hdr *lh = (struct kuc_hdr *)buf;
+
+ LASSERT(len <= CR_MAXSIZE);
+
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_CHANGELOG;
+ lh->kuc_flags = flags;
+ lh->kuc_msgtype = CL_RECORD;
+ lh->kuc_msglen = len;
+ return lh;
+}
+
+#define D_CHANGELOG 0
+
+struct changelog_show {
+ __u64 cs_startrec;
+ __u32 cs_flags;
+ cfs_file_t *cs_fp;
+ char *cs_buf;
+ struct obd_device *cs_obd;
+};
+
+static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr,
+ void *data)
+{
+ struct changelog_show *cs = data;
+ struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
+ struct kuc_hdr *lh;
+ int len, rc;
+ ENTRY;
+
+ if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) ||
+ (rec->cr.cr_type >= CL_LAST)) {
+ CERROR("Not a changelog rec %d/%d\n", rec->cr_hdr.lrh_type,
+ rec->cr.cr_type);
+ RETURN(-EINVAL);
+ }
+
+ if (rec->cr.cr_index < cs->cs_startrec) {
+ /* Skip entries earlier than what we are interested in */
+ CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64"\n",
+ rec->cr.cr_index, cs->cs_startrec);
+ RETURN(0);
+ }
+
+ CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID
+ " %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
+ changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
+ rec->cr.cr_flags & CLF_FLAGMASK,
+ PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid),
+ rec->cr.cr_namelen, rec->cr.cr_name);
+
+ len = sizeof(*lh) + sizeof(rec->cr) + rec->cr.cr_namelen;
+
+ /* Set up the message */
+ lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags);
+ memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
+
+ rc = libcfs_kkuc_msg_put(cs->cs_fp, lh);
+ CDEBUG(D_CHANGELOG, "kucmsg fp %p len %d rc %d\n", cs->cs_fp, len,rc);
+
+ RETURN(rc);
+}
+
+static int mdc_changelog_send_thread(void *csdata)
+{
+ struct changelog_show *cs = csdata;
+ struct llog_ctxt *ctxt = NULL;
+ struct llog_handle *llh = NULL;
+ struct kuc_hdr *kuch;
+ int rc;
+
+ CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n",
+ cs->cs_fp, cs->cs_startrec);
+
+ OBD_ALLOC(cs->cs_buf, CR_MAXSIZE);
+ if (cs->cs_buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ /* Set up the remote catalog handle */
+ ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT);
+ if (ctxt == NULL)
+ GOTO(out, rc = -ENOENT);
+ rc = llog_create(ctxt, &llh, NULL, CHANGELOG_CATALOG);
+ if (rc) {
+ CERROR("llog_create() failed %d\n", rc);
+ GOTO(out, rc);
+ }
+ rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
+ if (rc) {
+ CERROR("llog_init_handle failed %d\n", rc);
+ GOTO(out, rc);
+ }
+
+ /* We need the pipe fd open, so llog_process can't daemonize */
+ rc = llog_cat_process_flags(llh, changelog_show_cb, cs,
+ LLOG_FLAG_NODEAMON, 0, 0);
+
+ /* Send EOF no matter what our result */
+ if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch),
+ cs->cs_flags))) {
+ kuch->kuc_msgtype = CL_EOF;
+ libcfs_kkuc_msg_put(cs->cs_fp, kuch);
+ }
+
+out:
+ cfs_put_file(cs->cs_fp);
+ if (llh)
+ llog_cat_put(llh);
+ if (ctxt)
+ llog_ctxt_put(ctxt);
+ if (cs->cs_buf)
+ OBD_FREE(cs->cs_buf, CR_MAXSIZE);
+ OBD_FREE_PTR(cs);
+ return rc;
+}
+
+static int mdc_ioc_changelog_send(struct obd_device *obd,
+ struct ioc_changelog *icc)
+{
+ struct changelog_show *cs;
+ int rc;
+
+ /* Freed in mdc_changelog_send_thread */
+ OBD_ALLOC_PTR(cs);
+ if (!cs)
+ return -ENOMEM;
+
+ cs->cs_obd = obd;
+ cs->cs_startrec = icc->icc_recno;
+ /* matching cfs_put_file in mdc_changelog_send_thread */
+ cs->cs_fp = cfs_get_fd(icc->icc_id);
+ cs->cs_flags = icc->icc_flags;
+
+ /* New thread because we should return to user app before
+ writing into our pipe */
+ rc = cfs_kernel_thread(mdc_changelog_send_thread, cs,
+ CLONE_VM | CLONE_FILES);
+ if (rc >= 0) {
+ CDEBUG(D_CHANGELOG, "start changelog thread: %d\n", rc);
+ return 0;
+ }
+
+ CERROR("Failed to start changelog thread: %d\n", rc);
+ OBD_FREE_PTR(cs);
+ return rc;
+}
+
+static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
+ struct lustre_kernelcomm *lk);
+
static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg)
{
return -EINVAL;
}
switch (cmd) {
+ case LL_IOC_HSM_CT_START:
+ rc = mdc_ioc_hsm_ct_start(exp, karg);
+ GOTO(out, rc);
+ case OBD_IOC_CHANGELOG_SEND:
+ rc = mdc_ioc_changelog_send(obd, karg);
+ GOTO(out, rc);
case OBD_IOC_CHANGELOG_CLEAR: {
- struct ioc_changelog_clear *icc = karg;
+ struct ioc_changelog *icc = karg;
struct changelog_setinfo cs =
- {icc->icc_recno, icc->icc_id};
+ {.cs_recno = icc->icc_recno, .cs_id = icc->icc_id};
rc = obd_set_info_async(exp, strlen(KEY_CHANGELOG_CLEAR),
KEY_CHANGELOG_CLEAR, sizeof(cs), &cs,
NULL);
}
}
+static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
+ struct lustre_kernelcomm *lk)
+{
+ int rc = 0;
+
+ if (lk->lk_group != KUC_GRP_HSM) {
+ CERROR("Bad copytool group %d\n", lk->lk_group);
+ return -EINVAL;
+ }
+
+ CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd,
+ lk->lk_uid, lk->lk_group, lk->lk_flags);
+
+ if (lk->lk_flags & LK_FLG_STOP)
+ rc = libcfs_kkuc_group_rem(lk->lk_uid,lk->lk_group);
+ else {
+ cfs_file_t *fp = cfs_get_fd(lk->lk_wfd);
+ rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group);
+ if (rc && fp)
+ cfs_put_file(fp);
+ }
+
+ /* lk_data is archive number mask */
+ /* TODO: register archive num with mdt so coordinator can choose
+ correct agent. */
+
+ return rc;
+}
+
/**
- * Send a message to any listening copytools, nonblocking
- * @param val LNL message (lnl_hdr + hsm_action_list)
+ * Send a message to any listening copytools
+ * @param val KUC message (kuc_hdr + hsm_action_list)
* @param len total length of message
*/
static int mdc_hsm_copytool_send(int len, void *val)
{
- struct lnl_hdr *lh = (struct lnl_hdr *)val;
+ struct kuc_hdr *lh = (struct kuc_hdr *)val;
struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1);
int rc;
ENTRY;
(int) (sizeof(*lh) + sizeof(*hal)));
RETURN(-EPROTO);
}
- if (lh->lnl_magic == __swab16(LNL_MAGIC)) {
- lustre_swab_lnlh(lh);
+ if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
+ lustre_swab_kuch(lh);
lustre_swab_hal(hal);
- } else if (lh->lnl_magic != LNL_MAGIC) {
- CERROR("Bad magic %x!=%x\n", lh->lnl_magic, LNL_MAGIC);
+ } else if (lh->kuc_magic != KUC_MAGIC) {
+ CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC);
RETURN(-EPROTO);
}
- CDEBUG(D_IOCTL, " Received message mg=%x t=%d m=%d l=%d actions=%d\n",
- lh->lnl_magic, lh->lnl_transport, lh->lnl_msgtype,
- lh->lnl_msglen, hal->hal_count);
+ CDEBUG(D_HSM, " Received message mg=%x t=%d m=%d l=%d actions=%d\n",
+ lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype,
+ lh->kuc_msglen, hal->hal_count);
/* Broadcast to HSM listeners */
- rc = libcfs_klnl_msg_put(0, LNL_GRP_HSM, lh);
+ rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh);
RETURN(rc);
}
CERROR("failed to setup llogging subsystems\n");
}
- /* ignore errors */
- libcfs_klnl_start(LNL_TRANSPORT_HSM);
- libcfs_klnl_start(LNL_TRANSPORT_CHANGELOG);
-
RETURN(rc);
err_close_lock:
switch (stage) {
case OBD_CLEANUP_EARLY:
+ break;
case OBD_CLEANUP_EXPORTS:
+ /* Failsafe, ok if racy */
+ if (obd->obd_type->typ_refcnt <= 1)
+ libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
+
/* If we set up but never connected, the
client import will not have been cleaned. */
if (obd->u.cli.cl_import) {
{
struct client_obd *cli = &obd->u.cli;
- libcfs_klnl_stop(LNL_TRANSPORT_HSM, LNL_GRP_HSM);
- libcfs_klnl_stop(LNL_TRANSPORT_CHANGELOG, 0);
-
OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
*/
int mdt_hsm_copytool_send(struct obd_export *exp)
{
- struct lnl_hdr *lh;
+ struct kuc_hdr *lh;
struct hsm_action_list *hal;
struct hsm_action_item *hai;
int rc, len;
if (lh == NULL)
RETURN(-ENOMEM);
- lh->lnl_magic = LNL_MAGIC;
- lh->lnl_transport = LNL_TRANSPORT_HSM;
- lh->lnl_msgtype = HMT_ACTION_LIST;
- lh->lnl_msglen = len;
+ lh->kuc_magic = KUC_MAGIC;
+ lh->kuc_transport = KUC_TRANSPORT_HSM;
+ lh->kuc_msgtype = HMT_ACTION_LIST;
+ lh->kuc_msglen = len;
hal = (struct hsm_action_list *)(lh + 1);
hal->hal_version = HAL_VERSION;
return 0;
}
- cfs_daemonize_ctxt("llog_process_thread");
+ if (!(lpi->lpi_flags & LLOG_FLAG_NODEAMON))
+ cfs_daemonize_ctxt("llog_process_thread");
if (cd != NULL) {
last_called_index = cd->lpcd_first_idx;
return 0;
}
-int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
- void *data, void *catdata)
+int llog_process_flags(struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata, int flags)
{
struct llog_process_info *lpi;
int rc;
lpi->lpi_cb = cb;
lpi->lpi_cbdata = data;
lpi->lpi_catdata = catdata;
+ lpi->lpi_flags = flags;
#ifdef __KERNEL__
cfs_init_completion(&lpi->lpi_completion);
OBD_FREE_PTR(lpi);
RETURN(rc);
}
+EXPORT_SYMBOL(llog_process_flags);
+
+int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata)
+{
+ return llog_process_flags(loghandle, cb, data, catdata, 0);
+}
EXPORT_SYMBOL(llog_process);
inline int llog_get_size(struct llog_handle *loghandle)
cd.lpcd_first_idx = d->lpd_startidx;
cd.lpcd_last_idx = 0;
- rc = llog_process(llh, d->lpd_cb, d->lpd_data, &cd);
+ rc = llog_process_flags(llh, d->lpd_cb, d->lpd_data, &cd,
+ d->lpd_flags);
/* Continue processing the next log from idx 0 */
d->lpd_startidx = 0;
} else {
- rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL);
+ rc = llog_process_flags(llh, d->lpd_cb, d->lpd_data, NULL,
+ d->lpd_flags);
}
RETURN(rc);
}
-int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data,
- int startcat, int startidx)
+int llog_cat_process_flags(struct llog_handle *cat_llh, llog_cb_t cb,
+ void *data, int flags, int startcat, int startidx)
{
struct llog_process_data d;
struct llog_log_hdr *llh = cat_llh->lgh_hdr;
d.lpd_cb = cb;
d.lpd_startcat = startcat;
d.lpd_startidx = startidx;
+ d.lpd_flags = flags;
if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
struct llog_process_cat_data cd;
cd.lpcd_first_idx = llh->llh_cat_idx;
cd.lpcd_last_idx = 0;
- rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+ rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, &cd,
+ flags);
if (rc != 0)
RETURN(rc);
cd.lpcd_first_idx = 0;
cd.lpcd_last_idx = cat_llh->lgh_last_idx;
- rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+ rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, &cd,
+ flags);
} else {
- rc = llog_process(cat_llh, llog_cat_process_cb, &d, NULL);
+ rc = llog_process_flags(cat_llh, llog_cat_process_cb, &d, NULL,
+ flags);
}
RETURN(rc);
}
+EXPORT_SYMBOL(llog_cat_process_flags);
+
+int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data,
+ int startcat, int startidx)
+{
+ return llog_cat_process_flags(cat_llh, cb, data, 0, startcat, startidx);
+}
EXPORT_SYMBOL(llog_cat_process);
#ifdef __KERNEL__
void *lpi_cbdata;
void *lpi_catdata;
int lpi_rc;
+ int lpi_flags;
cfs_completion_t lpi_completion;
};
__swab32s (&k->lk_padding);
}
-void lustre_swab_lnlh(struct lnl_hdr *l)
+void lustre_swab_kuch(struct kuc_hdr *l)
{
- __swab16s(&l->lnl_magic);
- /* __u8 l->lnl_transport */
- __swab16s(&l->lnl_msgtype);
- __swab16s(&l->lnl_msglen);
+ __swab16s(&l->kuc_magic);
+ /* __u8 l->kuc_transport */
+ __swab16s(&l->kuc_msgtype);
+ __swab16s(&l->kuc_msglen);
}
-EXPORT_SYMBOL(lustre_swab_lnlh);
+EXPORT_SYMBOL(lustre_swab_kuch);
#include <stdio.h>
#include <getopt.h>
+#include <signal.h>
#include <libcfs/libcfs.h>
#include <lustre/lustre_user.h>
#include <lustre/liblustreapi.h>
+void *ctdata;
+
+void handler(int signal ) {
+ psignal(signal, "exiting");
+ /* If we don't clean up upon interrupt, umount thinks there's a ref
+ * and doesn't remove us from mtab (EINPROGRESS). The lustre client
+ * does successfully unmount and the mount is actually gone, but the
+ * mtab entry remains. So this just makes mtab happier. */
+ llapi_copytool_fini(&ctdata);
+ exit(1);
+}
+
int main(int argc, char **argv) {
int c, test = 0;
struct option long_opts[] = {
{"test", no_argument, 0, 't'},
{0, 0, 0, 0}
};
- void *ctdata;
- int archive_nums[] = {1}; /* which archive numbers we care about */
+ int archives[] = {1}; /* which archives we care about */
int rc;
optind = 0;
}
}
- rc = llapi_copytool_start(&ctdata, 0, ARRAY_SIZE(archive_nums),
- archive_nums);
+ if (optind != argc - 1) {
+ fprintf(stderr, "Usage: %s <fsname>\n", argv[0]);
+ return -EINVAL;
+ }
+
+ rc = llapi_copytool_start(&ctdata, argv[optind], 0,
+ ARRAY_SIZE(archives), archives);
if (rc < 0) {
fprintf(stderr, "Can't start copytool interface: %s\n",
strerror(-rc));
printf("Waiting for message from kernel (pid=%d)\n", getpid());
+ signal(SIGINT, handler);
+
while(1) {
struct hsm_action_list *hal;
struct hsm_action_item *hai;
export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT="$LRSYNC_EXCEPT 5a 5b"
-# bug number for skipped test: - 20878
+[ -n "$ONLY" ] && SLOW=yes
+ALWAYS_EXCEPT="$LRSYNC_EXCEPT"
+# bug number for skipped test:
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$ALWAYS_EXCEPT$EXCEPT" ] && \
test_163() {
remote_mds_nodsh && skip "remote MDS with nodsh" && return
- copytool --test || { skip "copytool not runnable: $?" && return; }
- copytool &
+ copytool --test $FSNAME || { skip "copytool not runnable: $?" && return; }
+ copytool $FSNAME &
sleep 1
local uuid=$($LCTL get_param -n mdc.${FSNAME}-MDT0000-mdc-*.uuid)
# this proc file is temporary and linux-only
- do_facet mds lctl set_param mdt.${FSNAME}-MDT0000.mdccomm=$uuid || error "lnl send failed"
- kill $!
+ do_facet mds lctl set_param mdt.${FSNAME}-MDT0000.mdccomm=$uuid ||\
+ error "kernel->userspace send failed"
+ kill -INT $!
}
-run_test 163 "LustreNetLink kernelcomms"
+run_test 163 "kernel <-> userspace comms"
test_169() {
# do directio so as not to populate the page cache
llverdev_LDADD := $(EXT2FSLIB) $(BLKIDLIB)
L_IOCTL := $(top_builddir)/libcfs/libcfs/util/l_ioctl.c
-L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/ulinux/ulinux-kernelcomm.c
+L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/kernel_user_comm.c
liblustreapi_a_SOURCES = liblustreapi.c $(L_IOCTL) $(L_KERNELCOMM)
libiam_a_SOURCES = libiam.c
return get_root_path(want, fsname, NULL, mntdir, idx);
}
+/* Given a path, find the corresponding Lustre fsname */
int llapi_search_fsname(const char *pathname, char *fsname)
{
return get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL,
/* Print mdtname 'name' into 'buf' using 'format'. Add -MDT0000 if needed.
* format must have %s%s, buf must be > 16
+ * Eg: if name = "lustre-MDT0000", "lustre", or "lustre-MDT0000_UUID"
+ * then buf = "lustre-MDT0000"
*/
static int get_mdtname(char *name, char *format, char *buf)
{
return sprintf(buf, format, name, suffix);
}
+/** ioctl on filsystem root, with mdtindex sent as data
+ * \param mdtname path, fsname, or mdtname (lutre-MDT0004)
+ * \param mdtidxp pointer to integer within data to be filled in with the
+ * mdt index (0 if no mdt is specified). NULL won't be filled.
+ */
+static int root_ioctl(const char *mdtname, int opc, void *data, int *mdtidxp,
+ int want_error)
+{
+ char fsname[20];
+ char *ptr;
+ int fd, index, rc;
+
+ /* Take path, fsname, or MDTname. Assume MDT0000 in the former cases.
+ Open root and parse mdt index. */
+ if (mdtname[0] == '/') {
+ index = 0;
+ rc = get_root_path(WANT_FD | want_error, NULL, &fd,
+ (char *)mdtname, -1);
+ } else {
+ if (get_mdtname((char *)mdtname, "%s%s", fsname) < 0)
+ return -EINVAL;
+ ptr = fsname + strlen(fsname) - 8;
+ *ptr = '\0';
+ index = strtol(ptr + 4, NULL, 10);
+ rc = get_root_path(WANT_FD | want_error, fsname, &fd, NULL, -1);
+ }
+ if (rc < 0) {
+ if (want_error)
+ llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
+ "Can't open %s: %d\n", mdtname, rc);
+ return rc;
+ }
+
+ if (mdtidxp)
+ *mdtidxp = index;
+
+ rc = ioctl(fd, opc, data);
+ if (rc && want_error)
+ llapi_err(LLAPI_MSG_ERROR, "ioctl %d err %d", opc, rc);
+
+ close(fd);
+ return rc;
+}
+
/****** Changelog API ********/
+
+static int changelog_ioctl(const char *mdtname, int opc, int id,
+ long long recno, int flags)
+{
+ struct ioc_changelog data;
+ int *idx;
+
+ data.icc_id = id;
+ data.icc_recno = recno;
+ data.icc_flags = flags;
+ idx = (int *)(&data.icc_mdtindex);
+
+ return root_ioctl(mdtname, opc, &data, idx, WANT_ERROR);
+}
+
#define CHANGELOG_PRIV_MAGIC 0xCA8E1080
struct changelog_private {
int magic;
int flags;
- lustre_netlink lnl;
+ lustre_kernelcomm kuc;
+ char *buf;
};
/** Start reading from a changelog
long long startrec)
{
struct changelog_private *cp;
- struct changelog_show cs = {};
- char mdtname[20];
- char pattern[PATH_MAX];
- char trigger[PATH_MAX];
- int fd, rc, pid;
-
- /* Find mdtname from path, fsname, mdtname, or mdtname_UUID */
- if (device[0] == '/') {
- if ((rc = llapi_search_fsname(device, mdtname)))
- return rc;
- if ((rc = get_mdtname(mdtname, "%s%s", mdtname)) < 0)
- return rc;
- } else {
- if ((rc = get_mdtname((char *)device, "%s%s", mdtname)) < 0)
- return rc;
- }
-
- /* Find corresponding mdc trigger */
- snprintf(pattern, PATH_MAX,
- "/proc/fs/lustre/mdc/%s-*/changelog_trigger", mdtname);
- rc = first_match(pattern, trigger);
- if (rc)
- return rc;
-
- /* Make sure we can write the trigger */
- fd = open(trigger, O_WRONLY);
- if (fd < 0)
- return -errno;
+ int rc;
/* Set up the receiver control struct */
cp = malloc(sizeof(*cp));
- if (cp == NULL) {
- close(fd);
+ if (cp == NULL)
return -ENOMEM;
+
+ cp->buf = malloc(CR_MAXSIZE);
+ if (cp->buf == NULL) {
+ rc = -ENOMEM;
+ goto out_free;
}
cp->magic = CHANGELOG_PRIV_MAGIC;
cp->flags = flags;
- /* Start the receiver */
- rc = libcfs_ulnl_start(&cp->lnl, 0 /* unicast */);
+
+ /* Set up the receiver */
+ rc = libcfs_ukuc_start(&cp->kuc, 0 /* no group registration */);
if (rc < 0)
goto out_free;
- /* We need to trigger Lustre to start sending messages now.
- We could send a lnl message to a kernel listener,
- or write into proc. Proc has the advantage of running in this
- context, avoiding the need for a kernel thread. */
- cs.cs_pid = getpid();
- cs.cs_startrec = startrec;
- cs.cs_flags = flags & CHANGELOG_FLAG_BLOCK ? LNL_FL_BLOCK : 0;
- if ((pid = fork()) < 0) {
- goto out_free;
- } else if (!pid) {
- /* Write triggers Lustre to start sending, but it
- won't return until it is complete, meaning everything
- got shipped through lnl (or error). So we trigger it
- from a child process here, allowing the llapi call to
- return and wait for the lnl messages. */
- rc = write(fd, &cs, sizeof(cs));
- exit(rc);
+ *priv = cp;
+
+ /* Tell the kernel to start sending */
+ rc = changelog_ioctl(device, OBD_IOC_CHANGELOG_SEND, cp->kuc.lk_wfd,
+ startrec, flags);
+ /* Only the kernel reference keeps the write side open */
+ close(cp->kuc.lk_wfd);
+ cp->kuc.lk_wfd = 0;
+ if (rc < 0) {
+ /* frees and clears priv */
+ llapi_changelog_fini(priv);
+ return rc;
}
- close(fd);
- *priv = cp;
return 0;
out_free:
+ if (cp->buf)
+ free(cp->buf);
free(cp);
- close(fd);
return rc;
}
if (!cp || (cp->magic != CHANGELOG_PRIV_MAGIC))
return -EINVAL;
- libcfs_ulnl_stop(&cp->lnl);
+ libcfs_ukuc_stop(&cp->kuc);
+ free(cp->buf);
free(cp);
*priv = NULL;
return 0;
int llapi_changelog_recv(void *priv, struct changelog_rec **rech)
{
struct changelog_private *cp = (struct changelog_private *)priv;
- struct lnl_hdr *lnlh;
+ struct kuc_hdr *kuch;
int rc = 0;
if (!cp || (cp->magic != CHANGELOG_PRIV_MAGIC))
return -EINVAL;
repeat:
- rc = libcfs_ulnl_msg_get(&cp->lnl, CR_MAXSIZE, LNL_TRANSPORT_CHANGELOG,
- &lnlh);
+ rc = libcfs_ukuc_msg_get(&cp->kuc, cp->buf, CR_MAXSIZE,
+ KUC_TRANSPORT_CHANGELOG);
if (rc < 0)
return rc;
- if ((lnlh->lnl_transport != LNL_TRANSPORT_CHANGELOG) ||
- ((lnlh->lnl_msgtype != CL_RECORD) &&
- (lnlh->lnl_msgtype != CL_EOF))) {
+ kuch = (struct kuc_hdr *)cp->buf;
+ if ((kuch->kuc_transport != KUC_TRANSPORT_CHANGELOG) ||
+ ((kuch->kuc_msgtype != CL_RECORD) &&
+ (kuch->kuc_msgtype != CL_EOF))) {
llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
"Unknown changelog message type %d:%d\n",
- lnlh->lnl_transport, lnlh->lnl_msgtype);
+ kuch->kuc_transport, kuch->kuc_msgtype);
rc = -EPROTO;
goto out_free;
}
- if (lnlh->lnl_msgtype == CL_EOF) {
+ if (kuch->kuc_msgtype == CL_EOF) {
if (cp->flags & CHANGELOG_FLAG_FOLLOW) {
/* Ignore EOFs */
goto repeat;
}
/* Our message is a changelog_rec */
- *rech = (struct changelog_rec *)(lnlh + 1);
+ *rech = (struct changelog_rec *)(kuch + 1);
return 0;
out_free:
- libcfs_ulnl_msg_free(&lnlh);
*rech = NULL;
return rc;
}
/** Release the changelog record when done with it. */
int llapi_changelog_free(struct changelog_rec **rech)
{
- if (*rech) {
- struct lnl_hdr *lnlh = (struct lnl_hdr *)*rech - 1;
- libcfs_ulnl_msg_free(&lnlh);
- }
*rech = NULL;
return 0;
}
int llapi_changelog_clear(const char *mdtname, const char *idstr,
long long endrec)
{
- struct ioc_changelog_clear data;
- char fsname[17];
- char *ptr;
- int id, fd, index, rc;
+ int id;
if (endrec < 0) {
llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
return -EINVAL;
}
- /* Take path, fsname, or MDTNAME. Assume MDT0000 in the former cases */
- if (mdtname[0] == '/') {
- index = 0;
- fd = open(mdtname, O_RDONLY | O_DIRECTORY | O_NONBLOCK);
- rc = fd < 0 ? -errno : 0;
- } else {
- if (get_mdtname((char *)mdtname, "%s%s", fsname) < 0)
- return -EINVAL;
- ptr = fsname + strlen(fsname) - 8;
- *ptr = '\0';
- index = strtol(ptr + 4, NULL, 10);
- rc = get_root_path(WANT_FD | WANT_ERROR, fsname, &fd, NULL, -1);
- }
- if (rc < 0) {
- llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
- "Can't open %s: %d\n", mdtname, rc);
- return rc;
- }
-
- data.icc_mdtindex = index;
- data.icc_id = id;
- data.icc_recno = endrec;
- rc = ioctl(fd, OBD_IOC_CHANGELOG_CLEAR, &data);
- if (rc)
- llapi_err(LLAPI_MSG_ERROR, "ioctl err %d", rc);
-
- close(fd);
- return rc;
+ return changelog_ioctl(mdtname, OBD_IOC_CHANGELOG_CLEAR, id, endrec, 0);
}
int llapi_fid2path(const char *device, const char *fidstr, char *buf,
#define CT_PRIV_MAGIC 0xC0BE2001
struct copytool_private {
int magic;
- lustre_netlink lnl;
- int archive_num_count;
- int archive_nums[0];
+ char *buf;
+ char *fsname;
+ lustre_kernelcomm kuc;
+ __u32 archives;
};
#include <libcfs/libcfs.h>
/** Register a copytool
- * @param priv Opaque private control structure
+ * @param[out] priv Opaque private control structure
+ * @param fsname Lustre filesystem
* @param flags Open flags, currently unused (e.g. O_NONBLOCK)
- * @param archive_num_count
- * @param archive_nums Which archive numbers this copytool is responsible for
+ * @param archive_count
+ * @param archives Which archive numbers this copytool is responsible for
*/
-int llapi_copytool_start(void **priv, int flags, int archive_num_count,
- int *archive_nums)
+int llapi_copytool_start(void **priv, char *fsname, int flags,
+ int archive_count, int *archives)
{
struct copytool_private *ct;
int rc;
- if (archive_num_count > 0 && archive_nums == NULL) {
+ if (archive_count > 0 && archives == NULL) {
llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
"NULL archive numbers");
return -EINVAL;
}
- ct = malloc(sizeof(*ct) +
- archive_num_count * sizeof(ct->archive_nums[0]));
+ ct = malloc(sizeof(*ct));
if (ct == NULL)
return -ENOMEM;
+ ct->buf = malloc(HAL_MAXSIZE);
+ ct->fsname = malloc(strlen(fsname) + 1);
+ if (ct->buf == NULL || ct->fsname == NULL) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ strcpy(ct->fsname, fsname);
ct->magic = CT_PRIV_MAGIC;
- ct->archive_num_count = archive_num_count;
- if (ct->archive_num_count > 0)
- memcpy(ct->archive_nums, archive_nums, archive_num_count *
- sizeof(ct->archive_nums[0]));
+ ct->archives = 0;
+ for (rc = 0; rc < archive_count; rc++) {
+ if (archives[rc] > sizeof(ct->archives)) {
+ llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
+ "Maximum of %d archives supported",
+ sizeof(ct->archives));
+ goto out_err;
+ }
+ ct->archives |= 1 << archives[rc];
+ }
+ /* special case: if no archives specified, default to archive #0. */
+ if (ct->archives == 0)
+ ct->archives = 1;
+
+ rc = libcfs_ukuc_start(&ct->kuc, KUC_GRP_HSM);
+ if (rc < 0)
+ goto out_err;
- rc = libcfs_ulnl_start(&ct->lnl, LNL_GRP_HSM);
+ /* Storing archive(s) in lk_data; see mdc_ioc_hsm_ct_start */
+ ct->kuc.lk_data = ct->archives;
+ rc = root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL,
+ WANT_ERROR);
+ /* Only the kernel reference keeps the write side open */
+ close(ct->kuc.lk_wfd);
+ ct->kuc.lk_wfd = 0;
if (rc < 0)
goto out_err;
return 0;
out_err:
+ if (ct->buf)
+ free(ct->buf);
+ if (ct->fsname)
+ free(ct->fsname);
free(ct);
return rc;
}
if (!ct || (ct->magic != CT_PRIV_MAGIC))
return -EINVAL;
- libcfs_ulnl_stop(&ct->lnl);
+ /* Tell the kernel to stop sending us messages */
+ ct->kuc.lk_flags = LK_FLG_STOP;
+ root_ioctl(ct->fsname, LL_IOC_HSM_CT_START, &(ct->kuc), NULL, 0);
+
+ /* Shut down the kernelcomms */
+ libcfs_ukuc_stop(&ct->kuc);
+
+ free(ct->buf);
+ free(ct->fsname);
free(ct);
*priv = NULL;
return 0;
int llapi_copytool_recv(void *priv, struct hsm_action_list **halh, int *msgsize)
{
struct copytool_private *ct = (struct copytool_private *)priv;
- struct lnl_hdr *lnlh;
+ struct kuc_hdr *kuch;
struct hsm_action_list *hal;
int rc = 0;
if (halh == NULL || msgsize == NULL)
return -EINVAL;
- rc = libcfs_ulnl_msg_get(&ct->lnl, HAL_MAXSIZE,
- LNL_TRANSPORT_HSM, &lnlh);
+ rc = libcfs_ukuc_msg_get(&ct->kuc, ct->buf, HAL_MAXSIZE,
+ KUC_TRANSPORT_HSM);
if (rc < 0)
return rc;
/* Handle generic messages */
- if (lnlh->lnl_transport == LNL_TRANSPORT_GENERIC &&
- lnlh->lnl_msgtype == LNL_MSG_SHUTDOWN) {
+ kuch = (struct kuc_hdr *)ct->buf;
+ if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC &&
+ kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) {
rc = -ESHUTDOWN;
goto out_free;
}
- if (lnlh->lnl_transport != LNL_TRANSPORT_HSM ||
- lnlh->lnl_msgtype != HMT_ACTION_LIST) {
+ if (kuch->kuc_transport != KUC_TRANSPORT_HSM ||
+ kuch->kuc_msgtype != HMT_ACTION_LIST) {
llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO,
"Unknown HSM message type %d:%d\n",
- lnlh->lnl_transport, lnlh->lnl_msgtype);
+ kuch->kuc_transport, kuch->kuc_msgtype);
rc = -EPROTO;
goto out_free;
}
/* Our message is an hsm_action_list */
- hal = (struct hsm_action_list *)(lnlh + 1);
+ hal = (struct hsm_action_list *)(kuch + 1);
/* Check that we have registered for this archive # */
- for (rc = 0; rc < ct->archive_num_count; rc++) {
- if (hal->hal_archive_num == ct->archive_nums[rc])
- break;
- }
- if (rc >= ct->archive_num_count) {
- CDEBUG(D_INFO, "This copytool does not service archive #%d, "
- "ignoring this request.\n", hal->hal_archive_num);
+ if (((1 << hal->hal_archive_num) & ct->archives) == 0) {
+ llapi_err(LLAPI_MSG_INFO | LLAPI_MSG_NO_ERRNO,
+ "Ignoring request for archive #%d (bitmask %#x)\n",
+ hal->hal_archive_num, ct->archives);
rc = 0;
goto out_free;
}
*halh = hal;
- *msgsize = lnlh->lnl_msglen - sizeof(*lnlh);
+ *msgsize = kuch->kuc_msglen - sizeof(*kuch);
return 0;
out_free:
- libcfs_ulnl_msg_free(&lnlh);
*halh = NULL;
*msgsize = 0;
return rc;
/** Release the action list when done with it. */
int llapi_copytool_free(struct hsm_action_list **hal)
{
- if (*hal) {
- struct lnl_hdr *lnlh = (struct lnl_hdr *)*hal - 1;
- libcfs_ulnl_msg_free(&lnlh);
- }
*hal = NULL;
return 0;
}