Whamcloud - gitweb
LU-13238 ofd: add OFD access logs 52/37552/16
authorJohn L. Hammond <jhammond@whamcloud.com>
Tue, 28 Jan 2020 15:29:52 +0000 (09:29 -0600)
committerOleg Drokin <green@whamcloud.com>
Thu, 7 May 2020 05:45:55 +0000 (05:45 +0000)
Add access logs to OFD layer. BRW RPC handlers will record accesss to
an in memory circular buffer which may be read in userspace through
character devices (/dev/lustre-access-log/$FSNAME-OSTxxxx). A control
device (/dev/lustre-access-log/control) is added to facilitate device
discovery. A utility (ofd_access_log_reader) to discover and consume
access logs is included.

Signed-off-by: John L. Hammond <jhammond@whamcloud.com>
Change-Id: I76b78cc5075ee01f9b234e96e7a22a1bdcf2f755
Reviewed-on: https://review.whamcloud.com/37552
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
18 files changed:
lustre/include/uapi/linux/lustre/Makefile.am
lustre/include/uapi/linux/lustre/lustre_access_log.h [new file with mode: 0644]
lustre/ofd/Makefile.in
lustre/ofd/lproc_ofd.c
lustre/ofd/ofd_access_log.c [new file with mode: 0644]
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_io.c
lustre/ptlrpc/wirehdr.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity.sh
lustre/utils/.gitignore
lustre/utils/Makefile.am
lustre/utils/lstddef.h [new file with mode: 0644]
lustre/utils/ofd_access_log_reader.c [new file with mode: 0644]
lustre/utils/wirecheck.c
lustre/utils/wirehdr.c
lustre/utils/wiretest.c

index cd941ec..4258024 100644 (file)
@@ -44,6 +44,7 @@ lustreinclude_HEADERS = \
 
 if SERVER
 lustreinclude_HEADERS += \
+       lustre_access_log.h \
        lustre_barrier_user.h \
        lustre_disk.h \
        lustre_lfsck_user.h \
@@ -51,6 +52,7 @@ lustreinclude_HEADERS += \
 endif
 
 EXTRA_DIST = \
+       lustre_access_log.h \
        lustre_barrier_user.h \
        lustre_cfg.h \
        lustre_disk.h \
diff --git a/lustre/include/uapi/linux/lustre/lustre_access_log.h b/lustre/include/uapi/linux/lustre/lustre_access_log.h
new file mode 100644 (file)
index 0000000..0e391cf
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef _LUSTRE_ACCESS_LOG_H
+# define _LUSTRE_ACCESS_LOG_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+/*
+ * This is due to us being out of kernel and the way the OpenSFS branch
+ * handles CFLAGS.
+ */
+#ifdef __KERNEL__
+# include <uapi/linux/lustre/lustre_user.h>
+#else
+# include <linux/lustre/lustre_user.h>
+#endif
+
+enum ofd_access_flags {
+       OFD_ACCESS_READ = 0x1,
+       OFD_ACCESS_WRITE = 0x2,
+};
+
+struct ofd_access_entry_v1 {
+       struct lu_fid   oae_parent_fid; /* 16 */
+       __u64           oae_begin; /* 24 */
+       __u64           oae_end; /* 32 */
+       __u64           oae_time; /* 40 */
+       __u32           oae_size; /* 44 */
+       __u32           oae_segment_count; /* 48 */
+       __u32           oae_flags; /* 52 enum ofd_access_flags */
+       __u32           oae_reserved1; /* 56 */
+       __u32           oae_reserved2; /* 60 */
+       __u32           oae_reserved3; /* 64 */
+};
+
+/* The name of the subdirectory of devtmpfs (/dev) containing the
+ * control and access log char devices. */
+#define LUSTRE_ACCESS_LOG_DIR_NAME "lustre-access-log"
+
+enum {
+       LUSTRE_ACCESS_LOG_VERSION_1 = 0x00010000,
+       LUSTRE_ACCESS_LOG_TYPE_OFD = 0x1,
+       LUSTRE_ACCESS_LOG_NAME_SIZE = 128,
+};
+
+struct lustre_access_log_info_v1 {
+       __u32   lali_version; /* LUSTRE_ACCESS_LOG_VERSION_1 */
+       __u32   lali_type; /* LUSTRE_ACCESS_LOG_TYPE_OFD */
+       char    lali_name[LUSTRE_ACCESS_LOG_NAME_SIZE]; /* obd_name */
+       __u32   lali_log_size;
+       __u32   lali_entry_size;
+       /* Underscore prefixed members are intended for test and debug
+        * purposes only. */
+       __u32   _lali_head;
+       __u32   _lali_tail;
+       __u32   _lali_entry_space;
+       __u32   _lali_entry_count;
+       __u32   _lali_drop_count;
+       __u32   _lali_is_closed;
+};
+
+enum {
+       /* /dev/lustre-access-log/control ioctl: return lustre access log
+        * interface version. */
+       LUSTRE_ACCESS_LOG_IOCTL_VERSION = _IO('O', 0x81),
+
+       /* /dev/lustre-access-log/control ioctl: return device major
+        * used for access log devices. (The major is dynamically
+        * allocated during ofd module initialization. */
+       LUSTRE_ACCESS_LOG_IOCTL_MAJOR = _IO('O', 0x82),
+
+       /* /dev/lustre-access-log/control ioctl: get global control event
+        * count and store it into file private_data. */
+       LUSTRE_ACCESS_LOG_IOCTL_PRESCAN = _IO('O', 0x83),
+
+       /* /dev/lustre-access-log/OBDNAME ioctl: populate struct
+        * lustre_access_log_info_v1 for the current device. */
+       LUSTRE_ACCESS_LOG_IOCTL_INFO = _IOR('O', 0x84, struct lustre_access_log_info_v1),
+};
+
+#endif /* _LUSTRE_ACCESS_LOG_H */
index f446c1e..186172e 100644 (file)
@@ -1,7 +1,7 @@
 MODULES := ofd
 
 ofd-objs := ofd_dev.o ofd_obd.o ofd_fs.o ofd_trans.o ofd_objects.o ofd_io.o
-ofd-objs += lproc_ofd.o ofd_dlm.o ofd_lvb.o
+ofd-objs += lproc_ofd.o ofd_dlm.o ofd_lvb.o ofd_access_log.o
 
 EXTRA_DIST = $(ofd-objs:%.o=%.c) ofd_internal.h
 
index e7ed6a8..60c99eb 100644 (file)
@@ -45,6 +45,7 @@
 #include <lprocfs_status.h>
 #include <linux/seq_file.h>
 #include <lustre_lfsck.h>
+#include <uapi/linux/lustre/lustre_access_log.h>
 
 #include "ofd_internal.h"
 
@@ -687,6 +688,105 @@ ofd_lfsck_verify_pfid_seq_write(struct file *file, const char __user *buffer,
 
 LPROC_SEQ_FOPS(ofd_lfsck_verify_pfid);
 
+static ssize_t access_log_mask_show(struct kobject *kobj,
+                       struct attribute *attr, char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
+
+       return snprintf(buf, PAGE_SIZE, "%s%s%s\n",
+               (ofd->ofd_access_log_mask == 0) ? "0" : "",
+               (ofd->ofd_access_log_mask & OFD_ACCESS_READ) ? "r" : "",
+               (ofd->ofd_access_log_mask & OFD_ACCESS_WRITE) ? "w" : "");
+}
+
+static ssize_t access_log_mask_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
+       unsigned int mask = 0;
+       size_t i;
+
+       for (i = 0; i < count; i++) {
+               switch (tolower(buffer[i])) {
+               case '0':
+                       break;
+               case 'r':
+                       mask |= OFD_ACCESS_READ;
+                       break;
+               case 'w':
+                       mask |= OFD_ACCESS_WRITE;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       ofd->ofd_access_log_mask = mask;
+
+       return count;
+}
+LUSTRE_RW_ATTR(access_log_mask);
+
+static ssize_t access_log_size_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
+
+       return snprintf(buf, PAGE_SIZE, "%u\n", ofd->ofd_access_log_size);
+}
+
+static ssize_t access_log_size_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
+       struct ofd_access_log *oal;
+       unsigned int size;
+       ssize_t rc;
+
+       rc = kstrtouint(buffer, 0, &size);
+       if (rc < 0)
+               return rc;
+
+       if (!ofd_access_log_size_is_valid(size))
+               return -EINVAL;
+
+       /* The size of the ofd_access_log cannot be changed after it
+        * has been created.
+        */
+       if (ofd->ofd_access_log_size == size)
+               return count;
+
+       oal = ofd_access_log_create(obd->obd_name, size);
+       if (IS_ERR(oal))
+               return PTR_ERR(oal);
+
+       spin_lock(&ofd->ofd_flags_lock);
+       if (ofd->ofd_access_log != NULL) {
+               rc = -EBUSY;
+       } else {
+               ofd->ofd_access_log = oal;
+               ofd->ofd_access_log_size = size;
+               oal = NULL;
+               rc = count;
+       }
+       spin_unlock(&ofd->ofd_flags_lock);
+
+       ofd_access_log_delete(oal);
+
+       return rc;
+}
+LUSTRE_RW_ATTR(access_log_size);
+
 static int ofd_site_stats_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
@@ -981,6 +1081,8 @@ static struct attribute *ofd_attrs[] = {
 #endif
        &lustre_attr_soft_sync_limit.attr,
        &lustre_attr_lfsck_speed_limit.attr,
+       &lustre_attr_access_log_mask.attr,
+       &lustre_attr_access_log_size.attr,
        &lustre_attr_job_cleanup_interval.attr,
        &lustre_attr_checksum_t10pi_enforce.attr,
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 14, 53, 0)
diff --git a/lustre/ofd/ofd_access_log.c b/lustre/ofd/ofd_access_log.c
new file mode 100644 (file)
index 0000000..4447de0
--- /dev/null
@@ -0,0 +1,625 @@
+#include <linux/cdev.h>
+#include <linux/circ_buf.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/lustre/lustre_idl.h>
+#include <uapi/linux/lustre/lustre_access_log.h>
+#include "ofd_internal.h"
+
+/* OFD access logs: OST (OFD) RPC handlers log accesses by FID and
+ * PFID which are read from userspace through character device files
+ * (/dev/lustre-access-log/scratch-OST0000). Accesses are described by
+ * struct ofd_access_entry_v1. The char device implements read()
+ * (blocking and nonblocking) and poll(), along with an ioctl that
+ * returns diagnostic information on an oal device.
+ *
+ * A control device (/dev/lustre-access-log/control) supports an ioctl()
+ * plus poll() method to for oal discovery. See uses of
+ * oal_control_event_count and oal_control_wait_queue for details.
+ *
+ * oal log size and entry size are restricted to powers of 2 to
+ * support circ_buf methods. See Documentation/core-api/circular-buffers.rst
+ * in the linux tree for more information.
+ *
+ * The associated struct device (*oal_device) owns the oal. The
+ * release() method of oal_device frees the oal and releases its
+ * minor. This may seem slightly more complicated than necessary but
+ * it allows the OST to be unmounted while the oal still has open file
+ * descriptors.
+ */
+
+enum {
+       OAL_DEV_COUNT = 1 << MINORBITS,
+};
+
+struct ofd_access_log {
+       char oal_name[128]; /* lustre-OST0000 */
+       struct device oal_device;
+       struct cdev oal_cdev;
+       struct circ_buf oal_circ;
+       wait_queue_head_t oal_read_wait_queue;
+       spinlock_t oal_read_lock;
+       spinlock_t oal_write_lock;
+       unsigned int oal_drop_count;
+       unsigned int oal_is_closed;
+       unsigned int oal_log_size;
+       unsigned int oal_entry_size;
+};
+
+static atomic_t oal_control_event_count = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(oal_control_wait_queue);
+
+static struct class *oal_log_class;
+static unsigned int oal_log_major;
+static DEFINE_IDR(oal_log_minor_idr); /* TODO Use ida instead. */
+static DEFINE_SPINLOCK(oal_log_minor_lock);
+
+bool ofd_access_log_size_is_valid(unsigned int size)
+{
+       const unsigned int size_min = 2 * sizeof(struct ofd_access_entry_v1);
+       const unsigned int size_max = 1U << 30;
+
+       if (size == 0)
+               return true;
+
+       return is_power_of_2(size) && size_min <= size && size <= size_max;
+}
+
+static void oal_control_event_inc(void)
+{
+       atomic_inc(&oal_control_event_count);
+       wake_up(&oal_control_wait_queue);
+}
+
+static int oal_log_minor_alloc(int *pminor)
+{
+       void *OAL_LOG_MINOR_ALLOCED = (void *)-1;
+       int minor;
+
+       idr_preload(GFP_KERNEL);
+       spin_lock(&oal_log_minor_lock);
+       minor = idr_alloc(&oal_log_minor_idr, OAL_LOG_MINOR_ALLOCED, 0,
+                       OAL_DEV_COUNT, GFP_NOWAIT);
+       spin_unlock(&oal_log_minor_lock);
+       idr_preload_end();
+
+       if (minor < 0)
+               return minor;
+
+       *pminor = minor;
+
+       return 0;
+}
+
+static void oal_log_minor_free(int minor)
+{
+       spin_lock(&oal_log_minor_lock);
+       idr_remove(&oal_log_minor_idr, minor);
+       spin_unlock(&oal_log_minor_lock);
+}
+
+static bool oal_is_empty(struct ofd_access_log *oal)
+{
+       return CIRC_CNT(oal->oal_circ.head,
+                       oal->oal_circ.tail,
+                       oal->oal_log_size) < oal->oal_entry_size;
+}
+
+static ssize_t oal_write_entry(struct ofd_access_log *oal,
+                       const void *entry, size_t entry_size)
+{
+       struct circ_buf *circ = &oal->oal_circ;
+       unsigned int head;
+       unsigned int tail;
+       ssize_t rc;
+
+       if (entry_size != oal->oal_entry_size)
+               return -EINVAL;
+
+       spin_lock(&oal->oal_write_lock);
+       head = circ->head;
+       tail = READ_ONCE(circ->tail);
+
+       /* CIRC_SPACE() return space available, 0..oal_log_size -
+        * 1. It always leaves one free char, since a completely full
+        * buffer would have head == tail, which is the same as empty. */
+       if (CIRC_SPACE(head, tail, oal->oal_log_size) < oal->oal_entry_size) {
+               oal->oal_drop_count++;
+               rc = -EAGAIN;
+               goto out_write_lock;
+       }
+
+       memcpy(&circ->buf[head], entry, entry_size);
+       rc = entry_size;
+
+       /* Ensure the entry is stored before we update the head. */
+       smp_store_release(&circ->head,
+                       (head + oal->oal_entry_size) & (oal->oal_log_size - 1));
+
+       wake_up(&oal->oal_read_wait_queue);
+out_write_lock:
+       spin_unlock(&oal->oal_write_lock);
+
+       return rc;
+}
+
+/* Read one entry from the log and return its size. Non-blocking.
+ * When the log is empty we return -EAGAIN if the OST is still mounted
+ * and 0 otherwise.
+ */
+static ssize_t oal_read_entry(struct ofd_access_log *oal,
+                       void *entry_buf, size_t entry_buf_size)
+{
+       struct circ_buf *circ = &oal->oal_circ;
+       unsigned int head;
+       unsigned int tail;
+       ssize_t rc;
+
+       /* XXX This method may silently truncate entries when
+        * entry_buf_size is less than oal_entry_size. But that's OK
+        * because you know what you are doing. */
+       spin_lock(&oal->oal_read_lock);
+
+       /* Memory barrier usage follows circular-buffers.txt. */
+       head = smp_load_acquire(&circ->head);
+       tail = circ->tail;
+
+       if (!CIRC_CNT(head, tail, oal->oal_log_size)) {
+               rc = oal->oal_is_closed ? 0 : -EAGAIN;
+               goto out_read_lock;
+       }
+
+       BUG_ON(CIRC_CNT(head, tail, oal->oal_log_size) < oal->oal_entry_size);
+
+       /* Read index before reading contents at that index. */
+       smp_read_barrier_depends();
+
+       /* Extract one entry from the buffer. */
+       rc = min_t(size_t, oal->oal_entry_size, entry_buf_size);
+       memcpy(entry_buf, &circ->buf[tail], rc);
+
+       /* Memory barrier usage follows circular-buffers.txt. */
+       smp_store_release(&circ->tail,
+                       (tail + oal->oal_entry_size) & (oal->oal_log_size - 1));
+
+out_read_lock:
+       spin_unlock(&oal->oal_read_lock);
+
+       return rc;
+}
+
+static int oal_file_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = container_of(inode->i_cdev,
+                                       struct ofd_access_log, oal_cdev);
+
+       return nonseekable_open(inode, filp);
+}
+
+/* User buffer size must be a multiple of ofd access entry size. */
+static ssize_t oal_file_read(struct file *filp, char __user *buf, size_t count,
+                       loff_t *ppos)
+{
+       struct ofd_access_log *oal = filp->private_data;
+       void *entry;
+       size_t size = 0;
+       int rc = 0;
+
+       if (!count)
+               return 0;
+
+       if (count & (oal->oal_entry_size - 1))
+               return -EINVAL;
+
+       entry = kzalloc(oal->oal_entry_size, GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       while (size < count) {
+               rc = oal_read_entry(oal, entry, oal->oal_entry_size);
+               if (rc == -EAGAIN) {
+                       if (filp->f_flags & O_NONBLOCK)
+                               break;
+
+                       rc = wait_event_interruptible(oal->oal_read_wait_queue,
+                               !oal_is_empty(oal) || oal->oal_is_closed);
+                       if (rc)
+                               break;
+               } else if (rc <= 0) {
+                       break; /* cloed or error */
+               } else {
+                       if (copy_to_user(buf, entry, oal->oal_entry_size)) {
+                               rc = -EFAULT;
+                               break;
+                       }
+
+                       buf += oal->oal_entry_size;
+                       size += oal->oal_entry_size;
+               }
+       }
+
+       kfree(entry);
+
+       return size ? size : rc;
+}
+
+/* Included for test purposes. User buffer size must be a multiple of
+ * ofd access entry size. */
+static ssize_t oal_file_write(struct file *filp, const char __user *buf,
+                       size_t count, loff_t *ppos)
+{
+       struct ofd_access_log *oal = filp->private_data;
+       void *entry;
+       size_t size = 0;
+       ssize_t rc = 0;
+
+       if (!count)
+               return 0;
+
+       if (count & (oal->oal_entry_size - 1))
+               return -EINVAL;
+
+       entry = kzalloc(oal->oal_entry_size, GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       while (size < count) {
+               if (copy_from_user(entry, buf, oal->oal_entry_size)) {
+                       rc = -EFAULT;
+                       break;
+               }
+
+               rc = oal_write_entry(oal, entry, oal->oal_entry_size);
+               if (rc <= 0)
+                       break;
+
+               buf += oal->oal_entry_size;
+               size += oal->oal_entry_size;
+       }
+
+       kfree(entry);
+
+       return size > 0 ? size : rc;
+}
+
+unsigned int oal_file_poll(struct file *filp, struct poll_table_struct *wait)
+{
+       struct ofd_access_log *oal = filp->private_data;
+       unsigned int mask = 0;
+
+       poll_wait(filp, &oal->oal_read_wait_queue, wait);
+
+       spin_lock(&oal->oal_read_lock);
+
+       if (!oal_is_empty(oal) || oal->oal_is_closed)
+               mask |= POLLIN;
+
+       spin_unlock(&oal->oal_read_lock);
+
+       return mask;
+}
+
+static long oal_ioctl_info(struct ofd_access_log *oal, unsigned long arg)
+{
+       struct lustre_access_log_info_v1 __user *lali;
+       u32 entry_count = CIRC_CNT(oal->oal_circ.head,
+                               oal->oal_circ.tail,
+                               oal->oal_log_size) / oal->oal_entry_size;
+       u32 entry_space = CIRC_SPACE(oal->oal_circ.head,
+                               oal->oal_circ.tail,
+                               oal->oal_log_size) / oal->oal_entry_size;
+
+       lali = (struct lustre_access_log_info_v1 __user *)arg;
+       BUILD_BUG_ON(sizeof(lali->lali_name) != sizeof(oal->oal_name));
+
+       if (put_user(LUSTRE_ACCESS_LOG_VERSION_1, &lali->lali_version))
+               return -EFAULT;
+
+       if (put_user(LUSTRE_ACCESS_LOG_TYPE_OFD, &lali->lali_type))
+               return -EFAULT;
+
+       if (copy_to_user(lali->lali_name, oal->oal_name, sizeof(oal->oal_name)))
+               return -EFAULT;
+
+       if (put_user(oal->oal_log_size, &lali->lali_log_size))
+               return -EFAULT;
+
+       if (put_user(oal->oal_entry_size, &lali->lali_entry_size))
+               return -EFAULT;
+
+       if (put_user(oal->oal_circ.head, &lali->_lali_head))
+               return -EFAULT;
+
+       if (put_user(oal->oal_circ.tail, &lali->_lali_tail))
+               return -EFAULT;
+
+       if (put_user(entry_space, &lali->_lali_entry_space))
+               return -EFAULT;
+
+       if (put_user(entry_count, &lali->_lali_entry_count))
+               return -EFAULT;
+
+       if (put_user(oal->oal_drop_count, &lali->_lali_drop_count))
+               return -EFAULT;
+
+       if (put_user(oal->oal_is_closed, &lali->_lali_is_closed))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long oal_file_ioctl(struct file *filp, unsigned int cmd,
+                       unsigned long arg)
+{
+       struct ofd_access_log *oal = filp->private_data;
+
+       switch (cmd) {
+       case LUSTRE_ACCESS_LOG_IOCTL_VERSION:
+               return LUSTRE_ACCESS_LOG_VERSION_1;
+       case LUSTRE_ACCESS_LOG_IOCTL_INFO:
+               return oal_ioctl_info(oal, arg);
+       default:
+               return -ENOTTY;
+       }
+}
+
+static const struct file_operations oal_fops = {
+       .owner = THIS_MODULE,
+       .open = &oal_file_open,
+       .unlocked_ioctl = &oal_file_ioctl,
+       .read = &oal_file_read,
+       .write = &oal_file_write,
+       .poll = &oal_file_poll,
+       .llseek = &no_llseek,
+};
+
+static void oal_device_release(struct device *dev)
+{
+       struct ofd_access_log *oal = dev_get_drvdata(dev);
+
+       oal_log_minor_free(MINOR(oal->oal_device.devt));
+       vfree(oal->oal_circ.buf);
+       kfree(oal);
+}
+
+struct ofd_access_log *ofd_access_log_create(const char *ofd_name, size_t size)
+{
+       const size_t entry_size = sizeof(struct ofd_access_entry_v1);
+       struct ofd_access_log *oal;
+       int minor;
+       int rc;
+
+       BUILD_BUG_ON(sizeof(oal->oal_name) != MAX_OBD_NAME);
+       BUILD_BUG_ON(!is_power_of_2(entry_size));
+
+       if (!size)
+               return NULL;
+
+       if (!is_power_of_2(size) || (size & (entry_size - 1)) ||
+           (unsigned int)size != size)
+               return ERR_PTR(-EINVAL);
+
+       oal = kzalloc(sizeof(*oal), GFP_KERNEL);
+       if (!oal)
+               return ERR_PTR(-ENOMEM);
+
+       strlcpy(oal->oal_name, ofd_name, sizeof(oal->oal_name));
+       oal->oal_log_size = size;
+       oal->oal_entry_size = entry_size;
+       spin_lock_init(&oal->oal_write_lock);
+       spin_lock_init(&oal->oal_read_lock);
+       init_waitqueue_head(&oal->oal_read_wait_queue);
+
+       oal->oal_circ.buf = vmalloc(oal->oal_log_size);
+       if (!oal->oal_circ.buf) {
+               rc = -ENOMEM;
+               goto out_free;
+       }
+
+       rc = oal_log_minor_alloc(&minor);
+       if (rc < 0)
+               goto out_free;
+
+       device_initialize(&oal->oal_device);
+       oal->oal_device.devt = MKDEV(oal_log_major, minor);
+       oal->oal_device.class = oal_log_class;
+       oal->oal_device.release = &oal_device_release;
+       dev_set_drvdata(&oal->oal_device, oal);
+       rc = dev_set_name(&oal->oal_device,
+                       "%s!%s", LUSTRE_ACCESS_LOG_DIR_NAME, oal->oal_name);
+       if (rc < 0)
+               goto out_minor;
+
+       cdev_init(&oal->oal_cdev, &oal_fops);
+       oal->oal_cdev.owner = THIS_MODULE;
+       rc = cdev_device_add(&oal->oal_cdev, &oal->oal_device);
+       if (rc < 0)
+               goto out_device_name;
+
+       oal_control_event_inc();
+
+       return oal;
+
+out_device_name:
+       kfree_const(oal->oal_device.kobj.name);
+out_minor:
+       oal_log_minor_free(minor);
+out_free:
+       vfree(oal->oal_circ.buf);
+       kfree(oal);
+
+       return ERR_PTR(rc);
+}
+
+void ofd_access(struct ofd_device *m,
+               const struct lu_fid *parent_fid,
+               __u64 begin, __u64 end,
+               unsigned int size,
+               unsigned int segment_count,
+               int rw)
+{
+       unsigned int flags = (rw == READ) ? OFD_ACCESS_READ : OFD_ACCESS_WRITE;
+
+       if (m->ofd_access_log && (flags & m->ofd_access_log_mask)) {
+               struct ofd_access_entry_v1 oae = {
+                       .oae_parent_fid = *parent_fid,
+                       .oae_begin = begin,
+                       .oae_end = end,
+                       .oae_time = ktime_get_real_seconds(),
+                       .oae_size = size,
+                       .oae_segment_count = segment_count,
+                       .oae_flags = flags,
+               };
+
+               oal_write_entry(m->ofd_access_log, &oae, sizeof(oae));
+       }
+}
+
+/* Called on OST umount to:
+ * - Close the write end of the oal. The wakes any tasks sleeping in
+ *   read or poll and makes all reads return zero once the log
+ *   becomes empty.
+ * - Delete the associated stuct device and cdev, preventing new
+ *   opens. Existing opens retain a reference on the oal through
+ *   their reference on oal_device.
+ * The oal will be freed when the last open file handle is closed. */
+void ofd_access_log_delete(struct ofd_access_log *oal)
+{
+       if (!oal)
+               return;
+
+       oal->oal_is_closed = 1;
+       wake_up_all(&oal->oal_read_wait_queue);
+       cdev_device_del(&oal->oal_cdev, &oal->oal_device);
+}
+
+/* private_data for control device file. */
+struct oal_control_file {
+       int ccf_event_count;
+};
+
+/* Control file usage:
+ * Open /dev/lustre-access-log/control.
+ * while (1)
+ *   Poll for readable on control FD.
+ *   Call ioctl(FD, LUSTRE_ACCESS_LOG_IOCTL_PRESCAN) to fetch event count.
+ *   Scan /dev/ or /sys/class/... for new devices.
+ */
+static int oal_control_file_open(struct inode *inode, struct file *filp)
+{
+       struct oal_control_file *ccf;
+       int rc;
+
+       rc = nonseekable_open(inode, filp);
+       if (rc)
+               return rc;
+
+       /* ccf->ccf_event_count = 0 on open */
+       ccf = kzalloc(sizeof(*ccf), GFP_KERNEL);
+       if (!ccf)
+               return -ENOMEM;
+
+       filp->private_data = ccf;
+
+       return 0;
+}
+
+static int oal_control_file_release(struct inode *inode, struct file *filp)
+{
+       kfree(filp->private_data);
+       return 0;
+}
+
+static unsigned int oal_control_file_poll(struct file *filp, poll_table *wait)
+{
+       struct oal_control_file *ccf = filp->private_data;
+       unsigned int mask = 0;
+
+       poll_wait(filp, &oal_control_wait_queue, wait);
+
+       if (atomic_read(&oal_control_event_count) != ccf->ccf_event_count)
+               mask |= POLLIN;
+
+       return mask;
+}
+
+static long oal_control_file_ioctl(struct file *filp, unsigned int cmd,
+                               unsigned long arg)
+{
+       struct oal_control_file *ccf = filp->private_data;
+
+       switch (cmd) {
+       case LUSTRE_ACCESS_LOG_IOCTL_VERSION:
+               return LUSTRE_ACCESS_LOG_VERSION_1;
+       case LUSTRE_ACCESS_LOG_IOCTL_MAJOR:
+               return oal_log_major;
+       case LUSTRE_ACCESS_LOG_IOCTL_PRESCAN:
+               ccf->ccf_event_count = atomic_read(&oal_control_event_count);
+               return 0;
+       default:
+               return -ENOTTY;
+       }
+}
+
+static const struct file_operations oal_control_fops = {
+       .owner = THIS_MODULE,
+       .open = &oal_control_file_open,
+       .release = &oal_control_file_release,
+       .poll = &oal_control_file_poll,
+       .unlocked_ioctl = &oal_control_file_ioctl,
+       .llseek = &noop_llseek,
+};
+
+static struct miscdevice oal_control_misc = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = LUSTRE_ACCESS_LOG_DIR_NAME"!control",
+       .fops = &oal_control_fops,
+};
+
+int ofd_access_log_module_init(void)
+{
+       dev_t dev;
+       int rc;
+
+       BUILD_BUG_ON(!is_power_of_2(sizeof(struct ofd_access_entry_v1)));
+
+       rc = misc_register(&oal_control_misc);
+       if (rc)
+               return rc;
+
+       rc = alloc_chrdev_region(&dev, 0, OAL_DEV_COUNT,
+                               LUSTRE_ACCESS_LOG_DIR_NAME);
+       if (rc)
+               goto out_oal_control_misc;
+
+       oal_log_major = MAJOR(dev);
+
+       oal_log_class = class_create(THIS_MODULE, LUSTRE_ACCESS_LOG_DIR_NAME);
+       if (IS_ERR(oal_log_class)) {
+               rc = PTR_ERR(oal_log_class);
+               goto out_dev;
+       }
+
+       return 0;
+out_dev:
+       unregister_chrdev_region(dev, OAL_DEV_COUNT);
+out_oal_control_misc:
+       misc_deregister(&oal_control_misc);
+
+       return rc;
+}
+
+void ofd_access_log_module_exit(void)
+{
+       class_destroy(oal_log_class);
+       unregister_chrdev_region(MKDEV(oal_log_major, 0), OAL_DEV_COUNT);
+       idr_destroy(&oal_log_minor_idr);
+       misc_deregister(&oal_control_misc);
+}
index 3bceb33..883d7ef 100644 (file)
@@ -2845,6 +2845,8 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        INIT_LIST_HEAD(&m->ofd_inconsistency_list);
        spin_lock_init(&m->ofd_inconsistency_lock);
 
+       m->ofd_access_log_mask = -1; /* Log all accesses if enabled. */
+
        spin_lock_init(&m->ofd_batch_lock);
        init_rwsem(&m->ofd_lastid_rwsem);
 
@@ -3015,6 +3017,9 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m)
                d->ld_obd->obd_namespace = m->ofd_namespace = NULL;
        }
 
+       ofd_access_log_delete(m->ofd_access_log);
+       m->ofd_access_log = NULL;
+
        ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev);
 
        LASSERT(atomic_read(&d->ld_ref) == 0);
@@ -3130,13 +3135,28 @@ static struct lu_device_type ofd_device_type = {
  */
 static int __init ofd_init(void)
 {
-       int                             rc;
+       int rc;
 
        rc = lu_kmem_init(ofd_caches);
        if (rc)
                return rc;
+
+       rc = ofd_access_log_module_init();
+       if (rc)
+               goto out_caches;
+
        rc = class_register_type(&ofd_obd_ops, NULL, true, NULL,
                                 LUSTRE_OST_NAME, &ofd_device_type);
+       if (rc)
+               goto out_ofd_access_log;
+
+       return 0;
+
+out_ofd_access_log:
+       ofd_access_log_module_exit();
+out_caches:
+       lu_kmem_fini(ofd_caches);
+
        return rc;
 }
 
@@ -3148,8 +3168,9 @@ static int __init ofd_init(void)
  */
 static void __exit ofd_exit(void)
 {
-       lu_kmem_fini(ofd_caches);
        class_unregister_type(LUSTRE_OST_NAME);
+       ofd_access_log_module_exit();
+       lu_kmem_fini(ofd_caches);
 }
 
 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
index 9765ca0..da30314 100644 (file)
@@ -120,6 +120,10 @@ struct ofd_device {
        __u64                    ofd_inconsistency_self_detected;
        __u64                    ofd_inconsistency_self_repaired;
 
+       struct ofd_access_log   *ofd_access_log;
+       unsigned int             ofd_access_log_size;
+       unsigned int             ofd_access_log_mask;
+
        struct list_head        ofd_seq_list;
        rwlock_t                ofd_seq_list_lock;
        int                     ofd_seq_count;
@@ -289,6 +293,18 @@ struct ofd_thread_info {
 extern void target_recovery_fini(struct obd_device *obd);
 extern void target_recovery_init(struct lu_target *lut, svc_handler_t handler);
 
+/* ofd_access_log.c */
+bool ofd_access_log_size_is_valid(unsigned int size);
+int ofd_access_log_module_init(void);
+void ofd_access_log_module_exit(void);
+
+struct ofd_access_log;
+struct ofd_access_log *ofd_access_log_create(const char *ofd_name, size_t size);
+void ofd_access_log_delete(struct ofd_access_log *oal);
+void ofd_access(struct ofd_device *m,
+               const struct lu_fid *parent_fid, __u64 begin, __u64 end,
+               unsigned int size, unsigned int segment_count, int rw);
+
 /* ofd_dev.c */
 extern struct lu_context_key ofd_thread_key;
 int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd);
index 1eb1668..9305d8e 100644 (file)
@@ -571,6 +571,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        int i, j, rc, tot_bytes = 0;
        enum dt_bufs_type dbt = DT_BUFS_TYPE_READ;
        int maxlnb = *nr_local;
+       __u64 begin, end;
 
        ENTRY;
        LASSERT(env != NULL);
@@ -598,7 +599,12 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        if (ptlrpc_connection_is_local(exp->exp_connection))
                dbt |= DT_BUFS_TYPE_LOCAL;
 
+       begin = -1;
+       end = 0;
+
        for (*nr_local = 0, i = 0, j = 0; i < niocount; i++) {
+               begin = min_t(__u64, begin, rnb[i].rnb_offset);
+               end = max_t(__u64, end, rnb[i].rnb_offset + rnb[i].rnb_len);
 
                if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF))
                        rnb[i].rnb_len = 100 * 1024 * 1024;
@@ -621,6 +627,17 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        if (unlikely(rc))
                GOTO(buf_put, rc);
 
+       ofd_access(ofd,
+               &(struct lu_fid) {
+                       .f_seq = oa->o_parent_seq,
+                       .f_oid = oa->o_parent_oid,
+                       .f_ver = oa->o_stripe_idx,
+               },
+               begin, end,
+               tot_bytes,
+               niocount,
+               READ);
+
        ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, tot_bytes);
        RETURN(0);
 
@@ -666,6 +683,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        int i, j, k, rc = 0, tot_bytes = 0;
        enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE;
        int maxlnb = *nr_local;
+       __u64 begin, end;
 
        ENTRY;
        LASSERT(env != NULL);
@@ -773,8 +791,14 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        if (ptlrpc_connection_is_local(exp->exp_connection))
                dbt |= DT_BUFS_TYPE_LOCAL;
 
+       begin = -1;
+       end = 0;
+
        /* parse remote buffers to local buffers and prepare the latter */
        for (*nr_local = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
+               begin = min_t(__u64, begin, rnb[i].rnb_offset);
+               end = max_t(__u64, end, rnb[i].rnb_offset + rnb[i].rnb_len);
+
                if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF))
                        rnb[i].rnb_len += PAGE_SIZE;
                rc = dt_bufs_get(env, ofd_object_child(fo),
@@ -802,6 +826,18 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                GOTO(err, rc);
 
        ofd_read_unlock(env, fo);
+
+       ofd_access(ofd,
+               &(struct lu_fid) {
+                       .f_seq = oa->o_parent_seq,
+                       .f_oid = oa->o_parent_oid,
+                       .f_ver = oa->o_stripe_idx,
+               },
+               begin, end,
+               tot_bytes,
+               obj->ioo_bufcnt,
+               WRITE);
+
        ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes);
        RETURN(0);
 err:
index e24d835..3254496 100644 (file)
@@ -41,6 +41,7 @@
 #include <obd_class.h>
 #include <lustre_net.h>
 #include <lustre_disk.h>
+#include <uapi/linux/lustre/lustre_access_log.h>
 #include <uapi/linux/lustre/lustre_lfsck_user.h>
 #include <uapi/linux/lustre/lustre_cfg.h>
 
index b37e059..3b5431b 100644 (file)
@@ -41,6 +41,7 @@
 #include <obd_class.h>
 #include <lustre_net.h>
 #include <lustre_disk.h>
+#include <uapi/linux/lustre/lustre_access_log.h>
 #include <uapi/linux/lustre/lustre_lfsck_user.h>
 #include <uapi/linux/lustre/lustre_cfg.h>
 
@@ -5370,6 +5371,82 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(union nodemap_rec) == 32, "found %lld\n",
                 (long long)(int)sizeof(union nodemap_rec));
 
+       LASSERTF(OFD_ACCESS_READ == 0x00000001UL, "found 0x%.8xUL\n",
+               (unsigned)OFD_ACCESS_READ);
+       LASSERTF(OFD_ACCESS_WRITE == 0x00000002UL, "found 0x%.8xUL\n",
+               (unsigned)OFD_ACCESS_WRITE);
+       /* Checks for struct ofd_access_entry_v1 */
+       LASSERTF((int)sizeof(struct ofd_access_entry_v1) == 64, "found %lld\n",
+                (long long)(int)sizeof(struct ofd_access_entry_v1));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_parent_fid) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_parent_fid));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_parent_fid) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_parent_fid));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_begin) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_begin));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_begin) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_begin));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_end) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_end));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_end) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_end));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_time) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_time));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_time) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_time));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_size) == 40, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_size));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_size));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_segment_count) == 44, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_segment_count));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_segment_count) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_segment_count));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_flags) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_flags));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_flags) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_flags));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved1) == 52, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved1));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved1));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved2) == 56, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved2));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved2) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved2));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved3) == 60, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved3));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved3) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved3));
+
+       LASSERTF(LUSTRE_ACCESS_LOG_VERSION_1 == 0x00010000UL, "found 0x%.8xUL\n",
+               (unsigned)LUSTRE_ACCESS_LOG_VERSION_1);
+       LASSERTF(LUSTRE_ACCESS_LOG_TYPE_OFD == 0x00000001UL, "found 0x%.8xUL\n",
+               (unsigned)LUSTRE_ACCESS_LOG_TYPE_OFD);
+       /* Checks for struct lustre_access_log_info_v1 */
+       LASSERTF((int)sizeof(struct lustre_access_log_info_v1) == 168, "found %lld\n",
+                (long long)(int)sizeof(struct lustre_access_log_info_v1));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_version) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_version));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_version) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_version));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_type) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_type));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_type) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_type));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_name) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_name));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_name) == 128, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_name));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_log_size) == 136, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_log_size));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_log_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_log_size));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_entry_size) == 140, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_entry_size));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_entry_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_entry_size));
+
        /* Checks for struct lfsck_request */
        LASSERTF((int)sizeof(struct lfsck_request) == 96, "found %lld\n",
                 (long long)(int)sizeof(struct lfsck_request));
index 6833c31..62a7f89 100755 (executable)
@@ -14975,6 +14975,233 @@ test_162c() {
 }
 run_test 162c "fid2path works with paths 100 or more directories deep"
 
+oalr_event_count() {
+       local event="${1}"
+       local trace="${2}"
+
+       awk -v name="${FSNAME}-OST0000" \
+           -v event="${event}" \
+           '$1 == "TRACE" && $2 == event && $3 == name' \
+           "${trace}" |
+       wc -l
+}
+
+oalr_expect_event_count() {
+       local event="${1}"
+       local trace="${2}"
+       local expect="${3}"
+       local count
+
+       count=$(oalr_event_count "${event}" "${trace}")
+       if ((count == expect)); then
+               return 0
+       fi
+
+       error_noexit "${event} event count was '${count}', expected ${expect}"
+       cat "${trace}" >&2
+       exit 1
+}
+
+cleanup_165() {
+       do_facet ost1 killall --quiet -KILL ofd_access_log_reader || true
+       stop ost1
+       start ost1 "$(ostdevname 1)" $OST_MOUNT_OPTS
+}
+
+setup_165() {
+       sync # Flush previous IOs so we can count log entries.
+       do_facet ost1 $LCTL set_param "obdfilter.${FSNAME}-OST0000.access_log_size=4096"
+       stack_trap cleanup_165 EXIT
+}
+
+test_165a() {
+       local trace="/tmp/${tfile}.trace"
+       local rc
+       local count
+
+       do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" &
+       setup_165
+       sleep 5
+
+       do_facet ost1 ofd_access_log_reader --list
+       stop ost1
+
+       do_facet ost1 killall -TERM ofd_access_log_reader
+       wait
+       rc=$?
+
+       if ((rc != 0)); then
+               error "ofd_access_log_reader exited with rc = '${rc}'"
+       fi
+
+       # Parse trace file for discovery events:
+       oalr_expect_event_count alr_log_add "${trace}" 1
+       oalr_expect_event_count alr_log_eof "${trace}" 1
+       oalr_expect_event_count alr_log_free "${trace}" 1
+}
+run_test 165a "ofd access log discovery"
+
+test_165b() {
+       local trace="/tmp/${tfile}.trace"
+       local file="${DIR}/${tfile}"
+       local pfid1
+       local pfid2
+       local -a entry
+       local rc
+       local count
+       local size
+       local flags
+
+       setup_165
+
+       lfs setstripe -c 1 -i 0 "${file}"
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'"
+       do_facet ost1 ofd_access_log_reader --list
+
+       do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" &
+       sleep 5
+       do_facet ost1 killall -TERM ofd_access_log_reader
+       wait
+       rc=$?
+
+       if ((rc != 0)); then
+               error "ofd_access_log_reader exited with rc = '${rc}'"
+       fi
+
+       oalr_expect_event_count alr_log_entry "${trace}" 1
+
+       pfid1=$($LFS path2fid "${file}")
+
+       # 1     2             3   4    5     6   7    8    9     10
+       # TRACE alr_log_entry OST PFID BEGIN END TIME SIZE COUNT FLAGS
+       entry=( - $(awk -v pfid="${pfid}" '$1 == "TRACE" && $2 == "alr_log_entry"' "${trace}" ) )
+
+       echo "entry = '${entry[*]}'" >&2
+
+       pfid2=${entry[4]}
+       if [[ "${pfid1}" != "${pfid2}" ]]; then
+               error "entry '${entry[*]}' has invalid PFID '${pfid2}', expected ${pfid1}"
+       fi
+
+       size=${entry[8]}
+       if ((size != 1048576)); then
+               error "entry '${entry[*]}' has invalid io size '${size}', expected 1048576"
+       fi
+
+       flags=${entry[10]}
+       if [[ "${flags}" != "w" ]]; then
+               error "entry '${entry[*]}' has invalid io flags '${flags}', expected 'w'"
+       fi
+
+       do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" &
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r524288c || error "cannot read '${file}'"
+       sleep 5
+       do_facet ost1 killall -TERM ofd_access_log_reader
+       wait
+       rc=$?
+
+       if ((rc != 0)); then
+               error "ofd_access_log_reader exited with rc = '${rc}'"
+       fi
+
+       oalr_expect_event_count alr_log_entry "${trace}" 1
+
+       entry=( - $(awk -v pfid="${pfid}" '$1 == "TRACE" && $2 == "alr_log_entry"' "${trace}" ) )
+       echo "entry = '${entry[*]}'" >&2
+
+       pfid2=${entry[4]}
+       if [[ "${pfid1}" != "${pfid2}" ]]; then
+               error "entry '${entry[*]}' has invalid PFID '${pfid2}', expected ${pfid1}"
+       fi
+
+       size=${entry[8]}
+       if ((size != 524288)); then
+               error "entry '${entry[*]}' has invalid io size '${size}', 524288"
+       fi
+
+       flags=${entry[10]}
+       if [[ "${flags}" != "r" ]]; then
+               error "entry '${entry[*]}' has invalid io flags '${flags}', expected 'r'"
+       fi
+}
+run_test 165b "ofd access log entries are produced and consumed"
+
+test_165c() {
+       local file="${DIR}/${tdir}/${tfile}"
+       test_mkdir "${DIR}/${tdir}"
+
+       setup_165
+
+       lfs setstripe -c 1 -i 0 "${DIR}/${tdir}"
+
+       # 4096 / 64 = 64. Create twice as many entries.
+       for ((i = 0; i < 128; i++)); do
+               $MULTIOP "${file}-${i}" oO_CREAT:O_WRONLY:w512c || error "cannot create file"
+       done
+
+       sync
+       do_facet ost1 ofd_access_log_reader --list
+       unlinkmany  "${file}-%d" 128
+}
+run_test 165c "full ofd access logs do not block IOs"
+
+oal_peek_entry_count() {
+       do_facet ost1 ofd_access_log_reader --list | awk '$1 == "_entry_count:" { print $2; }'
+}
+
+oal_expect_entry_count() {
+       local entry_count=$(oal_peek_entry_count)
+       local expect="$1"
+
+       if ((entry_count == expect)); then
+               return 0
+       fi
+
+       error_noexit "bad entry count, got ${entry_count}, expected ${expect}"
+       do_facet ost1 ofd_access_log_reader --list >&2
+       exit 1
+}
+
+test_165d() {
+       local trace="/tmp/${tfile}.trace"
+       local file="${DIR}/${tdir}/${tfile}"
+       local param="obdfilter.${FSNAME}-OST0000.access_log_mask"
+       local entry_count
+       test_mkdir "${DIR}/${tdir}"
+
+       setup_165
+       lfs setstripe -c 1 -i 0 "${file}"
+
+       do_facet ost1 lctl set_param "${param}=rw"
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'"
+       oal_expect_entry_count 1
+
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'"
+       oal_expect_entry_count 2
+
+       do_facet ost1 lctl set_param "${param}=r"
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'"
+       oal_expect_entry_count 2
+
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'"
+       oal_expect_entry_count 3
+
+       do_facet ost1 lctl set_param "${param}=w"
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'"
+       oal_expect_entry_count 4
+
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'"
+       oal_expect_entry_count 4
+
+       do_facet ost1 lctl set_param "${param}=0"
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'"
+       oal_expect_entry_count 4
+
+       $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'"
+       oal_expect_entry_count 4
+}
+run_test 165d "ofd_access_log mask works"
+
 test_169() {
        # do directio so as not to populate the page cache
        log "creating a 10 Mb file"
index c6cbe5e..8c57e3b 100644 (file)
@@ -28,3 +28,4 @@
 /lhsmtool_posix
 /l_tunedisk
 /l_getsepol
+/ofd_access_log_reader
index 5d664dd..23b8a23 100644 (file)
@@ -36,7 +36,8 @@ endif # TESTS
 
 if SERVER
 sbin_PROGRAMS += mkfs.lustre tunefs.lustre llverdev lr_reader lshowmount \
-                ll_decode_filter_fid llog_reader l_tunedisk
+                ll_decode_filter_fid llog_reader l_tunedisk \
+                ofd_access_log_reader
 endif
 if LIBPTHREAD
 sbin_PROGRAMS += lhsmtool_posix
@@ -134,6 +135,10 @@ llog_reader_DEPENDENCIES := liblustreapi.la
 
 lr_reader_SOURCES = lr_reader.c
 
+ofd_access_log_reader_SOURCES = \
+       lstddef.h \
+       ofd_access_log_reader.c
+
 if UTILS
 
 PLUGIN_LIB =
diff --git a/lustre/utils/lstddef.h b/lustre/utils/lstddef.h
new file mode 100644 (file)
index 0000000..c10ab0b
--- /dev/null
@@ -0,0 +1,295 @@
+#ifndef _LSTDDEF_H
+#define _LSTDDEF_H
+
+#include <stddef.h>
+
+#define __ALIGN_LSTDDEF_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#define __ALIGN_LSTDDEF(x, a) __ALIGN_LSTDDEF_MASK(x, (typeof(x))(a) - 1)
+#define __LSTDDEF_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+#define ALIGN(x, a)            __ALIGN_LSTDDEF((x), (a))
+#define ALIGN_DOWN(x, a)       __ALIGN_LSTDDEF((x) - ((a) - 1), (a))
+#define __ALIGN_MASK(x, mask)  __ALIGN_LSTDDEF_MASK((x), (mask))
+#define PTR_ALIGN(p, a)                ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x, a)               (((x) & ((typeof(x))(a) - 1)) == 0)
+
+#ifndef __must_be_array
+# define __must_be_array(arr) 0
+#endif
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define FIELD_SIZEOF(t, f) (sizeof(((t *)0)->f))
+#define DIV_ROUND_UP __USER_DIV_ROUND_UP
+
+#define DIV_ROUND_DOWN_ULL(ll, d) \
+       ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
+
+#define DIV_ROUND_UP_ULL(ll, d)        DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d))
+
+#if BITS_PER_LONG == 32
+# define DIV_ROUND_UP_SECTOR_T(ll, d) DIV_ROUND_UP_ULL(ll, d)
+#else
+# define DIV_ROUND_UP_SECTOR_T(ll, d) DIV_ROUND_UP(ll, d)
+#endif
+
+/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+#define roundup(x, y) ({                               \
+       const typeof(y) __y = y;                        \
+       (((x) + (__y - 1)) / __y) * __y;                \
+})
+
+#define rounddown(x, y) ({                             \
+       typeof(x) __x = (x);                            \
+       __x - (__x % (y));                              \
+})
+
+/*
+ * Divide positive or negative dividend by positive divisor and round
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
+ */
+#define DIV_ROUND_CLOSEST(x, divisor) ({               \
+       typeof(x) __x = x;                              \
+       typeof(divisor) __d = divisor;                  \
+       (((typeof(x))-1) > 0 ||                         \
+        ((typeof(divisor))-1) > 0 || (__x) > 0) ?      \
+               (((__x) + ((__d) / 2)) / (__d)) :       \
+               (((__x) - ((__d) / 2)) / (__d));        \
+})
+
+/*
+ * Same as above but for u64 dividends. divisor must be a 32-bit
+ * number.
+ */
+#define DIV_ROUND_CLOSEST_ULL(x, divisor) ({           \
+       typeof(divisor) __d = divisor;                  \
+       unsigned long long _tmp = (x) + (__d) / 2;      \
+       do_div(_tmp, __d);                              \
+       _tmp;                                           \
+})
+
+/*
+ * Multiplies an integer by a fraction, while avoiding unnecessary
+ * overflow or loss of precision.
+ */
+#define mult_frac(x, numer, denom) ({                  \
+       typeof(x) quot = (x) / (denom);                 \
+       typeof(x) rem  = (x) % (denom);                 \
+       (quot * (numer)) + ((rem * (numer)) / (denom)); \
+})
+
+/**
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
+
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((__u32)(n))
+
+/**
+ * abs - return absolute value of an argument
+ * @x: the value.  If it is unsigned type, it is converted to signed type first
+ *   (s64, long or int depending on its size).
+ *
+ * Return: an absolute value of x.  If x is 64-bit, macro's return type is s64,
+ *   otherwise it is signed long.
+ */
+#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(__s64), ({    \
+               __s64 __x = (x);                                        \
+               (__x < 0) ? -__x : __x;                                 \
+       }), ({                                                          \
+               long ret;                                               \
+               if (sizeof(x) == sizeof(long)) {                        \
+                       long __x = (x);                                 \
+                       ret = (__x < 0) ? -__x : __x;                   \
+               } else {                                                \
+                       int __x = (x);                                  \
+                       ret = (__x < 0) ? -__x : __x;                   \
+               }                                                       \
+               ret;                                                    \
+       }))
+
+/**
+ * reciprocal_scale - "scale" a value into range [0, ep_ro)
+ * @val: value
+ * @ep_ro: right open interval endpoint
+ *
+ * Perform a "reciprocal multiplication" in order to "scale" a value into
+ * range [0, ep_ro), where the upper interval endpoint is right-open.
+ * This is useful, e.g. for accessing a index of an array containing
+ * ep_ro elements, for example. Think of it as sort of modulus, only that
+ * the result isn't that of modulo. ;) Note that if initial input is a
+ * small value, then result will return 0.
+ *
+ * Return: a result based on val in interval [0, ep_ro).
+ */
+static inline __u32 reciprocal_scale(__u32 val, __u32 ep_ro)
+{
+       return (__u32)(((__u64) val * ep_ro) >> 32);
+}
+
+/*
+ * min()/max()/clamp() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({                           \
+       typeof(x) _min1 = (x);                  \
+       typeof(y) _min2 = (y);                  \
+       (void) (&_min1 == &_min2);              \
+       _min1 < _min2 ? _min1 : _min2;          \
+})
+
+#define max(x, y) ({                           \
+       typeof(x) _max1 = (x);                  \
+       typeof(y) _max2 = (y);                  \
+       (void) (&_max1 == &_max2);              \
+       _max1 > _max2 ? _max1 : _max2;          \
+})
+
+#define min3(x, y, z) ({                       \
+       typeof(x) _min1 = (x);                  \
+       typeof(y) _min2 = (y);                  \
+       typeof(z) _min3 = (z);                  \
+       (void) (&_min1 == &_min2);              \
+       (void) (&_min1 == &_min3);              \
+       _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
+               (_min2 < _min3 ? _min2 : _min3); \
+})
+
+#define max3(x, y, z) ({                       \
+       typeof(x) _max1 = (x);                  \
+       typeof(y) _max2 = (y);                  \
+       typeof(z) _max3 = (z);                  \
+       (void) (&_max1 == &_max2);              \
+       (void) (&_max1 == &_max3);              \
+       _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
+               (_max2 > _max3 ? _max2 : _max3); \
+})
+
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({                  \
+       typeof(x) __x = (x);                    \
+       typeof(y) __y = (y);                    \
+       __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); \
+})
+
+/**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does strict typechecking of min/max to make sure they are of the
+ * same type as val.  See the unnecessary pointer comparisons.
+ */
+#define clamp(val, min, max) ({                        \
+       typeof(val) __val = (val);              \
+       typeof(min) __min = (min);              \
+       typeof(max) __max = (max);              \
+       (void) (&__val == &__min);              \
+       (void) (&__val == &__max);              \
+       __val = __val < __min ? __min : __val;  \
+       __val > __max ? __max : __val;          \
+})
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max/clamp at all, of course.
+ */
+#define min_t(type, x, y) ({                   \
+       type __min1 = (x);                      \
+       type __min2 = (y);                      \
+       __min1 < __min2 ? __min1 : __min2;      \
+})
+
+#define max_t(type, x, y) ({                   \
+       type __max1 = (x);                      \
+       type __max2 = (y);                      \
+       __max1 > __max2 ? __max1 : __max2;      \
+})
+
+/**
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * 'type' to make all the comparisons.
+ */
+#define clamp_t(type, val, min, max) ({                \
+       type __val = (val);                     \
+       type __min = (min);                     \
+       type __max = (max);                     \
+       __val = __val < __min ? __min : __val;  \
+       __val > __max ? __max : __val;          \
+})
+
+/**
+ * clamp_val - return a value clamped to a given range using val's type
+ * @val: current value
+ * @min: minimum allowable value
+ * @max: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of whatever
+ * type the input argument 'val' is.  This is useful when val is an unsigned
+ * type and min and max are literals that will otherwise be assigned a signed
+ * integer type.
+ */
+#define clamp_val(val, min, max) ({            \
+       typeof(val) __val = (val);              \
+       typeof(val) __min = (min);              \
+       typeof(val) __max = (max);              \
+       __val = __val < __min ? __min : __val;  \
+       __val > __max ? __max : __val;          \
+})
+
+/*
+ * swap - swap value of @a and @b
+ */
+#define swap(a, b) do {                                \
+       typeof(a) __tmp = (a);                  \
+       (a) = (b);                              \
+       (b) = __tmp;                            \
+} while (0)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:       the pointer to the member.
+ * @type:      the type of the container struct this is embedded in.
+ * @member:    the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({                     \
+       const typeof(((type *)0)->member) *__mptr = (ptr);      \
+       (type *)((char *)__mptr - offsetof(type, member));      \
+})
+
+#endif /* !_LSTDDEF_H */
diff --git a/lustre/utils/ofd_access_log_reader.c b/lustre/utils/ofd_access_log_reader.c
new file mode 100644 (file)
index 0000000..6625241
--- /dev/null
@@ -0,0 +1,723 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ *
+ * Copyright 2020, DataDirect Networks Storage.
+ *
+ * This file is part of Lustre, http://www.lustre.org/
+ *
+ * Author: John L. Hammond <jhammond@whamcloud.com>
+ *
+ * lustre/utils/ofd_access_log_reader.c
+ *
+ * Sample utility to discover and read Lustre (ofd) access logs.
+ *
+ * This demonstrates the discovery and reading of Lustre access logs
+ * (see linux/lustre/lustre_access_log.h and
+ * lustre/ofd/ofd_access_log.c.). By default it opens the control
+ * device, discovers and opens all access log devices, and consumes
+ * all access log entries. If invoked with the --list option then it
+ * prints information about all available devices to stdout and exits.
+ *
+ * Structured trace points (when --trace is used) are added to permit
+ * testing of the access log functionality (see test_165* in
+ * lustre/tests/sanity.sh).
+ */
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <malloc.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <linux/types.h>
+#include <linux/lustre/lustre_user.h>
+#include <linux/lustre/lustre_access_log.h>
+#include "lstddef.h"
+
+/* TODO fsname filter */
+
+static FILE *debug_file;
+static FILE *trace_file;
+
+#define DEBUG(fmt, args...)                                            \
+       do {                                                            \
+               if (debug_file != NULL)                                 \
+                       fprintf(debug_file, "DEBUG %s:%d: "fmt, __func__, __LINE__, ##args); \
+       } while (0)
+
+#define TRACE(fmt, args...)                                            \
+       do {                                                            \
+               if (trace_file != NULL)                                 \
+                       fprintf(trace_file, "TRACE "fmt, ##args);       \
+       } while (0)
+
+#define DEBUG_D(x) DEBUG("%s = %d\n", #x, x)
+#define DEBUG_P(x) DEBUG("%s = %p\n", #x, x)
+#define DEBUG_S(x) DEBUG("%s = '%s'\n", #x, x)
+#define DEBUG_U(x) DEBUG("%s = %u\n", #x, x)
+#define DEBUG_U32(x) DEBUG("%s = %"PRIu32"\n", #x, x)
+#define DEBUG_U64(x) DEBUG("%s = %"PRIu64"\n", #x, x)
+
+#define ERROR(fmt, args...) \
+       fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##args)
+
+#define FATAL(fmt, args...)                    \
+       do {                                    \
+               ERROR("FATAL: "fmt, ##args);    \
+               exit(EXIT_FAILURE);             \
+       } while (0)
+
+enum {
+       ALR_EXIT_SUCCESS = INT_MIN + EXIT_SUCCESS,
+       ALR_EXIT_FAILURE = INT_MIN + EXIT_FAILURE,
+       ALR_ERROR = -1,
+       ALR_EOF = 0,
+       ALR_OK = 1,
+};
+
+struct alr_dev {
+       char *alr_name;
+       int (*alr_io)(int /* epoll_fd */, struct alr_dev * /* this */, unsigned int /* mask */);
+       void (*alr_destroy)(struct alr_dev *);
+       int alr_fd;
+};
+
+struct alr_log {
+       struct alr_dev alr_dev;
+       char *alr_buf;
+       size_t alr_buf_size;
+       size_t alr_entry_size;
+       dev_t alr_rdev;
+};
+
+static struct alr_log *alr_log[1 << 20]; /* 20 == MINORBITS */
+static int oal_version; /* FIXME ... major version, minor version */
+static unsigned int oal_log_major;
+static unsigned int oal_log_minor_max;
+
+#define D_ALR_DEV "%s %d"
+#define P_ALR_DEV(ad) \
+       (ad)->alr_name, (ad)->alr_fd
+
+#define D_ALR_LOG D_ALR_DEV" %u:%u"
+#define P_ALR_LOG(al) \
+       P_ALR_DEV(&(al)->alr_dev), major((al)->alr_rdev), minor((al)->alr_rdev)
+
+static void alr_dev_free(int epoll_fd, struct alr_dev *ad)
+{
+       TRACE("alr_dev_free %s\n", ad->alr_name);
+
+       if (!(ad->alr_fd < 0))
+               epoll_ctl(epoll_fd, EPOLL_CTL_DEL, ad->alr_fd, NULL);
+
+       if (ad->alr_destroy != NULL)
+               (*ad->alr_destroy)(ad);
+
+       if (!(ad->alr_fd < 0))
+               close(ad->alr_fd);
+
+       free(ad->alr_name);
+       free(ad);
+}
+
+static struct alr_log **alr_log_lookup(dev_t rdev)
+{
+       assert(major(rdev) == oal_log_major);
+
+       if (!(minor(rdev) < ARRAY_SIZE(alr_log)))
+               return NULL;
+
+       return &alr_log[minor(rdev)];
+}
+
+static const char *alr_flags_to_str(unsigned int flags)
+{
+       switch (flags & (OFD_ACCESS_READ | OFD_ACCESS_WRITE)) {
+       default:
+               return "0";
+       case OFD_ACCESS_READ:
+               return "r";
+       case OFD_ACCESS_WRITE:
+               return "w";
+       case OFD_ACCESS_READ | OFD_ACCESS_WRITE:
+               return "rw";
+       }
+}
+
+/* /dev/lustre-access-log/scratch-OST0000 device poll callback: read entries
+ * from log and print. */
+static int alr_log_io(int epoll_fd, struct alr_dev *ad, unsigned int mask)
+{
+       struct alr_log *al = container_of(ad, struct alr_log, alr_dev);
+       ssize_t i, count;
+
+       TRACE("alr_log_io %s\n", ad->alr_name);
+       DEBUG_U(mask);
+
+       assert(al->alr_entry_size != 0);
+       assert(al->alr_buf_size != 0);
+       assert(al->alr_buf != NULL);
+
+       count = read(ad->alr_fd, al->alr_buf, al->alr_buf_size);
+       if (count < 0) {
+               ERROR("cannot read events from '%s': %s\n", ad->alr_name, strerror(errno));
+               return ALR_ERROR;
+       }
+
+       if (count == 0) {
+               TRACE("alr_log_eof %s\n", ad->alr_name);
+               return ALR_EOF;
+       }
+
+       if (count % al->alr_entry_size != 0) {
+               ERROR("invalid read from "D_ALR_LOG": entry_size = %zu, count = %zd\n",
+                       P_ALR_LOG(al), al->alr_entry_size, count);
+               return ALR_ERROR;
+       }
+
+       DEBUG("read "D_ALR_LOG", count = %zd\n", P_ALR_LOG(al), count);
+
+       for (i = 0; i < count; i += al->alr_entry_size) {
+               struct ofd_access_entry_v1 *oae =
+                       (struct ofd_access_entry_v1 *)&al->alr_buf[i];
+
+               TRACE("alr_log_entry %s "DFID" %lu %lu %lu %u %u %s\n",
+                       ad->alr_name,
+                       PFID(&oae->oae_parent_fid),
+                       (unsigned long)oae->oae_begin,
+                       (unsigned long)oae->oae_end,
+                       (unsigned long)oae->oae_time,
+                       (unsigned int)oae->oae_size,
+                       (unsigned int)oae->oae_segment_count,
+                       alr_flags_to_str(oae->oae_flags));
+       }
+
+       return ALR_OK;
+}
+
+static void alr_log_destroy(struct alr_dev *ad)
+{
+       struct alr_log *al = container_of(ad, struct alr_log, alr_dev);
+       struct alr_log **pal;
+
+       TRACE("alr_log_free %s\n", ad->alr_name);
+       assert(major(al->alr_rdev) == oal_log_major);
+
+       pal = alr_log_lookup(al->alr_rdev);
+       if (pal != NULL && *pal == al)
+               *pal = NULL;
+
+       free(al->alr_buf);
+       al->alr_buf = NULL;
+       al->alr_buf_size = 0;
+}
+
+/* Add an access log (identified by path) to the epoll set. */
+static int alr_log_add(int epoll_fd, const char *path)
+{
+       struct alr_log **pal, *al = NULL;
+       struct stat st;
+       int fd = -1;
+       int rc;
+
+       fd = open(path, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
+       if (fd < 0) {
+               ERROR("cannot open device '%s': %s\n", path, strerror(errno));
+               rc = (errno == ENOENT ? 0 : -1); /* Possible race. */
+               goto out;
+       }
+
+       /* Revalidate rdev in case of race. */
+       rc = fstat(fd, &st);
+       if (rc < 0) {
+               ERROR("cannot stat '%s': %s\n", path, strerror(errno));
+               goto out;
+       }
+
+       if (major(st.st_rdev) != oal_log_major)
+               goto out;
+
+       pal = alr_log_lookup(st.st_rdev);
+       if (pal == NULL) {
+               ERROR("no device slot available for '%s' with minor %u\n",
+                       path, minor(st.st_rdev));
+               goto out;
+       }
+
+       if (*pal != NULL)
+               goto out; /* We already have this device. */
+
+       struct lustre_access_log_info_v1 lali;
+
+       memset(&lali, 0, sizeof(lali));
+
+       rc = ioctl(fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali);
+       if (rc < 0) {
+               ERROR("cannot get info for device '%s': %s\n",
+                       path, strerror(errno));
+               goto out;
+       }
+
+       if (lali.lali_type != LUSTRE_ACCESS_LOG_TYPE_OFD) {
+               rc = 0;
+               goto out;
+       }
+
+       al = calloc(1, sizeof(*al));
+       if (al == NULL)
+               FATAL("cannot allocate struct alr_dev of size %zu: %s\n",
+                       sizeof(*al), strerror(errno));
+
+       al->alr_dev.alr_io = &alr_log_io;
+       al->alr_dev.alr_destroy = &alr_log_destroy;
+       al->alr_dev.alr_fd = fd;
+       fd = -1;
+
+       al->alr_rdev = st.st_rdev;
+
+       al->alr_dev.alr_name = strdup(lali.lali_name);
+       if (al->alr_dev.alr_name == NULL)
+               FATAL("cannot copy name of size %zu: %s\n",
+                       strlen(lali.lali_name), strerror(errno));
+
+       al->alr_buf_size = lali.lali_log_size;
+       al->alr_entry_size = lali.lali_entry_size;
+
+       if (al->alr_entry_size == 0) {
+               ERROR("device '%s' has zero entry size\n", path);
+               rc = -1;
+               goto out;
+       }
+
+       if (al->alr_buf_size == 0)
+               al->alr_buf_size = 1048576;
+
+       al->alr_buf_size = roundup(al->alr_buf_size, al->alr_entry_size);
+
+       al->alr_buf = malloc(al->alr_buf_size);
+       if (al->alr_buf == NULL)
+               FATAL("cannot allocate log buffer for '%s' of size %zu: %s\n",
+                       path, al->alr_buf_size, strerror(errno));
+
+       struct epoll_event ev = {
+               .events = EPOLLIN | EPOLLHUP,
+               .data.ptr = &al->alr_dev,
+       };
+
+       rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, al->alr_dev.alr_fd, &ev);
+       if (rc < 0) {
+               ERROR("cannot add device '%s' to epoll set: %s\n",
+                       path, strerror(errno));
+               goto out;
+       }
+
+       TRACE("alr_log_add %s\n", al->alr_dev.alr_name);
+
+       if (oal_log_minor_max < minor(al->alr_rdev))
+               oal_log_minor_max = minor(al->alr_rdev);
+
+       assert(*pal == NULL);
+       *pal = al;
+       al = NULL;
+       rc = 0;
+out:
+       if (al != NULL)
+               alr_dev_free(epoll_fd, &al->alr_dev);
+
+       if (!(fd < 0))
+               close(fd);
+
+       return rc;
+}
+
+/* Scan /dev/lustre-access-log/ for new access log devices and add to
+ * epoll set. */
+static int alr_scan(int epoll_fd)
+{
+       const char dir_path[] = "/dev/"LUSTRE_ACCESS_LOG_DIR_NAME;
+       DIR *dir;
+       int dir_fd;
+       struct dirent *d;
+       int rc;
+
+       dir = opendir(dir_path);
+       if (dir == NULL) {
+               ERROR("cannot open '%s' for scanning: %s\n", dir_path, strerror(errno));
+               return ALR_EXIT_FAILURE;
+       }
+
+       dir_fd = dirfd(dir);
+
+       /* Scan /dev for devices with major equal to oal_log_major and add
+        * any new devices. */
+       while ((d = readdir(dir)) != NULL) {
+               char path[6 + PATH_MAX];
+               struct alr_log **pal;
+               struct stat st;
+
+               if (d->d_type != DT_CHR)
+                       continue;
+
+               rc = fstatat(dir_fd, d->d_name, &st, 0);
+               if (rc < 0) {
+                       ERROR("cannot stat '%s/%s' while scanning: %s\n",
+                               dir_path, d->d_name, strerror(errno));
+                       continue;
+               }
+
+               if (!S_ISCHR(st.st_mode))
+                       continue;
+
+               if (major(st.st_rdev) != oal_log_major)
+                       continue;
+
+               pal = alr_log_lookup(st.st_rdev);
+               if (pal == NULL) {
+                       ERROR("no device slot available for '%s/%s' with minor %u\n",
+                               dir_path, d->d_name, minor(st.st_rdev));
+                       continue;
+               }
+
+               if (*pal != NULL)
+                       continue; /* We already have this device. */
+
+               snprintf(path, sizeof(path), "%s/%s", dir_path, d->d_name);
+
+               alr_log_add(epoll_fd, path);
+       }
+
+       closedir(dir);
+
+       return ALR_OK;
+}
+
+/* /dev/lustre-access-log/control device poll callback: call prescan
+ * ioctl and scan /dev/lustre-access-log/ for new access log
+ * devices. */
+static int alr_ctl_io(int epoll_fd, struct alr_dev *cd, unsigned int mask)
+{
+       int rc;
+
+       TRACE("%s\n", __func__);
+       DEBUG_U(mask);
+
+       if (mask & EPOLLERR)
+               return ALR_EXIT_FAILURE;
+
+       if (mask & EPOLLHUP)
+               return ALR_EXIT_SUCCESS;
+
+       rc = ioctl(cd->alr_fd, LUSTRE_ACCESS_LOG_IOCTL_PRESCAN);
+       if (rc < 0) {
+               ERROR("cannot start scanning: %s\n", strerror(errno));
+               return ALR_EXIT_FAILURE;
+       }
+
+       return alr_scan(epoll_fd);
+}
+
+/* signalfd epoll callback. Handle SIGINT and SIGTERM by breaking from
+ * the epoll loop and exiting normally.*/
+static int alr_signal_io(int epoll_fd, struct alr_dev *sd, unsigned int mask)
+{
+       struct signalfd_siginfo ssi;
+       ssize_t rc;
+
+       TRACE("%s\n", __func__);
+       DEBUG_U(mask);
+
+       rc = read(sd->alr_fd, &ssi, sizeof(ssi));
+       if (rc <= 0)
+               return ALR_OK;
+
+       DEBUG_U32(ssi.ssi_signo);
+       switch (ssi.ssi_signo) {
+       case SIGINT:
+       case SIGTERM:
+               return ALR_EXIT_SUCCESS;
+       default:
+               return ALR_OK;
+       }
+}
+
+/* Call LUSTRE_ACCESS_LOG_IOCTL_INFO to get access log info and print
+ * YAML formatted info to stdout. */
+static int alr_log_info(struct alr_log *al)
+{
+       struct lustre_access_log_info_v1 lali;
+       int rc;
+
+       rc = ioctl(al->alr_dev.alr_fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali);
+       if (rc < 0) {
+               ERROR("cannot get info for device '%s': %s\n",
+                       al->alr_dev.alr_name, strerror(errno));
+               return -1;
+       }
+
+       printf("- name: %s\n"
+              "  version: %#x\n"
+              "  type: %#x\n"
+              "  log_size: %u\n"
+              "  entry_size: %u\n"
+              "  _head: %u\n"
+              "  _tail: %u\n"
+              "  _entry_space: %u\n"
+              "  _entry_count: %u\n"
+              "  _drop_count: %u\n"
+              "  _is_closed: %u\n",
+              lali.lali_name,
+              lali.lali_version,
+              lali.lali_type,
+              lali.lali_log_size,
+              lali.lali_entry_size,
+              lali._lali_head,
+              lali._lali_tail,
+              lali._lali_entry_space,
+              lali._lali_entry_count,
+              lali._lali_drop_count,
+              lali._lali_is_closed);
+
+       return 0;
+}
+
+static struct alr_dev *alr_dev_create(int epoll_fd, int fd, const char *name,
+                       int (*io)(int, struct alr_dev *, unsigned int),
+                       void (*destroy)(struct alr_dev *))
+{
+       struct alr_dev *alr;
+       int rc;
+
+       alr = calloc(1, sizeof(*alr));
+       if (alr == NULL)
+               return NULL;
+
+       alr->alr_name = strdup(name);
+       if (alr->alr_name == NULL) {
+               free(alr);
+               return NULL;
+       }
+       alr->alr_io = io;
+       alr->alr_destroy = destroy;
+       alr->alr_fd = fd;
+
+       struct epoll_event event = {
+               .events = EPOLLIN | EPOLLHUP,
+               .data.ptr = alr,
+       };
+
+       rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, alr->alr_fd, &event);
+       if (rc < 0) {
+               free(alr);
+               return NULL;
+       }
+
+       return alr;
+}
+
+int main(int argc, char *argv[])
+{
+       const char ctl_path[] = "/dev/"LUSTRE_ACCESS_LOG_DIR_NAME"/control";
+       struct alr_dev *alr_signal = NULL;
+       struct alr_dev *alr_ctl = NULL;
+       unsigned int m;
+       int list_info = 0;
+       int epoll_fd = -1;
+       int signal_fd = -1;
+       int ctl_fd = -1;
+       int exit_status;
+       int rc;
+       int c;
+
+       static struct option options[] = {
+               { .name = "debug", .has_arg = optional_argument, .val = 'd', },
+               { .name = "help", .has_arg = no_argument, .val = 'h', },
+               { .name = "list", .has_arg = no_argument, .val = 'l', },
+               { .name = "trace", .has_arg = optional_argument, .val = 't', },
+               { .name = NULL, },
+       };
+
+       while ((c = getopt_long(argc, argv, "d::hlt::", options, NULL)) != -1) {
+               switch (c) {
+               case 'd':
+                       if (optarg == NULL) {
+                               debug_file = stderr;
+                       } else if (strcmp(optarg, "-") == 0) {
+                               debug_file = stdout;
+                       } else {
+                               debug_file = fopen(optarg, "a");
+                               if (debug_file == NULL)
+                                       FATAL("cannot open debug file '%s': %s\n",
+                                               optarg, strerror(errno));
+                       }
+
+                       break;
+               case 'h':
+                       /* ... */
+                       exit(EXIT_SUCCESS);
+               case 'l':
+                       list_info = 1;
+                       break;
+               case 't':
+                       if (optarg == NULL) {
+                               trace_file = stderr;
+                       } else if (strcmp(optarg, "-") == 0) {
+                               trace_file = stdout;
+                       } else {
+                               trace_file = fopen(optarg, "a");
+                               if (debug_file == NULL)
+                                       FATAL("cannot open debug file '%s': %s\n",
+                                               optarg, strerror(errno));
+                       }
+
+                       break;
+               case '?':
+                       /* Try ... for more ... */
+                       exit(EXIT_FAILURE);
+               }
+       }
+
+       epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+       if (epoll_fd < 0)
+               FATAL("cannot create epoll set: %s\n", strerror(errno));
+
+       /* Setup signal FD and add to epoll set. */
+       sigset_t signal_mask;
+       sigemptyset(&signal_mask);
+       sigaddset(&signal_mask, SIGINT);
+       sigaddset(&signal_mask, SIGTERM);
+       rc = sigprocmask(SIG_BLOCK, &signal_mask, NULL);
+       if (rc < 0)
+               FATAL("cannot set process signal mask: %s\n", strerror(errno));
+
+       signal_fd = signalfd(-1, &signal_mask, SFD_NONBLOCK|SFD_CLOEXEC);
+       if (signal_fd < 0)
+               FATAL("cannot create signalfd: %s\n", strerror(errno));
+
+       alr_signal = alr_dev_create(epoll_fd, signal_fd, "signal", &alr_signal_io, NULL);
+       if (alr_signal == NULL)
+               FATAL("cannot register signalfd: %s\n", strerror(errno));
+
+       signal_fd = -1;
+
+       /* Open control device. */
+       ctl_fd = open(ctl_path, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
+       if (ctl_fd < 0)
+               FATAL("cannot open '%s': %s\n", ctl_path, strerror(errno));
+
+       /* Get and print interface version. */
+       oal_version = ioctl(ctl_fd, LUSTRE_ACCESS_LOG_IOCTL_VERSION);
+       if (oal_version < 0)
+               FATAL("cannot get ofd access log interface version: %s\n", strerror(errno));
+
+       DEBUG_D(oal_version);
+
+       /* Get and print device major used for access log devices. */
+       oal_log_major = ioctl(ctl_fd, LUSTRE_ACCESS_LOG_IOCTL_MAJOR);
+       if (oal_log_major < 0)
+               FATAL("cannot get ofd access log major: %s\n", strerror(errno));
+
+       DEBUG_D(oal_log_major);
+
+       /* Add control device to epoll set. */
+       alr_ctl = alr_dev_create(epoll_fd, ctl_fd, "control", &alr_ctl_io, NULL);
+       if (alr_ctl == NULL)
+               FATAL("cannot register control device: %s\n", strerror(errno));
+
+       ctl_fd = -1;
+
+       do {
+               struct epoll_event ev[32];
+               int timeout = (list_info ? 0 : -1);
+               int i, ev_count;
+
+               ev_count = epoll_wait(epoll_fd, ev, ARRAY_SIZE(ev), timeout);
+               if (ev_count < 0) {
+                       if (errno == EINTR) /* Signal or timeout. */
+                               continue;
+
+                       ERROR("cannot wait on epoll set: %s\n", strerror(errno));
+                       exit_status = EXIT_FAILURE;
+                       goto out;
+               }
+
+               DEBUG_D(ev_count);
+
+               for (i = 0; i < ev_count; i++) {
+                       struct alr_dev *ad = ev[i].data.ptr;
+                       unsigned int mask = ev[i].events;
+
+                       rc = (*ad->alr_io)(epoll_fd, ad, mask);
+                       switch (rc) {
+                       case ALR_EXIT_FAILURE:
+                               exit_status = EXIT_FAILURE;
+                               goto out;
+                       case ALR_EXIT_SUCCESS:
+                               exit_status = EXIT_SUCCESS;
+                               goto out;
+                       case ALR_ERROR:
+                       case ALR_EOF:
+                               alr_dev_free(epoll_fd, ad);
+                               break;
+                       case ALR_OK:
+                       default:
+                               break;
+                       }
+               }
+       } while (!list_info);
+
+       exit_status = EXIT_SUCCESS;
+out:
+       assert(oal_log_minor_max < ARRAY_SIZE(alr_log));
+
+       for (m = 0; m <= oal_log_minor_max; m++) {
+               if (alr_log[m] == NULL)
+                       continue;
+
+               if (list_info) {
+                       rc = alr_log_info(alr_log[m]);
+                       if (rc < 0)
+                               exit_status = EXIT_FAILURE;
+               }
+
+               alr_dev_free(epoll_fd, &alr_log[m]->alr_dev);
+       }
+
+       alr_dev_free(epoll_fd, alr_ctl);
+       alr_dev_free(epoll_fd, alr_signal);
+       close(epoll_fd);
+
+       DEBUG_D(exit_status);
+
+       return exit_status;
+}
index 250212d..54a1082 100644 (file)
@@ -39,6 +39,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <linux/lustre/lustre_idl.h>
+#include <linux/lustre/lustre_access_log.h>
 #include <linux/lustre/lustre_lfsck_user.h>
 #include <linux/lustre/lustre_disk.h>
 #include <linux/lustre/lustre_cfg.h>
@@ -2501,6 +2502,37 @@ static void check_nodemap_rec(void)
        CHECK_UNION(nodemap_rec);
 }
 
+static void check_ofd_access_entry_v1(void)
+{
+       BLANK_LINE();
+       CHECK_VALUE_X(OFD_ACCESS_READ);
+       CHECK_VALUE_X(OFD_ACCESS_WRITE);
+       CHECK_STRUCT(ofd_access_entry_v1);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_parent_fid);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_begin);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_end);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_time);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_size);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_segment_count);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_flags);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_reserved1);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_reserved2);
+       CHECK_MEMBER(ofd_access_entry_v1, oae_reserved3);
+}
+
+static void check_lustre_access_log_info_v1(void)
+{
+       BLANK_LINE();
+       CHECK_VALUE_X(LUSTRE_ACCESS_LOG_VERSION_1);
+       CHECK_VALUE_X(LUSTRE_ACCESS_LOG_TYPE_OFD);
+       CHECK_STRUCT(lustre_access_log_info_v1);
+       CHECK_MEMBER(lustre_access_log_info_v1, lali_version);
+       CHECK_MEMBER(lustre_access_log_info_v1, lali_type);
+       CHECK_MEMBER(lustre_access_log_info_v1, lali_name);
+       CHECK_MEMBER(lustre_access_log_info_v1, lali_log_size);
+       CHECK_MEMBER(lustre_access_log_info_v1, lali_entry_size);
+}
+
 static void check_lfsck_request(void)
 {
        BLANK_LINE();
@@ -3035,6 +3067,9 @@ main(int argc, char **argv)
        check_nodemap_global_rec();
        check_nodemap_rec();
 
+       check_ofd_access_entry_v1();
+       check_lustre_access_log_info_v1();
+
        check_lfsck_request();
        check_lfsck_reply();
 
index a5a6261..49c732b 100644 (file)
@@ -36,6 +36,7 @@
 #include <string.h>
 
 #include <linux/lustre/lustre_idl.h>
+#include <linux/lustre/lustre_access_log.h>
 #ifdef HAVE_SERVER_SUPPORT
 #include <linux/lustre/lustre_lfsck_user.h>
 #include <linux/lustre/lustre_disk.h>
index 1755067..cadc201 100644 (file)
 #include <string.h>
 
 #include <linux/lustre/lustre_idl.h>
+#include <linux/lustre/lustre_access_log.h>
 #ifdef HAVE_SERVER_SUPPORT
 #include <linux/lustre/lustre_lfsck_user.h>
 #include <linux/lustre/lustre_disk.h>
 #ifdef CONFIG_FS_POSIX_ACL
-#include <linux/posix_acl_xattr.h>
 #ifdef HAVE_STRUCT_POSIX_ACL_XATTR
+#include <linux/posix_acl_xattr.h>
 # define posix_acl_xattr_header struct posix_acl_xattr_header
 # define posix_acl_xattr_entry  struct posix_acl_xattr_entry
 #endif /* HAVE_STRUCT_POSIX_ACL_XATTR */
 #include <linux/lustre/lustre_cfg.h>
 
 #define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; }
-#define LASSERTF(cond, fmt, ...) if (!(cond)) { printf("failed '" #cond "'" fmt, ## __VA_ARGS__); ret = 1; }
+#define LASSERTF(cond, fmt, ...) if (!(cond)) { printf("failed '" #cond "'" fmt, ## __VA_ARGS__);ret = 1;}
 /*
- * BUILD_BUG_ON() is Compile-time LASSERT, which verifies correctness at
- * compile-time rather than runtime. If "cond" is true, then there are two
- * identical cases ("0" and "0"), which is an error that causes the compiler to
- * complain. If "cond" is false, then there are two different cases
- * ("(non-zero)" and "0").
- *
+ * BUILD_BUG_ON() is Compile-time check which verifies correctness at
+ * compile-time rather than runtime.
  */
-#ifndef BUILD_BUG_ON
-#define BUILD_BUG_ON(cond) do {switch (0) {case (cond): case 1: break; } } while (0)
-#endif
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
 int ret;
 
@@ -5402,6 +5397,82 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(union nodemap_rec) == 32, "found %lld\n",
                 (long long)(int)sizeof(union nodemap_rec));
 
+       LASSERTF(OFD_ACCESS_READ == 0x00000001UL, "found 0x%.8xUL\n",
+               (unsigned)OFD_ACCESS_READ);
+       LASSERTF(OFD_ACCESS_WRITE == 0x00000002UL, "found 0x%.8xUL\n",
+               (unsigned)OFD_ACCESS_WRITE);
+       /* Checks for struct ofd_access_entry_v1 */
+       LASSERTF((int)sizeof(struct ofd_access_entry_v1) == 64, "found %lld\n",
+                (long long)(int)sizeof(struct ofd_access_entry_v1));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_parent_fid) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_parent_fid));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_parent_fid) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_parent_fid));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_begin) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_begin));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_begin) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_begin));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_end) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_end));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_end) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_end));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_time) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_time));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_time) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_time));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_size) == 40, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_size));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_size));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_segment_count) == 44, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_segment_count));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_segment_count) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_segment_count));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_flags) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_flags));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_flags) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_flags));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved1) == 52, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved1));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved1));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved2) == 56, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved2));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved2) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved2));
+       LASSERTF((int)offsetof(struct ofd_access_entry_v1, oae_reserved3) == 60, "found %lld\n",
+                (long long)(int)offsetof(struct ofd_access_entry_v1, oae_reserved3));
+       LASSERTF((int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved3) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ofd_access_entry_v1 *)0)->oae_reserved3));
+
+       LASSERTF(LUSTRE_ACCESS_LOG_VERSION_1 == 0x00010000UL, "found 0x%.8xUL\n",
+               (unsigned)LUSTRE_ACCESS_LOG_VERSION_1);
+       LASSERTF(LUSTRE_ACCESS_LOG_TYPE_OFD == 0x00000001UL, "found 0x%.8xUL\n",
+               (unsigned)LUSTRE_ACCESS_LOG_TYPE_OFD);
+       /* Checks for struct lustre_access_log_info_v1 */
+       LASSERTF((int)sizeof(struct lustre_access_log_info_v1) == 168, "found %lld\n",
+                (long long)(int)sizeof(struct lustre_access_log_info_v1));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_version) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_version));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_version) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_version));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_type) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_type));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_type) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_type));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_name) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_name));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_name) == 128, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_name));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_log_size) == 136, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_log_size));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_log_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_log_size));
+       LASSERTF((int)offsetof(struct lustre_access_log_info_v1, lali_entry_size) == 140, "found %lld\n",
+                (long long)(int)offsetof(struct lustre_access_log_info_v1, lali_entry_size));
+       LASSERTF((int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_entry_size) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lustre_access_log_info_v1 *)0)->lali_entry_size));
+
        /* Checks for struct lfsck_request */
        LASSERTF((int)sizeof(struct lfsck_request) == 96, "found %lld\n",
                 (long long)(int)sizeof(struct lfsck_request));