1 #include <linux/cdev.h>
2 #include <linux/circ_buf.h>
3 #include <linux/device.h>
5 #include <linux/kernel.h>
6 #include <linux/miscdevice.h>
7 #include <linux/module.h>
8 #include <linux/poll.h>
9 #include <linux/slab.h>
10 #include <linux/types.h>
11 #include <linux/uaccess.h>
12 #include <uapi/linux/lustre/lustre_idl.h>
13 #include <uapi/linux/lustre/lustre_access_log.h>
14 #include "ofd_internal.h"
16 /* OFD access logs: OST (OFD) RPC handlers log accesses by FID and
17 * PFID which are read from userspace through character device files
18 * (/dev/lustre-access-log/scratch-OST0000). Accesses are described by
19 * struct ofd_access_entry_v1. The char device implements read()
20 * (blocking and nonblocking) and poll(), along with an ioctl that
21 * returns diagnostic information on an oal device.
23 * A control device (/dev/lustre-access-log/control) supports an ioctl()
24 * plus poll() method to for oal discovery. See uses of
25 * oal_control_event_count and oal_control_wait_queue for details.
27 * oal log size and entry size are restricted to powers of 2 to
28 * support circ_buf methods. See Documentation/core-api/circular-buffers.rst
29 * in the linux tree for more information.
31 * The associated struct device (*oal_device) owns the oal. The
32 * release() method of oal_device frees the oal and releases its
33 * minor. This may seem slightly more complicated than necessary but
34 * it allows the OST to be unmounted while the oal still has open file
39 OAL_DEV_COUNT = 1 << MINORBITS,
42 struct ofd_access_log {
43 char oal_name[128]; /* lustre-OST0000 */
44 struct device oal_device;
46 struct circ_buf oal_circ;
47 wait_queue_head_t oal_read_wait_queue;
48 spinlock_t oal_read_lock;
49 spinlock_t oal_write_lock;
50 unsigned int oal_drop_count;
51 unsigned int oal_is_closed;
52 unsigned int oal_log_size;
53 unsigned int oal_entry_size;
56 static atomic_t oal_control_event_count = ATOMIC_INIT(0);
57 static DECLARE_WAIT_QUEUE_HEAD(oal_control_wait_queue);
59 static struct class *oal_log_class;
60 static unsigned int oal_log_major;
61 static DEFINE_IDR(oal_log_minor_idr); /* TODO Use ida instead. */
62 static DEFINE_SPINLOCK(oal_log_minor_lock);
64 bool ofd_access_log_size_is_valid(unsigned int size)
66 const unsigned int size_min = 2 * sizeof(struct ofd_access_entry_v1);
67 const unsigned int size_max = 1U << 30;
72 return is_power_of_2(size) && size_min <= size && size <= size_max;
75 static void oal_control_event_inc(void)
77 atomic_inc(&oal_control_event_count);
78 wake_up(&oal_control_wait_queue);
81 static int oal_log_minor_alloc(int *pminor)
83 void *OAL_LOG_MINOR_ALLOCED = (void *)-1;
86 idr_preload(GFP_KERNEL);
87 spin_lock(&oal_log_minor_lock);
88 minor = idr_alloc(&oal_log_minor_idr, OAL_LOG_MINOR_ALLOCED, 0,
89 OAL_DEV_COUNT, GFP_NOWAIT);
90 spin_unlock(&oal_log_minor_lock);
101 static void oal_log_minor_free(int minor)
103 spin_lock(&oal_log_minor_lock);
104 idr_remove(&oal_log_minor_idr, minor);
105 spin_unlock(&oal_log_minor_lock);
108 static bool oal_is_empty(struct ofd_access_log *oal)
110 return CIRC_CNT(oal->oal_circ.head,
112 oal->oal_log_size) < oal->oal_entry_size;
115 static ssize_t oal_write_entry(struct ofd_access_log *oal,
116 const void *entry, size_t entry_size)
118 struct circ_buf *circ = &oal->oal_circ;
123 if (entry_size != oal->oal_entry_size)
126 spin_lock(&oal->oal_write_lock);
128 tail = READ_ONCE(circ->tail);
130 /* CIRC_SPACE() return space available, 0..oal_log_size -
131 * 1. It always leaves one free char, since a completely full
132 * buffer would have head == tail, which is the same as empty. */
133 if (CIRC_SPACE(head, tail, oal->oal_log_size) < oal->oal_entry_size) {
134 oal->oal_drop_count++;
139 memcpy(&circ->buf[head], entry, entry_size);
142 /* Ensure the entry is stored before we update the head. */
143 smp_store_release(&circ->head,
144 (head + oal->oal_entry_size) & (oal->oal_log_size - 1));
146 wake_up(&oal->oal_read_wait_queue);
148 spin_unlock(&oal->oal_write_lock);
153 /* Read one entry from the log and return its size. Non-blocking.
154 * When the log is empty we return -EAGAIN if the OST is still mounted
157 static ssize_t oal_read_entry(struct ofd_access_log *oal,
158 void *entry_buf, size_t entry_buf_size)
160 struct circ_buf *circ = &oal->oal_circ;
165 /* XXX This method may silently truncate entries when
166 * entry_buf_size is less than oal_entry_size. But that's OK
167 * because you know what you are doing. */
168 spin_lock(&oal->oal_read_lock);
170 /* Memory barrier usage follows circular-buffers.txt. */
171 head = smp_load_acquire(&circ->head);
174 if (!CIRC_CNT(head, tail, oal->oal_log_size)) {
175 rc = oal->oal_is_closed ? 0 : -EAGAIN;
179 BUG_ON(CIRC_CNT(head, tail, oal->oal_log_size) < oal->oal_entry_size);
181 /* Read index before reading contents at that index. */
182 smp_read_barrier_depends();
184 /* Extract one entry from the buffer. */
185 rc = min_t(size_t, oal->oal_entry_size, entry_buf_size);
186 memcpy(entry_buf, &circ->buf[tail], rc);
188 /* Memory barrier usage follows circular-buffers.txt. */
189 smp_store_release(&circ->tail,
190 (tail + oal->oal_entry_size) & (oal->oal_log_size - 1));
193 spin_unlock(&oal->oal_read_lock);
198 static int oal_file_open(struct inode *inode, struct file *filp)
200 filp->private_data = container_of(inode->i_cdev,
201 struct ofd_access_log, oal_cdev);
203 return nonseekable_open(inode, filp);
206 /* User buffer size must be a multiple of ofd access entry size. */
207 static ssize_t oal_file_read(struct file *filp, char __user *buf, size_t count,
210 struct ofd_access_log *oal = filp->private_data;
218 if (count & (oal->oal_entry_size - 1))
221 entry = kzalloc(oal->oal_entry_size, GFP_KERNEL);
225 while (size < count) {
226 rc = oal_read_entry(oal, entry, oal->oal_entry_size);
228 if (filp->f_flags & O_NONBLOCK)
231 rc = wait_event_interruptible(oal->oal_read_wait_queue,
232 !oal_is_empty(oal) || oal->oal_is_closed);
235 } else if (rc <= 0) {
236 break; /* cloed or error */
238 if (copy_to_user(buf, entry, oal->oal_entry_size)) {
243 buf += oal->oal_entry_size;
244 size += oal->oal_entry_size;
250 return size ? size : rc;
253 /* Included for test purposes. User buffer size must be a multiple of
254 * ofd access entry size. */
255 static ssize_t oal_file_write(struct file *filp, const char __user *buf,
256 size_t count, loff_t *ppos)
258 struct ofd_access_log *oal = filp->private_data;
266 if (count & (oal->oal_entry_size - 1))
269 entry = kzalloc(oal->oal_entry_size, GFP_KERNEL);
273 while (size < count) {
274 if (copy_from_user(entry, buf, oal->oal_entry_size)) {
279 rc = oal_write_entry(oal, entry, oal->oal_entry_size);
283 buf += oal->oal_entry_size;
284 size += oal->oal_entry_size;
289 return size > 0 ? size : rc;
292 unsigned int oal_file_poll(struct file *filp, struct poll_table_struct *wait)
294 struct ofd_access_log *oal = filp->private_data;
295 unsigned int mask = 0;
297 poll_wait(filp, &oal->oal_read_wait_queue, wait);
299 spin_lock(&oal->oal_read_lock);
301 if (!oal_is_empty(oal) || oal->oal_is_closed)
304 spin_unlock(&oal->oal_read_lock);
309 static long oal_ioctl_info(struct ofd_access_log *oal, unsigned long arg)
311 struct lustre_access_log_info_v1 __user *lali;
312 u32 entry_count = CIRC_CNT(oal->oal_circ.head,
314 oal->oal_log_size) / oal->oal_entry_size;
315 u32 entry_space = CIRC_SPACE(oal->oal_circ.head,
317 oal->oal_log_size) / oal->oal_entry_size;
319 lali = (struct lustre_access_log_info_v1 __user *)arg;
320 BUILD_BUG_ON(sizeof(lali->lali_name) != sizeof(oal->oal_name));
322 if (put_user(LUSTRE_ACCESS_LOG_VERSION_1, &lali->lali_version))
325 if (put_user(LUSTRE_ACCESS_LOG_TYPE_OFD, &lali->lali_type))
328 if (copy_to_user(lali->lali_name, oal->oal_name, sizeof(oal->oal_name)))
331 if (put_user(oal->oal_log_size, &lali->lali_log_size))
334 if (put_user(oal->oal_entry_size, &lali->lali_entry_size))
337 if (put_user(oal->oal_circ.head, &lali->_lali_head))
340 if (put_user(oal->oal_circ.tail, &lali->_lali_tail))
343 if (put_user(entry_space, &lali->_lali_entry_space))
346 if (put_user(entry_count, &lali->_lali_entry_count))
349 if (put_user(oal->oal_drop_count, &lali->_lali_drop_count))
352 if (put_user(oal->oal_is_closed, &lali->_lali_is_closed))
358 static long oal_file_ioctl(struct file *filp, unsigned int cmd,
361 struct ofd_access_log *oal = filp->private_data;
364 case LUSTRE_ACCESS_LOG_IOCTL_VERSION:
365 return LUSTRE_ACCESS_LOG_VERSION_1;
366 case LUSTRE_ACCESS_LOG_IOCTL_INFO:
367 return oal_ioctl_info(oal, arg);
373 static const struct file_operations oal_fops = {
374 .owner = THIS_MODULE,
375 .open = &oal_file_open,
376 .unlocked_ioctl = &oal_file_ioctl,
377 .read = &oal_file_read,
378 .write = &oal_file_write,
379 .poll = &oal_file_poll,
380 .llseek = &no_llseek,
383 static void oal_device_release(struct device *dev)
385 struct ofd_access_log *oal = dev_get_drvdata(dev);
387 oal_log_minor_free(MINOR(oal->oal_device.devt));
388 vfree(oal->oal_circ.buf);
392 struct ofd_access_log *ofd_access_log_create(const char *ofd_name, size_t size)
394 const size_t entry_size = sizeof(struct ofd_access_entry_v1);
395 struct ofd_access_log *oal;
399 BUILD_BUG_ON(sizeof(oal->oal_name) != MAX_OBD_NAME);
400 BUILD_BUG_ON(!is_power_of_2(entry_size));
405 if (!is_power_of_2(size) || (size & (entry_size - 1)) ||
406 (unsigned int)size != size)
407 return ERR_PTR(-EINVAL);
409 oal = kzalloc(sizeof(*oal), GFP_KERNEL);
411 return ERR_PTR(-ENOMEM);
413 strlcpy(oal->oal_name, ofd_name, sizeof(oal->oal_name));
414 oal->oal_log_size = size;
415 oal->oal_entry_size = entry_size;
416 spin_lock_init(&oal->oal_write_lock);
417 spin_lock_init(&oal->oal_read_lock);
418 init_waitqueue_head(&oal->oal_read_wait_queue);
420 oal->oal_circ.buf = vmalloc(oal->oal_log_size);
421 if (!oal->oal_circ.buf) {
426 rc = oal_log_minor_alloc(&minor);
430 device_initialize(&oal->oal_device);
431 oal->oal_device.devt = MKDEV(oal_log_major, minor);
432 oal->oal_device.class = oal_log_class;
433 oal->oal_device.release = &oal_device_release;
434 dev_set_drvdata(&oal->oal_device, oal);
435 rc = dev_set_name(&oal->oal_device,
436 "%s!%s", LUSTRE_ACCESS_LOG_DIR_NAME, oal->oal_name);
440 cdev_init(&oal->oal_cdev, &oal_fops);
441 oal->oal_cdev.owner = THIS_MODULE;
442 rc = cdev_device_add(&oal->oal_cdev, &oal->oal_device);
444 goto out_device_name;
446 oal_control_event_inc();
451 kfree_const(oal->oal_device.kobj.name);
453 oal_log_minor_free(minor);
455 vfree(oal->oal_circ.buf);
461 void ofd_access(struct ofd_device *m,
462 const struct lu_fid *parent_fid,
463 __u64 begin, __u64 end,
465 unsigned int segment_count,
468 unsigned int flags = (rw == READ) ? OFD_ACCESS_READ : OFD_ACCESS_WRITE;
470 if (m->ofd_access_log && (flags & m->ofd_access_log_mask)) {
471 struct ofd_access_entry_v1 oae = {
472 .oae_parent_fid = *parent_fid,
475 .oae_time = ktime_get_real_seconds(),
477 .oae_segment_count = segment_count,
481 oal_write_entry(m->ofd_access_log, &oae, sizeof(oae));
485 /* Called on OST umount to:
486 * - Close the write end of the oal. The wakes any tasks sleeping in
487 * read or poll and makes all reads return zero once the log
489 * - Delete the associated stuct device and cdev, preventing new
490 * opens. Existing opens retain a reference on the oal through
491 * their reference on oal_device.
492 * The oal will be freed when the last open file handle is closed. */
493 void ofd_access_log_delete(struct ofd_access_log *oal)
498 oal->oal_is_closed = 1;
499 wake_up_all(&oal->oal_read_wait_queue);
500 cdev_device_del(&oal->oal_cdev, &oal->oal_device);
503 /* private_data for control device file. */
504 struct oal_control_file {
508 /* Control file usage:
509 * Open /dev/lustre-access-log/control.
511 * Poll for readable on control FD.
512 * Call ioctl(FD, LUSTRE_ACCESS_LOG_IOCTL_PRESCAN) to fetch event count.
513 * Scan /dev/ or /sys/class/... for new devices.
515 static int oal_control_file_open(struct inode *inode, struct file *filp)
517 struct oal_control_file *ccf;
520 rc = nonseekable_open(inode, filp);
524 /* ccf->ccf_event_count = 0 on open */
525 ccf = kzalloc(sizeof(*ccf), GFP_KERNEL);
529 filp->private_data = ccf;
534 static int oal_control_file_release(struct inode *inode, struct file *filp)
536 kfree(filp->private_data);
540 static unsigned int oal_control_file_poll(struct file *filp, poll_table *wait)
542 struct oal_control_file *ccf = filp->private_data;
543 unsigned int mask = 0;
545 poll_wait(filp, &oal_control_wait_queue, wait);
547 if (atomic_read(&oal_control_event_count) != ccf->ccf_event_count)
553 static long oal_control_file_ioctl(struct file *filp, unsigned int cmd,
556 struct oal_control_file *ccf = filp->private_data;
559 case LUSTRE_ACCESS_LOG_IOCTL_VERSION:
560 return LUSTRE_ACCESS_LOG_VERSION_1;
561 case LUSTRE_ACCESS_LOG_IOCTL_MAJOR:
562 return oal_log_major;
563 case LUSTRE_ACCESS_LOG_IOCTL_PRESCAN:
564 ccf->ccf_event_count = atomic_read(&oal_control_event_count);
571 static const struct file_operations oal_control_fops = {
572 .owner = THIS_MODULE,
573 .open = &oal_control_file_open,
574 .release = &oal_control_file_release,
575 .poll = &oal_control_file_poll,
576 .unlocked_ioctl = &oal_control_file_ioctl,
577 .llseek = &noop_llseek,
580 static struct miscdevice oal_control_misc = {
581 .minor = MISC_DYNAMIC_MINOR,
582 .name = LUSTRE_ACCESS_LOG_DIR_NAME"!control",
583 .fops = &oal_control_fops,
586 int ofd_access_log_module_init(void)
591 BUILD_BUG_ON(!is_power_of_2(sizeof(struct ofd_access_entry_v1)));
593 rc = misc_register(&oal_control_misc);
597 rc = alloc_chrdev_region(&dev, 0, OAL_DEV_COUNT,
598 LUSTRE_ACCESS_LOG_DIR_NAME);
600 goto out_oal_control_misc;
602 oal_log_major = MAJOR(dev);
604 oal_log_class = class_create(THIS_MODULE, LUSTRE_ACCESS_LOG_DIR_NAME);
605 if (IS_ERR(oal_log_class)) {
606 rc = PTR_ERR(oal_log_class);
612 unregister_chrdev_region(dev, OAL_DEV_COUNT);
613 out_oal_control_misc:
614 misc_deregister(&oal_control_misc);
619 void ofd_access_log_module_exit(void)
621 class_destroy(oal_log_class);
622 unregister_chrdev_region(MKDEV(oal_log_major, 0), OAL_DEV_COUNT);
623 idr_destroy(&oal_log_minor_idr);
624 misc_deregister(&oal_control_misc);