4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
22 * Copyright 2020, DataDirect Networks Storage.
24 * This file is part of Lustre, http://www.lustre.org/
26 * Author: John L. Hammond <jhammond@whamcloud.com>
28 * lustre/utils/ofd_access_log_reader.c
30 * Sample utility to discover and read Lustre (ofd) access logs.
32 * This demonstrates the discovery and reading of Lustre access logs
33 * (see linux/lustre/lustre_access_log.h and
34 * lustre/ofd/ofd_access_log.c.). By default it opens the control
35 * device, discovers and opens all access log devices, and consumes
36 * all access log entries. If invoked with the --list option then it
37 * prints information about all available devices to stdout and exits.
39 * Structured trace points (when --trace is used) are added to permit
40 * testing of the access log functionality (see test_165* in
41 * lustre/tests/sanity.sh).
57 #include <sys/epoll.h>
58 #include <sys/ioctl.h>
59 #include <sys/signalfd.h>
61 #include <sys/sysmacros.h>
62 #include <sys/timerfd.h>
63 #include <sys/types.h>
64 #include <linux/types.h>
65 #include <linux/lustre/lustre_user.h>
66 #include <linux/lustre/lustre_access_log.h>
67 #include "ofd_access_batch.h"
70 /* TODO fsname filter */
72 static FILE *debug_file;
73 static FILE *trace_file;
75 #define DEBUG(fmt, args...) \
77 if (debug_file != NULL) \
78 fprintf(debug_file, "DEBUG %s:%d: "fmt, __func__, __LINE__, ##args); \
81 #define TRACE(fmt, args...) \
83 if (trace_file != NULL) \
84 fprintf(trace_file, "TRACE "fmt, ##args); \
87 #define DEBUG_D(x) DEBUG("%s = %"PRIdMAX"\n", #x, (intmax_t)x)
88 #define DEBUG_P(x) DEBUG("%s = %p\n", #x, x)
89 #define DEBUG_S(x) DEBUG("%s = '%s'\n", #x, x)
90 #define DEBUG_U(x) DEBUG("%s = %"PRIuMAX"\n", #x, (uintmax_t)x)
92 #define ERROR(fmt, args...) \
93 fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##args)
95 #define FATAL(fmt, args...) \
97 ERROR("FATAL: "fmt, ##args); \
102 ALR_EXIT_SUCCESS = INT_MIN + EXIT_SUCCESS,
103 ALR_EXIT_FAILURE = INT_MIN + EXIT_FAILURE,
111 int (*alr_io)(int /* epoll_fd */, struct alr_dev * /* this */, unsigned int /* mask */);
112 void (*alr_destroy)(struct alr_dev *);
117 struct alr_dev alr_dev;
120 size_t alr_entry_size;
124 static struct alr_log *alr_log[1 << 20]; /* 20 == MINORBITS */
125 static int oal_version; /* FIXME ... major version, minor version */
126 static unsigned int oal_log_major;
127 static unsigned int oal_log_minor_max;
128 static struct alr_batch *alr_batch;
129 static FILE *alr_batch_file;
130 static const char *alr_batch_file_path;
131 static int alr_print_fraction = 100;
133 #define D_ALR_DEV "%s %d"
134 #define P_ALR_DEV(ad) \
135 (ad)->alr_name, (ad)->alr_fd
137 #define D_ALR_LOG D_ALR_DEV" %u:%u"
138 #define P_ALR_LOG(al) \
139 P_ALR_DEV(&(al)->alr_dev), major((al)->alr_rdev), minor((al)->alr_rdev)
141 static void alr_dev_free(int epoll_fd, struct alr_dev *ad)
143 TRACE("alr_dev_free %s\n", ad->alr_name);
145 if (!(ad->alr_fd < 0))
146 epoll_ctl(epoll_fd, EPOLL_CTL_DEL, ad->alr_fd, NULL);
148 if (ad->alr_destroy != NULL)
149 (*ad->alr_destroy)(ad);
151 if (!(ad->alr_fd < 0))
158 static struct alr_log **alr_log_lookup(dev_t rdev)
160 assert(major(rdev) == oal_log_major);
162 if (!(minor(rdev) < ARRAY_SIZE(alr_log)))
165 return &alr_log[minor(rdev)];
168 static const char *alr_flags_to_str(unsigned int flags)
170 switch (flags & (OFD_ACCESS_READ | OFD_ACCESS_WRITE)) {
173 case OFD_ACCESS_READ:
175 case OFD_ACCESS_WRITE:
177 case OFD_ACCESS_READ | OFD_ACCESS_WRITE:
182 /* /dev/lustre-access-log/scratch-OST0000 device poll callback: read entries
183 * from log and print. */
184 static int alr_log_io(int epoll_fd, struct alr_dev *ad, unsigned int mask)
186 struct alr_log *al = container_of(ad, struct alr_log, alr_dev);
189 TRACE("alr_log_io %s\n", ad->alr_name);
192 assert(al->alr_entry_size != 0);
193 assert(al->alr_buf_size != 0);
194 assert(al->alr_buf != NULL);
196 count = read(ad->alr_fd, al->alr_buf, al->alr_buf_size);
198 ERROR("cannot read events from '%s': %s\n", ad->alr_name, strerror(errno));
203 TRACE("alr_log_eof %s\n", ad->alr_name);
207 if (count % al->alr_entry_size != 0) {
208 ERROR("invalid read from "D_ALR_LOG": entry_size = %zu, count = %zd\n",
209 P_ALR_LOG(al), al->alr_entry_size, count);
213 DEBUG("read "D_ALR_LOG", count = %zd\n", P_ALR_LOG(al), count);
215 for (i = 0; i < count; i += al->alr_entry_size) {
216 struct ofd_access_entry_v1 *oae =
217 (struct ofd_access_entry_v1 *)&al->alr_buf[i];
219 TRACE("alr_log_entry %s "DFID" %lu %lu %lu %u %u %s\n",
221 PFID(&oae->oae_parent_fid),
222 (unsigned long)oae->oae_begin,
223 (unsigned long)oae->oae_end,
224 (unsigned long)oae->oae_time,
225 (unsigned int)oae->oae_size,
226 (unsigned int)oae->oae_segment_count,
227 alr_flags_to_str(oae->oae_flags));
229 alr_batch_add(alr_batch, ad->alr_name, &oae->oae_parent_fid,
230 oae->oae_time, oae->oae_begin, oae->oae_end,
231 oae->oae_size, oae->oae_segment_count, oae->oae_flags);
237 static void alr_log_destroy(struct alr_dev *ad)
239 struct alr_log *al = container_of(ad, struct alr_log, alr_dev);
240 struct alr_log **pal;
242 TRACE("alr_log_free %s\n", ad->alr_name);
243 assert(major(al->alr_rdev) == oal_log_major);
245 pal = alr_log_lookup(al->alr_rdev);
246 if (pal != NULL && *pal == al)
251 al->alr_buf_size = 0;
254 /* Add an access log (identified by path) to the epoll set. */
255 static int alr_log_add(int epoll_fd, const char *path)
257 struct alr_log **pal, *al = NULL;
264 fd = open(path, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
266 ERROR("cannot open device '%s': %s\n", path, strerror(errno));
267 rc = (errno == ENOENT ? 0 : -1); /* Possible race. */
271 /* Revalidate rdev in case of race. */
274 ERROR("cannot stat '%s': %s\n", path, strerror(errno));
278 if (major(st.st_rdev) != oal_log_major)
281 pal = alr_log_lookup(st.st_rdev);
283 ERROR("no device slot available for '%s' with minor %u\n",
284 path, minor(st.st_rdev));
289 goto out; /* We already have this device. */
291 struct lustre_access_log_info_v1 lali;
293 memset(&lali, 0, sizeof(lali));
295 rc = ioctl(fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali);
297 ERROR("cannot get info for device '%s': %s\n",
298 path, strerror(errno));
302 if (lali.lali_type != LUSTRE_ACCESS_LOG_TYPE_OFD) {
307 al = calloc(1, sizeof(*al));
309 FATAL("cannot allocate struct alr_dev of size %zu: %s\n",
310 sizeof(*al), strerror(errno));
312 al->alr_dev.alr_io = &alr_log_io;
313 al->alr_dev.alr_destroy = &alr_log_destroy;
314 al->alr_dev.alr_fd = fd;
317 al->alr_rdev = st.st_rdev;
319 al->alr_dev.alr_name = strdup(lali.lali_name);
320 if (al->alr_dev.alr_name == NULL)
321 FATAL("cannot copy name of size %zu: %s\n",
322 strlen(lali.lali_name), strerror(errno));
324 al->alr_buf_size = lali.lali_log_size;
325 al->alr_entry_size = lali.lali_entry_size;
327 if (al->alr_entry_size == 0) {
328 ERROR("device '%s' has zero entry size\n", path);
333 if (al->alr_buf_size == 0)
334 al->alr_buf_size = 1048576;
336 al->alr_buf_size = roundup(al->alr_buf_size, al->alr_entry_size);
338 al->alr_buf = malloc(al->alr_buf_size);
339 if (al->alr_buf == NULL)
340 FATAL("cannot allocate log buffer for '%s' of size %zu: %s\n",
341 path, al->alr_buf_size, strerror(errno));
343 struct epoll_event ev = {
344 .events = EPOLLIN | EPOLLHUP,
345 .data.ptr = &al->alr_dev,
348 rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, al->alr_dev.alr_fd, &ev);
350 ERROR("cannot add device '%s' to epoll set: %s\n",
351 path, strerror(errno));
355 TRACE("alr_log_add %s\n", al->alr_dev.alr_name);
357 if (oal_log_minor_max < minor(al->alr_rdev))
358 oal_log_minor_max = minor(al->alr_rdev);
360 assert(*pal == NULL);
366 alr_dev_free(epoll_fd, &al->alr_dev);
374 /* Scan /dev/lustre-access-log/ for new access log devices and add to
376 static int alr_scan(int epoll_fd)
378 const char dir_path[] = "/dev/"LUSTRE_ACCESS_LOG_DIR_NAME;
384 dir = opendir(dir_path);
386 ERROR("cannot open '%s' for scanning: %s\n", dir_path, strerror(errno));
387 return ALR_EXIT_FAILURE;
392 /* Scan /dev for devices with major equal to oal_log_major and add
393 * any new devices. */
394 while ((d = readdir(dir)) != NULL) {
395 char path[6 + PATH_MAX];
396 struct alr_log **pal;
399 if (d->d_type != DT_CHR)
402 rc = fstatat(dir_fd, d->d_name, &st, 0);
404 ERROR("cannot stat '%s/%s' while scanning: %s\n",
405 dir_path, d->d_name, strerror(errno));
409 if (!S_ISCHR(st.st_mode))
412 if (major(st.st_rdev) != oal_log_major)
415 pal = alr_log_lookup(st.st_rdev);
417 ERROR("no device slot available for '%s/%s' with minor %u\n",
418 dir_path, d->d_name, minor(st.st_rdev));
423 continue; /* We already have this device. */
425 snprintf(path, sizeof(path), "%s/%s", dir_path, d->d_name);
427 alr_log_add(epoll_fd, path);
435 /* /dev/lustre-access-log/control device poll callback: call prescan
436 * ioctl and scan /dev/lustre-access-log/ for new access log
438 static int alr_ctl_io(int epoll_fd, struct alr_dev *cd, unsigned int mask)
442 TRACE("%s\n", __func__);
446 return ALR_EXIT_FAILURE;
449 return ALR_EXIT_SUCCESS;
451 rc = ioctl(cd->alr_fd, LUSTRE_ACCESS_LOG_IOCTL_PRESCAN);
453 ERROR("cannot start scanning: %s\n", strerror(errno));
454 return ALR_EXIT_FAILURE;
457 return alr_scan(epoll_fd);
460 /* signalfd epoll callback. Handle SIGINT and SIGTERM by breaking from
461 * the epoll loop and exiting normally.*/
462 static int alr_signal_io(int epoll_fd, struct alr_dev *sd, unsigned int mask)
464 struct signalfd_siginfo ssi;
467 TRACE("%s\n", __func__);
470 rc = read(sd->alr_fd, &ssi, sizeof(ssi));
474 DEBUG_U(ssi.ssi_signo);
475 switch (ssi.ssi_signo) {
478 return ALR_EXIT_SUCCESS;
484 /* batching timerfd epoll callback. Print batched access entries to
486 static int alr_batch_timer_io(int epoll_fd, struct alr_dev *td, unsigned int mask)
488 time_t now = time(NULL);
489 uint64_t expire_count;
492 TRACE("%s\n", __func__);
496 rc = read(td->alr_fd, &expire_count, sizeof(expire_count));
500 DEBUG_U(expire_count);
502 rc = alr_batch_print(alr_batch, alr_batch_file, alr_print_fraction);
504 ERROR("cannot write to '%s': %s\n",
505 alr_batch_file_path, strerror(errno));
509 /* FIXME: blocking write to batch file. */
510 rc = fflush(alr_batch_file);
512 ERROR("cannot write to '%s': %s\n",
513 alr_batch_file_path, strerror(errno));
517 /* Failed writes will leave alr_batch_file (pipe) in a
518 * weird state so make that fatal. */
519 return (rc < 0) ? ALR_EXIT_FAILURE : ALR_OK;
522 /* Call LUSTRE_ACCESS_LOG_IOCTL_INFO to get access log info and print
523 * YAML formatted info to stdout. */
524 static int alr_log_info(struct alr_log *al)
526 struct lustre_access_log_info_v1 lali;
529 rc = ioctl(al->alr_dev.alr_fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali);
531 ERROR("cannot get info for device '%s': %s\n",
532 al->alr_dev.alr_name, strerror(errno));
536 printf("- name: %s\n"
543 " _entry_space: %u\n"
544 " _entry_count: %u\n"
551 lali.lali_entry_size,
554 lali._lali_entry_space,
555 lali._lali_entry_count,
556 lali._lali_drop_count,
557 lali._lali_is_closed);
562 static struct alr_dev *alr_dev_create(int epoll_fd, int fd, const char *name,
563 int (*io)(int, struct alr_dev *, unsigned int),
564 void (*destroy)(struct alr_dev *))
569 alr = calloc(1, sizeof(*alr));
573 alr->alr_name = strdup(name);
574 if (alr->alr_name == NULL) {
579 alr->alr_destroy = destroy;
582 struct epoll_event event = {
583 .events = EPOLLIN | EPOLLHUP,
587 rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, alr->alr_fd, &event);
598 printf("Usage: %s: [OPTION]...\n"
599 "Discover, read, batch, and write Lustre access logs\n"
601 "Mandatory arguments to long options are mandatory for short options too.\n"
602 " -f, --batch-file=FILE print batch to file (default stdout)\n"
603 " -i, --batch-interval=INTERVAL print batch every INTERVAL seconds\n"
604 " -o, --batch-offset=OFFSET print batch at OFFSET seconds\n"
605 " -d, --debug[=FILE] print debug messages to FILE (stderr)\n"
606 " -h, --help display this help and exit\n"
607 " -l, --list print YAML list of available access logs\n"
608 " -t, --trace[=FILE] print trace messages to FILE (stderr)\n",
609 program_invocation_short_name);
612 int main(int argc, char *argv[])
614 const char ctl_path[] = "/dev/"LUSTRE_ACCESS_LOG_DIR_NAME"/control";
615 struct alr_dev *alr_signal = NULL;
616 struct alr_dev *alr_batch_timer = NULL;
617 struct alr_dev *alr_ctl = NULL;
618 time_t batch_interval = 0;
619 time_t batch_offset = 0;
627 static struct option options[] = {
628 { .name = "batch-file", .has_arg = required_argument, .val = 'f', },
629 { .name = "batch-interval", .has_arg = required_argument, .val = 'i', },
630 { .name = "batch-offset", .has_arg = required_argument, .val = 'o', },
631 { .name = "debug", .has_arg = optional_argument, .val = 'd', },
632 { .name = "help", .has_arg = no_argument, .val = 'h', },
633 { .name = "fraction", .has_arg = required_argument, .val = 'F', },
634 { .name = "list", .has_arg = no_argument, .val = 'l', },
635 { .name = "trace", .has_arg = optional_argument, .val = 't', },
639 while ((c = getopt_long(argc, argv, "d::f:F:hi:lt::", options, NULL)) != -1) {
642 alr_batch_file_path = optarg;
646 batch_interval = strtoll(optarg, NULL, 0);
647 if (batch_interval < 0 || batch_interval >= 1048576 ||
649 FATAL("invalid batch interval '%s'\n", optarg);
653 batch_offset = strtoll(optarg, NULL, 0);
654 if (batch_offset < 0 || batch_offset >= 1048576 ||
656 FATAL("invalid batch offset '%s'\n", optarg);
659 if (optarg == NULL) {
661 } else if (strcmp(optarg, "-") == 0) {
664 debug_file = fopen(optarg, "a");
665 if (debug_file == NULL)
666 FATAL("cannot open debug file '%s': %s\n",
667 optarg, strerror(errno));
675 alr_print_fraction = strtoll(optarg, NULL, 0);
676 if (alr_print_fraction < 1 || alr_print_fraction > 100)
677 FATAL("invalid batch offset '%s'\n", optarg);
683 if (optarg == NULL) {
685 } else if (strcmp(optarg, "-") == 0) {
688 trace_file = fopen(optarg, "a");
689 if (debug_file == NULL)
690 FATAL("cannot open debug file '%s': %s\n",
691 optarg, strerror(errno));
696 fprintf(stderr, "Try '%s --help' for more information.\n",
697 program_invocation_short_name);
702 if (batch_interval > 0) {
703 alr_batch = alr_batch_create(-1);
704 if (alr_batch == NULL)
705 FATAL("cannot create batch struct: %s\n",
709 if (alr_batch_file_path != NULL) {
710 alr_batch_file = fopen(alr_batch_file_path, "w");
711 if (alr_batch_file == NULL)
712 FATAL("cannot open batch file '%s': %s\n",
713 alr_batch_file_path, strerror(errno));
715 alr_batch_file_path = "stdout";
716 alr_batch_file = stdout;
719 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
721 FATAL("cannot create epoll set: %s\n", strerror(errno));
723 /* Setup signal FD and add to epoll set. */
724 sigset_t signal_mask;
725 sigemptyset(&signal_mask);
726 sigaddset(&signal_mask, SIGINT);
727 sigaddset(&signal_mask, SIGTERM);
728 rc = sigprocmask(SIG_BLOCK, &signal_mask, NULL);
730 FATAL("cannot set process signal mask: %s\n", strerror(errno));
732 int signal_fd = signalfd(-1, &signal_mask, SFD_NONBLOCK|SFD_CLOEXEC);
734 FATAL("cannot create signalfd: %s\n", strerror(errno));
736 alr_signal = alr_dev_create(epoll_fd, signal_fd, "signal", &alr_signal_io, NULL);
737 if (alr_signal == NULL)
738 FATAL("cannot register signalfd: %s\n", strerror(errno));
742 /* Setup batch timer FD and add to epoll set. */
744 rc = clock_gettime(CLOCK_REALTIME, &now);
746 FATAL("cannot read realtime clock: %s\n", strerror(errno));
748 int timer_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC);
750 FATAL("cannot create batch timerfd: %s\n", strerror(errno));
752 struct itimerspec it = {
753 .it_value.tv_sec = (batch_interval > 0) ?
754 roundup(now.tv_sec, batch_interval) +
755 (batch_offset % batch_interval) :
757 .it_interval.tv_sec = batch_interval,
760 DEBUG_D(it.it_value.tv_sec);
762 rc = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &it, NULL);
764 FATAL("cannot arm timerfd: %s\n", strerror(errno));
766 alr_batch_timer = alr_dev_create(epoll_fd, timer_fd, "batch_timer",
767 &alr_batch_timer_io, NULL);
768 if (alr_batch_timer == NULL)
769 FATAL("cannot register batch timerfd: %s\n", strerror(errno));
773 /* Open control device. */
774 int ctl_fd = open(ctl_path, O_RDONLY|O_NONBLOCK|O_CLOEXEC);
776 FATAL("cannot open '%s': %s\n", ctl_path, strerror(errno));
778 /* Get and print interface version. */
779 oal_version = ioctl(ctl_fd, LUSTRE_ACCESS_LOG_IOCTL_VERSION);
781 FATAL("cannot get ofd access log interface version: %s\n", strerror(errno));
783 DEBUG_D(oal_version);
785 /* Get and print device major used for access log devices. */
786 oal_log_major = ioctl(ctl_fd, LUSTRE_ACCESS_LOG_IOCTL_MAJOR);
787 if (oal_log_major < 0)
788 FATAL("cannot get ofd access log major: %s\n", strerror(errno));
790 DEBUG_D(oal_log_major);
792 /* Add control device to epoll set. */
793 alr_ctl = alr_dev_create(epoll_fd, ctl_fd, "control", &alr_ctl_io, NULL);
795 FATAL("cannot register control device: %s\n", strerror(errno));
800 struct epoll_event ev[32];
801 int timeout = (list_info ? 0 : -1);
804 ev_count = epoll_wait(epoll_fd, ev, ARRAY_SIZE(ev), timeout);
806 if (errno == EINTR) /* Signal or timeout. */
809 ERROR("cannot wait on epoll set: %s\n", strerror(errno));
810 exit_status = EXIT_FAILURE;
816 for (i = 0; i < ev_count; i++) {
817 struct alr_dev *ad = ev[i].data.ptr;
818 unsigned int mask = ev[i].events;
820 rc = (*ad->alr_io)(epoll_fd, ad, mask);
822 case ALR_EXIT_FAILURE:
823 exit_status = EXIT_FAILURE;
825 case ALR_EXIT_SUCCESS:
826 exit_status = EXIT_SUCCESS;
830 alr_dev_free(epoll_fd, ad);
837 } while (!list_info);
839 exit_status = EXIT_SUCCESS;
841 assert(oal_log_minor_max < ARRAY_SIZE(alr_log));
843 for (m = 0; m <= oal_log_minor_max; m++) {
844 if (alr_log[m] == NULL)
848 rc = alr_log_info(alr_log[m]);
850 exit_status = EXIT_FAILURE;
853 alr_dev_free(epoll_fd, &alr_log[m]->alr_dev);
856 alr_dev_free(epoll_fd, alr_ctl);
857 alr_dev_free(epoll_fd, alr_signal);
858 alr_dev_free(epoll_fd, alr_batch_timer);
861 alr_batch_destroy(alr_batch);
863 DEBUG_D(exit_status);