X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Fofd_access_log_reader.c;h=c62717851f3e438a7d0c386b38ac032a72962f8b;hb=2db059c05781cd332ef337eef0de3e5656802d8e;hp=66252412eb32c6facd05a01228540ca628372b49;hpb=66172e3274ca3187edd519dd87a81a478a6d7259;p=fs%2Flustre-release.git diff --git a/lustre/utils/ofd_access_log_reader.c b/lustre/utils/ofd_access_log_reader.c index 6625241..c627178 100644 --- a/lustre/utils/ofd_access_log_reader.c +++ b/lustre/utils/ofd_access_log_reader.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -59,10 +60,12 @@ #include #include #include +#include #include #include #include #include +#include "ofd_access_batch.h" #include "lstddef.h" /* TODO fsname filter */ @@ -82,12 +85,10 @@ static FILE *trace_file; fprintf(trace_file, "TRACE "fmt, ##args); \ } while (0) -#define DEBUG_D(x) DEBUG("%s = %d\n", #x, x) +#define DEBUG_D(x) DEBUG("%s = %"PRIdMAX"\n", #x, (intmax_t)x) #define DEBUG_P(x) DEBUG("%s = %p\n", #x, x) #define DEBUG_S(x) DEBUG("%s = '%s'\n", #x, x) -#define DEBUG_U(x) DEBUG("%s = %u\n", #x, x) -#define DEBUG_U32(x) DEBUG("%s = %"PRIu32"\n", #x, x) -#define DEBUG_U64(x) DEBUG("%s = %"PRIu64"\n", #x, x) +#define DEBUG_U(x) DEBUG("%s = %"PRIuMAX"\n", #x, (uintmax_t)x) #define ERROR(fmt, args...) \ fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##args) @@ -118,13 +119,21 @@ struct alr_log { char *alr_buf; size_t alr_buf_size; size_t alr_entry_size; + size_t alr_read_count; dev_t alr_rdev; }; static struct alr_log *alr_log[1 << 20]; /* 20 == MINORBITS */ static int oal_version; /* FIXME ... major version, minor version */ +static __u32 alr_filter = 0xffffffff; /* no filter by default */ static unsigned int oal_log_major; static unsigned int oal_log_minor_max; +static struct alr_batch *alr_batch; +static FILE *alr_batch_file; +static pthread_mutex_t alr_batch_file_mutex = PTHREAD_MUTEX_INITIALIZER; +static const char *alr_batch_file_path; +static const char *alr_stats_file_path; +static int alr_print_fraction = 100; #define D_ALR_DEV "%s %d" #define P_ALR_DEV(ad) \ @@ -208,6 +217,8 @@ static int alr_log_io(int epoll_fd, struct alr_dev *ad, unsigned int mask) DEBUG("read "D_ALR_LOG", count = %zd\n", P_ALR_LOG(al), count); + al->alr_read_count += count / al->alr_entry_size; + for (i = 0; i < count; i += al->alr_entry_size) { struct ofd_access_entry_v1 *oae = (struct ofd_access_entry_v1 *)&al->alr_buf[i]; @@ -221,6 +232,10 @@ static int alr_log_io(int epoll_fd, struct alr_dev *ad, unsigned int mask) (unsigned int)oae->oae_size, (unsigned int)oae->oae_segment_count, alr_flags_to_str(oae->oae_flags)); + + alr_batch_add(alr_batch, ad->alr_name, &oae->oae_parent_fid, + oae->oae_time, oae->oae_begin, oae->oae_end, + oae->oae_size, oae->oae_segment_count, oae->oae_flags); } return ALR_OK; @@ -251,6 +266,8 @@ static int alr_log_add(int epoll_fd, const char *path) int fd = -1; int rc; + DEBUG_S(path); + fd = open(path, O_RDONLY|O_NONBLOCK|O_CLOEXEC); if (fd < 0) { ERROR("cannot open device '%s': %s\n", path, strerror(errno)); @@ -293,6 +310,12 @@ static int alr_log_add(int epoll_fd, const char *path) rc = 0; goto out; } + rc = ioctl(fd, LUSTRE_ACCESS_LOG_IOCTL_FILTER, alr_filter); + if (rc < 0) { + ERROR("cannot set filter '%s': %s\n", + path, strerror(errno)); + goto out; + } al = calloc(1, sizeof(*al)); if (al == NULL) @@ -361,6 +384,94 @@ out: return rc; } +/* Call LUSTRE_ACCESS_LOG_IOCTL_INFO to get access log info and print + * YAML formatted info to stdout. */ +static int alr_log_info(struct alr_log *al) +{ + struct lustre_access_log_info_v1 lali; + int rc; + + rc = ioctl(al->alr_dev.alr_fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali); + if (rc < 0) { + ERROR("cannot get info for device '%s': %s\n", + al->alr_dev.alr_name, strerror(errno)); + return -1; + } + + printf("- name: %s\n" + " version: %#x\n" + " type: %#x\n" + " log_size: %u\n" + " entry_size: %u\n", + lali.lali_name, + lali.lali_version, + lali.lali_type, + lali.lali_log_size, + lali.lali_entry_size); + + return 0; +} + +static int alr_log_stats(FILE *file, struct alr_log *al) +{ + struct lustre_access_log_info_v1 lali; + int rc; + + rc = ioctl(al->alr_dev.alr_fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali); + if (rc < 0) { + ERROR("cannot get info for device '%s': %s\n", + al->alr_dev.alr_name, strerror(errno)); + return -1; + } + +#define X(m) \ + fprintf(file, "STATS %s %s %u\n", lali.lali_name, #m, lali.m) + + X(_lali_head); + X(_lali_tail); + X(_lali_entry_space); + X(_lali_entry_count); + X(_lali_drop_count); + X(_lali_is_closed); +#undef X + + fprintf(file, "STATS %s %s %zu\n", + lali.lali_name, "alr_read_count", al->alr_read_count); + + return 0; +} + +static void alr_log_stats_all(void) +{ + FILE *stats_file; + int m; + + if (alr_stats_file_path == NULL) { + stats_file = stderr; + } else if (strcmp(alr_stats_file_path, "-") == 0) { + stats_file = stdout; + } else { + stats_file = fopen(alr_stats_file_path, "a"); + if (stats_file == NULL) { + ERROR("cannot open '%s': %s\n", + alr_stats_file_path, strerror(errno)); + return; + } + } + + for (m = 0; m <= oal_log_minor_max; m++) { + if (alr_log[m] == NULL) + continue; + + alr_log_stats(stats_file, alr_log[m]); + } + + if (stats_file == stdout || stats_file == stderr) + fflush(stats_file); + else + fclose(stats_file); +} + /* Scan /dev/lustre-access-log/ for new access log devices and add to * epoll set. */ static int alr_scan(int epoll_fd) @@ -461,57 +572,76 @@ static int alr_signal_io(int epoll_fd, struct alr_dev *sd, unsigned int mask) if (rc <= 0) return ALR_OK; - DEBUG_U32(ssi.ssi_signo); + DEBUG_U(ssi.ssi_signo); switch (ssi.ssi_signo) { case SIGINT: case SIGTERM: return ALR_EXIT_SUCCESS; + case SIGUSR1: + alr_log_stats_all(); + + return ALR_OK; + case SIGUSR2: + if (debug_file == NULL) + debug_file = stderr; + + if (trace_file == NULL) + trace_file = stderr; + + return ALR_OK; default: return ALR_OK; } } -/* Call LUSTRE_ACCESS_LOG_IOCTL_INFO to get access log info and print - * YAML formatted info to stdout. */ -static int alr_log_info(struct alr_log *al) +/* batching timerfd epoll callback. Print batched access entries to + * alr_batch_file. */ +static int alr_batch_timer_io(int epoll_fd, struct alr_dev *td, unsigned int mask) { - struct lustre_access_log_info_v1 lali; - int rc; + time_t now = time(NULL); + uint64_t expire_count; + ssize_t rc; - rc = ioctl(al->alr_dev.alr_fd, LUSTRE_ACCESS_LOG_IOCTL_INFO, &lali); + TRACE("%s\n", __func__); + DEBUG_D(now); + DEBUG_U(mask); + + rc = read(td->alr_fd, &expire_count, sizeof(expire_count)); + if (rc <= 0) + return ALR_OK; + + DEBUG_U(expire_count); + + rc = alr_batch_print(alr_batch, alr_batch_file, &alr_batch_file_mutex, + alr_print_fraction); if (rc < 0) { - ERROR("cannot get info for device '%s': %s\n", - al->alr_dev.alr_name, strerror(errno)); - return -1; + ERROR("cannot write to '%s': %s\n", + alr_batch_file_path, strerror(errno)); + goto out; } +out: + /* Failed writes will leave alr_batch_file (pipe) in a + * weird state so make that fatal. */ + return (rc < 0) ? ALR_EXIT_FAILURE : ALR_OK; +} - printf("- name: %s\n" - " version: %#x\n" - " type: %#x\n" - " log_size: %u\n" - " entry_size: %u\n" - " _head: %u\n" - " _tail: %u\n" - " _entry_space: %u\n" - " _entry_count: %u\n" - " _drop_count: %u\n" - " _is_closed: %u\n", - lali.lali_name, - lali.lali_version, - lali.lali_type, - lali.lali_log_size, - lali.lali_entry_size, - lali._lali_head, - lali._lali_tail, - lali._lali_entry_space, - lali._lali_entry_count, - lali._lali_drop_count, - lali._lali_is_closed); +/* batch file (stdout) poll callback: detect remote pipe close and exit. */ +static int alr_batch_file_io(int epoll_fd, struct alr_dev *ad, unsigned int mask) +{ + TRACE("%s\n", __func__); + DEBUG_U(mask); - return 0; + if (mask & EPOLLHUP) + return ALR_EXIT_SUCCESS; + + if (mask & EPOLLERR) + return ALR_EXIT_FAILURE; + + return ALR_OK; } static struct alr_dev *alr_dev_create(int epoll_fd, int fd, const char *name, + uint32_t events, int (*io)(int, struct alr_dev *, unsigned int), void (*destroy)(struct alr_dev *)) { @@ -532,7 +662,7 @@ static struct alr_dev *alr_dev_create(int epoll_fd, int fd, const char *name, alr->alr_fd = fd; struct epoll_event event = { - .events = EPOLLIN | EPOLLHUP, + .events = events, .data.ptr = alr, }; @@ -545,30 +675,74 @@ static struct alr_dev *alr_dev_create(int epoll_fd, int fd, const char *name, return alr; } +void usage(void) +{ + printf("Usage: %s: [OPTION]...\n" +"Discover, read, batch, and write Lustre access logs\n" +"\n" +"Mandatory arguments to long options are mandatory for short options too.\n" +" -f, --batch-file=FILE print batch to file (default stdout)\n" +" -F, --batch-fraction=P set batch printing fraction to P/100\n" +" -i, --batch-interval=INTERVAL print batch every INTERVAL seconds\n" +" -o, --batch-offset=OFFSET print batch at OFFSET seconds\n" +" -I, --mdt-index-filter=INDEX set log MDT index filter to INDEX\n" +" -h, --help display this help and exit\n" +" -l, --list print YAML list of available access logs\n" +" -d, --debug[=FILE] print debug messages to FILE (stderr)\n" +" -s, --stats=FILE print stats messages to FILE (stderr)\n" +" -t, --trace[=FILE] print trace messages to FILE (stderr)\n", + program_invocation_short_name); +} + int main(int argc, char *argv[]) { const char ctl_path[] = "/dev/"LUSTRE_ACCESS_LOG_DIR_NAME"/control"; struct alr_dev *alr_signal = NULL; + struct alr_dev *alr_batch_timer = NULL; + struct alr_dev *alr_batch_file_hup = NULL; struct alr_dev *alr_ctl = NULL; + time_t batch_interval = 0; + time_t batch_offset = 0; unsigned int m; int list_info = 0; int epoll_fd = -1; - int signal_fd = -1; - int ctl_fd = -1; int exit_status; int rc; int c; static struct option options[] = { + { .name = "batch-file", .has_arg = required_argument, .val = 'f', }, + { .name = "batch-fraction", .has_arg = required_argument, .val = 'F', }, + { .name = "batch-interval", .has_arg = required_argument, .val = 'i', }, + { .name = "batch-offset", .has_arg = required_argument, .val = 'o', }, + { .name = "mdt-index-filter", .has_arg = required_argument, .val = 'I' }, { .name = "debug", .has_arg = optional_argument, .val = 'd', }, { .name = "help", .has_arg = no_argument, .val = 'h', }, { .name = "list", .has_arg = no_argument, .val = 'l', }, + { .name = "stats", .has_arg = required_argument, .val = 's', }, { .name = "trace", .has_arg = optional_argument, .val = 't', }, { .name = NULL, }, }; - while ((c = getopt_long(argc, argv, "d::hlt::", options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "d::f:F:hi:I:ls:t::", options, NULL)) != -1) { switch (c) { + case 'f': + alr_batch_file_path = optarg; + break; + case 'i': + errno = 0; + batch_interval = strtoll(optarg, NULL, 0); + if (batch_interval < 0 || batch_interval >= 1048576 || + errno != 0) + FATAL("invalid batch interval '%s'\n", optarg); + break; + case 'o': + errno = 0; + batch_offset = strtoll(optarg, NULL, 0); + if (batch_offset < 0 || batch_offset >= 1048576 || + errno != 0) + FATAL("invalid batch offset '%s'\n", optarg); + break; case 'd': if (optarg == NULL) { debug_file = stderr; @@ -583,11 +757,22 @@ int main(int argc, char *argv[]) break; case 'h': - /* ... */ + usage(); exit(EXIT_SUCCESS); + case 'F': + alr_print_fraction = strtoll(optarg, NULL, 0); + if (alr_print_fraction < 1 || alr_print_fraction > 100) + FATAL("invalid batch offset '%s'\n", optarg); + break; + case 'I': + alr_filter = strtoll(optarg, NULL, 0); + break; case 'l': list_info = 1; break; + case 's': + alr_stats_file_path = optarg; + break; case 't': if (optarg == NULL) { trace_file = stderr; @@ -602,11 +787,29 @@ int main(int argc, char *argv[]) break; case '?': - /* Try ... for more ... */ + fprintf(stderr, "Try '%s --help' for more information.\n", + program_invocation_short_name); exit(EXIT_FAILURE); } } + if (batch_interval > 0) { + alr_batch = alr_batch_create(-1); + if (alr_batch == NULL) + FATAL("cannot create batch struct: %s\n", + strerror(errno)); + } + + if (alr_batch_file_path != NULL) { + alr_batch_file = fopen(alr_batch_file_path, "w"); + if (alr_batch_file == NULL) + FATAL("cannot open batch file '%s': %s\n", + alr_batch_file_path, strerror(errno)); + } else { + alr_batch_file_path = "stdout"; + alr_batch_file = stdout; + } + epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (epoll_fd < 0) FATAL("cannot create epoll set: %s\n", strerror(errno)); @@ -616,22 +819,69 @@ int main(int argc, char *argv[]) sigemptyset(&signal_mask); sigaddset(&signal_mask, SIGINT); sigaddset(&signal_mask, SIGTERM); + sigaddset(&signal_mask, SIGUSR1); + sigaddset(&signal_mask, SIGUSR2); rc = sigprocmask(SIG_BLOCK, &signal_mask, NULL); if (rc < 0) FATAL("cannot set process signal mask: %s\n", strerror(errno)); - signal_fd = signalfd(-1, &signal_mask, SFD_NONBLOCK|SFD_CLOEXEC); + int signal_fd = signalfd(-1, &signal_mask, SFD_NONBLOCK|SFD_CLOEXEC); if (signal_fd < 0) FATAL("cannot create signalfd: %s\n", strerror(errno)); - alr_signal = alr_dev_create(epoll_fd, signal_fd, "signal", &alr_signal_io, NULL); + alr_signal = alr_dev_create(epoll_fd, signal_fd, "signal", EPOLLIN, + &alr_signal_io, NULL); if (alr_signal == NULL) FATAL("cannot register signalfd: %s\n", strerror(errno)); signal_fd = -1; + /* Setup batch timer FD and add to epoll set. */ + struct timespec now; + rc = clock_gettime(CLOCK_REALTIME, &now); + if (rc < 0) + FATAL("cannot read realtime clock: %s\n", strerror(errno)); + + int timer_fd = timerfd_create(CLOCK_REALTIME, TFD_NONBLOCK|TFD_CLOEXEC); + if (timer_fd < 0) + FATAL("cannot create batch timerfd: %s\n", strerror(errno)); + + struct itimerspec it = { + .it_value.tv_sec = (batch_interval > 0) ? + roundup(now.tv_sec, batch_interval) + + (batch_offset % batch_interval) : + 0, + .it_interval.tv_sec = batch_interval, + }; + + DEBUG_D(it.it_value.tv_sec); + + rc = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &it, NULL); + if (rc < 0) + FATAL("cannot arm timerfd: %s\n", strerror(errno)); + + alr_batch_timer = alr_dev_create(epoll_fd, timer_fd, "batch_timer", + EPOLLIN, &alr_batch_timer_io, NULL); + if (alr_batch_timer == NULL) + FATAL("cannot register batch timerfd: %s\n", strerror(errno)); + + timer_fd = -1; + + int batch_fd = dup(fileno(alr_batch_file)); + if (batch_fd < 0) + FATAL("cannot duplicate batch file descriptor: %s\n", + strerror(errno)); + + /* We pass events = 0 since we only care about EPOLLHUP. */ + alr_batch_file_hup = alr_dev_create(epoll_fd, batch_fd, "batch_file", 0, + &alr_batch_file_io, NULL); + if (alr_batch_file_hup == NULL) + FATAL("cannot register batch file HUP: %s\n", strerror(errno)); + + batch_fd = -1; + /* Open control device. */ - ctl_fd = open(ctl_path, O_RDONLY|O_NONBLOCK|O_CLOEXEC); + int ctl_fd = open(ctl_path, O_RDONLY|O_NONBLOCK|O_CLOEXEC); if (ctl_fd < 0) FATAL("cannot open '%s': %s\n", ctl_path, strerror(errno)); @@ -650,7 +900,8 @@ int main(int argc, char *argv[]) DEBUG_D(oal_log_major); /* Add control device to epoll set. */ - alr_ctl = alr_dev_create(epoll_fd, ctl_fd, "control", &alr_ctl_io, NULL); + alr_ctl = alr_dev_create(epoll_fd, ctl_fd, "control", EPOLLIN, + &alr_ctl_io, NULL); if (alr_ctl == NULL) FATAL("cannot register control device: %s\n", strerror(errno)); @@ -715,8 +966,12 @@ out: alr_dev_free(epoll_fd, alr_ctl); alr_dev_free(epoll_fd, alr_signal); + alr_dev_free(epoll_fd, alr_batch_timer); + alr_dev_free(epoll_fd, alr_batch_file_hup); close(epoll_fd); + alr_batch_destroy(alr_batch); + DEBUG_D(exit_status); return exit_status;