From: John L. Hammond Date: Mon, 14 Sep 2020 17:58:31 +0000 (-0500) Subject: LU-14049 utils: manage thread resources in alr_batch_print() X-Git-Tag: 2.13.57~103 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F09%2F40309%2F5;p=fs%2Flustre-release.git LU-14049 utils: manage thread resources in alr_batch_print() In alr_batch_print(), create the sort and print thread with the detached attribute. Protect against concurrent write the the batch output file. Ensure that memory is freed in error cases. Signed-off-by: John L. Hammond Change-Id: Ibf1b299bd15f5d189a2302ce476bf2ef986a85b1 Reviewed-on: https://review.whamcloud.com/40309 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/utils/ofd_access_batch.c b/lustre/utils/ofd_access_batch.c index 1fa5db7..d5d567b 100644 --- a/lustre/utils/ofd_access_batch.c +++ b/lustre/utils/ofd_access_batch.c @@ -350,6 +350,7 @@ struct alr_thread_arg { struct list_head list; int fraction; FILE *file; + pthread_mutex_t *file_mutex; }; void *alr_sort_and_print_thread(void *arg) @@ -357,7 +358,8 @@ void *alr_sort_and_print_thread(void *arg) struct alr_entry *alre, *next; struct alr_thread_arg *aa = arg; struct list_head *tmp = &aa->list; - int *sa, d, i, nr = 0; + int *sa = NULL; + int rc, d, i, nr = 0; unsigned long cut; list_for_each_entry(alre, tmp, alre_fid_hash_node.fhn_node) { @@ -366,72 +368,98 @@ void *alr_sort_and_print_thread(void *arg) if (alre->alre_count[1] > 0) nr++; } - if (nr) { - sa = calloc(sizeof(int), nr); - if (!sa) { - fprintf(stderr, "cannot allocate memory for sorting\n"); - exit(1); - } - i = 0; - list_for_each_entry(alre, tmp, alre_fid_hash_node.fhn_node) { - if (alre->alre_count[0] > 0) - sa[i++] = alre->alre_count[0]; - if (alre->alre_count[1] > 0) - sa[i++] = alre->alre_count[1]; - } - qsort(sa, nr, sizeof(int), sort_compare); - i = nr * aa->fraction / 100; - if (i > 0) + + if (nr == 0) + goto out; + + sa = calloc(nr, sizeof(*sa)); + if (!sa) { + fprintf(stderr, "cannot allocate memory for sorting\n"); + exit(1); + } + + i = 0; + list_for_each_entry(alre, tmp, alre_fid_hash_node.fhn_node) { + if (alre->alre_count[0] > 0) + sa[i++] = alre->alre_count[0]; + if (alre->alre_count[1] > 0) + sa[i++] = alre->alre_count[1]; + } + + qsort(sa, nr, sizeof(*sa), sort_compare); + i = nr * aa->fraction / 100; + if (i > 0) + i--; + cut = sa[i]; + if (cut < 1) + cut = 1; + free(sa); + + /* Prevent jumbled output from multiple concurrent sort and + * print threads. */ + rc = pthread_mutex_lock(aa->file_mutex); + if (rc != 0) { + fprintf(stderr, "cannot lock batch file: %s\n", + strerror(rc)); + exit(1); + } + + /* there might be lots of items at @cut, but we want to limit total + * output. so the first loop dumps all items > @cut and the second + * loop dumps items=@cut so that total number (@i) is not exceeeded. + * XXX: possible optimization - move items=@cut to another list, so + * that 2nd pass takes < O(n) */ + list_for_each_entry(alre, tmp, alre_fid_hash_node.fhn_node) { + for (d = 0; d < 2; d++) { + if (alre->alre_count[d] <= cut) + continue; + alre_printf(aa->file, alre, d); i--; - cut = sa[i]; - if (cut < 1) - cut = 1; - free(sa); - - /* there might be lots of items at @cut, but we want to limit total - * output. so the first loop dumps all items > @cut and the second - * loop dumps items=@cut so that total number (@i) is not exceeeded. - * XXX: possible optimization - move items=@cut to another list, so - * that 2nd pass takes < O(n) */ - list_for_each_entry_safe(alre, next, tmp, alre_fid_hash_node.fhn_node) { - for (d = 0; d < 2; d++) { - if (alre->alre_count[d] <= cut) - continue; - alre_printf(aa->file, alre, d); - i--; - } } - list_for_each_entry_safe(alre, next, tmp, alre_fid_hash_node.fhn_node) { - for (d = 0; d < 2 && i > 0; d++) { - if (alre->alre_count[d] != cut) - continue; - alre_printf(aa->file, alre, d); - i--; - } - alre_del_init(alre); - free(alre); + } + + list_for_each_entry(alre, tmp, alre_fid_hash_node.fhn_node) { + for (d = 0; d < 2 && i > 0; d++) { + if (alre->alre_count[d] != cut) + continue; + alre_printf(aa->file, alre, d); + i--; } } + rc = pthread_mutex_unlock(aa->file_mutex); + if (rc != 0) { + fprintf(stderr, "cannot unlock batch file: %s\n", + strerror(rc)); + exit(1); + } + +out: + list_for_each_entry_safe(alre, next, tmp, alre_fid_hash_node.fhn_node) { + alre_del_init(alre); + free(alre); + } + free(aa); - pthread_exit((void *)0); - return 0; + return NULL; } /* Print, clear, and resize the batch. */ -int alr_batch_print(struct alr_batch *alrb, FILE *file, int fraction) +int alr_batch_print(struct alr_batch *alrb, FILE *file, + pthread_mutex_t *file_mutex, int fraction) { unsigned int new_hash_shift; - struct alr_thread_arg *aa; + pthread_attr_t attr, *pattr = NULL; + struct alr_thread_arg *aa = NULL; pthread_t pid; int i, rc; if (alrb == NULL) return 0; - aa = malloc(sizeof(*aa)); - if (!aa) + aa = calloc(1, sizeof(*aa)); + if (aa == NULL) return -ENOMEM; /* move all collected items to the temp list */ @@ -443,12 +471,27 @@ int alr_batch_print(struct alr_batch *alrb, FILE *file, int fraction) INIT_LIST_HEAD(&alrb->alrb_hash[i]); } aa->file = file; + aa->file_mutex = file_mutex; aa->fraction = fraction; + rc = pthread_attr_init(&attr); + if (rc != 0) + goto out; + + pattr = &attr; + + rc = pthread_attr_setdetachstate(pattr, PTHREAD_CREATE_DETACHED); + if (rc != 0) + goto out; + /* as sorting may take time and we don't want to lose access * records we better do sorting and printing in a different thread */ - rc = pthread_create(&pid, NULL, alr_sort_and_print_thread, aa); + rc = pthread_create(&pid, pattr, &alr_sort_and_print_thread, aa); + if (rc != 0) + goto out; + aa = NULL; /* Sort and print thread owns it now. */ +out: /* Resize hash based on previous count. */ new_hash_shift = alrb->alrb_hash_shift; @@ -461,6 +504,24 @@ int alr_batch_print(struct alr_batch *alrb, FILE *file, int fraction) alrb->alrb_count = 0; + if (pattr != NULL) + pthread_attr_destroy(pattr); + + if (aa != NULL) { + struct alr_entry *alre, *next; + + list_for_each_entry_safe(alre, next, &aa->list, + alre_fid_hash_node.fhn_node) { + alre_del_init(alre); + free(alre); + } + } + + free(aa); + + if (rc > 0) + rc = -rc; /* Fixup pthread return conventions. */ + return rc; } diff --git a/lustre/utils/ofd_access_batch.h b/lustre/utils/ofd_access_batch.h index 7b874ee0..8c5c8e9 100644 --- a/lustre/utils/ofd_access_batch.h +++ b/lustre/utils/ofd_access_batch.h @@ -1,5 +1,6 @@ #ifndef _OFD_ACCESS_BATCH_H_ #define _OFD_ACCESS_BATCH_H_ +#include #include #include @@ -11,6 +12,7 @@ void alr_batch_destroy(struct alr_batch *alrb); int alr_batch_add(struct alr_batch *alrb, const char *obd_name, const struct lu_fid *pfid, time_t time, __u64 begin, __u64 end, __u32 size, __u32 segment_count, __u32 flags); -int alr_batch_print(struct alr_batch *alrb, FILE *file, int fraction); +int alr_batch_print(struct alr_batch *alrb, FILE *file, + pthread_mutex_t *file_mutex, int fraction); #endif /* _OFD_ACCESS_BATCH_H_ */ diff --git a/lustre/utils/ofd_access_log_reader.c b/lustre/utils/ofd_access_log_reader.c index 75d5a59..5bbc20a 100644 --- a/lustre/utils/ofd_access_log_reader.c +++ b/lustre/utils/ofd_access_log_reader.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ static unsigned int oal_log_major; static unsigned int oal_log_minor_max; static struct alr_batch *alr_batch; static FILE *alr_batch_file; +static pthread_mutex_t alr_batch_file_mutex = PTHREAD_MUTEX_INITIALIZER; static const char *alr_batch_file_path; static int alr_print_fraction = 100; @@ -499,7 +501,8 @@ static int alr_batch_timer_io(int epoll_fd, struct alr_dev *td, unsigned int mas DEBUG_U(expire_count); - rc = alr_batch_print(alr_batch, alr_batch_file, alr_print_fraction); + rc = alr_batch_print(alr_batch, alr_batch_file, &alr_batch_file_mutex, + alr_print_fraction); if (rc < 0) { ERROR("cannot write to '%s': %s\n", alr_batch_file_path, strerror(errno));