From 063889913fa3715d31605114fcfb47585f4fb43a Mon Sep 17 00:00:00 2001 From: Vitaliy Kuznetsov Date: Fri, 29 Mar 2024 15:29:49 +0100 Subject: [PATCH] EX-9459 lipe: Fix behavior when getting attributes This improvement is important and is intended for the --collect-fsize-stats output policy in lipe_scan3. This patch prevents the scanning process from stopping and completing if any LOV attribute is not received correctly. Instead of halting the scan, the patch adds additional error counters, and all types of reports will now include new error statistics. Also add a counters for objects that have no size/allocate size. An example of a new block with error information from a report with the .out extension which will contain the following fields: Error counters: Allocated blocks is empty: 11101 Size is empty: 0 Without size (all size value empty): 59 Failed to get LOV attr: 0 Failed to get mirror count: 0 Failed to get stripe count: 0 Failed to get stripe size: 0 Test-Parameters: trivial testlist=sanity-lipe-scan3,sanity-lipe-find3 Signed-off-by: Vitaliy Kuznetsov Change-Id: I1817ea189f3d554894822ad8d12a8514546b13b0 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54583 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexandre Ioffe Reviewed-by: Andreas Dilger --- lipe/src/lipe_scan3/ls3_stats.c | 165 ++++++++++++++++++++++++++++++++++------ lipe/src/lipe_scan3/ls3_stats.h | 28 ++++++- 2 files changed, 165 insertions(+), 28 deletions(-) diff --git a/lipe/src/lipe_scan3/ls3_stats.c b/lipe/src/lipe_scan3/ls3_stats.c index eba5a2c..c64afc7 100644 --- a/lipe/src/lipe_scan3/ls3_stats.c +++ b/lipe/src/lipe_scan3/ls3_stats.c @@ -19,6 +19,42 @@ #include "ls3_stats.h" #include "ls3_dir_stats.h" +struct fstats_report *reports_with_stats; + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define LS3_GET_ERROR_COUNT(stats, error) \ + (atomic_load(&(stats)->fr_errors[(error)])) + + #define LS3_INC_ERROR_COUNT(stats, error) \ + atomic_fetch_add_explicit(&(stats)->fr_errors[(error)], \ + 1, memory_order_relaxed); + + #define LS3_INIT_ERRORS_COUNT(stats) \ + for (int _f = 0; _f < LS3_STATS_ERROR_END_MARKER; _f++) { \ + atomic_init(&(stats)->fr_errors[_f], 0); \ + } + + #define LS3_DESTROY_ERRORS_COUNT(stats) + +#else + #define LS3_GET_ERROR_COUNT(stats, error) \ + ((stats)->fr_errors[(error)]) + + #define LS3_INC_ERROR_COUNT(stats, error) \ + do { \ + pthread_mutex_lock(&(stats)->fr_error_mutex); \ + (stats)->fr_errors[(error)]++; \ + pthread_mutex_unlock(&(stats)->fr_error_mutex); \ + } while(0) + + #define LS3_INIT_ERRORS_COUNT(stats) \ + pthread_mutex_init(&(stats)->fr_error_mutex, NULL); + + #define LS3_DESTROY_ERRORS_COUNT(stats) \ + pthread_mutex_destroy(&(stats)->fr_error_mutex); + +#endif + /* Function to calculate the range index of a number. * The range index is based on the power of 2 that is less than @@ -89,6 +125,24 @@ static const char* ls3_stats_get_groupname_from_gid(gid_t gid) return grp ? grp->gr_name : "NULL"; } +static const ls3_stats_err_mapping ls3_err_mappings[] = { + {"Allocated blocks is empty", "AllocatedBlocksIsEmpty", + LS3_STATS_ERROR_EMPTY_BLOCK_SIZE}, + {"Size is empty", "SizeIsEmpty", + LS3_STATS_ERROR_EMPTY_SIZE}, + {"Without size (all size value empty)", "WithoutSize", + LS3_STATS_ERROR_WITHOUT_SIZE}, + {"Failed to get LOV attr", "FailedToGetLOVattr", + LS3_STATS_ERROR_GET_LOV_FAILED}, + {"Failed to get mirror count", "FailedToGetMirrorCount", + LS3_STATS_ERROR_GET_MIRROR_CNT}, + {"Failed to get stripe count", "FailedToGetStripeCount", + LS3_STATS_ERROR_GET_STRIPE_CNT}, + {"Failed to get stripe size", "FailedToGetStripeSize", + LS3_STATS_ERROR_GET_STRIPE_SIZE}, + {NULL, NULL, LS3_STATS_ERROR_END_MARKER} /* End marker */ +}; + static const ls3_stats_extension_mapping ext_mappings[] = { {"out", LS3_STATS_FILE_EXTENSION_OUT}, {"json", LS3_STATS_FILE_EXTENSION_JSON}, @@ -793,6 +847,8 @@ static void ls3_stats_print_to_json(const char *f_time, double e_time) struct json_object *jobj_main = json_object_new_object(); struct json_object *jobj_reports = json_object_new_array(); struct json_object *jobj_users_reports = json_object_new_array(); + struct json_object *jobj_errors = json_object_new_object(); + const ls3_stats_err_mapping *err; const char *json_str; char *title_text; char *value_ts; @@ -833,6 +889,15 @@ static void ls3_stats_print_to_json(const char *f_time, double e_time) reports_with_stats->client_mount_path)); } + for (err = ls3_err_mappings; + err->counter_num != LS3_STATS_ERROR_END_MARKER; ++err) { + json_object_object_add(jobj_errors, err->json_err_header, + json_object_new_int( + LS3_GET_ERROR_COUNT(reports_with_stats, + err->counter_num))); + } + json_object_object_add(jobj_main, "ErrorCounters", jobj_errors); + for (i = 0; i < LS3_STATS_TYPE_TOTAL_COUNT_REPORT; i++) { struct json_object *jobj_report; struct json_object *jobj_general; @@ -1050,6 +1115,7 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time) { struct report_template *report_ptr; struct range_report_template *range_ptr; + const ls3_stats_err_mapping *err; char *second_value_ts; char *value_ts; FILE *yaml_fd; @@ -1066,8 +1132,8 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time) "DevicePath: %s\n" "DeviceName: %s\n" "ClientMountPath: %s\n" - "Inodes count on device: %u\n" - "Free inodes count on device: %u\n" + "InodesCountOnDevice: %u\n" + "FreeInodesCountOnDevice: %u\n" "DeviceType: %s\n", LS3_STATS_VERSION, f_time, reports_with_stats->total_count_files, e_time, @@ -1078,6 +1144,15 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time) reports_with_stats->free_inodes_count, reports_with_stats->device_is_mdt ? "MDT" : "OST"); + fprintf(yaml_fd, "\nError counters:\n"); + for (err = ls3_err_mappings; + err->counter_num != LS3_STATS_ERROR_END_MARKER; ++err) { + fprintf(yaml_fd, " %s: %u\n", err->json_err_header, + LS3_GET_ERROR_COUNT(reports_with_stats, + err->counter_num)); + } + fprintf(yaml_fd, "\n"); + for (i = 0; i < LS3_STATS_TYPE_TOTAL_COUNT_REPORT; i++) { char *title_text; @@ -1213,6 +1288,7 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time) { struct report_template *report_ptr; struct range_report_template *range_ptr; + const ls3_stats_err_mapping *err; char *second_value_ts; char *value_ts; FILE *out_fd; @@ -1239,6 +1315,15 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time) reports_with_stats->free_inodes_count, reports_with_stats->client_mount_path); + fprintf(out_fd, "Error counters:\n"); + for (err = ls3_err_mappings; + err->counter_num != LS3_STATS_ERROR_END_MARKER; ++err) { + fprintf(out_fd, "%s: %u\n", err->str_err_header, + LS3_GET_ERROR_COUNT(reports_with_stats, + err->counter_num)); + } + fprintf(out_fd, "\n"); + for (i = 0; i < LS3_STATS_TYPE_TOTAL_COUNT_REPORT; i++) { char *header_text; @@ -1374,6 +1459,7 @@ static void ls3_stats_print_to_csv(const char *f_time, double e_time) { struct report_template *report_ptr; struct range_report_template *range_ptr; + const ls3_stats_err_mapping *err; FILE *csv_fd; int i, j, k; @@ -1398,6 +1484,15 @@ static void ls3_stats_print_to_csv(const char *f_time, double e_time) reports_with_stats->free_inodes_count, reports_with_stats->client_mount_path); + fprintf(csv_fd, "Error counters:\n"); + for (err = ls3_err_mappings; + err->counter_num != LS3_STATS_ERROR_END_MARKER; ++err) { + fprintf(csv_fd, "%s [%u]\n", err->str_err_header, + LS3_GET_ERROR_COUNT(reports_with_stats, + err->counter_num)); + } + fprintf(csv_fd, "\n"); + for (i = 0; i < LS3_STATS_TYPE_TOTAL_COUNT_REPORT; i++) { char *value_ts; char *title_text; @@ -2096,8 +2191,15 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all) * For this reason, we do not align the size. */ if (allocate_file_size == 0 && loa_all->loa_size == 0) { - reports_with_stats->error_counter++; - return 0; + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_WITHOUT_SIZE); + return 0; /* go to next ... */ + } else if (allocate_file_size == 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_EMPTY_BLOCK_SIZE); + } else if (loa_all->loa_size == 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_EMPTY_SIZE); } /* In most cases loa_all->loa_blocks is not available on MDT. */ @@ -2139,7 +2241,6 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all) /* Reports only for regular file's */ if (!(loa_all->loa_mode & S_IFREG)) - /* TODO: Directory processing will be added here */ return 0; if (allocate_file_size != 0) { @@ -2200,39 +2301,49 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all) return 0; rc = llapi_layout_get_last_init_comp(loa_all->loa_layout); - if (rc < 0) - return -1; /* TODO add text message in report file */ + if (rc < 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_GET_LOV_FAILED); + return 0; + } rc = llapi_layout_mirror_count_get(loa_all->loa_layout, &mirror_count); - if (rc < 0) - return -1; + if (rc < 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_GET_MIRROR_CNT); + } else { + ls3_stats_update_range_with_id(LS3_STATS_TYPE_MIRROR_COUNT, + mirror_count, + loa_all->loa_size); + } rc = llapi_layout_stripe_count_get(loa_all->loa_layout, &stripe_count); - if (rc < 0) - return -1; + if (rc < 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_GET_STRIPE_CNT); + } else { + ls3_stats_update_range(LS3_STATS_TYPE_STRIPE_COUNT, + stripe_count, + LS3_STATS_EMPTY_VALUE); + } rc = llapi_layout_stripe_size_get(loa_all->loa_layout, &stripe_size); - if (rc < 0) - return -1; + if (rc < 0) { + LS3_INC_ERROR_COUNT(reports_with_stats, + LS3_STATS_ERROR_GET_STRIPE_SIZE); + } else { + ls3_stats_update_range_with_id(LS3_STATS_TYPE_STRIPE_SIZE, + stripe_size, + loa_all->loa_size); + } rc = llapi_layout_compress_get(loa_all->loa_layout, &compr_type, &compr_lvl); - if (rc < 0) - return -1; - - ls3_stats_update_range(LS3_STATS_TYPE_STRIPE_COUNT, stripe_count, - LS3_STATS_EMPTY_VALUE); - ls3_stats_update_range_with_id(LS3_STATS_TYPE_MIRROR_COUNT, - mirror_count, loa_all->loa_size); - - ls3_stats_update_range_with_id(LS3_STATS_TYPE_STRIPE_SIZE, - stripe_size, loa_all->loa_size); - - if (compr_type == LL_COMPR_TYPE_NONE) + if (rc < 0 || compr_type == LL_COMPR_TYPE_NONE) return 0; /* skip compression statistics */ /* TODO: Сompression statistics from MDT by type & lvl */ @@ -2293,6 +2404,9 @@ void ls3_stats_init(void) reports_with_stats->last_user_idx_in_array = 0; reports_with_stats->start_time = time(NULL); pthread_mutex_init(&reports_with_stats->user_rt_mutex, NULL); + + /* Allocate error counters */ + LS3_INIT_ERRORS_COUNT(reports_with_stats); } void ls3_stats_destroy(void) @@ -2355,6 +2469,7 @@ void ls3_stats_destroy(void) free(reports_with_stats->client_mount_path); pthread_mutex_destroy(&reports_with_stats->user_rt_mutex); + LS3_DESTROY_ERRORS_COUNT(reports_with_stats); free(reports_with_stats->users_reports); free(reports_with_stats); } diff --git a/lipe/src/lipe_scan3/ls3_stats.h b/lipe/src/lipe_scan3/ls3_stats.h index 15fe96b..4e12352 100644 --- a/lipe/src/lipe_scan3/ls3_stats.h +++ b/lipe/src/lipe_scan3/ls3_stats.h @@ -17,6 +17,11 @@ #include #include #include +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) +#include /* No supporting on el7.9 */ +#else +#define atomic_uint uint32_t +#endif #include "ls3_debug.h" #include "ls3_object_attrs.h" @@ -50,6 +55,22 @@ typedef struct { } ls3_stats_extension_mapping; typedef enum { + LS3_STATS_ERROR_EMPTY_BLOCK_SIZE = 0, /* Obj allocate size empty */ + LS3_STATS_ERROR_EMPTY_SIZE, /* Obj size empty */ + LS3_STATS_ERROR_WITHOUT_SIZE, /* All size value in obj empty */ + LS3_STATS_ERROR_GET_LOV_FAILED, + LS3_STATS_ERROR_GET_MIRROR_CNT, + LS3_STATS_ERROR_GET_STRIPE_CNT, + LS3_STATS_ERROR_GET_STRIPE_SIZE, + LS3_STATS_ERROR_END_MARKER +} ls3_stats_errors; + +typedef struct { + const char *str_err_header; + const char *json_err_header; + ls3_stats_errors counter_num; +} ls3_stats_err_mapping; +typedef enum { LS3_STATS_TYPE_FILES_SIZE = 0, LS3_STATS_TYPE_CAPACITY_USED, LS3_STATS_TYPE_EQUAL_OVERHEAD, @@ -91,7 +112,7 @@ typedef enum { } ls3_stats_val_type; struct range_report_template { - uint64_t rrt_id; /* UID/GID/PRJID/StripeSize */ + int64_t rrt_id; /* UID/GID/PRJID/StripeSize */ uint64_t count_in_range; /* Count object in range */ uint64_t total_in_range; /* Total value in range (size) */ /* last_time_access - only for users time report. @@ -134,6 +155,7 @@ struct ls3_stats_user_report_template { struct fstats_report { pthread_mutex_t user_rt_mutex; + pthread_mutex_t fr_error_mutex; /* only needed on ver. el7.9 */ time_t start_time; uint64_t total_count_files; uint64_t max_count_users_in_report; /* current max array size */ @@ -141,8 +163,8 @@ struct fstats_report { uint32_t inodes_count; uint32_t free_inodes_count; unsigned int block_size; /* from ext2_filsys */ + atomic_uint fr_errors[LS3_STATS_ERROR_END_MARKER]; int report_extension; /* bitmap for extension */ - int error_counter; /* When can't get object size */ char *report_file_name; char *format_report_file_name; char *device_path; @@ -153,7 +175,7 @@ struct fstats_report { struct ls3_stats_user_report_template **users_reports; }; -struct fstats_report *reports_with_stats; +extern struct fstats_report *reports_with_stats; void ls3_stats_init(void); void ls3_stats_destroy(void); int ls3_stats_get_range_index(uint64_t num); -- 1.8.3.1