Whamcloud - gitweb
EX-7717 lipe: Add simple compression ratio statistics
authorVitaliy Kuznetsov <vkuznetsov@ddn.com>
Tue, 12 Dec 2023 14:49:27 +0000 (15:49 +0100)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 8 Feb 2024 08:53:21 +0000 (08:53 +0000)
This patch adds a new table to display data
compression ratio in overall statistics.

The new table to display compression ratio (for regular files)
will have the following column values:
0. Compression ratio range;
1. Count of files in range;
2. Number of files in range as a percent of total
   number of files;
3. Number of files in this range or smaller as
   a % of total # of files;
4. Total compression size of files in range;
5. Total compression size of files in range as a % of
   total compression size of files;
6. Total compression size of files in this range or
   smaller as a % of total compression size of files;
7. Minimum value in range (ratio);
8. Maximum value in range (ratio).

The columns in the table are numbered from 0 to 8 for a better
understanding of the table without the need to name the
columns with long text.

This PR also changes some variable types to the "double" type
for correct calculation of values and to avoid duplication of
variables with the same semantic value.

The output of information in reports with the .out
extension has also been improved.

Test-Parameters: trivial testlist=sanity-lipe-scan3
Signed-off-by: Vitaliy Kuznetsov <vkuznetsov@ddn.com>
Change-Id: I242ddb9c4132a7fce81508dadacf8e2b01e3cead
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52372
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Colin Faber <cfaber@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lipe/src/lipe_scan3/ls3_stats.c
lipe/src/lipe_scan3/ls3_stats.h
lustre/tests/sanity-lipe-scan3.sh

index 975785c..c60c93e 100644 (file)
@@ -38,6 +38,24 @@ int ls3_stats_get_range_index(uint64_t num)
        return (int)floor(log2(num));
 }
 
+/* Function to calculate the range index for a given number.
+ * The range index is determined based on dividing the input number by 0.2,
+ * with a minimum index of 0 for non-negative numbers.
+ *
+ * @param num: The input double number for which the range index is calculated.
+ *             If num is negative, the function returns 0.
+ *
+ * @return The calculated range index.
+ */
+int ls3_stats_get_range_index_double(double num)
+{
+       if (num < 0.0)
+               return 0;
+
+       /* 0.001 it's rounding for exception handling with integers */
+       return (int)floor((num + 0.001) / 0.2);
+}
+
 /* ls3_stats_get_day_diff - Calculates the time difference in
  * days between a given timestamp and the current real-time.
  *
@@ -231,7 +249,6 @@ static ls3_stats_val_type ls3_stats_get_value_type(
        case LS3_STATS_TYPE_STORAGE_SET_SIZE_BY_USER:
        case LS3_STATS_TYPE_STORAGE_SET_SIZE_BY_GROUP:
        case LS3_STATS_TYPE_STORAGE_SET_SIZE_BY_PROJID:
-       case LS3_STATS_TYPE_COMPRESSION_RATIO:
        case LS3_STATS_TYPE_STRIPE_SIZE:
        case LS3_STATS_TYPE_POSITIVE_OVERHEAD:
        case LS3_STATS_TYPE_NEGATIVE_OVERHEAD:
@@ -246,18 +263,18 @@ static ls3_stats_val_type ls3_stats_get_value_type(
                return LS3_STATS_VALUE_TYPE_CHARS;
        case LS3_STATS_TYPE_DIRECTORY_SIZE_ENTRIES:
                return LS3_STATS_VALUE_TYPE_ENTRIES;
-       case LS3_STATS_TYPE_FILES_EMPLOYING_DOM:
-       case LS3_STATS_TYPE_TOTAL_COUNT_REPORT:
-               return LS3_STATS_VALUE_TYPE_ERROR;
-/*     case LS3_STATS_TYPE_POSITIVE_OVERHEAD:
-       case LS3_STATS_TYPE_NEGATIVE_OVERHEAD:
-               return LS3_STATS_VALUE_TYPE_BYTES; */
        case LS3_STATS_TYPE_LINK_COUNT:
                return LS3_STATS_VALUE_TYPE_LINKS;
        case LS3_STATS_TYPE_STRIPE_COUNT:
                return LS3_STATS_VALUE_TYPE_STRIPE;
        case LS3_STATS_TYPE_MIRROR_COUNT:
                return LS3_STATS_VALUE_TYPE_MIRROR;
+       case LS3_STATS_TYPE_COMPRESSION_RATIO:
+               return LS3_STATS_VALUE_TYPE_COMP_RATIO;
+       case LS3_STATS_TYPE_FILES_EMPLOYING_DOM:
+       case LS3_STATS_TYPE_COMPRESSION_FLAG:
+       case LS3_STATS_TYPE_TOTAL_COUNT_REPORT:
+               return LS3_STATS_VALUE_TYPE_ERROR;
        }
 
        return LS3_STATS_VALUE_TYPE_ERROR;
@@ -287,6 +304,8 @@ static char *ls3_stats_get_str_value_type(ls3_stats_val_type num_type)
                return "Stripe";
        case LS3_STATS_VALUE_TYPE_MIRROR:
                return "Mirror";
+       case LS3_STATS_VALUE_TYPE_COMP_RATIO:
+               return "%";
        }
 
        return "";
@@ -341,9 +360,10 @@ static char *ls3_stats_get_title_text(ls3_stats_report_type report_type)
        case LS3_STATS_TYPE_MIRROR_COUNT:
                return "Mirror count (of regular files)";
        case LS3_STATS_TYPE_COMPRESSION_RATIO:
-               return "Compress ratio (of regular files)";
+               return "Compression ratio (of regular files)";
        case LS3_STATS_TYPE_SYMLINK_TARGET_LENGTH:
                return "Symlink target length";
+       case LS3_STATS_TYPE_COMPRESSION_FLAG:
        case LS3_STATS_TYPE_TOTAL_COUNT_REPORT:
                return "";
        }
@@ -642,9 +662,24 @@ static char *ls3_stats_get_header_text(ls3_stats_report_type report_type)
                            "total mirror;\n"
                       "7 - Minimum value in range;\n"
                       "8 - Maximum value in range.\n";
+       case LS3_STATS_TYPE_COMPRESSION_RATIO:
+               return "\n\nCompression ratio (of regular files):\n"
+                      "0 - Compression ratio range;\n"
+                      "1 - Count of files in range;\n"
+                      "2 - Number of files in range as a percent of total "
+                           "number of files;\n"
+                      "3 - Number of files in this range or smaller as a % "
+                           "of total # of files;\n"
+                      "4 - Total compression size of files in range;\n"
+                      "5 - Total compression size of files in range as a % of "
+                           "total compression size of files;\n"
+                      "6 - Total compression size of files in this range or "
+                           "smaller as a % of total compression size of files;\n"
+                      "7 - Minimum value in range;\n"
+                      "8 - Maximum value in range.\n";
        case LS3_STATS_TYPE_FILES_EMPLOYING_DOM:
        case LS3_STATS_TYPE_SYMLINK_TARGET_LENGTH:
-       case LS3_STATS_TYPE_COMPRESSION_RATIO:
+       case LS3_STATS_TYPE_COMPRESSION_FLAG:
        case LS3_STATS_TYPE_TOTAL_COUNT_REPORT:
                return "";
        }
@@ -687,39 +722,29 @@ static void ls3_stats_get_range_str(struct range_report_template *range_ptr,
                                 "       Never changed       ");
                else
                        snprintf(str, strlen,
-                                "%8lu - %8lu %7s",
+                                "%8.0f - %-8.0f %7s",
                                 range_ptr->range_start,
                                 range_ptr->range_end, value_ts);
                break;
+       case LS3_STATS_TYPE_COMPRESSION_RATIO:
+               snprintf(str, strlen,
+                        "%11.2f - %-11.2f %%", range_ptr->range_start,
+                        range_ptr->range_end);
+               break;
+       case LS3_STATS_TYPE_FILES_SIZE:
+       case LS3_STATS_TYPE_CAPACITY_USED:
+               snprintf(str, strlen,
+                        "%10.2f - %-10.2f %2s", range_ptr->range_start,
+                        range_ptr->range_end, value_ts);
+               break;
        default:
                snprintf(str, strlen,
-                        "%8lu - %8lu %7s", range_ptr->range_start,
+                        "%8.0f - %-8.0f %7s", range_ptr->range_start,
                         range_ptr->range_end, value_ts);
                break;
        }
 }
 
-/*
- * json_add_field_with_value is used to add a new field to a given JSON object.
- * It takes a field name and a value (along with its type) or percentage, and
- * formats them into a string.
- * This string is then added as a value for the new field in the JSON object.
- */
-static void json_add_field_with_value(json_object* jobj,
-       const char* field_name, uint64_t val1, const char* val_type,
-       double percent)
-{
-       char buffer[64] = {0};  /* Always not more 64 symbols */
-
-       if (val_type && val_type[0] != '\0')
-               sprintf(buffer, "%lu %s", val1, val_type);
-       else
-               sprintf(buffer, "%.2f %%", percent);
-
-       json_object_object_add(jobj, field_name,
-                               json_object_new_string(buffer));
-}
-
 /* ls3_stats_get_time_str - сonvert a given number of seconds (since the epoch)
  * to a human-readable date-time string format.
  *
@@ -832,19 +857,24 @@ static void ls3_stats_print_to_json(const char *f_time, double e_time)
                json_object_object_add(jobj_general, "Count",
                        json_object_new_int64(report_ptr->files_count));
 
-               json_add_field_with_value(jobj_general, "Min",
-                       report_ptr->rt_min, value_ts, LS3_STATS_EMPTY_VALUE);
+               json_object_object_add(jobj_general, "Min",
+                       json_object_new_double(report_ptr->rt_min));
 
-               json_add_field_with_value(jobj_general, "Max",
-                       report_ptr->rt_max, value_ts, LS3_STATS_EMPTY_VALUE);
+               json_object_object_add(jobj_general, "Max",
+                       json_object_new_double(report_ptr->rt_max));
 
-               json_add_field_with_value(jobj_general, "Avg",
-                       report_ptr->rt_avg, value_ts, LS3_STATS_EMPTY_VALUE);
+               json_object_object_add(jobj_general, "Avg",
+                       json_object_new_double(report_ptr->rt_avg));
 
-               json_add_field_with_value(jobj_general, "Total",
-                       report_ptr->total_value,
-                       (second_value_ts[0] != '\0') ?
-                        second_value_ts : value_ts, LS3_STATS_EMPTY_VALUE);
+               json_object_object_add(jobj_general, "MinMaxAvgValueType",
+                       json_object_new_string(value_ts));
+
+               json_object_object_add(jobj_general, "TotalValue",
+                       json_object_new_int64(report_ptr->total_value));
+
+               json_object_object_add(jobj_general, "TotalValueType",
+                       json_object_new_string((second_value_ts[0] != '\0') ?
+                                              second_value_ts : value_ts));
 
                for (j = 0; j < report_ptr->count_ranges; j++) {
                        struct json_object *jobj_range =
@@ -859,50 +889,50 @@ static void ls3_stats_print_to_json(const char *f_time, double e_time)
                                continue;       /* Nothing for print */
 
                        ls3_stats_get_range_str(range_ptr, range_t,
-                               sizeof(range_t), j, i,value_ts);
+                               sizeof(range_t), j, i, value_ts);
 
-                       json_object_object_add(jobj_range, "RangeID",
-                                              json_object_new_int(j));
-
-                       json_object_object_add(jobj_range, "RangeStr",
+                       json_object_object_add(jobj_range, "RangeInStr",
                                               json_object_new_string(range_t));
 
+                       json_object_object_add(jobj_range, "RangeStart",
+                               json_object_new_double(range_ptr->range_start));
+
+                       json_object_object_add(jobj_range, "RangeEnd",
+                               json_object_new_double(range_ptr->range_end));
+
+                       json_object_object_add(jobj_range, "RangeID",
+                               json_object_new_int64(range_ptr->rrt_id));
+
                        json_object_object_add(jobj_range, "CountInRange",
                                json_object_new_int64(range_ptr->count_in_range));
 
-                       json_add_field_with_value(jobj_range,
-                                                 "PercentageInRange",
-                                                 LS3_STATS_EMPTY_VALUE, "",
-                                                 range_ptr->percentage);
+                       json_object_object_add(jobj_range, "PercentageInRange",
+                               json_object_new_double(range_ptr->percentage));
+
+                       json_object_object_add(jobj_range, "CumulativePercentageInRange",
+                               json_object_new_double(range_ptr->cumulative_percentage));
 
-                       json_add_field_with_value(jobj_range,
-                                                 "CumulativePercentageInRange",
-                                                 LS3_STATS_EMPTY_VALUE , "",
-                                                 range_ptr->cumulative_percentage);
+                       json_object_object_add(jobj_range, "TotalInRange",
+                               json_object_new_int64(range_ptr->total_in_range));
 
-                       json_add_field_with_value(jobj_range, "TotalInRange",
-                                                 range_ptr->total_in_range,
-                                                 (second_value_ts[0] != '\0') ?
-                                                 second_value_ts : value_ts,
-                                                 LS3_STATS_EMPTY_VALUE);
+                       json_object_object_add(jobj_general, "TotalInRangeType",
+                               json_object_new_string((second_value_ts[0] != '\0') ?
+                                                      second_value_ts : value_ts));
 
-                       json_add_field_with_value(jobj_range,
-                                                 "PercentTotalInRange",
-                                                 LS3_STATS_EMPTY_VALUE, "",
-                                                 range_ptr->percent_in_range);
+                       json_object_object_add(jobj_range, "PercentTotalInRange",
+                               json_object_new_double(range_ptr->percent_in_range));
 
-                       json_add_field_with_value(jobj_range,
-                                                 "PercentCumulativeTotalInRange",
-                                                 LS3_STATS_EMPTY_VALUE, "",
-                                                 range_ptr->percent_cumulative_in_range);
+                       json_object_object_add(jobj_range, "PercentCumulativeTotalInRange",
+                               json_object_new_double(range_ptr->percent_cumulative_in_range));
 
-                       json_add_field_with_value(jobj_range, "MinValueInRange",
-                                                 range_ptr->rrt_min, value_ts,
-                                                 LS3_STATS_EMPTY_VALUE);
+                       json_object_object_add(jobj_range, "MinValueInRange",
+                               json_object_new_double(range_ptr->rrt_min));
 
-                       json_add_field_with_value(jobj_range, "MaxValueInRange",
-                                                 range_ptr->rrt_max, value_ts,
-                                                 LS3_STATS_EMPTY_VALUE);
+                       json_object_object_add(jobj_range, "MaxValueInRange",
+                               json_object_new_double(range_ptr->rrt_max));
+
+                       json_object_object_add(jobj_general, "MinMaxType",
+                               json_object_new_string(value_ts));
 
                        json_object_array_add(jobj_ranges, jobj_range);
                }
@@ -974,11 +1004,11 @@ static void ls3_stats_print_to_json(const char *f_time, double e_time)
                                                        range_ptr->count_in_range));
                                json_object_object_add(jobj_range,
                                                       "MinSizeKB",
-                                                      json_object_new_int64(
+                                                      json_object_new_double(
                                                        range_ptr->rrt_min));
                                json_object_object_add(jobj_range,
                                                       "MaxSizeKB",
-                                                      json_object_new_int64(
+                                                      json_object_new_double(
                                                        range_ptr->rrt_max));
                                json_object_object_add(jobj_range,
                                                       "TotalSizeInRangeKB",
@@ -1063,9 +1093,9 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time)
                        "\n%s:\n"
                        "  General:\n"
                        "    Count: %lu\n"
-                       "    Min: %lu %s\n"
-                       "    Max: %lu %s\n"
-                       "    Avg: %lu %s\n"
+                       "    Min: %.2f %s\n"
+                       "    Max: %.2f %s\n"
+                       "    Avg: %.2f %s\n"
                        "    Total: %lu %s\n"
                        "  Ranges:\n",
                        title_text, report_ptr->files_count,
@@ -1100,8 +1130,8 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time)
                                "      TotalCapacityInRange: %lu %s\n"
                                "      PercentCapacityInRange: %.2f\n"
                                "      PercentCumulativeCapacityInRange: %.2f\n"
-                               "      MinValueInRange: %lu %s\n"
-                               "      MaxValueInRange: %lu %s\n",
+                               "      MinValueInRange: %.2f %s\n"
+                               "      MaxValueInRange: %.2f %s\n",
                                j, range_t,
                                range_ptr->count_in_range, range_ptr->percentage,
                                range_ptr->cumulative_percentage,
@@ -1152,11 +1182,11 @@ static void ls3_stats_print_to_yaml(const char *f_time, double e_time)
                                ls3_stats_get_time_str(buffer, sizeof(buffer),
                                                range_ptr->last_time_access);
                                fprintf(yaml_fd,
-                                       "        RangeStart: %lu %s\n"
-                                       "        RangeEnd: %lu %s\n"
+                                       "        RangeStart: %.2f %s\n"
+                                       "        RangeEnd: %.2f %s\n"
                                        "        CountFilesInRange: %lu\n"
-                                       "        MinSize: %lu %s\n"
-                                       "        MaxSize: %lu %s\n"
+                                       "        Min: %.2f %s\n"
+                                       "        Max: %.2f %s\n"
                                        "        TotalSizeInRange: %lu %s\n"
                                        "        LastTimeAccess: %s\n",
                                        range_ptr->range_start, value_ts,
@@ -1221,8 +1251,9 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time)
                header_text = ls3_stats_get_header_text(i);
 
                fprintf(out_fd, "%s\nGeneral: Count: %lu | Min value in ranges:"
-                       " %lu %s | Max value in ranges: %lu %s| Average value in ranges:"
-                       " %lu %s | Total value: %lu %s\n\n",
+                       " %.2f %s | Max value in ranges: %.2f %s |"
+                       " Average value in ranges: %.2f %s |"
+                       " Total value: %lu %s\n\n",
                        header_text, report_ptr->files_count, report_ptr->rt_min,
                        value_ts, report_ptr->rt_max, value_ts,
                        report_ptr->rt_avg, value_ts, report_ptr->total_value,
@@ -1250,7 +1281,7 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time)
 
                        fprintf(out_fd, "[ %s] [ %11lu] [%6.2f%%]"
                                " [%6.2f%% cumulative] [%11lu %7s] [%6.2f%%]"
-                               " [%6.2f%% cumulative] [%11lu %7s] [%11lu %7s]\n",
+                               " [%6.2f%% cumulative] [%11.2f %7s] [%11.2f %7s]\n",
                                range_t, range_ptr->count_in_range,
                                range_ptr->percentage,
                                range_ptr->cumulative_percentage,
@@ -1291,11 +1322,7 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time)
                                ls3_stats_get_str_value_type(report_ptr->value_type);
                        second_value_ts =
                                ls3_stats_get_str_value_type(report_ptr->second_value_type);
-                       fprintf(out_fd, "[-----------------------------"
-                               "--------------------------------------"
-                               "--------------------------------------"
-                               "----------------------------]\n");
-                       fprintf(out_fd, "[ %50s  %79s ]\n", title, " " );
+                       fprintf(out_fd, "[ %-50s  %79s ]\n", title, " " );
                        fprintf(out_fd, "[-----------------------------"
                                "--------------------------------------"
                                "--------------------------------------"
@@ -1315,8 +1342,8 @@ static void ls3_stats_print_to_out(const char *f_time, double e_time)
                                ls3_stats_get_time_str(buffer, sizeof(buffer),
                                                range_ptr->last_time_access);
 
-                               fprintf(out_fd, "[ %8lu - %8lu %7s] [ %11lu ]"
-                                       " [%11lu %7s] [%11lu %7s]"
+                               fprintf(out_fd, "[ %8.0f - %8.0f %7s] [ %11lu ]"
+                                       " [%11.0f %7s] [%11.0f %7s]"
                                        " [%11lu %7s] [%20s]\n",
                                        range_ptr->range_start,
                                        range_ptr->range_end, value_ts,
@@ -1383,8 +1410,8 @@ static void ls3_stats_print_to_csv(const char *f_time, double e_time)
                title_text = ls3_stats_get_title_text(i);
 
                fprintf(csv_fd, "Histogram,%s\n", title_text);
-               fprintf(csv_fd, "General,\nCount,%lu\nMinValueInRanges,%lu,%s\n"
-                       "MaxValueInRanges,%lu,%s\nAverageValueInRanges,%lu,%s\n"
+               fprintf(csv_fd, "General,\nCount,%lu\nMinValueInRanges,%.2f,%s\n"
+                       "MaxValueInRanges,%.2f,%s\nAverageValueInRanges,%.2f,%s\n"
                        "TotalValue,%lu,%s\n\n",
                        report_ptr->files_count, report_ptr->rt_min,
                        value_ts, report_ptr->rt_max, value_ts,
@@ -1412,7 +1439,7 @@ static void ls3_stats_print_to_csv(const char *f_time, double e_time)
                                                sizeof(range_t),
                                                j, i, value_ts);
                        fprintf(csv_fd, "%s,%lu,%.2f,%.2f,%lu %s,%.2f,%.2f,"
-                               "%lu %s,%lu %s\n",
+                               "%.2f %s,%.2f %s\n",
                                range_t, range_ptr->count_in_range,
                                range_ptr->percentage,
                                range_ptr->cumulative_percentage,
@@ -1463,7 +1490,7 @@ static void ls3_stats_print_to_csv(const char *f_time, double e_time)
                                ls3_stats_get_time_str(buffer, sizeof(buffer),
                                                range_ptr->last_time_access);
 
-                               fprintf(csv_fd, "%lu,%lu,%lu,%lu,%lu,%lu,%s\n",
+                               fprintf(csv_fd, "%.2f,%.2f,%lu,%.2f,%.2f,%lu,%s\n",
                                        range_ptr->range_start,
                                        range_ptr->range_end,
                                        range_ptr->count_in_range,
@@ -1527,6 +1554,7 @@ void ls3_stats_calculate_values(void)
        for (i = 0; i < LS3_STATS_TYPE_TOTAL_COUNT_REPORT; i++) {
                double cumulative_percentage = 0;
                double percent_cumulative_in_range = 0;
+               double total_weighted_sum = 0;
 
                report_ptr = reports_with_stats->reports[i];
                if (report_ptr == NULL)
@@ -1539,9 +1567,6 @@ void ls3_stats_calculate_values(void)
                        reports_with_stats->total_count_files =
                                report_ptr->files_count;
 
-               report_ptr->rt_avg =
-                       report_ptr->total_value / report_ptr->files_count;
-
                for (j = 0; j < report_ptr->count_ranges; j++) {
                        range_ptr = report_ptr->fs_ranges[j];
                        if (range_ptr == NULL)
@@ -1569,7 +1594,26 @@ void ls3_stats_calculate_values(void)
 
                        percent_cumulative_in_range +=
                                range_ptr->percent_in_range;
+
+                       if (i == LS3_STATS_TYPE_COMPRESSION_RATIO) {
+                               /* total_weighted_sum - for calculating general
+                                * average value when using the ratio range */
+                               double range_avg = (range_ptr->range_start +
+                                                  range_ptr->range_end) / 2.0;
+
+                               total_weighted_sum += range_avg *
+                                                     range_ptr->count_in_range;
+                       }
                }
+
+               if (i == LS3_STATS_TYPE_COMPRESSION_RATIO)
+                       report_ptr->rt_avg = total_weighted_sum /
+                                            report_ptr->files_count;
+               else
+                       /* By default */
+                       report_ptr->rt_avg = report_ptr->total_value /
+                                            report_ptr->files_count;
+
        }
 }
 
@@ -1590,6 +1634,19 @@ static struct range_report_template *ls3_stats_get_new_range_ptr(uint64_t res)
        return range_ptr;
 }
 
+static struct range_report_template *ls3_stats_get_new_range_double_ptr(double res)
+{
+       struct range_report_template *range_ptr;
+
+       range_ptr = xcalloc(1, sizeof(struct range_report_template));
+       range_ptr->rrt_min = UINT64_MAX;
+
+       /* 0.001 it's rounding for exception handling with integers */
+       range_ptr->range_start = floor((res + 0.001) / 0.2) * 0.2;
+       range_ptr->range_end = range_ptr->range_start + 0.2;
+       return range_ptr;
+}
+
 static struct report_template *ls3_stats_get_new_report_ptr(void)
 {
        struct report_template *report_ptr =
@@ -1598,7 +1655,6 @@ static struct report_template *ls3_stats_get_new_report_ptr(void)
        report_ptr->count_ranges = LS3_STATS_COUNT_RANGE_BY_DEFAULT;
        report_ptr->rt_min = UINT64_MAX;
        report_ptr->total_value = 0;
-       report_ptr->current_max_range = pow(2, report_ptr->count_ranges);
        report_ptr->fs_ranges = (struct range_report_template**)xcalloc(
                                 report_ptr->count_ranges,
                                 sizeof(struct range_report_template*));
@@ -1625,6 +1681,52 @@ static void ls3_stats_expand_num_of_ranges(struct report_template *report_ptr,
                num_bytes_to_clear);
 }
 
+static void ls3_stats_update_range_double(ls3_stats_report_type report_type,
+                                         double range_value, int64_t file_size)
+{
+       int index_range;
+       struct report_template *report_ptr;
+       struct range_report_template *range_ptr;
+
+       report_ptr = reports_with_stats->reports[report_type];
+       if (!report_ptr)
+               LS3_FATAL("Unable to access the report data: %d\n", report_type);
+
+       report_ptr->second_value_type = LS3_STATS_VALUE_TYPE_KB;
+       report_ptr->value_type = ls3_stats_get_value_type(report_type);
+       index_range = ls3_stats_get_range_index_double(range_value);
+
+       pthread_mutex_lock(&report_ptr->report_template_mutex);
+       if (range_value < report_ptr->rt_min)
+               report_ptr->rt_min = range_value;
+
+       if (range_value > report_ptr->rt_max) {
+               report_ptr->rt_max = range_value;
+
+               if (index_range > report_ptr->count_ranges - 1)
+                       ls3_stats_expand_num_of_ranges(report_ptr, index_range);
+       }
+
+       range_ptr = report_ptr->fs_ranges[index_range];
+       if (!range_ptr) {
+               range_ptr = ls3_stats_get_new_range_double_ptr(range_value);
+               report_ptr->fs_ranges[index_range] = range_ptr;
+       }
+
+       range_ptr->total_in_range += file_size >> 10;
+       report_ptr->total_value += file_size >> 10;
+       range_ptr->count_in_range++;
+       report_ptr->files_count++;
+
+       if (range_value < range_ptr->rrt_min)
+               range_ptr->rrt_min = range_value;
+
+       if (range_value > range_ptr->rrt_max)
+               range_ptr->rrt_max = range_value;
+
+       pthread_mutex_unlock(&report_ptr->report_template_mutex);
+}
+
 static void ls3_stats_update_range(ls3_stats_report_type report_type,
        int64_t range_value, int64_t second_value)
 {
@@ -1652,8 +1754,7 @@ static void ls3_stats_update_range(ls3_stats_report_type report_type,
                break;
        default:
                result_range_value = range_value;
-               report_ptr->second_value_type =
-                       LS3_STATS_VALUE_TYPE_EMPTY;
+               report_ptr->second_value_type = LS3_STATS_VALUE_TYPE_EMPTY;
                result_second_value = 0;
                break;
        }
@@ -1667,7 +1768,6 @@ static void ls3_stats_update_range(ls3_stats_report_type report_type,
 
        if (result_range_value > report_ptr->rt_max) {
                report_ptr->rt_max = result_range_value;
-               report_ptr->current_max_range = pow(2, report_ptr->rt_max);
 
                if (index_range > report_ptr->count_ranges - 1)
                        ls3_stats_expand_num_of_ranges(report_ptr, index_range);
@@ -1758,10 +1858,8 @@ static void ls3_stats_update_range_with_id(ls3_stats_report_type report_type,
        if (result_second_value < report_ptr->rt_min)
                report_ptr->rt_min = result_second_value;
 
-       if (result_second_value > report_ptr->rt_max) {
+       if (result_second_value > report_ptr->rt_max)
                report_ptr->rt_max = result_second_value;
-               report_ptr->current_max_range = local_id;
-       }
 
        range_ptr = ls3_stats_find_id_in_ranges(local_id, report_ptr);
        if (!range_ptr) {
@@ -1942,7 +2040,6 @@ static void ls3_stats_update_user_report_range(ls3_stats_users_reports report_ty
 
        if (result_range_value > report_ptr->rt_max) {
                report_ptr->rt_max = result_range_value;
-               report_ptr->current_max_range = pow(2, report_ptr->rt_max);
 
                if (index_range > report_ptr->count_ranges - 1)
                        ls3_stats_expand_num_of_ranges(report_ptr, index_range);
@@ -1975,24 +2072,30 @@ static void ls3_stats_update_user_report_range(ls3_stats_users_reports report_ty
 int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
 {
        struct lipe_path_entry *lpe;
-       uint64_t allocate_file_size = 0;
        const char *file_name;
+       uint64_t allocate_file_size = 0;
+       int64_t cmps_fsize = 0;
+       double cmps_ratio = 0;
 
        /* loa_all->loa_blocks is taken from the inode and reflects the .
         * number of 512B blocks. Since in the blocks_from_inode()
         * function we get the value from inode->i_blocks which is equal
         * to the number of blocks with a size of 512B */
-       allocate_file_size = loa_all->loa_blocks * 512;
+       if (loa_all->loa_blocks != 0)
+               allocate_file_size = loa_all->loa_blocks * 512;
+       else if (loa_all->loa_i_blocks != 0)
+               allocate_file_size = loa_all->loa_i_blocks * 512;
        /* It is likely that loa_all->loa_size is also aligned to a
         * block of 512 bytes somewhere in EXT2_I_SIZE(inode)...
         * For this reason, we do not align the size. */
 
-       if (allocate_file_size == 0 && loa_all->loa_size == 0)
-               /* Skip this, TODO: reports_with_stats->error_counter++ */
+       if (allocate_file_size == 0 && loa_all->loa_size == 0) {
+               reports_with_stats->error_counter++;
                return 0;
+       }
 
        /* In most cases loa_all->loa_blocks is not available on MDT. */
-       if (!reports_with_stats->device_is_mdt) {
+       if (!reports_with_stats->device_is_mdt && allocate_file_size != 0) {
                /* Get equal overhead (Files whose size is equal
                 * than capacity used. */
                if (allocate_file_size == loa_all->loa_size)
@@ -2027,12 +2130,21 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
 
        /* Reports only for regular file's */
        if (!(loa_all->loa_mode & S_IFREG))
+               /* TODO: Directory processing will be added here */
                return 0;
 
+       if (allocate_file_size != 0) {
+               ls3_stats_update_range(LS3_STATS_TYPE_CAPACITY_USED,
+                                      allocate_file_size,
+                                      LS3_STATS_EMPTY_VALUE);
+               /* For compression statistics */
+               cmps_ratio = (double)loa_all->loa_size /
+                            (double)allocate_file_size;
+               cmps_fsize = loa_all->loa_size - allocate_file_size;
+       }
+
        ls3_stats_update_range(LS3_STATS_TYPE_FILES_SIZE, loa_all->loa_size,
                               LS3_STATS_EMPTY_VALUE);
-       ls3_stats_update_range(LS3_STATS_TYPE_CAPACITY_USED, allocate_file_size,
-                              LS3_STATS_EMPTY_VALUE);
        ls3_stats_update_range(LS3_STATS_TYPE_TIME_SINCE_LAST_MOD_RF,
                               loa_all->loa_mtime, loa_all->loa_size);
        ls3_stats_update_range(LS3_STATS_TYPE_TIME_SINCE_LAST_MD_MOD_RF,
@@ -2071,6 +2183,8 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
                uint16_t mirror_count = 0;
                uint64_t stripe_count = 0;
                uint64_t stripe_size = 0;
+               uint8_t compr_type;
+               uint8_t compr_lvl;
                int rc;
 
                if (!loa_all->loa_layout)
@@ -2095,6 +2209,11 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
                if (rc < 0)
                        return -1;
 
+               rc = llapi_layout_compress_get(loa_all->loa_layout,
+                                              &compr_type, &compr_lvl);
+               if (rc < 0)
+                       return -1;
+
                ls3_stats_update_range(LS3_STATS_TYPE_STRIPE_COUNT, stripe_count,
                                       LS3_STATS_EMPTY_VALUE);
 
@@ -2104,13 +2223,18 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
                ls3_stats_update_range_with_id(LS3_STATS_TYPE_STRIPE_SIZE,
                                               stripe_size, loa_all->loa_size);
 
-               /* TODO: Get info about compression (lvl/type/chunk_log_bits)
-                * from loa_all->loa_layout via struct llapi_layout_comp */
+               if (compr_type == LL_COMPR_TYPE_NONE)
+                       return 0;       /* skip compression statistics */
+
+               /* TODO: Сompression statistics from MDT by type & lvl */
+
        } else {
                struct filter_fid ptr_filter_fid;
                struct ost_layout ptr_ost_layout;
+               struct ost_layout_compr layout_compr;
 
                ptr_filter_fid = loa_all->loa_filter_fid;
+               layout_compr = ptr_filter_fid.ff_layout_compr;
                ptr_ost_layout = ptr_filter_fid.ff_layout;
                ls3_stats_update_range(LS3_STATS_TYPE_STRIPE_COUNT,
                                       ptr_ost_layout.ol_stripe_count,
@@ -2119,8 +2243,22 @@ int ls3_stats_update_info(struct ls3_object_attrs *loa_all)
                ls3_stats_update_range_with_id(LS3_STATS_TYPE_STRIPE_SIZE,
                                               ptr_ost_layout.ol_stripe_size,
                                               loa_all->loa_size);
+
+               if (layout_compr.ol_compr_type == LL_COMPR_TYPE_NONE)
+                       return 0;       /* skip compression statistics */
+
+               /* TODO: Сompression statistics from OST by type & lvl
+                *       with using struct ost_layout_compr */
        }
 
+       if (allocate_file_size == 0 ||
+           loa_all->loa_size == 0 || allocate_file_size > loa_all->loa_size)
+               return 0;       /* skip compression statistics */
+
+       /* Сompression statistics by ratio  MDT/OST */
+       ls3_stats_update_range_double(LS3_STATS_TYPE_COMPRESSION_RATIO,
+                                     cmps_ratio, cmps_fsize);
+
        return 0;
 }
 
index c2ced90..15fe96b 100644 (file)
@@ -20,7 +20,7 @@
 #include "ls3_debug.h"
 #include "ls3_object_attrs.h"
 
-#define LS3_STATS_VERSION 1.0
+#define LS3_STATS_VERSION 1.1
 #define LS3_STATS_EMPTY_VALUE 0
 #define LS3_STATS_COUNT_RANGE_BY_DEFAULT 10
 #define LS3_STATS_ARRAY_SIZE_BY_DEFAULT 100
@@ -72,6 +72,7 @@ typedef enum {
        LS3_STATS_TYPE_STRIPE_COUNT,
        LS3_STATS_TYPE_STRIPE_SIZE,
        LS3_STATS_TYPE_MIRROR_COUNT,
+       LS3_STATS_TYPE_COMPRESSION_FLAG,
        LS3_STATS_TYPE_TOTAL_COUNT_REPORT       /* Should always be the last */
 } ls3_stats_report_type;
 
@@ -85,20 +86,21 @@ typedef enum {
        LS3_STATS_VALUE_TYPE_BYTES,
        LS3_STATS_VALUE_TYPE_STRIPE,
        LS3_STATS_VALUE_TYPE_MIRROR,
+       LS3_STATS_VALUE_TYPE_COMP_RATIO,
        LS3_STATS_VALUE_TYPE_EMPTY
 } ls3_stats_val_type;
 
 struct range_report_template {
        uint64_t rrt_id;                        /* UID/GID/PRJID/StripeSize */
-       uint64_t range_start;
-       uint64_t range_end;
        uint64_t count_in_range;                /* Count object in range */
        uint64_t total_in_range;                /* Total value in range (size) */
-       uint64_t rrt_min;
-       uint64_t rrt_max;
        /* last_time_access - only for users time report.
         * Displays the last modified date of the files in the range */
        uint64_t last_time_access;
+       double range_start;
+       double range_end;
+       double rrt_min;
+       double rrt_max;
        double percentage;
        double cumulative_percentage;
        double percent_in_range;
@@ -109,11 +111,10 @@ struct report_template {
        pthread_mutex_t report_template_mutex;
        uint64_t files_count;                   /* total files count in report */
        uint64_t total_value;                   /* size/chars/... in all range */
-       uint64_t rt_min;
-       uint64_t rt_max;
-       uint64_t rt_avg;
        uint64_t last_idx_in_fs_ranges;         /* current last idx in fs_ranges */
-       unsigned int current_max_range;
+       double rt_min;
+       double rt_max;
+       double rt_avg;
        unsigned int count_ranges;
        ls3_stats_val_type value_type;          /* KB/entries/chars/links/days */
        ls3_stats_val_type second_value_type;   /* KB/entries/chars/links/days */
index e8284df..a9ee7d0 100644 (file)
@@ -1075,9 +1075,9 @@ create_files() {
        local num_files=$1
        local file=$2
 
-       # Create one large sparse file on 1T == 1099511627776 byte
-       truncate "$file" 1099511627776
-       [[ $? -ne 0 ]] && error "truncate ended with an error"
+       # 123Mb
+       fallocate -l 125952K "$file"
+       [[ $? -ne 0 ]] && error "fallocate N0 ended with an error"
        sync
 
        # Create many small files of different sizes
@@ -1108,7 +1108,7 @@ test_306() {
        count_files_ls=$(ls -1 "$MOUNT" | grep -vE '^total' | wc -l)
        total_size_ls=$(ls -l "$MOUNT" | grep '^total' | awk '{print $2}')
 
-       (( count_files_ls > 2 )) || error "fallocate ended with an error"
+       (( count_files_ls > 2 )) || error "fallocate N1 ended with an error"
 
        out=$(lipe_scan3_facet "$facet" --collect-fsize-stats="$report_path")
 
@@ -1124,7 +1124,7 @@ test_306() {
        total_size=$(cat "$report_path" |
                     jq '.Reports[] | select(.GeneralInfo.Title ==
                     "Capacity used (Regular files space used on disk)") |
-                    .GeneralInfo.Total' | grep -oE '[0-9]+')
+                    .GeneralInfo.TotalValue' | grep -oE '[0-9]+')
 
        id_from_report=$(cat "$report_path" |
                         jq '.UserTimeReports[] |