Whamcloud - gitweb
EX-9280 lipe: extend periodic stats in lpurge
authorAlexandre Ioffe <aioffe@ddn.com>
Thu, 30 May 2024 02:45:31 +0000 (19:45 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 6 Jun 2024 08:17:48 +0000 (08:17 +0000)
In lpurge added periodic stats:
- Size and number of files which are not purged due to
   - stale
   - not mirrored
- Number of inodes total and used

These stats are refreshed with each purge cycle.  For example:

  testfs-OST0000: INFO: used_kb: 179564 (3%) total_kb: 5496292
                        used_inodes: 301 (0%) total_inodes: 375360
  testfs-OST0000: INFO: purged: 1 (20480KB 0%) failed_del: 0 (0KB 0%)
                        stale: 0 (0KB 0%) nomirror: 2 (178176KB 3%)

Test-Parameters: trivial testlist=hot-pools
Signed-off-by: Alexandre Ioffe <aioffe@ddn.com>
Change-Id: Ib404afe2b9d636bf1deaf8948411616971443932
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55248
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lipe/src/lpurge.c

index 2b72e1e..c0105cf 100644 (file)
@@ -111,16 +111,18 @@ static struct stats {
        unsigned long s_scans;
        unsigned long s_fast_scans;
        unsigned long s_slow_scans;
-       unsigned long s_queued;         /* files queued for purge */
-       unsigned long s_started;        /* files dequeued for purge by worker */
-       unsigned long s_purged;         /* files successfully purged */
-       unsigned long s_purged1;        /* # files successfully purged in one period */
-       unsigned long s_purged1_kb;     /* purged KB in one period*/
-       unsigned long s_failed;         /* files failed to purge */
-       unsigned long s_failed1;        /* # files failed to purge in one period */
+       unsigned long s_queued;         /* files queued for purge */
+       unsigned long s_started;        /* files dequeued for purge by worker */
+       unsigned long s_purged;         /* files successfully purged */
+       unsigned long s_purged1;        /* files purged in period */
+       unsigned long s_purged1_kb;     /* purged KB in period */
+       unsigned long s_failed;         /* files failed to purge */
+       unsigned long s_failed1;        /* files failed to purge in period */
        unsigned long s_failed1_kb;     /* total not purged KB */
-       unsigned long s_stale_mirror1;  /* # stale mirror not purged in one period */
-       unsigned long s_stale_mirror1_kb; /* # stale mirror KB not purged in one period */
+       unsigned long s_stale_mirror1;  /* stale files not purged in period */
+       unsigned long s_stale_mirror1_kb; /* stale KB's not purged in period */
+       unsigned long s_nomirror1;      /* no mirror skipped in period */
+       unsigned long s_nomirror_kb1;   /* no mirror skipped KB's in period */
 } stats;
 
 #define DEF_MIN_USED 10 /* 100 - DEF_FREEHI */
@@ -389,7 +391,10 @@ static int lpurge_get_ost_mntpt(void)
        return -1;
 }
 
-static int lpurge_get_used_and_total_kb(unsigned long long *p_used_kb, unsigned long long *p_total_kb)
+static int lpurge_get_used_and_total_kb(unsigned long long *p_used_kb,
+                                       unsigned long long *p_total_kb,
+                                       unsigned long long *p_used_inodes,
+                                       unsigned long long *p_total_inodes)
 {
        int rc;
        struct statfs statfs_buf;
@@ -416,17 +421,23 @@ static int lpurge_get_used_and_total_kb(unsigned long long *p_used_kb, unsigned
        if (p_total_kb != NULL)
                *p_total_kb = total_kb;
 
+       if (p_used_inodes != NULL)
+               *p_used_inodes = statfs_buf.f_files - statfs_buf.f_ffree;
+
+       if (p_total_inodes != NULL)
+               *p_total_inodes = statfs_buf.f_files;
+
        return 0;
 }
 
 static int lpurge_get_used_kb(unsigned long long *p_used_kb)
 {
-       return lpurge_get_used_and_total_kb(p_used_kb, NULL);
+       return lpurge_get_used_and_total_kb(p_used_kb, NULL, NULL, NULL);
 }
 
 static int lpurge_get_total_kb(unsigned long long *p_total_kb)
 {
-       return lpurge_get_used_and_total_kb(NULL, p_total_kb);
+       return lpurge_get_used_and_total_kb(NULL, p_total_kb, NULL, NULL);
 }
 
 static unsigned long long lpurge_total_kb;
@@ -1274,18 +1285,29 @@ again:
 static void lpurge_purge(void)
 {
        unsigned long long used_kb;
+       unsigned long long total_kb = 0;
+       unsigned long long used_inodes;
+       unsigned long long total_inodes = 0;
        int i, rc;
 
-       rc = lpurge_get_used_kb(&used_kb);
+       rc = lpurge_get_used_and_total_kb(&used_kb, &total_kb,
+                                         &used_inodes, &total_inodes);
        if (rc || used_kb <= lpurge_min_used_kb)
                return;
 
-       LX_INFO("used_kb %llu, try to release some\n", used_kb);
+       LX_INFO("used_kb: %llu (%lu%%), " \
+               "used_inodes: %llu (%u%%) try to release some\n",
+               used_kb, total_kb ? used_kb*100/total_kb : 0,
+               used_inodes,
+               total_inodes ? used_inodes*100/total_inodes : 0);
 
        /* start from the oldest group */
        for (i = LPURGE_HIST_MAX - 1; i >= 0; i--) {
                struct lpurge_slot *ls = lpurge_hist + i;
 
+               stats.s_nomirror1 += ls->ls_nomirror_objs;
+               stats.s_nomirror_kb1 += ls->ls_nomirror_used_kb;
+
                if (ls->ls_found == 0/* || ls->ls_used_kb == 0*/)
                        continue;
 
@@ -1299,20 +1321,36 @@ static void lpurge_purge(void)
                lpurge_purge_slot(ls, used_kb - lpurge_min_used_kb);
        }
 
-       LX_INFO("purged: %lu (%lu KB) failed: %lu (%lu KB) stale: %lu (%lu KB)\n",
+       rc = lpurge_get_used_and_total_kb(&used_kb, &total_kb,
+                                         &used_inodes, &total_inodes);
+       if (!rc)
+               LX_INFO("used_kb: %llu (%u%%) total_kb: %llu " \
+                       "used_inodes: %llu (%u%%) total_inodes: %llu\n",
+                       used_kb, total_kb ? used_kb*100/total_kb : 0, total_kb,
+                       used_inodes,
+                       total_inodes ? used_inodes*100/total_inodes : 0,
+                       total_inodes);
+
+       LX_INFO("purged: %lu (%luKB %u%%) " \
+               "failed_del: %lu (%luKB %u%%) " \
+               "stale: %lu (%luKB %u%%) " \
+               "nomirror: %lu (%luKB %u%%)\n",
                stats.s_purged1, stats.s_purged1_kb,
+               total_kb ? stats.s_purged1_kb*100/total_kb : 0,
                stats.s_failed1, stats.s_failed1_kb,
-               stats.s_stale_mirror1, stats.s_stale_mirror1_kb);
+               total_kb ? stats.s_failed1_kb*100/total_kb : 0,
+               stats.s_stale_mirror1, stats.s_stale_mirror1_kb,
+               total_kb ? stats.s_stale_mirror1_kb*100/total_kb : 0,
+               stats.s_nomirror1, stats.s_nomirror_kb1,
+               total_kb ? stats.s_nomirror_kb1*100/total_kb : 0);
        stats.s_purged1 = 0;
        stats.s_purged1_kb = 0;
        stats.s_failed1 = 0;
        stats.s_failed1_kb = 0;
        stats.s_stale_mirror1 = 0;
        stats.s_stale_mirror1_kb = 0;
-
-       rc = lpurge_get_used_kb(&used_kb);
-       if (!rc)
-               LX_INFO("used_kb = %llu\n", used_kb);
+       stats.s_nomirror1 = 0;
+       stats.s_nomirror_kb1 = 0;
 }
 
 static void lpurge_scan(void)
@@ -1796,7 +1834,7 @@ static void lpurge_usr1_handle(int sig)
 
        used_kb = 0;
        total_kb = 0;
-       lpurge_get_used_and_total_kb(&used_kb, &total_kb);
+       lpurge_get_used_and_total_kb(&used_kb, &total_kb, NULL, NULL);
        fprintf(f, "space:\n"
                "    kbfree: %llu\n"
                "    low: %llu\n"