From afd1d6259e9ae569dc8bb8afa6366dad2d3efa25 Mon Sep 17 00:00:00 2001 From: Lei Feng Date: Fri, 6 Aug 2021 10:40:21 +0800 Subject: [PATCH] EX-3554 lipe: improve log messages for lpcc_purge lpcc service handle log_level option for lpcc_purge. Add more log messages for lpcc_purge. Change-Id: I1fd41d32cc6add00acea60c38985774cd5b7071a Signed-off-by: Lei Feng Test-Parameters: trivial Reviewed-on: https://review.whamcloud.com/44519 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lipe/lpcc | 4 ++ lipe/man/lpcc.conf.5 | 4 ++ lipe/src/lpcc_purge.c | 106 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 78 insertions(+), 36 deletions(-) diff --git a/lipe/lpcc b/lipe/lpcc index d573a0b..5c8bc71 100755 --- a/lipe/lpcc +++ b/lipe/lpcc @@ -45,6 +45,7 @@ class LpccService: lpcc_purge_low_usage = 75 lpcc_purge_interval = 5 lpcc_purge_scan_threads = 1 + lpcc_purge_log_level = None def __init__(self, lpcc_config): self.lpcc_config = lpcc_config @@ -59,6 +60,7 @@ class LpccService: self.lpcc_purge_low_usage = lpcc_purge_obj.get('low_usage', 75) self.lpcc_purge_interval = lpcc_purge_obj.get('interval', 30) self.lpcc_purge_scan_threads = lpcc_purge_obj.get('scan_threads', 1) + self.lpcc_purge_log_level = lpcc_purge_obj.get('log_level'); @staticmethod def _check_process_by_pidfile(procname, pidfile): @@ -127,6 +129,8 @@ class LpccService: '--interval', str(self.lpcc_purge_interval), \ '--scan-threads', str(self.lpcc_purge_scan_threads), \ '--pidfile', pidfile] + if self.lpcc_purge_log_level is not None: + cmdline.extend(['--log-level', self.lpcc_purge_log_level]) eprint(cmdline) subprocess.Popen(cmdline) diff --git a/lipe/man/lpcc.conf.5 b/lipe/man/lpcc.conf.5 index 8df803f..60976dd 100644 --- a/lipe/man/lpcc.conf.5 +++ b/lipe/man/lpcc.conf.5 @@ -45,6 +45,10 @@ seconds by default. .BR purge.scan_threads How many threads are used to scan cache device in parallel. It is 1 thread by default. +.TP +.BR purge.log_level +The log level of lpcc_purge. It may be 'fatal', 'error', 'warn', 'normal', +'info', 'debug'. The defaule log level is 'info'. .SH AUTOCACHE CONDITION When a file in lustre file system is opened, the autocache condition will be diff --git a/lipe/src/lpcc_purge.c b/lipe/src/lpcc_purge.c index 8efc49e..f8dde31 100644 --- a/lipe/src/lpcc_purge.c +++ b/lipe/src/lpcc_purge.c @@ -44,6 +44,9 @@ #define OPT_MAX_SCAN_SECS 5 #define OPT_PIDFILE 6 +#define LPCC_PURGE_LOG(level, fmt, ...) \ + llapi_error((level) | LLAPI_MSG_NO_ERRNO, 0, fmt, ##__VA_ARGS__) + struct lpcc_purge_options { char *o_cache; char *o_mount; @@ -74,7 +77,6 @@ static struct lpcc_purge_options opt = { .o_clear_hashdir = false, }; -bool exit_flag = false; struct lpcc_purge_stats { double s_start_usage; uint64_t s_scan_times; @@ -201,10 +203,31 @@ static void lpcc_purge_null_handler(int signal) static void lpcc_purge_sigint_handler(int signal) { + LPCC_PURGE_LOG(LLAPI_MSG_FATAL, "Exit due to signal: %s\n", + strsignal(signal)); psignal(signal, "exiting"); _exit(0); } +static const char *log_level_strs[] = { + [LLAPI_MSG_OFF] = "off", + [LLAPI_MSG_FATAL] = "fatal", + [LLAPI_MSG_ERROR] = "error", + [LLAPI_MSG_WARN] = "warn", + [LLAPI_MSG_NORMAL] = "normal", + [LLAPI_MSG_INFO] = "info", + [LLAPI_MSG_DEBUG] = "debug", + [LLAPI_MSG_MAX] = "all", +}; + +static const char *log_level_to_str(enum llapi_message_level level) +{ + if (level >= 0 && level < ARRAY_SIZE(log_level_strs)) + return log_level_strs[level]; + else + return ""; +} + /** * Dump: * - config @@ -240,6 +263,7 @@ static void lpcc_purge_dump_config_stats(FILE *f) json_object_object_add(j_config, "candidate_num", json_object_new_int64(opt.o_candidate_num)); json_object_object_add(j_config, "clear_hashdir", json_object_new_boolean(opt.o_clear_hashdir)); json_object_object_add(j_config, "max_scan_secs", json_object_new_int64(opt.o_max_scan_secs)); + json_object_object_add(j_config, "log_level", json_object_new_string(log_level_to_str(llapi_msg_get_level()))); json_object_object_add(j_all, "config", j_config); json_object *j_stats = json_object_new_object(); @@ -276,8 +300,8 @@ static void lpcc_purge_usr1_handler(int sig) f = fopen(opt.o_dumpfile, "w"); if (!f) { - llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n", - opt.o_dumpfile); + llapi_error(LLAPI_MSG_WARN, errno, "cannot open dumpfile '%s'\n", + opt.o_dumpfile); return; }; @@ -416,8 +440,8 @@ static void load_config(const char *path) else { optarg = NULL; } - llapi_printf(LLAPI_MSG_DEBUG, "conf: %s %s\n", t, - optarg ? optarg : ""); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "conf: %s %s\n", + t, optarg ? optarg : ""); lpcc_purge_process_opt(opt->val, optarg); } @@ -425,15 +449,6 @@ static void load_config(const char *path) fclose(f); } -static const char *log_level_strs[] = { - [LLAPI_MSG_OFF] = "off", - [LLAPI_MSG_FATAL] = "fatal", - [LLAPI_MSG_ERROR] = "error", - [LLAPI_MSG_WARN] = "warn", - [LLAPI_MSG_NORMAL] = "normal", - [LLAPI_MSG_INFO] = "info", - [LLAPI_MSG_DEBUG] = "debug", -}; static enum llapi_message_level parse_log_level(const char *level_str) { @@ -508,7 +523,7 @@ static void lpcc_purge_process_opt(int c, char *optarg) break; case OPT_LOG_LEVEL: log_level = parse_log_level(optarg); - llapi_printf(LLAPI_MSG_INFO, "set log level: %s\n", + LPCC_PURGE_LOG(LLAPI_MSG_INFO, "set log level: %s\n", log_level_strs[log_level]); llapi_msg_set_level(log_level); @@ -764,7 +779,7 @@ static void lpcc_purge_wait_for_scan(void) { while (1) { double usage = lpcc_purge_get_fs_usage(opt.o_cache); - llapi_printf(LLAPI_MSG_DEBUG, "usage: %.1f\n", usage); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "usage: %.1f\n", usage); if (usage >= opt.o_high_usage || opt.o_dry_run) { stats.s_start_usage = usage; break; @@ -832,16 +847,16 @@ static int lpcc_purge_detach_by_fid(const char *mntpath, } if (detach.pccd_flags & PCC_DETACH_FL_CACHE_REMOVED) { - llapi_printf(LLAPI_MSG_DEBUG, - "Detach and remove the PCC cached fid: "DFID"\n", + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, + "PCC cached fid "DFID" was detached and removed\n", PFID(fid)); } else if (detach.pccd_flags & PCC_DETACH_FL_ATTACHING) { - llapi_printf(LLAPI_MSG_DEBUG, - "fid "DFID" is being attached, skip it", PFID(fid)); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, + "fid "DFID" is being attached, skip it\n", PFID(fid)); } else { - llapi_printf(LLAPI_MSG_DEBUG, + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "Remove non-cached file: %s flags: %X\n", cache_file, detach.pccd_flags); rc = unlink(cache_file); @@ -850,7 +865,6 @@ static int lpcc_purge_detach_by_fid(const char *mntpath, llapi_error(LLAPI_MSG_ERROR, rc, "Unlink %s failed", cache_file); } - } return rc; @@ -862,14 +876,30 @@ static int lpcc_purge_detach_candidate(const char *mnt, int rc; struct stat statbuf; uint64_t atime_ms; + time_t atime; + char atimestr[64], fidstr[64], path[PATH_MAX]; - llapi_printf(LLAPI_MSG_DEBUG, "detach fid: "DFID"\n", PFID(&candidate->c_fid)); + sprintf(fidstr, DFID_NOBRACE, PFID(&candidate->c_fid)); + rc = llapi_fid2path(opt.o_mount, fidstr, path, PATH_MAX, NULL, NULL); + if (rc != 0) { + llapi_error(LLAPI_MSG_ERROR, rc, "cannot get path of fid %s", + fidstr); + return rc; + } + atime = candidate->c_atime_ms / 1000; + ctime_r(&atime, atimestr); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, + "detach fid: "DFID", path: '%s/%s', cache file: '%s', " + "atime: %s", + PFID(&candidate->c_fid), opt.o_mount, path, + candidate->c_path,atimestr); /* double confirm the atime. If it's changed, discard this entry */ rc = stat(candidate->c_path, &statbuf); if (rc) { rc = -errno; - llapi_error(LLAPI_MSG_WARN, -rc, "cannot stat file '%s'", candidate->c_path); + llapi_error(LLAPI_MSG_WARN, -rc, "cannot stat file '%s'", + candidate->c_path); return rc; } atime_ms = statbuf.st_atim.tv_sec * 1000 + statbuf.st_atim.tv_nsec / 1000000; @@ -888,7 +918,7 @@ static int lpcc_purge_detach_candidate(const char *mnt, rc = lpcc_purge_detach_by_fid(mnt, &candidate->c_fid, candidate->c_path); if (rc) { llapi_error(LLAPI_MSG_WARN, -rc, "cannot detach fid: "DFID, - PFID(&candidate->c_fid)); + PFID(&candidate->c_fid)); pthread_mutex_lock(&stats.s_lock); stats.s_total_failed_objs++; pthread_mutex_unlock(&stats.s_lock); @@ -942,7 +972,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, stats.s_scanned_objs++; pthread_mutex_unlock(&stats.s_lock); - llapi_printf(LLAPI_MSG_DEBUG, "append fid: "DFID"\n", PFID(&fid)); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "append fid: "DFID"\n", PFID(&fid)); pthread_mutex_lock(&candidate_set.cs_lock); lpcc_purge_candidate_set_append(candidate); candidate = NULL; @@ -960,7 +990,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, } /* pool is full or scan for too long time */ - llapi_printf(LLAPI_MSG_DEBUG, "start purging, candidate number: %u, " + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "start purging, candidate number: %u, " "scan secs: %lu\n", candidate_set.cs_count, scan_secs); lpcc_purge_candidate_set_sort(); @@ -980,13 +1010,13 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, } } - llapi_printf(LLAPI_MSG_DEBUG, + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "copy out %d elements from the head\n", n_detach); for (i = 0; i < n_detach; i++) { to_detach[i] = candidate_set.cs_arr[i]; } - llapi_printf(LLAPI_MSG_DEBUG, + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "shift pool and remove %d elements from the tail\n", n_discard); memmove(&candidate_set.cs_arr[0], &candidate_set.cs_arr[n_detach], (candidate_set.cs_count - n_detach - n_discard) * sizeof(candidate_set.cs_arr[0])); @@ -1000,7 +1030,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, pthread_mutex_unlock(&candidate_set.cs_lock); /* Detach files from candidates */ - llapi_printf(LLAPI_MSG_DEBUG, "detach files...\n"); + LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "detach files...\n"); for (i = 0; i < n_detach; i++) { rc = lpcc_purge_detach_candidate(opt.o_mount, to_detach[i]); lpcc_purge_candidate_destroy(to_detach[i]); @@ -1022,8 +1052,6 @@ static void lpcc_purge_scan(void) struct scan_result result; bool ldd_err; - llapi_printf(LLAPI_MSG_INFO, "do scanning...\n"); - stats.s_scan_times++; stats.s_start_usage = lpcc_purge_get_fs_usage(opt.o_cache); stats.s_scanned_objs = 0; @@ -1034,7 +1062,7 @@ static void lpcc_purge_scan(void) policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR; memset(&result, 0x00, sizeof(result)); - llapi_printf(LLAPI_MSG_DEBUG, "start scanning...\n"); + LPCC_PURGE_LOG(LLAPI_MSG_INFO, "start scanning...\n"); rc = lipe_scan(instance, &policy, @@ -1050,14 +1078,14 @@ static void lpcc_purge_scan(void) "failed to run lipe_scan"); } - llapi_printf(LLAPI_MSG_INFO, "scanning end\n"); + LPCC_PURGE_LOG(LLAPI_MSG_INFO, "scanning end\n"); } static void lpcc_purge_free_space(void) { int i; - llapi_printf(LLAPI_MSG_INFO, "free space...\n"); + LPCC_PURGE_LOG(LLAPI_MSG_INFO, "free space...\n"); lpcc_purge_candidate_set_sort(); @@ -1069,7 +1097,6 @@ static void lpcc_purge_free_space(void) break; } - fflush(stdout); lpcc_purge_candidate_set_clear(); } @@ -1093,6 +1120,8 @@ int main(int argc, char *argv[]) instance = lipe_instance_alloc(0, opt.o_cache); instance->li_callback = lpcc_purge_scan_callback; + LPCC_PURGE_LOG(LLAPI_MSG_NORMAL, "lpcc_purge started\n"); + while(1) { lpcc_purge_wait_for_scan(); @@ -1105,6 +1134,9 @@ int main(int argc, char *argv[]) stats.s_end_time = time(NULL); + LPCC_PURGE_LOG(LLAPI_MSG_INFO, + "scanned %ld object, purged %ld\n", + stats.s_scanned_objs, stats.s_purged_objs); if (opt.o_dry_run) { break; } @@ -1113,5 +1145,7 @@ int main(int argc, char *argv[]) lipe_instance_free(instance); lpcc_purge_candidate_set_destroy(); + LPCC_PURGE_LOG(LLAPI_MSG_NORMAL, + "lpcc_purge ended with exit code: %d\n", rc); return rc == 0 ? 0 : EXIT_FAILURE; } -- 1.8.3.1