From 521d34204d137dd8df0b2393c1a8717763152e8e Mon Sep 17 00:00:00 2001 From: Alexandre Ioffe Date: Tue, 5 Oct 2021 19:37:20 -0700 Subject: [PATCH] EX-3889 lipe: lamigo/lpurge error reporting Replace LAMIGO_{FATAL,ERROR,WARN,INFO,DEBUG}() by macros with more general name LX_{FATAL,ERROR,WARN,INFO,DEBUG}() and use them for both lamigo and lpurge. Since now lipe will not use llapi_printf(), but only LX_{FATAL,ERROR,WARN,INFO,DEBUG}() and llapi_error() Test-Parameters: trivial testlist=hot-pools Signed-off-by: Alexandre Ioffe Change-Id: I4516bb737ec8a308b6e39be2767fd5e03e8b3c61 Reviewed-on: https://review.whamcloud.com/45131 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: John L. Hammond Reviewed-on: https://review.whamcloud.com/46096 --- lipe/src/Makefile.am | 4 +- lipe/src/lamigo.c | 360 ++++++++++++++++++++++++----------------------- lipe/src/lamigo.h | 75 ---------- lipe/src/lamigo_alr.c | 33 +++-- lipe/src/lamigo_hash.c | 1 + lipe/src/lpurge.c | 373 +++++++++++++++++++------------------------------ lipe/src/lx_log.h | 93 ++++++++++++ 7 files changed, 433 insertions(+), 506 deletions(-) create mode 100644 lipe/src/lx_log.h diff --git a/lipe/src/Makefile.am b/lipe/src/Makefile.am index 0229c8b..7ca1d46 100644 --- a/lipe/src/Makefile.am +++ b/lipe/src/Makefile.am @@ -87,7 +87,7 @@ laudit_report_SOURCES = laudit-report.c lipe_ssh_SOURCES = lipe_ssh.c lipe_ssh.h -lamigo_SOURCES = lamigo.c lamigo.h lamigo_alr.c lamigo_hash.c lamigo_hash.h \ +lamigo_SOURCES = lx_log.h lamigo.c lamigo.h lamigo_alr.c lamigo_hash.c lamigo_hash.h \ $(LIPE_SOURCES) $(lipe_ssh_SOURCES) lamigo_CFLAGS = $(LIPE_CFLAGS) lamigo_LDFLAGS = $(LIPE_LDFLAGS) -lssh @@ -96,7 +96,7 @@ if HAVE_SSH_THREADS lamigo_LDFLAGS += -lssh_threads endif -lpurge_SOURCES = lpurge.c $(LIPE_SOURCES) +lpurge_SOURCES = lx_log.h lpurge.c $(LIPE_SOURCES) lpurge_CFLAGS = $(LIPE_CFLAGS) lpurge_LDFLAGS = $(LIPE_LDFLAGS) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index e111e18..c2151e4 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -50,6 +50,7 @@ #include #include #include +#include "lx_log.h" #include "policy.h" #include "list.h" #include "debug.h" @@ -90,8 +91,8 @@ #define LAMIGO_HEAT_FILE "/var/run/lamigo-%s.heat" #define LAMIGO_PIDFILE "/var/run/lamigo-%s.pid" -int lamigo_log_level = LLAPI_MSG_INFO; -const char *lamigo_mdt_name = "NONE"; +enum llapi_message_level lx_log_level = LLAPI_MSG_INFO; +char *lx_log_prefix = "NONE"; static LIPE_LIST_HEAD(lamigo_rule_list); __u64 lamigo_rule_attrs; /* attributes needed to evalute the rules */ @@ -437,7 +438,7 @@ static void lamigo_dump_stats_file(void) FILE *f; int i; - LAMIGO_DEBUG("dumping stats to '%s'\n", opt.o_dump_file); + LX_DEBUG("dumping stats to '%s'\n", opt.o_dump_file); if (opt.o_dump_file == NULL) return; f = fopen(opt.o_dump_file, "w"); @@ -568,12 +569,12 @@ static void lamigo_dump_heat_file(void) { FILE *f; - LAMIGO_DEBUG("dumping heat to '%s'\n", opt.o_heat_file); + LX_DEBUG("dumping heat to '%s'\n", opt.o_heat_file); if (opt.o_heat_file == NULL) return; f = fopen(opt.o_heat_file, "w"); if (!f) { - LAMIGO_ERROR("cannot open heat file '%s': %s\n", opt.o_heat_file, strerror(errno)); + LX_ERROR("cannot open heat file '%s': %s\n", opt.o_heat_file, strerror(errno)); return; } lamigo_alr_dump_heat_table(f); @@ -641,8 +642,8 @@ void *lamigo_replicate_one(void *args) "'%s/.lustre/fid/"DFID"' > /dev/null 2>&1", rj->rj_pool, agent->rag_mountpoint, PFID(&rj->rj_fid)); - LAMIGO_DEBUG("set prefer on "DFID"\n", - PFID(&rj->rj_fid)); + LX_DEBUG("set prefer on "DFID"\n", + PFID(&rj->rj_fid)); } else if (resync == AMIGO_RESYNC_EXTEND) { int i; @@ -664,7 +665,7 @@ void *lamigo_replicate_one(void *args) agent->rag_mountpoint, PFID(&rj->rj_fid)); } else { - LAMIGO_ERROR("unknown resync: %d\n", resync); + LX_ERROR("unknown resync: %d\n", resync); rc = -EINVAL; goto out; } @@ -672,15 +673,15 @@ void *lamigo_replicate_one(void *args) /* rc < 0 means an ssh error. Otherwise command exit status is * in status. Mask common exit statuses. */ rc = lamigo_exec_cmd(agent, cmd, &status); - LAMIGO_DEBUG("exec command '%s' on '%s': rc = %d, status = %d\n", - cmd, agent->rag_hostname, rc, status); + LX_DEBUG("exec command '%s' on '%s': rc = %d, status = %d\n", + cmd, agent->rag_hostname, rc, status); if (rc < 0 || /* 1 from setprefer (see EX-3591) */ (rj->rj_setprefer && status != 0 && status != 1) || /* EBUSY from mirror extend/resync */ (!rj->rj_setprefer && status != 0 && status != EBUSY)) - LAMIGO_ERROR("command '%s' on '%s' failed: rc = %d, status = %d\n", - cmd, agent->rag_hostname, rc, status); + LX_ERROR("command '%s' on '%s' failed: rc = %d, status = %d\n", + cmd, agent->rag_hostname, rc, status); out: /* notify the main thread about completion */ write(lamigo_sigpipe[1], &rc, 1); @@ -715,15 +716,15 @@ static int lamigo_spawn_replication(struct resync_job *rj) } } if (!a) { - LAMIGO_DEBUG("no good agent\n"); + LX_DEBUG("no good agent\n"); return -EBUSY; } rj->rj_agent = a; rj->rj_start = time(NULL); - LAMIGO_DEBUG("new job %s for "DFID" spawned on %s\n", - resync == AMIGO_RESYNC_EXTEND ? "extend" : "resync", - PFID(&rj->rj_fid), rj->rj_agent->rag_hostname); + LX_DEBUG("new job %s for "DFID" spawned on %s\n", + resync == AMIGO_RESYNC_EXTEND ? "extend" : "resync", + PFID(&rj->rj_fid), rj->rj_agent->rag_hostname); rc = pthread_create(&pid, NULL, lamigo_replicate_one, rj); if (rc == 0) { @@ -786,7 +787,7 @@ static int lamigo_get_objects(struct lov_user_md_v3 *v3, } else { *objects = NULL; *stripes = 0; - LAMIGO_ERROR("unsupported LOV magic %x\n", v3->lmm_magic); + LX_ERROR("unsupported LOV magic %x\n", v3->lmm_magic); return -EINVAL; } return 0; @@ -929,7 +930,7 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, } if (opt.o_src_dom && v3->lmm_pattern == LOV_PATTERN_MDT) { - LAMIGO_DEBUG("DoM component"); + LX_DEBUG("DoM component"); onsrc++; continue; } @@ -1022,7 +1023,7 @@ lamigo_check_user_rules(struct lipe_object_attrs *attrs, rc = lipe_rule_evaluate(rule, attrs, sysattrs, &result); if (rc) { - LAMIGO_ERROR("cannot evaluate rule: %s\n", strerror(-rc)); + LX_ERROR("cannot evaluate rule: %s\n", strerror(-rc)); return AMIGO_RESYNC_NONE; } if (!result) @@ -1126,7 +1127,7 @@ static int lamigo_get_attrs(const struct lu_fid *fid, snprintf(attrs->loa_fid_str, sizeof(attrs->loa_fid_str), DFID_NOBRACE, PFID(&attrs->loa_fid)); attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_LMAEA; - LAMIGO_DEBUG("got LMA: %d\n", rc); + LX_DEBUG("got LMA: %d\n", rc); } } @@ -1202,8 +1203,8 @@ static int lamigo_is_in_sync(struct lu_fid *fid, */ resync = lamigo_check_user_rules(&attrs, &sysattrs); if (resync == AMIGO_RESYNC_NONE) { - LAMIGO_DEBUG("skip "DFID" due to rules\n", - PFID(fid)); + LX_DEBUG("skip "DFID" due to rules\n", + PFID(fid)); stats.s_skip_by_rule++; goto out; } @@ -1213,8 +1214,8 @@ static int lamigo_is_in_sync(struct lu_fid *fid, out: lamigo_hist_add(fid, resync); - LAMIGO_DEBUG("check "DFID" stripes=%d: resync=%d\n", - PFID(fid), mo->mo_stripes, resync); + LX_DEBUG("check "DFID" stripes=%d: resync=%d\n", + PFID(fid), mo->mo_stripes, resync); return resync; } @@ -1327,14 +1328,14 @@ static int lamigo_update_one(struct fid_rec *f) /* cold pool is close to full, skip replication */ /* do this check before expensive layout fetching, rules, etc */ stats.s_skip_tgt_closed++; - LAMIGO_DEBUG("pool %s closed for "DFID"\n", - tgt_pools->pl_pool, PFID(&f->fr_fh.fh_fid)); + LX_DEBUG("pool %s closed for "DFID"\n", + tgt_pools->pl_pool, PFID(&f->fr_fh.fh_fid)); return 0; } if (are_agents_busy()) { /* all the agents are busy */ - LAMIGO_DEBUG("no agents avilable (max: %d)\n", lamigo_max_jobs); + LX_DEBUG("no agents avilable (max: %d)\n", lamigo_max_jobs); return 1; } @@ -1346,10 +1347,8 @@ static int lamigo_update_one(struct fid_rec *f) return 0; } if (ah.ah_hot && alr_period - ah.ah_hot <= 1) { - LAMIGO_DEBUG( - "skip hot "DFID" in %u, now %lu\n", - PFID(&f->fr_fh.fh_fid), ah.ah_hot, - alr_period); + LX_DEBUG("skip hot "DFID" in %u, now %lu\n", + PFID(&f->fr_fh.fh_fid), ah.ah_hot, alr_period); stats.s_skip_hot++; return 0; } @@ -1417,7 +1416,7 @@ static int lamigo_check_sync(void) struct resync_job, rj_list); lipe_list_del(&rj->rj_list); rc = lamigo_submit_job(rj); - LAMIGO_DEBUG("tried to resubmit failed job %p: rc=%d\n", rj, rc); + LX_DEBUG("tried to resubmit failed job %p: rc=%d\n", rj, rc); if (rc != 0) return rc; } @@ -1582,8 +1581,8 @@ static void lamigo_check_and_clear_changelog(void) index - lamigo_last_cleared_index < opt.o_chlg_clear_frequency) return; - LAMIGO_DEBUG("CLEAR upto %llu in %s (%llu last)\n", - index, opt.o_chlg_user, lamigo_last_processed_idx); + LX_DEBUG("CLEAR upto %llu in %s (%llu last)\n", + index, opt.o_chlg_user, lamigo_last_processed_idx); lamigo_last_cleared_index = index; rc = llapi_changelog_clear(opt.o_mdtname, opt.o_chlg_user, index); if (rc < 0) { @@ -1596,9 +1595,9 @@ static void lamigo_check_and_clear_changelog(void) static void lamigo_job_fini(struct resync_job *rj, intptr_t retval) { - LAMIGO_DEBUG("job %lu on "DFID" done in %lu: %"PRIdPTR" (%d)\n", - rj->rj_pid, PFID(&rj->rj_fid), time(NULL) - rj->rj_start, - retval, rj->rj_agent->rag_bad); + LX_DEBUG("job %lu on "DFID" done in %lu: %"PRIdPTR" (%d)\n", + rj->rj_pid, PFID(&rj->rj_fid), time(NULL) - rj->rj_start, + retval, rj->rj_agent->rag_bad); rj->rj_done_timestamp = time(NULL); @@ -1607,16 +1606,16 @@ static void lamigo_job_fini(struct resync_job *rj, intptr_t retval) if (retval == 0) { /* the agent is back */ if (rj->rj_agent->rag_bad) { - LAMIGO_DEBUG("agent %s is back\n", - rj->rj_agent->rag_hostname); + LX_DEBUG("agent %s is back\n", + rj->rj_agent->rag_hostname); rj->rj_agent->rag_bad = false; lamigo_max_jobs += rj->rj_agent->rag_maxjobs; } } else { /* the agent is still bad */ if (rj->rj_agent->rag_bad == false) { - LAMIGO_DEBUG("agent %s is bad\n", - rj->rj_agent->rag_hostname); + LX_DEBUG("agent %s is bad\n", + rj->rj_agent->rag_hostname); assert(lamigo_max_jobs >= rj->rj_agent->rag_maxjobs); lamigo_max_jobs -= rj->rj_agent->rag_maxjobs; @@ -1700,14 +1699,14 @@ static void lamigo_add_agent(const char *host, const char *mnt, char *jobs) a->rag_maxjobs = strtol(jobs, &endptr, 10); if (*endptr != '\0') - LAMIGO_FATAL("invalid jobs: '%s' (1-2048 expected)\n", jobs); + LX_FATAL("invalid jobs: '%s' (1-2048 expected)\n", jobs); } else { a->rag_maxjobs = DEF_AGENT_JOBS; } if (a->rag_maxjobs < 1 || a->rag_maxjobs > 2048) - LAMIGO_FATAL("invalid jobs per agent: %d (1-2048 expected)\n", - a->rag_maxjobs); + LX_FATAL("invalid jobs per agent: %d (1-2048 expected)\n", + a->rag_maxjobs); lipe_list_add(&a->rag_list, &lamigo_agent_list); @@ -1725,13 +1724,13 @@ static void lamigo_add_agent(const char *host, const char *mnt, char *jobs) rc = lipe_ssh_context_init(&rss->rss_ctx, a->rag_hostname); if (rc < 0) - LAMIGO_FATAL("cannot create SSH context for '%s': rc = %d\n", - a->rag_hostname, rc); + LX_FATAL("cannot create SSH context for '%s': rc = %d\n", + a->rag_hostname, rc); lipe_list_add(&rss->rss_list, &a->rag_ssh_list); } - LAMIGO_DEBUG("AGENT: %s %s %d\n", a->rag_hostname, - a->rag_mountpoint, a->rag_maxjobs); + LX_DEBUG("AGENT: %s %s %d\n", a->rag_hostname, + a->rag_mountpoint, a->rag_maxjobs); lamigo_agent_count++; } @@ -1881,8 +1880,8 @@ void lamigo_refresh_osts_from_pool(struct pool_list *pl) rc = cfs_get_param_paths(&paths, "lod/%s-*/numobd", fsname); if (rc != 0) - LAMIGO_FATAL("cannot read OBD count from 'lod/%s-*/numobd': %s\n", - fsname, strerror(errno)); + LX_FATAL("cannot read OBD count from 'lod/%s-*/numobd': %s\n", + fsname, strerror(errno)); for (i = 0; i < paths.gl_pathc; i++) { rc = lamigo_read_file(paths.gl_pathv[i], data, sizeof(data)); @@ -1891,7 +1890,7 @@ void lamigo_refresh_osts_from_pool(struct pool_list *pl) obdcount = strtol(data, &endptr, 10); if (*endptr != '\0') - LAMIGO_FATAL("invalid OBD count '%s'\n", data); + LX_FATAL("invalid OBD count '%s'\n", data); break; } @@ -1899,7 +1898,7 @@ void lamigo_refresh_osts_from_pool(struct pool_list *pl) globfree(&paths); if (obdcount < 0) - LAMIGO_FATAL("cannot find filesystem '%s'\n", fsname); + LX_FATAL("cannot find filesystem '%s'\n", fsname); bufsize = sizeof(struct obd_uuid) * obdcount; buffer = xmalloc(bufsize + sizeof(*list) * obdcount); @@ -1993,19 +1992,19 @@ void lamigo_process_opt(int c, char *optarg) case LAMIGO_OPT_OFD_INTERVAL: opt.o_alr_ofd_interval = atoi(optarg); if (opt.o_alr_ofd_interval < 1) - LAMIGO_FATAL("invalid ofd interval '%s'\n", optarg); + LX_FATAL("invalid ofd interval '%s'\n", optarg); break; case LAMIGO_OPT_HOT_FRACTION: opt.o_alr_hot_fraction = atoi(optarg); if (opt.o_alr_hot_fraction < 1 || opt.o_alr_hot_fraction > 100) - LAMIGO_FATAL("invalid hot fraction '%s'\n", optarg); + LX_FATAL("invalid hot fraction '%s'\n", optarg); break; case LAMIGO_OPT_HOT_AFTER_IDLE: opt.o_alr_hot_after_idle = atoi(optarg); if (opt.o_alr_hot_after_idle < 1 || opt.o_alr_hot_after_idle >= opt.o_alr_periods) - LAMIGO_FATAL("invalid hot-after-idle '%s'\n", optarg); + LX_FATAL("invalid hot-after-idle '%s'\n", optarg); break; case LAMIGO_OPT_MIRROR_CMD: opt.o_mirror_cmd = strdup(optarg); @@ -2013,12 +2012,12 @@ void lamigo_process_opt(int c, char *optarg) case LAMIGO_OPT_POOL_REFRESH: opt.o_pool_refresh = strtol(optarg, &endptr, 10); if (*endptr != '\0' || opt.o_pool_refresh < 1) - LAMIGO_FATAL("invalid pool refresh interval '%s'\n", optarg); + LX_FATAL("invalid pool refresh interval '%s'\n", optarg); break; case LAMIGO_OPT_PROGRESS_INTV: opt.o_progress_interval = strtol(optarg, &endptr, 10); if (*endptr != '\0' || opt.o_progress_interval < 1) - LAMIGO_FATAL("invalid progress interval '%s'\n", optarg); + LX_FATAL("invalid progress interval '%s'\n", optarg); break; case LAMIGO_OPT_ALR_EXTRA_ARGS: opt.o_alr_extra_args = optarg; @@ -2026,7 +2025,7 @@ void lamigo_process_opt(int c, char *optarg) case LAMIGO_OPT_SRC_FREE: opt.o_src_free = atoi(optarg); if (opt.o_src_free < 1 || opt.o_src_free > 99) - LAMIGO_FATAL("invalid source free space '%s'\n", optarg); + LX_FATAL("invalid source free space '%s'\n", optarg); break; case LAMIGO_OPT_SRC_DOM: opt.o_src_dom = 1; @@ -2037,7 +2036,7 @@ void lamigo_process_opt(int c, char *optarg) case LAMIGO_OPT_TGT_FREE: opt.o_tgt_free = atoi(optarg); if (opt.o_tgt_free < 1 || opt.o_tgt_free > 99) - LAMIGO_FATAL("invalid target free space '%s'\n", optarg); + LX_FATAL("invalid target free space '%s'\n", optarg); break; case LAMIGO_OPT_VERSION: lipe_version(); @@ -2045,10 +2044,10 @@ void lamigo_process_opt(int c, char *optarg) case 'a': opt.o_min_age = strtol(optarg, &endptr, 10); if (*endptr != '\0' || opt.o_min_age < 5) - LAMIGO_FATAL("invalid value for -a '%s'\n", optarg); + LX_FATAL("invalid value for -a '%s'\n", optarg); break; case 'b': - lamigo_log_level = LLAPI_MSG_MAX; + lx_log_level = LLAPI_MSG_MAX; llapi_msg_set_level(LLAPI_MSG_MAX); break; case 'c': { @@ -2057,7 +2056,7 @@ void lamigo_process_opt(int c, char *optarg) rc = strsize2int(&cache_size, optarg); if (rc < 0 || cache_size <= 0 || (cache_size >= 100 && cache_size < 1<<20)) - LAMIGO_FATAL("invalid cache size '%s'\n", optarg); + LX_FATAL("invalid cache size '%s'\n", optarg); /* For value < 100, it is taken as the percentage of * total memory instead. @@ -2066,7 +2065,7 @@ void lamigo_process_opt(int c, char *optarg) opt.o_cache_size = get_fid_cache_size(cache_size); else opt.o_cache_size = cache_size; - LAMIGO_INFO("cache size: %lu\n", opt.o_cache_size); + LX_INFO("cache size: %lu\n", opt.o_cache_size); break; } case 'f': @@ -2077,7 +2076,7 @@ void lamigo_process_opt(int c, char *optarg) mnt = strsep(&optarg, ":"); jobs = strsep(&optarg, ":"); if (!host || !mnt) - LAMIGO_FATAL("invalid agent definition\n"); + LX_FATAL("invalid agent definition\n"); lamigo_add_agent(host, mnt, jobs); break; @@ -2090,14 +2089,14 @@ void lamigo_process_opt(int c, char *optarg) } else { opt.o_alr_heat_fn = atoi(optarg); if (opt.o_alr_heat_fn < 0 || opt.o_alr_heat_fn > 1) - LAMIGO_FATAL("invalid heat function '%s'\n", optarg); + LX_FATAL("invalid heat function '%s'\n", optarg); } break; case 'I': opt.o_alr_hot_after_idle = atoi(optarg); break; case 'm': - lamigo_mdt_name = xstrdup(optarg); + lx_log_prefix = xstrdup(optarg); opt.o_mdtname = xstrdup(optarg); break; case 'M': @@ -2106,7 +2105,7 @@ void lamigo_process_opt(int c, char *optarg) case 'n': opt.o_num_threads = strtoul(optarg, NULL, 0); if (opt.o_num_threads < 1) - LAMIGO_FATAL("invalid thread number: %d\n", opt.o_num_threads); + LX_FATAL("invalid thread number: %d\n", opt.o_num_threads); break; case 'o': lamigo_add_alr_agent(optarg); @@ -2124,7 +2123,7 @@ void lamigo_process_opt(int c, char *optarg) opt.o_chlg_user = strdup(optarg); break; case 'v': - lamigo_log_level++; + lx_log_level++; break; case 'w': opt.o_dump_file = strdup(optarg); @@ -2165,7 +2164,7 @@ static void count_bracket_recursion(const char *str, int *counter) else if (*p == '}') (*counter)--; if (*counter < 0) - LAMIGO_FATAL("invalid rule '%s'\n", str); + LX_FATAL("invalid rule '%s'\n", str); p++; } } @@ -2237,7 +2236,7 @@ static void load_config(char *name) f = fopen(name, "r"); if (!f) - LAMIGO_FATAL("cannot open config file '%s': %s\n", name, strerror(errno)); + LX_FATAL("cannot open config file '%s': %s\n", name, strerror(errno)); while (!feof(f)) { struct option *opt; @@ -2283,11 +2282,11 @@ static void load_config(char *name) optarg = strsep(&s, "\n "); if (!optarg && opt->has_arg == required_argument) - LAMIGO_FATAL("option '%s' requires an argument\n", t); + LX_FATAL("option '%s' requires an argument\n", t); } else { optarg = NULL; } - LAMIGO_DEBUG("conf: %s %s\n", t, optarg); + LX_DEBUG("conf: %s %s\n", t, optarg); lamigo_process_opt(opt->val, optarg); } @@ -2317,48 +2316,48 @@ void lamigo_parse_opts(int argc, char **argv) } if (!opt.o_mntpt) - LAMIGO_FATAL("no mount point specified\n"); + LX_FATAL("no mount point specified\n"); rc = llapi_search_fsname(opt.o_mntpt, fsname); if (rc < 0) - LAMIGO_FATAL("cannot find a Lustre file system mounted at '%s'\n", - opt.o_mntpt); + LX_FATAL("cannot find a Lustre file system mounted at '%s'\n", + opt.o_mntpt); if (!opt.o_mdtname) - LAMIGO_FATAL("no MDT specified\n"); + LX_FATAL("no MDT specified\n"); rc = cfs_get_param_paths(&paths, "mdt/%s/uuid", opt.o_mdtname); if (rc != 0) - LAMIGO_FATAL("cannot find MDT uuid from 'mdt/%s/uuid': %s\n", - opt.o_mdtname, strerror(errno)); + LX_FATAL("cannot find MDT uuid from 'mdt/%s/uuid': %s\n", + opt.o_mdtname, strerror(errno)); globfree(&paths); snprintf(buf, sizeof(buf), "%s/.lustre/fid", opt.o_mntpt); open_by_fid_fd = open(buf, O_RDONLY); if (open_by_fid_fd < 0) - LAMIGO_FATAL("cannot open '%s': %s\n", buf, strerror(errno)); + LX_FATAL("cannot open '%s': %s\n", buf, strerror(errno)); if (src_pools == NULL) { lamigo_parse_pool(DEF_SOURCE_POOL); - LAMIGO_WARN("source pools aren't defined, using '%s'\n", DEF_SOURCE_POOL); + LX_WARN("source pools aren't defined, using '%s'\n", DEF_SOURCE_POOL); } if (opt.o_tgt_pool == NULL) { opt.o_tgt_pool = DEF_TARGET_POOL; - LAMIGO_WARN("target pool is not defined, using %s\n", opt.o_tgt_pool); + LX_WARN("target pool is not defined, using %s\n", opt.o_tgt_pool); } opt.o_tgt_pool_len = strlen(opt.o_tgt_pool); if (lamigo_lookup_pool(opt.o_tgt_pool)) - LAMIGO_FATAL("target pool '%s' cannot also be source pool\n", - opt.o_tgt_pool); + LX_FATAL("target pool '%s' cannot also be source pool\n", + opt.o_tgt_pool); if (lipe_list_empty(&lamigo_agent_list)) - LAMIGO_FATAL("no agents configured\n"); + LX_FATAL("no agents configured\n"); - LAMIGO_DEBUG("target pool: %s/%d\n", opt.o_tgt_pool, - opt.o_tgt_pool_len); + LX_DEBUG("target pool: %s/%d\n", opt.o_tgt_pool, + opt.o_tgt_pool_len); tgt_pools = lamigo_alloc_pool(opt.o_tgt_pool); if (!opt.o_dump_file) { @@ -2376,7 +2375,7 @@ void lamigo_parse_opts(int argc, char **argv) rc = pipe2(lamigo_sigpipe, O_NONBLOCK); if (rc < 0) - LAMIGO_FATAL("cannot create sigpipe: %s\n", strerror(errno)); + LX_FATAL("cannot create sigpipe: %s\n", strerror(errno)); } static void lamigo_wait_for_job_completion(int timeout) @@ -2645,9 +2644,9 @@ int lamigo_rescan(void) diff_timevals(&result.sr_time_start, &result.sr_time_end, &result.sr_time_diff); - LAMIGO_DEBUG("finished scanning in %d.%06u seconds\n", - (int)result.sr_time_diff.tv_sec, - (unsigned int)result.sr_time_diff.tv_usec); + LX_DEBUG("finished scanning in %d.%06u seconds\n", + (int)result.sr_time_diff.tv_sec, + (unsigned int)result.sr_time_diff.tv_usec); return rc; } @@ -2661,7 +2660,7 @@ static void lamigo_changelog_check_and_set_mask(void) rc = cfs_get_param_paths(&paths, "mdd/%s/changelog_mask", opt.o_mdtname); if (rc != 0 || paths.gl_pathc != 1) - LAMIGO_FATAL("cannot find changelog mask: %s\n", strerror(errno)); + LX_FATAL("cannot find changelog mask: %s\n", strerror(errno)); rc = lamigo_read_file(paths.gl_pathv[0], buf, sizeof(buf)); globfree(&paths); @@ -2683,7 +2682,7 @@ static void lamigo_changelog_check_and_set_mask(void) opt.o_mdtname); rc = system(buf); if (rc < 0) - LAMIGO_FATAL("cannot enable CLOSE/UNLNK in changelog: rc = %d\n", rc); + LX_FATAL("cannot enable CLOSE/UNLNK in changelog: rc = %d\n", rc); llapi_err_noerrno(LLAPI_MSG_INFO, "enable CLOSE/UNLNK in changelog"); } @@ -2702,11 +2701,11 @@ static int lamigo_check_changelog_user(const char *user) rc = cfs_get_param_paths(&paths, "mdd/%s/changelog_users", opt.o_mdtname); if (rc != 0 || paths.gl_pathc != 1) - LAMIGO_FATAL("can't find changelog users\n"); + LX_FATAL("can't find changelog users\n"); rc = lamigo_read_file(paths.gl_pathv[0], buf, sizeof(buf)); if (rc < 0) - LAMIGO_FATAL("can't get changelog users\n"); + LX_FATAL("can't get changelog users\n"); rc = -1; s = buf; @@ -2749,22 +2748,22 @@ again: if (!rc) { /* found, use it */ opt.o_chlg_user = strdup(user); - LAMIGO_DEBUG("found Changelog user '%s' in '%s'\n", - user, buf); + LX_DEBUG("found Changelog user '%s' in '%s'\n", + user, buf); return; } } if (registered) - LAMIGO_FATAL("cannot find just registered Changelog user '%s'\n", user); + LX_FATAL("cannot find just registered Changelog user '%s'\n", user); /* try one from the config file */ if (opt.o_chlg_user) { rc = lamigo_check_changelog_user(opt.o_chlg_user); if (!rc) { /* found, use it */ - LAMIGO_DEBUG("found Changelog user '%s' from config\n", - opt.o_chlg_user); + LX_DEBUG("found Changelog user '%s' from config\n", + opt.o_chlg_user); return; } } @@ -2776,8 +2775,8 @@ again: opt.o_mdtname, opt.o_mdtname); rc = system(buf); if (rc < 0) - LAMIGO_FATAL("changelog user '%s' is not registered\n", - opt.o_chlg_user); + LX_FATAL("changelog user '%s' is not registered\n", + opt.o_chlg_user); registered = true; /* if a new changelog user was just registered, either this is the @@ -2804,12 +2803,12 @@ void lamigo_show_progress(void) return; progress_last_processed = stats.s_processed; - LAMIGO_INFO("%lu processed, %lu replicated, %lu busy, %lu in queue, " - "%lu hot skipped, %lu ro2hot, %lu rw2hot, %lu rw2cold\n", - stats.s_processed, stats.s_replicated, stats.s_busy, - stats.s_skip_hot, stats.s_replicate_ro2hot, - stats.s_replicate_rw2hot, stats.s_replicate_rw2cold, - head.lh_cached_count); + LX_INFO("%lu processed, %lu replicated, %lu busy, %lu in queue, " + "%lu hot skipped, %lu ro2hot, %lu rw2hot, %lu rw2cold\n", + stats.s_processed, stats.s_replicated, stats.s_busy, + stats.s_skip_hot, stats.s_replicate_ro2hot, + stats.s_replicate_rw2hot, stats.s_replicate_rw2cold, + head.lh_cached_count); } static void lamigo_lock_pidfile(void) @@ -2820,7 +2819,7 @@ static void lamigo_lock_pidfile(void) snprintf(buf, sizeof(buf), LAMIGO_PIDFILE, opt.o_mdtname); fd = open(buf, O_RDWR | O_CREAT, 0600); if (fd < 0) - LAMIGO_FATAL("cannot create pidfile '%s': %s\n", buf, strerror(errno)); + LX_FATAL("cannot create pidfile '%s': %s\n", buf, strerror(errno)); rc = flock(fd, LOCK_EX | LOCK_NB); if (rc < 0) { @@ -2830,18 +2829,18 @@ static void lamigo_lock_pidfile(void) sz = 0; if (sz > 0) buf[sz] = 0; - LAMIGO_FATAL("another lamigo is running, locked by %s\n", - sz > 0 ? buf : "[unknown]"); + LX_FATAL("another lamigo is running, locked by %s\n", + sz > 0 ? buf : "[unknown]"); } rc = ftruncate(fd, 0); if (rc < 0) - LAMIGO_FATAL("cannot truncate pidfile: %s\n", strerror(errno)); + LX_FATAL("cannot truncate pidfile: %s\n", strerror(errno)); sz = snprintf(buf, sizeof(buf), "%d\n", getpid()); rc = write(fd, buf, sz); if (rc < 0 || rc != sz) - LAMIGO_ERROR("cannot write pidfile: %s\n", rc < 0 ? strerror(errno) : "short write"); + LX_ERROR("cannot write pidfile: %s\n", rc < 0 ? strerror(errno) : "short write"); } static void lamigo_process_changelog(void) @@ -2889,10 +2888,10 @@ again: if (rc < 0) { int i; - LAMIGO_ERROR("cannot to process changelog record: %s\n", strerror(-rc)); + LX_ERROR("cannot to process changelog record: %s\n", strerror(-rc)); rc = llapi_changelog_fini(&chglog_hdlr); if (rc) - LAMIGO_FATAL("cannot fini changelog: %s\n", strerror(-rc)); + LX_FATAL("cannot fini changelog: %s\n", strerror(-rc)); i = 0; do { @@ -2909,9 +2908,9 @@ again: } while (i++ < 5 && rc != 0); if (rc) - LAMIGO_FATAL("cannot reopen changelog: %s\n", strerror(-rc)); + LX_FATAL("cannot reopen changelog: %s\n", strerror(-rc)); - LAMIGO_DEBUG("Reopened changelog\n"); + LX_DEBUG("Reopened changelog\n"); goto again; } } @@ -2926,23 +2925,23 @@ void lamigo_parse_rules(const char *rule_str, const char *filename) tok = json_tokener_new(); if (!tok) - LAMIGO_OOM(-1); + LX_OOM(-1); obj_top = json_tokener_parse_ex(tok, rule_str, strlen(rule_str)); if (obj_top == NULL) { enum json_tokener_error jerr; jerr = json_tokener_get_error(tok); - LAMIGO_FATAL("cannot parse rule '%s' in '%s': %s\n", - rule_str, filename, json_tokener_error_desc(jerr)); + LX_FATAL("cannot parse rule '%s' in '%s': %s\n", + rule_str, filename, json_tokener_error_desc(jerr)); } rc = json_object_object_get_ex(obj_top, LIPE_CONFIG_RULES, &obj_rules); if (!rc) - LAMIGO_FATAL("no rules in '%s'\n", filename); + LX_FATAL("no rules in '%s'\n", filename); if (json_object_get_type(obj_rules) != json_type_array) - LAMIGO_FATAL("rules in '%s' are not an array\n", filename); + LX_FATAL("rules in '%s' are not an array\n", filename); for (i = 0; i < json_object_array_length(obj_rules); i++) { struct json_object *obj_action, *obj_expr; @@ -2953,29 +2952,29 @@ void lamigo_parse_rules(const char *rule_str, const char *filename) obj_rule = json_object_array_get_idx(obj_rules, i); if (!obj_rule) - LAMIGO_FATAL("failed to get rule #%d\n", i); + LX_FATAL("failed to get rule #%d\n", i); rc = json_object_object_get_ex(obj_rule, LIPE_CONFIG_ACTION, &obj_action); if (!rc) - LAMIGO_FATAL("no action in rule %s\n", - json_object_to_json_string(obj_rule)); + LX_FATAL("no action in rule %s\n", + json_object_to_json_string(obj_rule)); action = json_object_get_string(obj_action); if (!action) - LAMIGO_FATAL("invalid action in rule %s\n", - json_object_to_json_string(obj_rule)); + LX_FATAL("invalid action in rule %s\n", + json_object_to_json_string(obj_rule)); rc = json_object_object_get_ex(obj_rule, LIPE_CONFIG_EXPRESSION, &obj_expr); if (!rc) - LAMIGO_FATAL("no expression in rule %s\n", - json_object_to_json_string(obj_rule)); + LX_FATAL("no expression in rule %s\n", + json_object_to_json_string(obj_rule)); expr = json_object_get_string(obj_expr); if (!expr) - LAMIGO_FATAL("invalid expression in rule %s\n", - json_object_to_json_string(obj_rule)); + LX_FATAL("invalid expression in rule %s\n", + json_object_to_json_string(obj_rule)); lr = xcalloc(1, sizeof(*lr)); @@ -2984,14 +2983,14 @@ void lamigo_parse_rules(const char *rule_str, const char *filename) else if (!strcmp(action, "mirror")) lr->lr_action.la_action = LAT_SHELL_CMD_FID; else - LAMIGO_FATAL("unknown action '%s' in rule '%s'\n", - action, json_object_to_json_string(obj_rule)); + LX_FATAL("unknown action '%s' in rule '%s'\n", + action, json_object_to_json_string(obj_rule)); LIPE_INIT_LIST_HEAD(&lr->lr_values); rc = lipe_policy_value_init(&lr->lr_values, &lr->lr_expression, &valid, expr); if (rc) - LAMIGO_FATAL("cannot parse expression in rule %s\n", strerror(-rc)); + LX_FATAL("cannot parse expression in rule %s\n", strerror(-rc)); lipe_list_add_tail(&lr->lr_linkage, &lamigo_rule_list); lamigo_rule_attrs |= lr->lr_attr_bits; @@ -3019,11 +3018,11 @@ static void *lamigo_signal_thread_start(void *arg) * EINVAL set contains an invalid signal number. */ if (rc != 0) { - LAMIGO_ERROR("signal wait failed: %s\n", strerror(rc)); + LX_ERROR("signal wait failed: %s\n", strerror(rc)); continue; } - LAMIGO_DEBUG("received signal %d\n", sig); + LX_DEBUG("received signal %d\n", sig); switch (sig) { case SIGUSR1: @@ -3033,7 +3032,7 @@ static void *lamigo_signal_thread_start(void *arg) lamigo_dump_heat_file(); break; default: - LAMIGO_INFO("received signal %d, exiting\n", sig); + LX_INFO("received signal %d, exiting\n", sig); exit(EXIT_SUCCESS); } } @@ -3054,11 +3053,11 @@ int main(int argc, char **argv) rc = pthread_sigmask(SIG_BLOCK, &sigset, NULL); if (rc != 0) - LAMIGO_FATAL("cannot set signal mask: %s\n", strerror(rc)); + LX_FATAL("cannot set signal mask: %s\n", strerror(rc)); rc = pthread_create(&lamigo_signal_thread_id, NULL, &lamigo_signal_thread_start, &sigset); if (rc != 0) - LAMIGO_FATAL("cannot start signal thread: %s\n", strerror(rc)); + LX_FATAL("cannot start signal thread: %s\n", strerror(rc)); lipe_version_init(); ssh_threads_set_callbacks(ssh_threads_get_pthread()); @@ -3066,7 +3065,7 @@ int main(int argc, char **argv) setlinebuf(stdout); setlinebuf(stderr); - llapi_msg_set_level(lamigo_log_level); + llapi_msg_set_level(lx_log_level); lamigo_parse_opts(argc, argv); @@ -3074,12 +3073,12 @@ int main(int argc, char **argv) * followed by the MDT name ("lamigo lustre-MDT0000"). */ llapi_set_command_name(opt.o_mdtname); - LAMIGO_INFO("version %s-%s, revision %s\n", - PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION); + LX_INFO("version %s-%s, revision %s\n", + PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION); rc = lamigo_init_cache(); if (rc < 0) - LAMIGO_FATAL("cannot init cache\n"); + LX_FATAL("cannot init cache\n"); /* create and lock pidfile to protect against another instance */ lamigo_lock_pidfile(); @@ -3087,8 +3086,8 @@ int main(int argc, char **argv) /* wait till the target pool got one OST at least */ lamigo_refresh_osts_from_pool(tgt_pools); while (tgt_pools->pl_ostnr == 0) { - LAMIGO_ERROR("target pool '%s' is empty, waiting %d seconds\n", - tgt_pools->pl_pool, opt.o_pool_refresh); + LX_ERROR("target pool '%s' is empty, waiting %d seconds\n", + tgt_pools->pl_pool, opt.o_pool_refresh); sleep(opt.o_pool_refresh); lamigo_refresh_osts_from_pool(tgt_pools); } @@ -3103,16 +3102,16 @@ int main(int argc, char **argv) rc = pthread_create(&lamigo_refresh_statfs_thread_id, NULL, lamigo_refresh_statfs_thread, NULL); if (rc != 0) - LAMIGO_FATAL("cannot start statfs thread: %s\n", strerror(rc)); + LX_FATAL("cannot start statfs thread: %s\n", strerror(rc)); if (opt.o_rescan) { /* scan the whole MDT and replicate matched files */ rc = lamigo_rescan(); if (rc < 0) - LAMIGO_FATAL("cannot scan device: %s\n", strerror(-rc)); + LX_FATAL("cannot scan device: %s\n", strerror(-rc)); } - LAMIGO_DEBUG("Start receiving records\n"); + LX_DEBUG("Start receiving records\n"); rc = llapi_changelog_start(&chglog_hdlr, CHANGELOG_FLAG_FOLLOW | CHANGELOG_FLAG_BLOCK | @@ -3120,9 +3119,9 @@ int main(int argc, char **argv) CHANGELOG_FLAG_EXTRA_FLAGS, opt.o_mdtname, 0); if (rc < 0) - LAMIGO_FATAL("cannot open changelog: %s\n", strerror(-rc)); + LX_FATAL("cannot open changelog: %s\n", strerror(-rc)); - LAMIGO_INFO("started\n"); + LX_INFO("started\n"); while (1) { if (head.lh_cached_count < opt.o_cached_fid_hiwm) @@ -3136,7 +3135,7 @@ int main(int argc, char **argv) if (!are_agents_busy()) { rc = lamigo_check_sync(); if (rc < 0) { - LAMIGO_ERROR("check sync failed: rc = %d\n", rc); + LX_ERROR("check sync failed: rc = %d\n", rc); sleep(1); } } @@ -3184,15 +3183,15 @@ static int lamigo_check_hot_one(struct alr_heat *ht) struct mirror_opts mo = { 0 }; int sync; - LAMIGO_DEBUG("check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, " - "L %d, I %d %s\n", PFID(&ht->ah_fid), - ht->ah_heat[0], ht->ah_heat[1], ht->ah_pools[0], - ht->ah_pools[1], ht->ah_livetime, ht->ah_idle, - ht->ah_mark ? "M" : ""); + LX_DEBUG("check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, " + "L %d, I %d %s\n", PFID(&ht->ah_fid), + ht->ah_heat[0], ht->ah_heat[1], ht->ah_pools[0], + ht->ah_pools[1], ht->ah_livetime, ht->ah_idle, + ht->ah_mark ? "M" : ""); if (ht->ah_mark & ALR_TAG_PROCESSED) { /* already tried to replicate */ - LAMIGO_DEBUG(DFID" tried to replicate already\n", - PFID(&ht->ah_fid)); + LX_DEBUG(DFID" tried to replicate already\n", + PFID(&ht->ah_fid)); return 0; } @@ -3204,8 +3203,8 @@ static int lamigo_check_hot_one(struct alr_heat *ht) if (ht->ah_heat[0] && ht->ah_heat[1] == 0 && ht->ah_pools[0] == 0 && ht->ah_pools[1]) { sync = lamigo_is_in_sync(&ht->ah_fid, tgt_pools, src_pools, &mo); - LAMIGO_DEBUG("try to replicate RO "DFID": %d\n", - PFID(&ht->ah_fid), sync); + LX_DEBUG("try to replicate RO "DFID": %d\n", + PFID(&ht->ah_fid), sync); if (sync != AMIGO_RESYNC_NONE) { lamigo_new_job_for_hot(&ht->ah_fid, sync, src_pools, mo.mo_stripes); @@ -3223,8 +3222,8 @@ static int lamigo_check_hot_one(struct alr_heat *ht) if (ht->ah_idle > 0 && ht->ah_heat[1] && ht->ah_pools[0] == 0 && ht->ah_pools[1]) { sync = lamigo_is_in_sync(&ht->ah_fid, tgt_pools, src_pools, &mo); - LAMIGO_DEBUG("try to replicate RW "DFID": %d\n", - PFID(&ht->ah_fid), sync); + LX_DEBUG("try to replicate RW "DFID": %d\n", + PFID(&ht->ah_fid), sync); if (sync != AMIGO_RESYNC_NONE) { lamigo_new_job_for_hot(&ht->ah_fid, sync, src_pools, mo.mo_stripes); @@ -3249,9 +3248,8 @@ static void lamigo_check_hot_on_cold(struct alr_heat *ht) ht->ah_pools[1] == 0 && ht->ah_pools[0]) { sync = lamigo_is_in_sync(&ht->ah_fid, src_pools, tgt_pools, &mo); - LAMIGO_DEBUG( - "replicate idling hot to CP "DFID": %d\n", - PFID(&ht->ah_fid), sync); + LX_DEBUG("replicate idling hot to CP "DFID": %d\n", + PFID(&ht->ah_fid), sync); if (sync != AMIGO_RESYNC_NONE) { lamigo_new_job_for_hot(&ht->ah_fid, sync, tgt_pools, mo.mo_stripes); @@ -3306,8 +3304,8 @@ static void lamigo_check_hot(void) if (src_pools->pl_open) { /* get most recent hot files */ ht = lamigo_get_hot(alr_hot_period, &nr); - LAMIGO_DEBUG("check hot in period %lu - %d\n", - alr_hot_period, nr); + LX_DEBUG("check hot in period %lu - %d\n", + alr_hot_period, nr); if (ht) { for (i = 0; i < nr; i++) lamigo_check_hot_one(ht + i); @@ -3321,16 +3319,16 @@ static void lamigo_check_hot(void) /* now check hot idling files - the files we found hot and * skipped replication. now it's time to try again */ ht = lamigo_get_hot(alr_hot_period - opt.o_alr_hot_after_idle, &nr); - LAMIGO_DEBUG("check idle in period %lu - %d\n", - alr_hot_period - 3, nr); + LX_DEBUG("check idle in period %lu - %d\n", + alr_hot_period - 3, nr); if (!ht) goto out; for (i = 0; i < nr; i++) { struct alr_heat *ah = ht + i; - LAMIGO_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n", - PFID(&ah->ah_fid), ah->ah_pools[0], ah->ah_pools[1], - ah->ah_livetime, ah->ah_idle); + LX_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n", + PFID(&ah->ah_fid), ah->ah_pools[0], ah->ah_pools[1], + ah->ah_livetime, ah->ah_idle); if (src_pools->pl_open) lamigo_check_hot_one(ht + i); if (tgt_pools->pl_open) @@ -3358,7 +3356,7 @@ static __u64 lamigo_read_osp_param(const int ostidx, const char *param) fd = open(path, O_RDONLY); if (fd < 0) { - LAMIGO_ERROR("cannot open '%s': %s\n", path, strerror(errno)); + LX_ERROR("cannot open '%s': %s\n", path, strerror(errno)); /* 0 means non-available OST */ return 0; } @@ -3366,7 +3364,7 @@ static __u64 lamigo_read_osp_param(const int ostidx, const char *param) if (rc > 0) retval = strtoul(buf, NULL, 10); if (rc < 0) - LAMIGO_ERROR("cannot read '%s': %s\n", path, strerror(errno)); + LX_ERROR("cannot read '%s': %s\n", path, strerror(errno)); close(fd); /* report zero if something went wrong @@ -3391,9 +3389,9 @@ static void lamigo_refresh_pool_statfs(struct pool_list *pl, int threshold) /* check OSP is active */ active = lamigo_read_osp_param(ostidx, "active"); status = lamigo_read_osp_param(ostidx, "prealloc_status"); - LAMIGO_DEBUG("statfs for %d%s/%d: %llu from %llu\n", - ostidx, active ? "(active)" : "(inactive)", - (int)status, kbavail, kbtotal); + LX_DEBUG("statfs for %d%s/%d: %llu from %llu\n", + ostidx, active ? "(active)" : "(inactive)", + (int)status, kbavail, kbtotal); if (!active || status) continue; tavail += kbavail; @@ -3408,9 +3406,9 @@ static void lamigo_refresh_pool_statfs(struct pool_list *pl, int threshold) pl->pl_open = true; /* whether pool is good for replicas */ - LAMIGO_DEBUG("statfs for %s %s pool: %llu from %llu, thresh %llu\n", - pl->pl_open ? "open" : "closed", pl->pl_pool, tavail, - ttotal, ttotal * threshold / 100); + LX_DEBUG("statfs for %s %s pool: %llu from %llu, thresh %llu\n", + pl->pl_open ? "open" : "closed", pl->pl_pool, tavail, + ttotal, ttotal * threshold / 100); } static void lamigo_refresh_statfs(void) @@ -3443,13 +3441,13 @@ static void *lamigo_refresh_statfs_thread(void *arg) str = strstr(opt.o_mdtname, "-MDT"); if (!str) - LAMIGO_FATAL("cannot get MDT index from '%s'\n", opt.o_mdtname); + LX_FATAL("cannot get MDT index from '%s'\n", opt.o_mdtname); lamigo_mdtidx = strtoul(str + 4, NULL, 16); rc = cfs_get_param_paths(&paths, "osp"); if (rc != 0) - LAMIGO_FATAL("cannot find OSP root: %s\n", strerror(errno)); + LX_FATAL("cannot find OSP root: %s\n", strerror(errno)); osproot = strdup(paths.gl_pathv[0]); globfree(&paths); diff --git a/lipe/src/lamigo.h b/lipe/src/lamigo.h index 343c641..c5c8813 100644 --- a/lipe/src/lamigo.h +++ b/lipe/src/lamigo.h @@ -119,79 +119,4 @@ void lamigo_alr_dump_heat_table(FILE *file); void lamigo_parse_rules(const char *rule_str, const char *filename); -extern int lamigo_log_level; /* enum llapi_message_level */ -extern const char *lamigo_mdt_name; - -/* lamigo runs as a systemd service. So whatever it prints to stderr - * (or stdout) will be collected by journald and added to the - * logs. journald will add a lamigo[$PID] prefix. So when we print a - * debug message we don't need an lamigo prefix but we do want to - * include the MDT name. In this was we get - * - * Sep 22 12:51:10 $HOSTNAME lamigo[24074]: lustre-MDT0001: blah balh -*/ -#define LAMIGO_PRINT(level, fmt, args...) \ - do { \ - if (level <= lamigo_log_level) \ - fprintf(stderr, "%s: " fmt, lamigo_mdt_name, ##args); \ - } while (0) - -#define LAMIGO_DEBUG(fmt, args...) \ - LAMIGO_PRINT(LLAPI_MSG_DEBUG, "DEBUG: " fmt, ##args) - -#define LAMIGO_INFO(fmt, args...) \ - LAMIGO_PRINT(LLAPI_MSG_INFO, "INFO: " fmt, ##args) - -#define LAMIGO_WARN(fmt, args...) \ - LAMIGO_PRINT(LLAPI_MSG_WARN, "WARN: " fmt, ##args) - -#define LAMIGO_ERROR(fmt, args...) \ - LAMIGO_PRINT(LLAPI_MSG_ERROR, "ERROR: " fmt, ##args) - -#define LAMIGO_FATAL(fmt, args...) \ - do { \ - LAMIGO_PRINT(LLAPI_MSG_FATAL, "FATAL: " fmt, ##args); \ - exit(EXIT_FAILURE); \ - } while (0) - -#define LAMIGO_OOM_AT(file, line, func, size) \ - LAMIGO_FATAL("out of memory at (%s:%d:%s), size = %zd\n", (file), (line), (func), (ssize_t)(size)) - -#define LAMIGO_OOM(size) \ - LAMIGO_OOM_AT(__FILE__, __LINE__, __func__, (size)) - -static inline void *xmalloc1(const char *file, int line, const char *func, size_t size) -{ - void *ptr = malloc(size); - - if (ptr == NULL && size != 0) - LAMIGO_OOM_AT(file, line, func, size); - - return ptr; -} - -static inline void *xcalloc1(const char *file, int line, const char *func, size_t nmemb, size_t size) -{ - void *ptr = calloc(nmemb, size); - - if (ptr == NULL && (nmemb * size) != 0) - LAMIGO_OOM_AT(file, line, func, (nmemb * size)); - - return ptr; -} - -static inline void *xstrdup1(const char *file, int line, const char *func, const char *s) -{ - void *ptr = strdup(s); - - if (ptr == NULL) - LAMIGO_OOM_AT(file, line, func, strlen(s) + 1); - - return ptr; -} - -#define xmalloc(size) (xmalloc1(__FILE__, __LINE__, __func__, (size))) -#define xcalloc(nmemb, size) (xcalloc1(__FILE__, __LINE__, __func__, (nmemb), (size))) -#define xstrdup(s) (xstrdup1(__FILE__, __LINE__, __func__, (s))) - #endif diff --git a/lipe/src/lamigo_alr.c b/lipe/src/lamigo_alr.c index 1e675a9..47e78eb 100644 --- a/lipe/src/lamigo_alr.c +++ b/lipe/src/lamigo_alr.c @@ -30,6 +30,7 @@ #include "list.h" #include "debug.h" #include "lipe_ssh.h" +#include "lx_log.h" #include "lamigo.h" #include "lamigo_hash.h" @@ -520,10 +521,9 @@ static void lamigo_alr_update_heat_all(void) #ifdef LAMIGO_DEBUG_ALR /* just to check/print sorting result */ - llapi_printf(LLAPI_MSG_DEBUG, - "SORT %d in %d: hottest %llu, hot >= %llu\n", nr, - alr_head.alh_lidx, max, - max * (100 - opt.o_alr_hot_fraction) / 100); + LX_DEBUG("SORT %d in %d: hottest %llu, hot >= %llu\n", nr, + alr_head.alh_lidx, max, + max * (100 - opt.o_alr_hot_fraction) / 100); i = 0; asum[0] = (__u64)1 << 63; lipe_list_for_each_entry(f, &p->alp_list, ar_link) { @@ -532,14 +532,13 @@ static void lamigo_alr_update_heat_all(void) asum[0] = t; if (i++ >= 8) continue; - llapi_printf(LLAPI_MSG_DEBUG, - DFID" H: %d/%d O: %llu IO: %llu/%llu P: %llu/%llu i: %d\n", - PFID(&f->ar_fh.fh_fid), (int)f->ar_heat[0], (int)f->ar_heat[1], - f->ar_maxoff[0] > f->ar_maxoff[1] ? f->ar_maxoff[0] : f->ar_maxoff[1], - f->ar_total_ops[0] > 0 ? f->ar_iosize[0] / f->ar_total_ops[0] : 0, - f->ar_total_ops[1] > 0 ? f->ar_iosize[1] / f->ar_total_ops[1] : 0, - f->ar_pools[ALR_HOT], f->ar_pools[ALR_COLD], - (int)alr_head.alh_lidx - f->ar_idx); + LX_DEBUG(DFID" H: %d/%d O: %llu IO: %llu/%llu P: %llu/%llu i: %d\n", + PFID(&f->ar_fh.fh_fid), (int)f->ar_heat[0], (int)f->ar_heat[1], + f->ar_maxoff[0] > f->ar_maxoff[1] ? f->ar_maxoff[0] : f->ar_maxoff[1], + f->ar_total_ops[0] > 0 ? f->ar_iosize[0] / f->ar_total_ops[0] : 0, + f->ar_total_ops[1] > 0 ? f->ar_iosize[1] / f->ar_total_ops[1] : 0, + f->ar_pools[ALR_HOT], f->ar_pools[ALR_COLD], + (int)alr_head.alh_lidx - f->ar_idx); } #endif @@ -767,7 +766,7 @@ int lamigo_alr_get_hot_files(int period, struct alr_heat *ht, int nr, } } if (!p) { - llapi_printf(LLAPI_MSG_DEBUG, "period %d not found\n", period); + LX_DEBUG("period %d not found\n", period); return 0; } pthread_mutex_lock(&p->alp_mutex); @@ -828,7 +827,7 @@ void lamigo_alr_init(void) str = strstr(opt.o_mdtname, "-MDT"); if (!str) - LAMIGO_FATAL("cannot get MDT index from '%s'\n", opt.o_mdtname); + LX_FATAL("cannot get MDT index from '%s'\n", opt.o_mdtname); mdtidx = strtoul(str + 4, NULL, 16); if (opt.o_alr_ofd_interval == 0) { @@ -840,7 +839,7 @@ void lamigo_alr_init(void) rc = fid_hash_init(&alr_head.alh_hash); if (rc) - LAMIGO_OOM(-1); + LX_OOM(-1); alr_head.alh_period = xcalloc(sizeof(*alr_head.alh_period), opt.o_alr_periods); @@ -858,12 +857,12 @@ void lamigo_alr_init(void) rc = pthread_create(&ala->ala_pid, NULL, lamigo_alr_data_collection_thread, ala); if (rc) - LAMIGO_FATAL("cannot start access log reader: %s\n", strerror(rc)); + LX_FATAL("cannot start access log reader: %s\n", strerror(rc)); } rc = pthread_create(&pid, NULL, lamigo_alr_heat_thread, NULL); if (rc) - LAMIGO_FATAL("cannot start heat-maint thread: %s\n", strerror(rc)); + LX_FATAL("cannot start heat-maint thread: %s\n", strerror(rc)); } void lamigo_add_alr_agent(const char *host) diff --git a/lipe/src/lamigo_hash.c b/lipe/src/lamigo_hash.c index 88fbef5..a2c77f4 100644 --- a/lipe/src/lamigo_hash.c +++ b/lipe/src/lamigo_hash.c @@ -1,4 +1,5 @@ #include +#include "lx_log.h" #include "lamigo.h" #include "lamigo_hash.h" diff --git a/lipe/src/lpurge.c b/lipe/src/lpurge.c index 4423b38..fc2bb2d 100644 --- a/lipe/src/lpurge.c +++ b/lipe/src/lpurge.c @@ -59,6 +59,7 @@ #include #include #include +#include "lx_log.h" #include "lipe_object_attrs.h" #include "lipe_version.h" #include "list.h" @@ -150,6 +151,9 @@ struct options { .o_scan_rate = DEF_SCAN_RATE, }; +enum llapi_message_level lx_log_level = LLAPI_MSG_INFO; +char *lx_log_prefix; /* To print device name in log messages */ + struct lipe_instance instance; unsigned long long freelo; /* low free space when to scan quickly and remove */ unsigned long long freehi; /* high free space when stop to scan */ @@ -289,11 +293,9 @@ static void lpurge_find_device(char *devname) int rc, i; rc = cfs_get_param_paths(&paths, "osd-*/%s/fstype", devname); - if (rc != 0) { - llapi_printf(LLAPI_MSG_FATAL, "can't find device '%s': rc=%d\n", - devname, errno); - exit(1); - } + if (rc != 0) + LX_FATAL("can't find device '%s': %s\n", devname, strerror(errno)); + for (i = 0; i < paths.gl_pathc; i++) { struct stat st; @@ -301,14 +303,11 @@ static void lpurge_find_device(char *devname) continue; if (!S_ISREG(st.st_mode)) continue; - if (ostname) { - llapi_printf(LLAPI_MSG_FATAL, - "Multiple OST devices found for '%s'\n", - devname); - exit(1); - } - ostname = strdup(devname); - ostprefix = strdup(paths.gl_pathv[i]); + if (ostname) + LX_FATAL("Multiple OST devices found for '%s'\n", devname); + + ostname = xstrdup(devname); + ostprefix = xstrdup(paths.gl_pathv[i]); } i = strlen(ostprefix); while (i && ostprefix[i] != '/') @@ -328,8 +327,7 @@ static int lpurge_read_param(const char *param, char *val, const int vallen) snprintf(buf, sizeof(buf), "%s/%s", ostprefix, param); fd = open(buf, O_RDONLY); if (fd < 0) { - llapi_printf(LLAPI_MSG_ERROR, - "can't open %s: %d\n", buf, errno); + LX_ERROR("can't open '%s': %s\n", buf, strerror(errno)); return -errno; } rc = read(fd, val, vallen); @@ -340,7 +338,7 @@ static int lpurge_read_param(const char *param, char *val, const int vallen) rc = 0; } if (rc < 0) { - llapi_printf(LLAPI_MSG_ERROR, "can't read: %d\n", errno); + LX_ERROR("can't read: %s\n", strerror(errno)); return -errno; } close(fd); @@ -354,14 +352,13 @@ static int lpurge_get_ost_mntpt(void) f = setmntent("/proc/mounts", "r"); if (!f) { - llapi_printf(LLAPI_MSG_ERROR, - "failed to read mtab file: %s\n", strerror(errno)); + LX_ERROR("failed to read mtab file: %s\n", strerror(errno)); return -errno; } while ((m = getmntent(f))) { if (!strcmp(ost_mntdev, m->mnt_fsname)) { - ost_mntpt = strdup(m->mnt_dir); + ost_mntpt = xstrdup(m->mnt_dir); return 0; } } @@ -380,7 +377,7 @@ static int lpurge_kbfree(unsigned long long *kbfreesize) rc = statfs(ost_mntpt, &statfs_buf); if (rc) { - llapi_printf(LLAPI_MSG_ERROR, "can't statfs: rc=%d\n", rc); + LX_ERROR("can't statfs: %s\n", strerror(errno)); return rc; } @@ -412,8 +409,7 @@ void lpurge_configure_thresholds(void) rc = lpurge_kbtotal(&fresh); if (rc) { - llapi_printf(LLAPI_MSG_ERROR, "failed to get total space, %d\n", - rc); + LX_ERROR("failed to get total space: %s\n", strerror(errno)); return; } @@ -423,14 +419,11 @@ void lpurge_configure_thresholds(void) lpurge_last_kbtotal = fresh; freelo = opt.o_freelo * lpurge_last_kbtotal / 100; freehi = opt.o_freehi * lpurge_last_kbtotal / 100; - if (freehi <= freelo) { - llapi_printf(LLAPI_MSG_FATAL, - "freehi (%llu) <= freelo (%llu)\n", - freehi, freelo); - exit(1); - } - llapi_printf(LLAPI_MSG_DEBUG, "total: %llu, free lo: %llu hi %llu\n", - lpurge_last_kbtotal, freelo, freehi); + if (freehi <= freelo) + LX_FATAL("freehi (%llu) <= freelo (%llu)\n", freehi, freelo); + + LX_DEBUG("total: %llu, free lo: %llu hi: %llu\n", + lpurge_last_kbtotal, freelo, freehi); } static void lpurge_wait_for_scan(void) @@ -443,13 +436,12 @@ static void lpurge_wait_for_scan(void) if (rc) goto wait; - llapi_printf(LLAPI_MSG_DEBUG, "free %llu low %llu high %llu\n", - kbfree, freelo, freehi); + LX_DEBUG("free %llu low %llu high %llu\n", + kbfree, freelo, freehi); /* no free space, don't wait any longer */ if (kbfree <= freelo && time(NULL) - scan_finished_time >= lpurge_suspend_time) { - llapi_printf(LLAPI_MSG_INFO, - "low watermark hit, scan at full rate\n"); + LX_INFO("low watermark hit, scan at full rate\n"); stats.s_low_hits++; break; } @@ -457,8 +449,7 @@ static void lpurge_wait_for_scan(void) /* low free space and scanned objects are too old, start a scan */ if (kbfree <= freehi && time(NULL) - scan_finished_time >= lpurge_expire_time) { - llapi_printf(LLAPI_MSG_INFO, - "hi watermark hit, scan slowly\n"); + LX_INFO("hi watermark hit, scan slowly\n"); stats.s_slow_scans++; break; } @@ -719,11 +710,10 @@ int lpurge_lipe_callback(struct lipe_instance *instance, if (!rc) goto out_ls_mutex; - llapi_printf(LLAPI_MSG_DEBUG, - "found under "DFID": size %ld block %ld age %ld slot %d\n", - PFID(&attrs->loa_filter_fid.ff_parent), - (unsigned long)attrs->loa_size, - (unsigned long)attrs->loa_blocks >> 10, age, index); + LX_DEBUG("found under "DFID": size %ld block %ld age %ld slot %d\n", + PFID(&attrs->loa_filter_fid.ff_parent), + (unsigned long)attrs->loa_size, + (unsigned long)attrs->loa_blocks >> 10, age, index); ls->ls_found++; ls->ls_space += attrs->loa_blocks >> 10; @@ -745,7 +735,7 @@ int lpurge_lipe_callback(struct lipe_instance *instance, pthread_mutex_unlock(&ls->ls_mutex); index--; - llapi_printf(LLAPI_MSG_DEBUG, "reclaiming one slot\n"); + LX_DEBUG("reclaiming one slot\n"); lpurge_reclaim_slot(index); ls = lpurge_hist + index; @@ -755,10 +745,7 @@ int lpurge_lipe_callback(struct lipe_instance *instance, } } - lo = calloc(1, sizeof(*lo)); - if (lo == NULL) - goto out_ls_mutex; - + lo = xcalloc(1, sizeof(*lo)); lo->lo_fid = attrs->loa_filter_fid.ff_parent; lo->lo_blocks = attrs->loa_blocks >> 10; lo->lo_last_utime = last_used; @@ -772,8 +759,8 @@ int lpurge_lipe_callback(struct lipe_instance *instance, lipe_list_add(&lo->lo_list, &ls->ls_obj_list); ls->ls_stored++; - llapi_printf(LLAPI_MSG_DEBUG, "add "DFID" to %p/%lu\n", - PFID(&attrs->loa_filter_fid.ff_parent), ls, ls->ls_stored); + LX_DEBUG("add "DFID" to %p/%lu\n", + PFID(&attrs->loa_filter_fid.ff_parent), ls, ls->ls_stored); if (ls->ls_max_utime < last_used) ls->ls_max_utime = last_used; @@ -800,8 +787,7 @@ out: /* check for low space */ if (kbfree < freelo && lpurge_scan_rate > 0) { /* full speed mode */ - llapi_printf(LLAPI_MSG_INFO, - "low watermark hit, scan at full rate\n"); + LX_INFO("low watermark hit, scan at full rate\n"); lpurge_scan_rate = 0; } } @@ -889,8 +875,8 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) rc = ioctl(open_by_fid_fd, IOC_MDC_GETFILESTRIPE, lov_xattr_buf); if (rc < 0) { rc = -errno; - llapi_printf(LLAPI_MSG_DEBUG, "cannot IOC_MDC_GETFILESTRIPE "DFID", rc = %d\n", - PFID(fid), rc); + LX_DEBUG("cannot IOC_MDC_GETFILESTRIPE "DFID", rc = %d\n", + PFID(fid), rc); goto out; } @@ -917,9 +903,8 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) fd = openat(open_by_fid_fd, fid_buf, O_RDWR); if (fd < 0) { rc = -errno; - llapi_printf(LLAPI_MSG_DEBUG, - "cannot open "DFID" for split: %s\n", - PFID(fid), strerror(errno)); + LX_DEBUG("cannot open "DFID" for split: %s\n", + PFID(fid), strerror(errno)); goto out; } @@ -952,12 +937,7 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) goto out; } - lil = calloc(1, offsetof(typeof(*lil), lil_ids[2])); - if (lil == NULL) { - rc = -ENOMEM; - goto out; - } - + lil = xcalloc(1, offsetof(typeof(*lil), lil_ids[2])); lil->lil_mode = LL_LEASE_UNLCK; lil->lil_flags = LL_LEASE_LAYOUT_SPLIT; lil->lil_count = 2; @@ -975,13 +955,11 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) } out: if (rc < 0) - llapi_printf(LLAPI_MSG_DEBUG, - "cannot delete mirror %u of "DFID": rc = %d\n", - mirror_id, PFID(fid), rc); + LX_DEBUG("cannot delete mirror %u of "DFID": rc = %d\n", + mirror_id, PFID(fid), rc); else - llapi_printf(LLAPI_MSG_DEBUG, - "deleted mirror %u of "DFID"\n", - mirror_id, PFID(fid)); + LX_DEBUG("deleted mirror %u of "DFID"\n", + mirror_id, PFID(fid)); llapi_layout_free(layout); @@ -1057,12 +1035,9 @@ static void lpurge_work_threads_stop(void) pthread_mutex_unlock(&lpurge_work_lock); for (i = 0; i < lpurge_work_thread_count; i++) { - rc = pthread_join(lpurge_work_threads[i], NULL); - if (rc != 0) { - llapi_printf(LLAPI_MSG_FATAL, - "cannot join work thread: %s\n", - strerror(rc)); - } + rc = pthread_join(lpurge_work_threads[i], NULL); /* errno returned */ + if (rc != 0) + LX_ERROR("cannot join work thread: %s\n", strerror(rc)); } lpurge_work_thread_count = 0; @@ -1081,21 +1056,15 @@ static int lpurge_work_threads_start(size_t thread_count) lpurge_work_should_run = true; - lpurge_work_threads = calloc(thread_count, sizeof(lpurge_work_threads[0])); - if (lpurge_work_threads == NULL) { - rc = -ENOMEM; - goto out; - } + lpurge_work_threads = xcalloc(thread_count, sizeof(lpurge_work_threads[0])); while (lpurge_work_thread_count < thread_count) { rc = pthread_create(&lpurge_work_threads[lpurge_work_thread_count], NULL /* attr */, &lpurge_work_func, NULL /* data */); - if (rc != 0) { - llapi_printf(LLAPI_MSG_FATAL, - "cannot create work thread: %s\n", - strerror(rc)); + if (rc != 0) { /* errno returned */ + LX_ERROR("cannot create work thread: %s\n", strerror(rc)); rc = -rc; goto out; } @@ -1128,8 +1097,8 @@ void lpurge_purge_slot(struct lpurge_slot *ls, long long target) /* try to remove some replicas */ again: - llapi_printf(LLAPI_MSG_DEBUG, "release upto %llu (expect %lu in %lu)\n", - target, ls->ls_space, ls->ls_found); + LX_DEBUG("release upto %llu (expect %lu in %lu)\n", + target, ls->ls_space, ls->ls_found); total = 0; assert(!lipe_list_empty(&ls->ls_obj_list)); @@ -1167,7 +1136,7 @@ again: if (rc) return; - llapi_printf(LLAPI_MSG_DEBUG, "spawn, expect %llu back\n", total); + LX_DEBUG("spawn, expect %llu back\n", total); /* Wait for purge threads to complete all submitted work. */ pthread_mutex_lock(&lpurge_work_lock); @@ -1188,14 +1157,14 @@ again: if (kbfree > was && kbfree - was >= total) break; } - llapi_printf(LLAPI_MSG_DEBUG, "got %llu back (now %llu, was %llu)\n", - kbfree - was, kbfree, was); + LX_DEBUG("got %llu back (now %llu, was %llu)\n", + kbfree - was, kbfree, was); if (kbfree > was) target -= kbfree - was; if (target <= 0 || kbfree >= freehi) { /* got enough space back, relax */ - llapi_printf(LLAPI_MSG_DEBUG, "relax\n"); + LX_DEBUG("relax\n"); return; } @@ -1209,7 +1178,7 @@ again: return; } - llapi_printf(LLAPI_MSG_DEBUG, "scan for more objects\n"); + LX_DEBUG("scan for more objects\n"); /* if not enough space is released, but slot's counters suppose * more space, then repeat scanning for this age only @@ -1251,9 +1220,7 @@ void lpurge_free_space(void) if (rc || kbfree >= freehi) return; - llapi_printf(LLAPI_MSG_INFO, - "%lluM space free, try to release some\n", - kbfree >> 10); + LX_INFO("%lluM space free, try to release some\n", kbfree >> 10); /* start from the oldest group */ for (i = LPURGE_HIST_MAX - 1; i >= 0; i--) { @@ -1268,14 +1235,13 @@ void lpurge_free_space(void) break; } - llapi_printf(LLAPI_MSG_DEBUG, "try to release slot %d\n", i); + LX_DEBUG("try to release slot %d\n", i); lpurge_purge_slot(ls, freehi - kbfree); } rc = lpurge_kbfree(&kbfree); if (!rc) - llapi_printf(LLAPI_MSG_INFO, - "%lluM space free\n", kbfree >> 10); + LX_INFO("%lluM space free\n", kbfree >> 10); } void lpurge_scan(void) @@ -1284,7 +1250,7 @@ void lpurge_scan(void) time_t scan_time; int i; - llapi_printf(LLAPI_MSG_DEBUG, "now look for space\n"); + LX_DEBUG("now look for space\n"); if (now - scan_finished_time < lpurge_expire_time) { /* if data aren't too old and there is non-empty slot, @@ -1292,10 +1258,9 @@ void lpurge_scan(void) */ for (i = 0; i < LPURGE_HIST_MAX; i++) { if (!lipe_list_empty(&lpurge_hist[i].ls_obj_list)) { - llapi_printf(LLAPI_MSG_DEBUG, - "slot %d isnt empty: %lu in %lu\n", - i, lpurge_hist[i].ls_space, - lpurge_hist[i].ls_found); + LX_DEBUG("slot %d isnt empty: %lu in %lu\n", + i, lpurge_hist[i].ls_space, + lpurge_hist[i].ls_found); return; } } @@ -1331,7 +1296,7 @@ void lpurge_scan(void) scan_started_time = now; lipe_scan_llite(&instance, opt.o_scan_threads); scan_finished_time = time(NULL); - llapi_printf(LLAPI_MSG_DEBUG, "SCANNED: oldest %lu\n", oldest); + LX_DEBUG("SCANNED: oldest %lu\n", oldest); scan_time = scan_finished_time - scan_started_time; lpurge_suspend_time = scan_time >> 1; @@ -1344,20 +1309,18 @@ void lpurge_scan(void) /* XXX: update expire time to 8X last scan time if need */ if (lpurge_expire_time < scan_time * 8) { lpurge_expire_time = scan_time * 8; - llapi_printf(LLAPI_MSG_DEBUG, - "increasing lpurge_expire to %lu seconds\n", - lpurge_expire_time); + LX_DEBUG("increasing lpurge_expire to %lu seconds\n", + lpurge_expire_time); } for (i = 0; i < LPURGE_HIST_MAX; i++) { if (lpurge_hist[i].ls_found == 0) continue; - llapi_printf(LLAPI_MSG_DEBUG, - "%d (< %lu): %lu in %lu objects\n", - i, lpurge_hist[i].ls_age, - lpurge_hist[i].ls_space, - lpurge_hist[i].ls_found); + LX_DEBUG("%d (< %lu): %lu in %lu objects\n", + i, lpurge_hist[i].ls_age, + lpurge_hist[i].ls_space, + lpurge_hist[i].ls_found); } } @@ -1367,20 +1330,15 @@ void parse_mountpoint(const char *name) int rc, idx; lustre_fd = open(name, O_RDONLY); - if (lustre_fd < 0) { - llapi_printf(LLAPI_MSG_FATAL, - "can't open %s: %d\n", name, errno); - exit(1); - } + if (lustre_fd < 0) + LX_FATAL("can't open '%s': %s\n", name, strerror(errno)); + fid.f_seq = FID_SEQ_NORMAL; fid.f_oid = 0; fid.f_ver = 0; rc = llapi_get_mdt_index_by_fid(lustre_fd, &fid, &idx); - if (rc != 0 && rc != -ENOENT) { - llapi_printf(LLAPI_MSG_FATAL, - "%s isn't Lustre mountpoint: %d\n", name, rc); - exit(1); - } + if (rc != 0 && rc != -ENOENT) /* can be positive and not errno */ + LX_FATAL("'%s' isn't Lustre mountpoint: %d\n", name, rc); open_by_fid_fd = openat(lustre_fd, ".lustre/fid", O_RDONLY); if (open_by_fid_fd < 0) { @@ -1433,16 +1391,17 @@ void lpurge_process_opt(int c, char *optarg) switch (c) { case LPURGE_INTERNAL_DUMP_FIDS: - opt.o_fids_dumpfile = strdup(optarg); + opt.o_fids_dumpfile = xstrdup(optarg); break; case LPURGE_OPT_VERSION: lipe_version(); exit(0); case 'b': + lx_log_level = LLAPI_MSG_MAX; llapi_msg_set_level(LLAPI_MSG_MAX); break; case 'D': - opt.o_device = strdup(optarg); + opt.o_device = xstrdup(optarg); lpurge_find_device(optarg); break; case 'f': @@ -1495,15 +1454,14 @@ void lpurge_process_opt(int c, char *optarg) /* parse_mds(optarg); */ break; case 'M': - opt.o_mountpoint = strdup(optarg); + opt.o_mountpoint = xstrdup(optarg); parse_mountpoint(optarg); break; case 'n': - llapi_printf(LLAPI_MSG_WARN, - "'-n' and '--dryrun' are deprecated, they are noop now\n"); + LX_WARN("'-n' and '--dryrun' are deprecated, they are noop now\n"); break; case 'p': - opt.o_pool = strdup(optarg); + opt.o_pool = xstrdup(optarg); break; case 'R': value = strtol(optarg, &endptr, 10); @@ -1536,15 +1494,12 @@ void lpurge_process_opt(int c, char *optarg) opt.o_scan_threads = value; break; case 'w': - opt.o_dumpfile = strdup(optarg); + opt.o_dumpfile = xstrdup(optarg); break; default: - llapi_printf(LLAPI_MSG_FATAL, - "unknown option '-%c'\n", optopt); - fprintf(stderr, "Try '%s --help' for more information.\n", - program_invocation_short_name); - exit(1); - break; + LX_FATAL("unknown option '-%c'\n" + "Try '%s --help' for more information.\n", + optopt, program_invocation_short_name); } } @@ -1554,11 +1509,9 @@ void load_config(char *name) FILE *f; f = fopen(name, "r"); - if (!f) { - llapi_printf(LLAPI_MSG_FATAL, - "can't open config file %s\n", name); - exit(1); - } + if (!f) + LX_FATAL("can't open config file '%s': %s\n", + name, strerror(errno)); while (!feof(f)) { struct option *opt; @@ -1579,8 +1532,7 @@ void load_config(char *name) continue; opt = lpurge_keyword_lookup(t); if (!opt) { - llapi_printf(LLAPI_MSG_ERROR, - "unknown tunable: %s\n", t); + LX_ERROR("unknown tunable: '%s'\n", t); continue; } if (opt->val == 'f') { @@ -1592,16 +1544,13 @@ void load_config(char *name) opt->has_arg == optional_argument) { optarg = strsep(&s, "\n "); if (!optarg && - opt->has_arg == required_argument) { - llapi_printf(LLAPI_MSG_FATAL, - "no argument for %s\n", t); - exit(1); - } + opt->has_arg == required_argument) + LX_FATAL("no argument for '%s'\n", t); } else { optarg = NULL; } - llapi_printf(LLAPI_MSG_DEBUG, "conf: %s %s\n", t, - optarg ? optarg : ""); + LX_DEBUG("conf: '%s' '%s'\n", t, + optarg ? optarg : ""); lpurge_process_opt(opt->val, optarg); } fclose(f); @@ -1613,55 +1562,37 @@ void lpurge_verify_opts(void) if (!opt.o_pool) { opt.o_pool = DEF_POOL; - llapi_printf(LLAPI_MSG_INFO, - "source pool isn't defined, use %s\n", opt.o_pool); + LX_INFO("source pool isn't defined, use '%s'\n", opt.o_pool); } - if (opt.o_freelo == 0) { opt.o_freelo = DEF_FREELO; - llapi_printf(LLAPI_MSG_INFO, - "low watermark is not defined, use %u\n", - opt.o_freelo); - } - if (opt.o_freelo < 1 || opt.o_freelo > 99) { - llapi_printf(LLAPI_MSG_ERROR, - "Invalid free low threshold: %u\n", opt.o_freelo); - exit(1); + LX_INFO("low watermark is not defined, use %u\n", opt.o_freelo); } + if (opt.o_freelo < 1 || opt.o_freelo > 99) + LX_FATAL("Invalid free low threshold: %u\n", opt.o_freelo); if (opt.o_freehi == 0) { opt.o_freehi = DEF_FREEHI; - llapi_printf(LLAPI_MSG_INFO, - "high watermark is not defined, use %u\n", - opt.o_freehi); - } - if (opt.o_freehi < 1 || opt.o_freehi > 99) { - llapi_printf(LLAPI_MSG_ERROR, - "Invalid free high threshold: %u\n", opt.o_freehi); - exit(1); + LX_INFO("high watermark is not defined, use %u\n", opt.o_freehi); } + if (opt.o_freehi < 1 || opt.o_freehi > 99) + LX_FATAL("Invalid free high threshold: %u\n", opt.o_freehi); - if (!ostprefix) { - llapi_printf(LLAPI_MSG_ERROR, - "OST device is not defined\n"); - exit(1); - } + if (!ostprefix) + LX_FATAL("OST device is not defined\n"); if (!opt.o_dumpfile) { snprintf(buf, sizeof(buf), LPURGE_DUMPFILE, ostname); - opt.o_dumpfile = strdup(buf); + opt.o_dumpfile = xstrdup(buf); } if (!opt.o_fids_dumpfile) { snprintf(buf, sizeof(buf), LPURGE_FIDS_DUMPFILE, ostname); - opt.o_fids_dumpfile = strdup(buf); + opt.o_fids_dumpfile = xstrdup(buf); } - if (lustre_fd < 0) { - llapi_printf(LLAPI_MSG_ERROR, - "client mountpoint is not defined\n"); - exit(1); - } + if (lustre_fd < 0) + LX_FATAL("client mountpoint is not defined\n"); lpurge_read_param("mntdev", ost_mntdev, sizeof(buf)); strcpy(instance.li_device, ost_mntdev); @@ -1674,31 +1605,23 @@ void lpurge_verify_opts(void) else if (!strcmp(buf, "zfs")) { instance.li_expected_fstype = LBT_LUSTRE_ON_ZFS; } else { - llapi_printf(LLAPI_MSG_FATAL, "Unknown fstype %s\n", buf); - exit(1); + LX_FATAL("unknown fstype '%s'\n", buf); } - if (opt.o_scan_threads < 1) { - llapi_printf(LLAPI_MSG_FATAL, "invalid scan_threads: %d\n", - opt.o_scan_threads); - exit(1); - } + if (opt.o_scan_threads < 1) + LX_FATAL("invalid scan_threads: %d\n", + opt.o_scan_threads); if (opt.o_scan_threads > sysconf(_SC_NPROCESSORS_ONLN)) { opt.o_scan_threads = sysconf(_SC_NPROCESSORS_ONLN); - llapi_printf(LLAPI_MSG_DEBUG, - "reset it to online cpu count: %lu\n", - sysconf(_SC_NPROCESSORS_ONLN)); - } - - if (opt.o_max_jobs < 1 || opt.o_max_jobs > 1024) { - llapi_printf(LLAPI_MSG_FATAL, - "invalid max_jobs: %d\n", opt.o_max_jobs); - exit(1); + LX_DEBUG("reset it to online cpu count: %lu\n", + sysconf(_SC_NPROCESSORS_ONLN)); } + if (opt.o_max_jobs < 1 || opt.o_max_jobs > 1024) + LX_FATAL("invalid max_jobs: %d\n", opt.o_max_jobs); } void lpurge_parse_opts(int argc, char **argv) @@ -1738,10 +1661,11 @@ void lpurge_usr1_handle(int sig) f = fopen(opt.o_dumpfile, "w"); if (!f) { - llapi_printf(LLAPI_MSG_DEBUG, "can't open dump file\n"); + LX_DEBUG("can't open dump file '%s'\n", + opt.o_dumpfile); return; } - llapi_printf(LLAPI_MSG_DEBUG, "dump to %s\n", opt.o_dumpfile); + LX_DEBUG("dump to %s\n", opt.o_dumpfile); fprintf(f, "version: %s-%s\n" "revision: %s\n" @@ -1819,9 +1743,7 @@ void lpurge_usr2_handle(int sig) dflist = flist_alloc(NULL, LPURGE_FLIST_SIZE, opt.o_fids_dumpfile, LDT_FILE); if (dflist == NULL) { - llapi_printf(LLAPI_MSG_ERROR, - "failed to alloc fid list, %s\n", - strerror(errno)); + LX_ERROR("failed to alloc fid list, %s\n", strerror(errno)); return; } @@ -1866,10 +1788,9 @@ void lpurge_usr2_handle(int sig) json_object_put(obj_summary); if (rc < 0) { - llapi_printf(LLAPI_MSG_ERROR, - "failed to dump [%s] to file: %s, %s\n", - output, opt.o_fids_dumpfile, - strerror(errno)); + LX_ERROR("failed to dump [%s] to file: %s, %s\n", + output, opt.o_fids_dumpfile, + strerror(errno)); return; } flist_write(dflist, true); @@ -1910,10 +1831,9 @@ void lpurge_usr2_handle(int sig) json_object_put(slot_stats); json_object_put(obj_slot); if (rc < 0) { - llapi_printf(LLAPI_MSG_ERROR, - "failed to dump [%s] to file: %s, %s\n", - output, opt.o_fids_dumpfile, - strerror(errno)); + LX_ERROR("failed to dump [%s] to file: %s, %s\n", + output, opt.o_fids_dumpfile, + strerror(errno)); return; } } @@ -1956,10 +1876,9 @@ void lpurge_usr2_handle(int sig) rc = flist_add_one(dflist, output); json_object_put(obj_stat); if (rc < 0) { - llapi_printf(LLAPI_MSG_DEBUG, - "failed to dump [%s] to file: %s, %s\n", - output, opt.o_fids_dumpfile, - strerror(errno)); + LX_DEBUG("failed to dump [%s] to file: '%s', '%s'\n", + output, opt.o_fids_dumpfile, + strerror(errno)); continue; } dumped++; @@ -1977,12 +1896,10 @@ static void lpurge_lock_pidfile(void) snprintf(buf, sizeof(buf), LPURGE_PIDFILE, ostname); fd = open(buf, O_RDWR | O_CREAT, 0600); - if (fd < 0) { - llapi_printf(LLAPI_MSG_FATAL, - "can't create pidfile: %s\n", - strerror(errno)); - exit(1); - } + if (fd < 0) + LX_FATAL("can't create pidfile '%s': %s\n", + buf, strerror(errno)); + rc = flock(fd, LOCK_EX | LOCK_NB); if (rc < 0) { sz = read(fd, buf, sizeof(buf)); @@ -1991,26 +1908,20 @@ static void lpurge_lock_pidfile(void) sz = 0; if (sz > 0) buf[sz] = 0; - llapi_printf(LLAPI_MSG_FATAL, - "another lpurge is running, locked by %s\n", - sz > 0 ? buf : "[unknown]"); - exit(1); + LX_FATAL("another lpurge is running, locked by '%s'\n", + sz > 0 ? buf : "[unknown]"); } rc = ftruncate(fd, 0); - if (rc < 0) { - llapi_printf(LLAPI_MSG_FATAL, - "cannot truncate pidfile: %s\n", strerror(errno)); - exit(1); - } + if (rc < 0) + LX_FATAL("cannot truncate pidfile: %s\n", strerror(errno)); sz = snprintf(buf, sizeof(buf), "%d\n", getpid()); rc = write(fd, buf, sz); - if (rc < 0 || rc != sz) { - llapi_printf(LLAPI_MSG_FATAL, - "can't write pidfile: rc=%d\n", rc); - exit(1); - } + if (rc < 0) + LX_FATAL("can't write pidfile: %s\n", strerror(errno)); + if (rc != sz) + LX_FATAL("can't write pidfile: %d != %d\n", rc, sz); } static void lpurge_register_signal_handlers(void) @@ -2076,7 +1987,7 @@ int main(int argc, char **argv) * to release some amount at least */ - /* what for low thresholds */ + /* wait for low thresholds */ lpurge_wait_for_scan(); /* start slowly, if the scanner meets free low threshold, diff --git a/lipe/src/lx_log.h b/lipe/src/lx_log.h new file mode 100644 index 0000000..02b8160 --- /dev/null +++ b/lipe/src/lx_log.h @@ -0,0 +1,93 @@ +#ifndef _LX_LOG_H_ +#define _LX_LOG_H_ + +#include +#include +#include +#include + +extern enum llapi_message_level lx_log_level; +extern char *lx_log_prefix; /* Device name the message is related to */ + +/* lamigo/lpurge runs as a systemd service. So whatever it prints to stderr + * (or stdout) will be collected by journald and added to the + * logs. journald will add a lamigo[$PID] prefix. So when we print a + * debug message we don't need an lamigo prefix but we do want to + * include the MDT name. In this was we get + * + * Sep 22 12:51:10 $HOSTNAME lamigo[24074]: lustre-MDT0001: blah blah + */ +#define LX_PRINT(level, fmt, args...) \ + do { \ + if (level <= lx_log_level) \ + fprintf(stderr, "%s%s" fmt, \ + lx_log_prefix ? lx_log_prefix : "", \ + lx_log_prefix ? ": " : "", \ + ##args); \ + } while (0) + +#define LX_DEBUG(fmt, args...) \ + LX_PRINT(LLAPI_MSG_DEBUG, "DEBUG: " fmt, ##args) + +#define LX_INFO(fmt, args...) \ + LX_PRINT(LLAPI_MSG_INFO, "INFO: " fmt, ##args) + +#define LX_WARN(fmt, args...) \ + LX_PRINT(LLAPI_MSG_WARN, "WARN: " fmt, ##args) + +#define LX_ERROR(fmt, args...) \ + LX_PRINT(LLAPI_MSG_ERROR, "ERROR: " fmt, ##args) + +#define LX_FATAL(fmt, args...) \ + do { \ + LX_PRINT(LLAPI_MSG_FATAL, "FATAL: " fmt, ##args); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define LX_OOM_AT(file, line, func, size) \ + LX_FATAL("out of memory at (%s:%d:%s), size = %zd\n", \ + (file), (line), (func), (ssize_t)(size)) + +#define LX_OOM(size) \ + LX_OOM_AT(__FILE__, __LINE__, __func__, (size)) + +static inline void *xmalloc1(const char *file, int line, const char *func, size_t size) +{ + void *ptr = malloc(size); + + if (ptr == NULL && size != 0) + LX_OOM_AT(file, line, func, size); + + return ptr; +} + +static inline void *xcalloc1(const char *file, int line, const char *func, size_t nmemb, size_t size) +{ + void *ptr = calloc(nmemb, size); + + if (ptr == NULL && (nmemb * size) != 0) + LX_OOM_AT(file, line, func, (nmemb * size)); + + return ptr; +} + +static inline void *xstrdup1(const char *file, int line, const char *func, const char *s) +{ + void *ptr; + + if (s == NULL) + LX_FATAL("NULL pointer at (%s:%d:%s)\n", file, line, func); + + ptr = strdup(s); + + if (ptr == NULL) + LX_OOM_AT(file, line, func, strlen(s) + 1); + + return ptr; +} + +#define xmalloc(size) (xmalloc1(__FILE__, __LINE__, __func__, (size))) +#define xcalloc(nmemb, size) (xcalloc1(__FILE__, __LINE__, __func__, (nmemb), (size))) +#define xstrdup(s) (xstrdup1(__FILE__, __LINE__, __func__, (s))) + +#endif -- 1.8.3.1