#define LAMIGO_HEAT_FILE "/var/run/lamigo-%s.heat"
#define LAMIGO_PIDFILE "/var/run/lamigo-%s.pid"
+int lamigo_log_level = LLAPI_MSG_INFO;
+const char *lamigo_mdt_name = "NONE";
+
static LIPE_LIST_HEAD(lamigo_rule_list);
__u64 lamigo_rule_attrs; /* attributes needed to evalute the rules */
DEF_HOT_AFTER_IDLE,
DEF_SRC_FREE,
DEF_TGT_FREE);
- exit(0);
+ exit(EXIT_SUCCESS);
}
#define container_of(ptr, type, member) ({ \
};
struct options opt = {
- .o_verbose = LLAPI_MSG_INFO,
.o_min_age = DEF_MIN_AGE,
.o_cache_size = DEF_CACHE_SIZE,
.o_chlg_clear_frequency = 4096,
}
}
-static void lamigo_sigterm_handler(int sig)
-{
- psignal(sig, "exiting");
-
- _exit(0);
-}
-
-static void lamigo_sigusr1_handler(int sig)
+static void lamigo_dump_stats_file(void)
{
struct resync_agent *a;
struct pool_list *pl;
FILE *f;
int i;
- llapi_printf(LLAPI_MSG_DEBUG, "dump to %s\n", opt.o_dump_file);
+ LAMIGO_DEBUG("dumping stats to '%s'\n", opt.o_dump_file);
if (opt.o_dump_file == NULL)
return;
f = fopen(opt.o_dump_file, "w");
fclose(f);
}
-static void lamigo_sigusr2_handler(int sig)
+static void lamigo_dump_heat_file(void)
{
FILE *f;
- llapi_printf(LLAPI_MSG_DEBUG, "heat to %s\n", opt.o_heat_file);
+ LAMIGO_DEBUG("dumping heat to '%s'\n", opt.o_heat_file);
if (opt.o_heat_file == NULL)
return;
f = fopen(opt.o_heat_file, "w");
if (!f) {
- llapi_printf(LLAPI_MSG_DEBUG, "can't open heat file\n");
+ LAMIGO_ERROR("cannot open heat file '%s': %s\n", opt.o_heat_file, strerror(errno));
return;
}
lamigo_alr_dump_heat_table(f);
return 0;
}
-static void lamigo_cleanup(void)
-{
- struct resync_agent *agent;
-
- fid_hash_free(&head.lh_hash);
- lipe_list_for_each_entry(agent, &lamigo_agent_list, rag_list) {
- struct resync_ssh_session *rss, *tmp;
-
- lipe_list_for_each_entry_safe(rss, tmp,
- &agent->rag_ssh_list,
- rss_list) {
- lipe_ssh_context_destroy(&rss->rss_ctx);
- lipe_list_del(&rss->rss_list);
- free(rss);
- }
- }
-}
-
static int lamigo_exec_cmd(struct resync_agent *a, const char *cmd, int *pstatus)
{
struct resync_ssh_session *rss;
int resync = rj->rj_resync;
char cmd[PATH_MAX * 2];
int status = INT_MAX;
- enum llapi_message_level msg_level;
int rc;
if (rj->rj_setprefer) {
"'%s/.lustre/fid/"DFID"' > /dev/null 2>&1", rj->rj_pool,
agent->rag_mountpoint,
PFID(&rj->rj_fid));
- llapi_printf(LLAPI_MSG_DEBUG, "set prefer on "DFID"\n",
+ LAMIGO_DEBUG("set prefer on "DFID"\n",
PFID(&rj->rj_fid));
} else if (resync == AMIGO_RESYNC_EXTEND) {
int i;
agent->rag_mountpoint,
PFID(&rj->rj_fid));
} else {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "unknown resync: %d", resync);
+ LAMIGO_ERROR("unknown resync: %d\n", resync);
rc = -EINVAL;
goto out;
}
/* rc < 0 means an ssh error. Otherwise command exit status is
* in status. Mask common exit statuses. */
rc = lamigo_exec_cmd(agent, cmd, &status);
+ LAMIGO_DEBUG("exec command '%s' on '%s': rc = %d, status = %d\n",
+ cmd, agent->rag_hostname, rc, status);
if (rc < 0 ||
/* 1 from setprefer (see EX-3591) */
(rj->rj_setprefer && status != 0 && status != 1) ||
/* EBUSY from mirror extend/resync */
(!rj->rj_setprefer && status != 0 && status != EBUSY))
- msg_level = LLAPI_MSG_ERROR;
- else
- msg_level = LLAPI_MSG_DEBUG;
-
- llapi_error(msg_level|LLAPI_MSG_NO_ERRNO, 0,
- "error executing command '%s' on '%s': rc = %d, status = %d",
- cmd, agent->rag_hostname, rc, status);
+ LAMIGO_ERROR("command '%s' on '%s' failed: rc = %d, status = %d\n",
+ cmd, agent->rag_hostname, rc, status);
out:
/* notify the main thread about completion */
write(lamigo_sigpipe[1], &rc, 1);
}
}
if (!a) {
- llapi_printf(LLAPI_MSG_DEBUG, "no good agent\n");
+ LAMIGO_DEBUG("no good agent\n");
return -EBUSY;
}
rj->rj_agent = a;
rj->rj_start = time(NULL);
- llapi_printf(LLAPI_MSG_DEBUG, "new job %s for "DFID" spawned on %s\n",
+ LAMIGO_DEBUG("new job %s for "DFID" spawned on %s\n",
resync == AMIGO_RESYNC_EXTEND ? "extend" : "resync",
PFID(&rj->rj_fid), rj->rj_agent->rag_hostname);
} else {
*objects = NULL;
*stripes = 0;
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "unsupported LOV magic %x", v3->lmm_magic);
+ LAMIGO_ERROR("unsupported LOV magic %x\n", v3->lmm_magic);
return -EINVAL;
}
return 0;
}
if (opt.o_src_dom && v3->lmm_pattern == LOV_PATTERN_MDT) {
- llapi_printf(LLAPI_MSG_DEBUG,
- "DoM component");
+ LAMIGO_DEBUG("DoM component");
onsrc++;
continue;
}
rc = lipe_rule_evaluate(rule, attrs, sysattrs, &result);
if (rc) {
- llapi_error(LLAPI_MSG_ERROR, rc, "rule failed");
+ LAMIGO_ERROR("cannot evaluate rule: %s\n", strerror(-rc));
return AMIGO_RESYNC_NONE;
}
if (!result)
snprintf(attrs->loa_fid_str, sizeof(attrs->loa_fid_str),
DFID_NOBRACE, PFID(&attrs->loa_fid));
attrs->loa_attr_bits |= LIPE_OBJECT_ATTR_LMAEA;
- llapi_printf(LLAPI_MSG_DEBUG, "got LMA: %d\n", rc);
+ LAMIGO_DEBUG("got LMA: %d\n", rc);
}
}
*/
resync = lamigo_check_user_rules(&attrs, &sysattrs);
if (resync == AMIGO_RESYNC_NONE) {
- llapi_printf(LLAPI_MSG_DEBUG,
- "skip "DFID" due to rules\n",
- PFID(fid));
+ LAMIGO_DEBUG("skip "DFID" due to rules\n",
+ PFID(fid));
stats.s_skip_by_rule++;
goto out;
}
out:
lamigo_hist_add(fid, resync);
- llapi_printf(LLAPI_MSG_DEBUG, "check "DFID" stripes=%d: resync=%d\n",
+ LAMIGO_DEBUG("check "DFID" stripes=%d: resync=%d\n",
PFID(fid), mo->mo_stripes, resync);
return resync;
(void *)a);
if (rc)
return;
- rj = calloc(1, sizeof(struct resync_job));
- if (rj == NULL) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "can't allocate for a test job");
- return;
- }
+ rj = xcalloc(1, sizeof(*rj));
rj->rj_check_job = 1;
rj->rj_pid = pid;
rj->rj_agent = a;
if (rc)
return;
- srj = calloc(1, sizeof(*srj));
- if (srj == NULL) {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "can't allocate for a job");
- return;
- }
-
+ srj = xcalloc(1, sizeof(*srj));
srj->rj_fid = rj->rj_fid;
srj->rj_setprefer = 1;
/* XXX: few src pools? */
/* cold pool is close to full, skip replication */
/* do this check before expensive layout fetching, rules, etc */
stats.s_skip_tgt_closed++;
- llapi_printf(LLAPI_MSG_DEBUG, "pool %s closed for "DFID"\n",
+ LAMIGO_DEBUG("pool %s closed for "DFID"\n",
tgt_pools->pl_pool, PFID(&f->fr_fh.fh_fid));
return 0;
}
if (are_agents_busy()) {
/* all the agents are busy */
- llapi_printf(LLAPI_MSG_DEBUG, "no agents avilable (max: %d)\n", lamigo_max_jobs);
+ LAMIGO_DEBUG("no agents avilable (max: %d)\n", lamigo_max_jobs);
return 1;
}
return 0;
}
if (ah.ah_hot && alr_period - ah.ah_hot <= 1) {
- llapi_printf(LLAPI_MSG_DEBUG,
+ LAMIGO_DEBUG(
"skip hot "DFID" in %u, now %lu\n",
PFID(&f->fr_fh.fh_fid), ah.ah_hot,
alr_period);
return 0;
}
- rj = calloc(1, sizeof(struct resync_job));
- if (rj == NULL) {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "can't allocate for a job");
- return 1;
- }
+ rj = xcalloc(1, sizeof(*rj));
rj->rj_fid = f->fr_fh.fh_fid;
rj->rj_stripes = mo.mo_stripes;
rj->rj_index = f->fr_index;
struct resync_job, rj_list);
lipe_list_del(&rj->rj_list);
rc = lamigo_submit_job(rj);
- llapi_printf(LLAPI_MSG_DEBUG,
- "tried to resubmit failed job %p: rc=%d\n", rj, rc);
+ LAMIGO_DEBUG("tried to resubmit failed job %p: rc=%d\n", rj, rc);
if (rc != 0)
return rc;
}
fh = fid_hash_find(&head.lh_hash, &rec->cr_tfid);
if (fh == NULL) {
- f = calloc(sizeof(struct fid_rec), 1);
- if (f == NULL) {
- rc = -ENOMEM;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "failed to alloc memory for fid_rec");
- return rc;
- }
-
+ f = xcalloc(1, sizeof(*f));
f->fr_fh.fh_fid = rec->cr_tfid;
f->fr_index = index;
f->fr_time = rec->cr_time;
index - lamigo_last_cleared_index < opt.o_chlg_clear_frequency)
return;
- llapi_printf(LLAPI_MSG_DEBUG, "CLEAR upto %llu in %s (%llu last)\n",
+ LAMIGO_DEBUG("CLEAR upto %llu in %s (%llu last)\n",
index, opt.o_chlg_user, lamigo_last_processed_idx);
lamigo_last_cleared_index = index;
rc = llapi_changelog_clear(opt.o_mdtname, opt.o_chlg_user, index);
static void lamigo_job_fini(struct resync_job *rj, intptr_t retval)
{
- llapi_printf(LLAPI_MSG_DEBUG,
- "job %lu on "DFID" done in %lu: %"PRIdPTR" (%d)\n",
- rj->rj_pid, PFID(&rj->rj_fid), time(NULL) - rj->rj_start,
- retval, rj->rj_agent->rag_bad);
+ LAMIGO_DEBUG("job %lu on "DFID" done in %lu: %"PRIdPTR" (%d)\n",
+ rj->rj_pid, PFID(&rj->rj_fid), time(NULL) - rj->rj_start,
+ retval, rj->rj_agent->rag_bad);
rj->rj_done_timestamp = time(NULL);
if (retval == 0) {
/* the agent is back */
if (rj->rj_agent->rag_bad) {
- llapi_printf(LLAPI_MSG_DEBUG, "agent %s is back\n",
- rj->rj_agent->rag_hostname);
+ LAMIGO_DEBUG("agent %s is back\n",
+ rj->rj_agent->rag_hostname);
rj->rj_agent->rag_bad = false;
lamigo_max_jobs += rj->rj_agent->rag_maxjobs;
}
} else {
/* the agent is still bad */
if (rj->rj_agent->rag_bad == false) {
- llapi_printf(LLAPI_MSG_DEBUG, "agent %s is bad\n",
- rj->rj_agent->rag_hostname);
+ LAMIGO_DEBUG("agent %s is bad\n",
+ rj->rj_agent->rag_hostname);
assert(lamigo_max_jobs >= rj->rj_agent->rag_maxjobs);
lamigo_max_jobs -= rj->rj_agent->rag_maxjobs;
struct resync_agent *a;
int i;
- a = calloc(1, sizeof(*a));
- if (!a) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "can't allocate memory for agent");
- exit(1);
- }
-
+ a = xcalloc(1, sizeof(*a));
a->rag_index = lamigo_agent_count;
a->rag_hostname = strdup(host);
a->rag_mountpoint = strdup(mnt);
char *endptr;
a->rag_maxjobs = strtol(jobs, &endptr, 10);
- if (*endptr != '\0') {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "invalid jobs: '%s' (1-2048 expected)",
- jobs);
- exit(1);
- }
+ if (*endptr != '\0')
+ LAMIGO_FATAL("invalid jobs: '%s' (1-2048 expected)\n", jobs);
} else {
a->rag_maxjobs = DEF_AGENT_JOBS;
}
- if (a->rag_maxjobs < 1 || a->rag_maxjobs > 2048) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "invalid jobs per agent: %d (1-2048 expected)",
- a->rag_maxjobs);
- exit(1);
- }
+ if (a->rag_maxjobs < 1 || a->rag_maxjobs > 2048)
+ LAMIGO_FATAL("invalid jobs per agent: %d (1-2048 expected)\n",
+ a->rag_maxjobs);
+
lipe_list_add(&a->rag_list, &lamigo_agent_list);
a->rag_jobs = 0;
/* ssh context per job, and one more for agent heartbeat */
for (i = 0; i < a->rag_maxjobs + 1; i++) {
- struct resync_ssh_session *rss = calloc(1, sizeof(*rss));
+ struct resync_ssh_session *rss = xcalloc(1, sizeof(*rss));
int rc;
- if (!rss) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "can't allocate memory for agent ssh session\n");
- exit(1);
- }
-
rc = lipe_ssh_context_init(&rss->rss_ctx, a->rag_hostname);
- if (rc < 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "cannot create SSH context for '%s'\n",
- a->rag_hostname);
- exit(1);
- }
-
+ if (rc < 0)
+ LAMIGO_FATAL("cannot create SSH context for '%s': rc = %d\n",
+ a->rag_hostname, rc);
lipe_list_add(&rss->rss_list, &a->rag_ssh_list);
}
- llapi_printf(LLAPI_MSG_DEBUG, "AGENT: %s %s %d\n", a->rag_hostname,
+ LAMIGO_DEBUG("AGENT: %s %s %d\n", a->rag_hostname,
a->rag_mountpoint, a->rag_maxjobs);
lamigo_agent_count++;
{
struct pool_list *pl;
- pl = calloc(sizeof(*pl), 1);
- if (pl == NULL) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't allocate pool");
- exit(1);
- }
- pl->pl_pool = strdup(pool);
- if (pl->pl_pool == NULL) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't allocate pool name");
- exit(1);
- }
+ pl = xcalloc(sizeof(*pl), 1);
+ pl->pl_pool = xstrdup(pool);
pl->pl_ostnr = 0;
pl->pl_osts = NULL;
pthread_rwlock_init(&pl->pl_lock, NULL);
int oldlevel;
rc = cfs_get_param_paths(&paths, "lod/%s-*/numobd", fsname);
- if (rc != 0) {
- llapi_error(LLAPI_MSG_FATAL, errno,
- "can't find numobd fs '%s'", fsname);
- exit(1);
- }
+ if (rc != 0)
+ LAMIGO_FATAL("cannot read OBD count from 'lod/%s-*/numobd': %s\n",
+ fsname, strerror(errno));
+
for (i = 0; i < paths.gl_pathc; i++) {
rc = lamigo_read_file(paths.gl_pathv[i], data, sizeof(data));
if (rc >= 0) {
char *endptr;
obdcount = strtol(data, &endptr, 10);
- if (*endptr != '\0') {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "invalid numobd: '%s'", data);
- exit(1);
- }
+ if (*endptr != '\0')
+ LAMIGO_FATAL("invalid OBD count '%s'\n", data);
+
break;
}
}
globfree(&paths);
- if (obdcount < 0) {
- llapi_error(LLAPI_MSG_FATAL, errno, "can't find fs '%s'", fsname);
- exit(1);
- }
+ if (obdcount < 0)
+ LAMIGO_FATAL("cannot find filesystem '%s'\n", fsname);
bufsize = sizeof(struct obd_uuid) * obdcount;
- buffer = malloc(bufsize + sizeof(*list) * obdcount);
- if (buffer == NULL) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't get mem for pool members");
- exit(1);
- }
+ buffer = xmalloc(bufsize + sizeof(*list) * obdcount);
list = (char **) (buffer + bufsize);
snprintf(poolname, sizeof(poolname), "%s.%s", fsname, pl->pl_pool);
oldlevel = llapi_msg_get_level();
goto out;
}
if (pl->pl_osts == NULL)
- pl->pl_osts = malloc(sizeof(int) * nb);
- if (pl->pl_osts == NULL) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't allocate mem for OST ind");
- exit(1);
- }
+ pl->pl_osts = xmalloc(sizeof(int) * nb);
fslen = strlen(fsname);
for (i = 0; i < nb; i++) {
break;
case LAMIGO_OPT_OFD_INTERVAL:
opt.o_alr_ofd_interval = atoi(optarg);
- if (opt.o_alr_ofd_interval < 1) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid ofd interval '%s'", optarg);
- exit(1);
- }
+ if (opt.o_alr_ofd_interval < 1)
+ LAMIGO_FATAL("invalid ofd interval '%s'\n", optarg);
break;
case LAMIGO_OPT_HOT_FRACTION:
opt.o_alr_hot_fraction = atoi(optarg);
if (opt.o_alr_hot_fraction < 1 ||
- opt.o_alr_hot_fraction > 100) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid hot fraction '%s'", optarg);
- exit(1);
- }
+ opt.o_alr_hot_fraction > 100)
+ LAMIGO_FATAL("invalid hot fraction '%s'\n", optarg);
break;
case LAMIGO_OPT_HOT_AFTER_IDLE:
opt.o_alr_hot_after_idle = atoi(optarg);
if (opt.o_alr_hot_after_idle < 1 ||
- opt.o_alr_hot_after_idle >= opt.o_alr_periods) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid hot-after-idle '%s'", optarg);
- exit(1);
- }
+ opt.o_alr_hot_after_idle >= opt.o_alr_periods)
+ LAMIGO_FATAL("invalid hot-after-idle '%s'\n", optarg);
break;
case LAMIGO_OPT_MIRROR_CMD:
opt.o_mirror_cmd = strdup(optarg);
break;
case LAMIGO_OPT_POOL_REFRESH:
opt.o_pool_refresh = strtol(optarg, &endptr, 10);
- if (*endptr != '\0' || opt.o_pool_refresh < 1) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad pool refresh interval '%s'", optarg);
- exit(1);
- }
+ if (*endptr != '\0' || opt.o_pool_refresh < 1)
+ LAMIGO_FATAL("invalid pool refresh interval '%s'\n", optarg);
break;
case LAMIGO_OPT_PROGRESS_INTV:
opt.o_progress_interval = strtol(optarg, &endptr, 10);
- if (*endptr != '\0' || opt.o_progress_interval < 1) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad progress interval '%s'", optarg);
- exit(1);
- }
+ if (*endptr != '\0' || opt.o_progress_interval < 1)
+ LAMIGO_FATAL("invalid progress interval '%s'\n", optarg);
break;
case LAMIGO_OPT_ALR_EXTRA_ARGS:
opt.o_alr_extra_args = optarg;
break;
case LAMIGO_OPT_SRC_FREE:
opt.o_src_free = atoi(optarg);
- if (opt.o_src_free < 1 || opt.o_src_free > 99) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad source free space '%s'", optarg);
- exit(1);
- }
+ if (opt.o_src_free < 1 || opt.o_src_free > 99)
+ LAMIGO_FATAL("invalid source free space '%s'\n", optarg);
break;
case LAMIGO_OPT_SRC_DOM:
opt.o_src_dom = 1;
break;
case LAMIGO_OPT_TGT_FREE:
opt.o_tgt_free = atoi(optarg);
- if (opt.o_tgt_free < 1 || opt.o_tgt_free > 99) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad target free space '%s'", optarg);
- exit(1);
- }
+ if (opt.o_tgt_free < 1 || opt.o_tgt_free > 99)
+ LAMIGO_FATAL("invalid target free space '%s'\n", optarg);
break;
case LAMIGO_OPT_VERSION:
lipe_version();
- exit(0);
+ exit(EXIT_SUCCESS);
case 'a':
opt.o_min_age = strtol(optarg, &endptr, 10);
- if (*endptr != '\0' || opt.o_min_age < 5) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad value for -a %s", optarg);
- exit(1);
- }
+ if (*endptr != '\0' || opt.o_min_age < 5)
+ LAMIGO_FATAL("invalid value for -a '%s'\n", optarg);
break;
case 'b':
+ lamigo_log_level = LLAPI_MSG_MAX;
llapi_msg_set_level(LLAPI_MSG_MAX);
break;
case 'c': {
rc = strsize2int(&cache_size, optarg);
if (rc < 0 || cache_size <= 0 ||
- (cache_size >= 100 && cache_size < 1<<20)) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "bad value for -c '%s'", optarg);
- exit(1);
- }
+ (cache_size >= 100 && cache_size < 1<<20))
+ LAMIGO_FATAL("invalid cache size '%s'\n", optarg);
/* For value < 100, it is taken as the percentage of
* total memory instead.
opt.o_cache_size = get_fid_cache_size(cache_size);
else
opt.o_cache_size = cache_size;
- llapi_printf(LLAPI_MSG_INFO, "Cache size: %lu\n", opt.o_cache_size);
+ LAMIGO_INFO("cache size: %lu\n", opt.o_cache_size);
break;
}
case 'f':
host = strsep(&optarg, ":");
mnt = strsep(&optarg, ":");
jobs = strsep(&optarg, ":");
- if (!host || !mnt) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "invalid agent definition");
- exit(1);
- }
+ if (!host || !mnt)
+ LAMIGO_FATAL("invalid agent definition\n");
+
lamigo_add_agent(host, mnt, jobs);
break;
case 'h':
enable_heat = 0;
} else {
opt.o_alr_heat_fn = atoi(optarg);
- if (opt.o_alr_heat_fn < 0 || opt.o_alr_heat_fn > 1) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "invalid heat function '%s'",
- optarg);
- exit(1);
- }
+ if (opt.o_alr_heat_fn < 0 || opt.o_alr_heat_fn > 1)
+ LAMIGO_FATAL("invalid heat function '%s'\n", optarg);
}
break;
case 'I':
opt.o_alr_hot_after_idle = atoi(optarg);
break;
case 'm':
- opt.o_mdtname = strdup(optarg);
+ lamigo_mdt_name = xstrdup(optarg);
+ opt.o_mdtname = xstrdup(optarg);
break;
case 'M':
opt.o_mntpt = strdup(optarg);
break;
case 'n':
opt.o_num_threads = strtoul(optarg, NULL, 0);
- if (opt.o_num_threads < 1) {
- rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "invalid thread number: %d",
- opt.o_num_threads);
- exit(1);
- }
+ if (opt.o_num_threads < 1)
+ LAMIGO_FATAL("invalid thread number: %d\n", opt.o_num_threads);
break;
case 'o':
lamigo_add_alr_agent(optarg);
opt.o_chlg_user = strdup(optarg);
break;
case 'v':
- opt.o_verbose++;
+ lamigo_log_level++;
break;
case 'w':
opt.o_dump_file = strdup(optarg);
break;
default:
rc = -EINVAL;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "%s: unknown option '-%c'\n",
- program_invocation_short_name,
- optopt);
- fprintf(stderr, "Try '%s --help' for more information.\n",
- program_invocation_short_name);
- exit(1);
+ fprintf(stderr,
+ "%s: unrecognized option '-%c'\n"
+ "Try '%s --help' for more information.\n",
+ program_invocation_short_name, optopt, program_invocation_short_name);
+ exit(EXIT_FAILURE + 1);
break;
}
}
(*counter)++;
else if (*p == '}')
(*counter)--;
- if (*counter < 0) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid rule string");
- exit(1);
- }
+ if (*counter < 0)
+ LAMIGO_FATAL("invalid rule '%s'\n", str);
p++;
}
}
len += strlen(src);
if (dst)
len += strlen(dst);
- n = malloc(len + 1);
+ n = xmalloc(len + 1);
if (src)
strcpy(n, src);
if (dst)
FILE *f;
f = fopen(name, "r");
- if (!f) {
- llapi_error(LLAPI_MSG_FATAL, errno,
- "can't open config file %s", name);
- exit(1);
- }
+ if (!f)
+ LAMIGO_FATAL("cannot open config file '%s': %s\n", name, strerror(errno));
+
while (!feof(f)) {
struct option *opt;
char *s, *t;
opt->has_arg == optional_argument) {
optarg = strsep(&s, "\n ");
if (!optarg &&
- opt->has_arg == required_argument) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "no argument for %s", t);
- exit(1);
- }
+ opt->has_arg == required_argument)
+ LAMIGO_FATAL("option '%s' requires an argument\n", t);
} else {
optarg = NULL;
}
- llapi_printf(LLAPI_MSG_DEBUG, "conf: %s %s\n", t, optarg);
+ LAMIGO_DEBUG("conf: %s %s\n", t, optarg);
lamigo_process_opt(opt->val, optarg);
}
fprintf(stderr,
"Try '%s --help' for more information.\n",
program_invocation_short_name);
- exit(1);
+ exit(EXIT_FAILURE + 1);
}
if (strcmp(options[opt_index].name, "mountpoint") == 0)
llapi_err_noerrno(LLAPI_MSG_WARN,
lamigo_process_opt(c, optarg);
}
- if (!opt.o_mntpt) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "%s: no mount point specified\n", argv[0]);
- exit(1);
- }
+ if (!opt.o_mntpt)
+ LAMIGO_FATAL("no mount point specified\n");
rc = llapi_search_fsname(opt.o_mntpt, fsname);
- if (rc < 0) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "cannot find a Lustre file system mounted at '%s'",
- opt.o_mntpt);
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("cannot find a Lustre file system mounted at '%s'\n",
+ opt.o_mntpt);
+
+ if (!opt.o_mdtname)
+ LAMIGO_FATAL("no MDT specified\n");
- if (!opt.o_mdtname) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "no MDT specified");
- exit(1);
- }
rc = cfs_get_param_paths(&paths, "mdt/%s/uuid", opt.o_mdtname);
- if (rc != 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't find MDT %s", opt.o_mdtname);
- exit(1);
- }
+ if (rc != 0)
+ LAMIGO_FATAL("cannot find MDT uuid from 'mdt/%s/uuid': %s\n",
+ opt.o_mdtname, strerror(errno));
+
globfree(&paths);
snprintf(buf, sizeof(buf), "%s/.lustre/fid", opt.o_mntpt);
open_by_fid_fd = open(buf, O_RDONLY);
- if (open_by_fid_fd < 0) {
- llapi_error(LLAPI_MSG_FATAL, errno, "can't open '%s'", buf);
- exit(1);
- }
+ if (open_by_fid_fd < 0)
+ LAMIGO_FATAL("cannot open '%s': %s\n", buf, strerror(errno));
if (src_pools == NULL) {
lamigo_parse_pool(DEF_SOURCE_POOL);
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "source pools aren't defined, use %s",
- DEF_SOURCE_POOL);
+ LAMIGO_WARN("source pools aren't defined, using '%s'\n", DEF_SOURCE_POOL);
}
if (opt.o_tgt_pool == NULL) {
opt.o_tgt_pool = DEF_TARGET_POOL;
- llapi_err_noerrno(LLAPI_MSG_INFO,
- "target pool not defined, use %s",
- opt.o_tgt_pool);
+ LAMIGO_WARN("target pool is not defined, using %s\n", opt.o_tgt_pool);
}
opt.o_tgt_pool_len = strlen(opt.o_tgt_pool);
- if (lamigo_lookup_pool(opt.o_tgt_pool)) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "target pool '%s' cannot also be source pool",
+ if (lamigo_lookup_pool(opt.o_tgt_pool))
+ LAMIGO_FATAL("target pool '%s' cannot also be source pool\n",
opt.o_tgt_pool);
- exit(1);
- }
- if (lipe_list_empty(&lamigo_agent_list)) {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "no agents configured?");
- exit(1);
- }
+ if (lipe_list_empty(&lamigo_agent_list))
+ LAMIGO_FATAL("no agents configured\n");
- llapi_printf(LLAPI_MSG_DEBUG, "target pool: %s/%d\n", opt.o_tgt_pool,
+ LAMIGO_DEBUG("target pool: %s/%d\n", opt.o_tgt_pool,
opt.o_tgt_pool_len);
tgt_pools = lamigo_alloc_pool(opt.o_tgt_pool);
opt.o_batch_sync_cnt = opt.o_cached_fid_hiwm / 2;
rc = pipe2(lamigo_sigpipe, O_NONBLOCK);
- if (rc < 0) {
- llapi_error(LLAPI_MSG_FATAL, errno,
- "cannot create sigpipe");
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("cannot create sigpipe: %s\n", strerror(errno));
}
static void lamigo_wait_for_job_completion(int timeout)
{
struct resync_job *rj;
- rj = calloc(1, sizeof(struct resync_job));
- if (rj == NULL) {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "can't allocate for a job");
- return 1;
- }
+ rj = xcalloc(1, sizeof(*rj));
rj->rj_fid = *fid;
rj->rj_stripes = mo->mo_stripes;
rj->rj_resync = resync;
diff_timevals(&result.sr_time_start, &result.sr_time_end,
&result.sr_time_diff);
- llapi_printf(LLAPI_MSG_DEBUG, "finished scanning in %d.%06u seconds\n",
+ LAMIGO_DEBUG("finished scanning in %d.%06u seconds\n",
(int)result.sr_time_diff.tv_sec,
(unsigned int)result.sr_time_diff.tv_usec);
rc = cfs_get_param_paths(&paths, "mdd/%s/changelog_mask",
opt.o_mdtname);
- if (rc != 0 || paths.gl_pathc != 1) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't find changelog mask");
- exit(1);
- }
+ if (rc != 0 || paths.gl_pathc != 1)
+ LAMIGO_FATAL("cannot find changelog mask: %s\n", strerror(errno));
+
rc = lamigo_read_file(paths.gl_pathv[0], buf, sizeof(buf));
globfree(&paths);
"lctl set_param -n mdd.%s.changelog_mask=+\"CLOSE UNLNK\"",
opt.o_mdtname);
rc = system(buf);
- if (rc < 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "can't enable CLOSE/UNLNK in changelog: rc=%d",
- rc);
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("cannot enable CLOSE/UNLNK in changelog: rc = %d\n", rc);
+
llapi_err_noerrno(LLAPI_MSG_INFO, "enable CLOSE/UNLNK in changelog");
}
rc = cfs_get_param_paths(&paths, "mdd/%s/changelog_users",
opt.o_mdtname);
- if (rc != 0 || paths.gl_pathc != 1) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't find changelog users");
- exit(1);
- }
+ if (rc != 0 || paths.gl_pathc != 1)
+ LAMIGO_FATAL("can't find changelog users\n");
+
rc = lamigo_read_file(paths.gl_pathv[0], buf, sizeof(buf));
- if (rc < 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't get changelog users");
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("can't get changelog users\n");
+
rc = -1;
s = buf;
/* skip current index line */
if (!rc) {
/* found, use it */
opt.o_chlg_user = strdup(user);
- llapi_printf(LLAPI_MSG_DEBUG,
- "found Changelog user '%s' in '%s'\n",
+ LAMIGO_DEBUG("found Changelog user '%s' in '%s'\n",
user, buf);
return;
}
}
- if (registered) {
- /* can't find just registered changelog user */
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "can't find registered Changelog user '%s'",
- user);
- exit(1);
- }
+ if (registered)
+ LAMIGO_FATAL("cannot find just registered Changelog user '%s'\n", user);
/* try one from the config file */
if (opt.o_chlg_user) {
rc = lamigo_check_changelog_user(opt.o_chlg_user);
if (!rc) {
/* found, use it */
- llapi_printf(LLAPI_MSG_DEBUG,
- "found Changelog user '%s' from config\n",
+ LAMIGO_DEBUG("found Changelog user '%s' from config\n",
opt.o_chlg_user);
return;
}
"lctl --device %s changelog_register -n >"LAMIGO_USERFILE,
opt.o_mdtname, opt.o_mdtname);
rc = system(buf);
- if (rc < 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "Changelog user '%s' is not registered",
+ if (rc < 0)
+ LAMIGO_FATAL("changelog user '%s' is not registered\n",
opt.o_chlg_user);
- exit(1);
- }
+
registered = true;
/* if a new changelog user was just registered, either this is the
* first time lamigo was run on the filesystem, or it has been some
return;
progress_last_processed = stats.s_processed;
- llapi_printf(LLAPI_MSG_INFO,
- "%lu processed, %lu replicated, %lu busy, %lu in queue, "
- "%lu hot skipped, %lu ro2hot, %lu rw2hot, %lu rw2cold\n",
- stats.s_processed, stats.s_replicated, stats.s_busy,
- stats.s_skip_hot, stats.s_replicate_ro2hot,
- stats.s_replicate_rw2hot, stats.s_replicate_rw2cold,
- head.lh_cached_count);
+ LAMIGO_INFO("%lu processed, %lu replicated, %lu busy, %lu in queue, "
+ "%lu hot skipped, %lu ro2hot, %lu rw2hot, %lu rw2cold\n",
+ stats.s_processed, stats.s_replicated, stats.s_busy,
+ stats.s_skip_hot, stats.s_replicate_ro2hot,
+ stats.s_replicate_rw2hot, stats.s_replicate_rw2cold,
+ head.lh_cached_count);
}
static void lamigo_lock_pidfile(void)
snprintf(buf, sizeof(buf), LAMIGO_PIDFILE, opt.o_mdtname);
fd = open(buf, O_RDWR | O_CREAT, 0600);
- if (fd < 0) {
- llapi_error(LLAPI_MSG_FATAL, errno, "can't create pidfile");
- exit(1);
- }
+ if (fd < 0)
+ LAMIGO_FATAL("cannot create pidfile '%s': %s\n", buf, strerror(errno));
+
rc = flock(fd, LOCK_EX | LOCK_NB);
if (rc < 0) {
sz = read(fd, buf, sizeof(buf));
sz = 0;
if (sz > 0)
buf[sz] = 0;
- llapi_err_noerrno(LLAPI_MSG_FATAL,
- "another lamigo is running, locked by %s",
+ LAMIGO_FATAL("another lamigo is running, locked by %s\n",
sz > 0 ? buf : "[unknown]");
- exit(1);
}
rc = ftruncate(fd, 0);
- if (rc < 0) {
- llapi_error(LLAPI_MSG_FATAL, errno, "cannot truncate pidfile");
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("cannot truncate pidfile: %s\n", strerror(errno));
sz = snprintf(buf, sizeof(buf), "%d\n", getpid());
rc = write(fd, buf, sz);
- if (rc < 0 || rc != sz) {
- llapi_error(LLAPI_MSG_FATAL, rc, "can't write pidfile");
- exit(1);
- }
+ if (rc < 0 || rc != sz)
+ LAMIGO_ERROR("cannot write pidfile: %s\n", rc < 0 ? strerror(errno) : "short write");
}
static void lamigo_process_changelog(void)
if (rc < 0) {
int i;
- llapi_error(LLAPI_MSG_ERROR, rc,
- "failed to process record");
+ LAMIGO_ERROR("cannot to process changelog record: %s\n", strerror(-rc));
rc = llapi_changelog_fini(&chglog_hdlr);
- if (rc) {
- llapi_error(LLAPI_MSG_FATAL, rc,
- "cannot fini changelog");
- exit(1);
- }
+ if (rc)
+ LAMIGO_FATAL("cannot fini changelog: %s\n", strerror(-rc));
+
i = 0;
do {
/* do not reopen too frequently */
CHANGELOG_FLAG_EXTRA_FLAGS,
opt.o_mdtname, 0);
} while (i++ < 5 && rc != 0);
- if (rc) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "unable to reopen changelog of MDT [%s]",
- opt.o_mdtname);
- exit(1);
- }
- llapi_printf(LLAPI_MSG_DEBUG, "Reopened changelog\n");
+
+ if (rc)
+ LAMIGO_FATAL("cannot reopen changelog: %s\n", strerror(-rc));
+
+ LAMIGO_DEBUG("Reopened changelog\n");
goto again;
}
}
int i, rc;
tok = json_tokener_new();
- if (!tok) {
- llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO , -1,
- "cannot allocate json token");
- exit(1);
- }
+ if (!tok)
+ LAMIGO_OOM(-1);
obj_top = json_tokener_parse_ex(tok, rule_str, strlen(rule_str));
if (obj_top == NULL) {
enum json_tokener_error jerr;
jerr = json_tokener_get_error(tok);
- llapi_error(LLAPI_MSG_FATAL|LLAPI_MSG_NO_ERRNO, -1,
- "cannot parse rules in %s: %s - %s",
- filename, rule_str, json_tokener_error_desc(jerr));
- exit(1);
+
+ LAMIGO_FATAL("cannot parse rule '%s' in '%s': %s\n",
+ rule_str, filename, json_tokener_error_desc(jerr));
}
rc = json_object_object_get_ex(obj_top, LIPE_CONFIG_RULES, &obj_rules);
- if (!rc) {
- llapi_error(LLAPI_MSG_ERROR, rc, "no rules in %s", filename);
- exit(1);
- }
- if (json_object_get_type(obj_rules) != json_type_array) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "rules are not an array in %s", filename);
- exit(1);
- }
+ if (!rc)
+ LAMIGO_FATAL("no rules in '%s'\n", filename);
+
+ if (json_object_get_type(obj_rules) != json_type_array)
+ LAMIGO_FATAL("rules in '%s' are not an array\n", filename);
for (i = 0; i < json_object_array_length(obj_rules); i++) {
struct json_object *obj_action, *obj_expr;
obj_rule = json_object_array_get_idx(obj_rules, i);
- if (!obj_rule) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "failed to get rule #%d", i);
- exit(1);
- }
+ if (!obj_rule)
+ LAMIGO_FATAL("failed to get rule #%d\n", i);
rc = json_object_object_get_ex(obj_rule, LIPE_CONFIG_ACTION,
&obj_action);
- if (!rc) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "no action in rule %s",
+ if (!rc)
+ LAMIGO_FATAL("no action in rule %s\n",
json_object_to_json_string(obj_rule));
- exit(1);
- }
+
action = json_object_get_string(obj_action);
- if (!action) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid action in rule %s",
+ if (!action)
+ LAMIGO_FATAL("invalid action in rule %s\n",
json_object_to_json_string(obj_rule));
- exit(1);
- }
+
rc = json_object_object_get_ex(obj_rule, LIPE_CONFIG_EXPRESSION,
&obj_expr);
- if (!rc) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "no expression in rule %s",
- json_object_to_json_string(obj_rule));
- exit(1);
- }
+ if (!rc)
+ LAMIGO_FATAL("no expression in rule %s\n",
+ json_object_to_json_string(obj_rule));
+
expr = json_object_get_string(obj_expr);
- if (!expr) {
- llapi_error(LLAPI_MSG_ERROR, -EINVAL,
- "invalid expression in rule %s",
- json_object_to_json_string(obj_rule));
- exit(1);
- }
+ if (!expr)
+ LAMIGO_FATAL("invalid expression in rule %s\n",
+ json_object_to_json_string(obj_rule));
- LIPE_ALLOC_PTR(lr);
- if (!lr) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "cannot allocate rule %s",
- json_object_to_json_string(obj_rule));
- exit(1);
- }
+ lr = xcalloc(1, sizeof(*lr));
if (!strcmp(action, "skip"))
lr->lr_action.la_action = LAT_COUNTER_INC;
else if (!strcmp(action, "mirror"))
lr->lr_action.la_action = LAT_SHELL_CMD_FID;
- else {
- llapi_error(LLAPI_MSG_ERROR, 0,
- "unknown action '%s' in rule %s", action,
- json_object_to_json_string(obj_rule));
- exit(1);
- }
+ else
+ LAMIGO_FATAL("unknown action '%s' in rule '%s'\n",
+ action, json_object_to_json_string(obj_rule));
LIPE_INIT_LIST_HEAD(&lr->lr_values);
rc = lipe_policy_value_init(&lr->lr_values, &lr->lr_expression,
&valid, expr);
- if (rc) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "cannot parse expression in rule %s",
- json_object_to_json_string(obj_rule));
- exit(1);
- }
+ if (rc)
+ LAMIGO_FATAL("cannot parse expression in rule %s\n", strerror(-rc));
+
lipe_list_add_tail(&lr->lr_linkage, &lamigo_rule_list);
lamigo_rule_attrs |= lr->lr_attr_bits;
}
json_object_put(obj_top);
}
-static void lamigo_register_signal_handlers(void)
+static void *lamigo_signal_thread_start(void *arg)
{
- struct sigaction sigterm_action = {
- .sa_handler = &lamigo_sigterm_handler,
- };
- struct sigaction sigusr1_action = {
- .sa_handler = &lamigo_sigusr1_handler,
- .sa_flags = SA_RESTART,
- };
- struct sigaction sigusr2_action = {
- .sa_handler = &lamigo_sigusr2_handler,
- .sa_flags = SA_RESTART,
- };
+ sigset_t *set = arg;
+ int sig;
+ int rc;
+
+ while (1) {
+ rc = sigwait(set, &sig);
+ /*
+ * RETURN VALUE
+ *
+ * On success, sigwait() returns 0. On error, it
+ * returns a positive error number (listed in
+ * ERRORS).
+ *
+ * ERRORS
+ * EINVAL set contains an invalid signal number.
+ */
+ if (rc != 0) {
+ LAMIGO_ERROR("signal wait failed: %s\n", strerror(rc));
+ continue;
+ }
- sigemptyset(&sigterm_action.sa_mask);
- sigemptyset(&sigusr1_action.sa_mask);
- sigemptyset(&sigusr2_action.sa_mask);
+ LAMIGO_DEBUG("received signal %d\n", sig);
- sigaction(SIGTERM, &sigterm_action, NULL);
- sigaction(SIGUSR1, &sigusr1_action, NULL);
- sigaction(SIGUSR2, &sigusr2_action, NULL);
+ switch (sig) {
+ case SIGUSR1:
+ lamigo_dump_stats_file();
+ break;
+ case SIGUSR2:
+ lamigo_dump_heat_file();
+ break;
+ default:
+ LAMIGO_INFO("received signal %d, exiting\n", sig);
+ exit(EXIT_SUCCESS);
+ }
+ }
}
int main(int argc, char **argv)
{
- int rc;
- bool stop = 0;
- int ret = 0;
- pthread_t pid;
+ pthread_t lamigo_refresh_statfs_thread_id;
+ pthread_t lamigo_signal_thread_id;
+ sigset_t sigset;
+ int rc;
- /* Ignore SIGUSR1 and SIGUSR2 until we are setup. */
- signal(SIGUSR1, SIG_IGN);
- signal(SIGUSR2, SIG_IGN);
+ /* We will handle signals in a dedicated thread. */
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGTERM);
+ sigaddset(&sigset, SIGUSR1);
+ sigaddset(&sigset, SIGUSR2);
+
+ rc = pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+ if (rc != 0)
+ LAMIGO_FATAL("cannot set signal mask: %s\n", strerror(rc));
+
+ rc = pthread_create(&lamigo_signal_thread_id, NULL, &lamigo_signal_thread_start, &sigset);
+ if (rc != 0)
+ LAMIGO_FATAL("cannot start signal thread: %s\n", strerror(rc));
lipe_version_init();
ssh_threads_set_callbacks(ssh_threads_get_pthread());
setlinebuf(stdout);
setlinebuf(stderr);
- llapi_msg_set_level(opt.o_verbose);
+ llapi_msg_set_level(lamigo_log_level);
lamigo_parse_opts(argc, argv);
* followed by the MDT name ("lamigo lustre-MDT0000"). */
llapi_set_command_name(opt.o_mdtname);
- llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0,
- "version %s-%s, revision %s",
+ LAMIGO_INFO("version %s-%s, revision %s\n",
PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION);
rc = lamigo_init_cache();
- if (rc < 0) {
- llapi_err_noerrno(LLAPI_MSG_FATAL, "can't init cache\n");
- exit(1);
- }
+ if (rc < 0)
+ LAMIGO_FATAL("cannot init cache\n");
/* create and lock pidfile to protect against another instance */
lamigo_lock_pidfile();
/* wait till the target pool got one OST at least */
lamigo_refresh_osts_from_pool(tgt_pools);
while (tgt_pools->pl_ostnr == 0) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "Target pool %s is empty, waiting...",
- tgt_pools->pl_pool);
+ LAMIGO_ERROR("target pool '%s' is empty, waiting %d seconds\n",
+ tgt_pools->pl_pool, opt.o_pool_refresh);
sleep(opt.o_pool_refresh);
lamigo_refresh_osts_from_pool(tgt_pools);
}
/* start heat collection and maintaining */
lamigo_alr_init();
- rc = pthread_create(&pid, NULL, lamigo_refresh_statfs_thread, NULL);
- if (rc) {
- llapi_error(LLAPI_MSG_FATAL, rc,
- "unable to start statfs thread");
- exit(1);
- }
+ rc = pthread_create(&lamigo_refresh_statfs_thread_id, NULL, lamigo_refresh_statfs_thread, NULL);
+ if (rc != 0)
+ LAMIGO_FATAL("cannot start statfs thread: %s\n", strerror(rc));
if (opt.o_rescan) {
/* scan the whole MDT and replicate matched files */
- ret = lamigo_rescan();
+ rc = lamigo_rescan();
+ if (rc < 0)
+ LAMIGO_FATAL("cannot scan device: %s\n", strerror(-rc));
}
- llapi_printf(LLAPI_MSG_DEBUG, "Start receiving records\n");
+ LAMIGO_DEBUG("Start receiving records\n");
rc = llapi_changelog_start(&chglog_hdlr,
CHANGELOG_FLAG_FOLLOW |
CHANGELOG_FLAG_BLOCK |
CHANGELOG_FLAG_JOBID |
CHANGELOG_FLAG_EXTRA_FLAGS,
opt.o_mdtname, 0);
- if (rc) {
- /* XXX: probably keep trying in some cases? */
- llapi_error(LLAPI_MSG_ERROR, rc,
- "unable to open changelog of MDT [%s]",
- opt.o_mdtname);
- ret = rc;
- goto out;
- }
-
- llapi_printf(LLAPI_MSG_INFO, "started\n");
-
- lamigo_register_signal_handlers();
+ if (rc < 0)
+ LAMIGO_FATAL("cannot open changelog: %s\n", strerror(-rc));
- while (!stop) {
+ LAMIGO_INFO("started\n");
+ while (1) {
if (head.lh_cached_count < opt.o_cached_fid_hiwm)
lamigo_process_changelog();
else
if (!are_agents_busy()) {
rc = lamigo_check_sync();
if (rc < 0) {
- stop = true;
- ret = rc;
+ LAMIGO_ERROR("check sync failed: rc = %d\n", rc);
+ sleep(1);
}
}
lamigo_check_bad_agents();
lamigo_show_progress();
}
-
- /* wait for all jobs to complete */
- while (lamigo_jobs_running) {
- lamigo_wait_for_job_completion(10);
- lamigo_check_jobs();
- lamigo_check_and_clear_changelog();
- }
-
- rc = llapi_changelog_fini(&chglog_hdlr);
- if (rc) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "unable to close changelog of MDT [%s]",
- opt.o_mdtname);
- ret = rc;
- }
-
-out:
- lamigo_cleanup();
- llapi_error(LLAPI_MSG_INFO|LLAPI_MSG_NO_ERRNO, 0, "exited\n");
- lipe_version_fini();
-
- return ret;
}
void lamigo_alr_mirror_cb(struct resync_job *rj, void *cbdata, int rc)
struct resync_job *rj;
int rc;
- rj = calloc(1, sizeof(struct resync_job));
- if (rj == NULL) {
- llapi_err_noerrno(LLAPI_MSG_ERROR, "can't allocate for a job");
- return;
- }
+ rj = xcalloc(1, sizeof(*rj));
rj->rj_fid = *fid;
rj->rj_stripes = stripes;
rj->rj_index = 0;
struct mirror_opts mo = { 0 };
int sync;
- llapi_printf(LLAPI_MSG_DEBUG,
- "check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, "
+ LAMIGO_DEBUG("check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, "
"L %d, I %d %s\n", PFID(&ht->ah_fid),
ht->ah_heat[0], ht->ah_heat[1], ht->ah_pools[0],
ht->ah_pools[1], ht->ah_livetime, ht->ah_idle,
ht->ah_mark ? "M" : "");
if (ht->ah_mark & ALR_TAG_PROCESSED) {
/* already tried to replicate */
- llapi_printf(LLAPI_MSG_DEBUG,
- DFID" tried to replicate already\n",
+ LAMIGO_DEBUG(DFID" tried to replicate already\n",
PFID(&ht->ah_fid));
return 0;
}
if (ht->ah_heat[0] && ht->ah_heat[1] == 0 &&
ht->ah_pools[0] == 0 && ht->ah_pools[1]) {
sync = lamigo_is_in_sync(&ht->ah_fid, tgt_pools, src_pools, &mo);
- llapi_printf(LLAPI_MSG_DEBUG, "try to replicate RO "DFID": %d\n",
+ LAMIGO_DEBUG("try to replicate RO "DFID": %d\n",
PFID(&ht->ah_fid), sync);
if (sync != AMIGO_RESYNC_NONE) {
lamigo_new_job_for_hot(&ht->ah_fid, sync, src_pools,
if (ht->ah_idle > 0 && ht->ah_heat[1] &&
ht->ah_pools[0] == 0 && ht->ah_pools[1]) {
sync = lamigo_is_in_sync(&ht->ah_fid, tgt_pools, src_pools, &mo);
- llapi_printf(LLAPI_MSG_DEBUG, "try to replicate RW "DFID": %d\n",
+ LAMIGO_DEBUG("try to replicate RW "DFID": %d\n",
PFID(&ht->ah_fid), sync);
if (sync != AMIGO_RESYNC_NONE) {
lamigo_new_job_for_hot(&ht->ah_fid, sync, src_pools,
ht->ah_pools[1] == 0 && ht->ah_pools[0]) {
sync = lamigo_is_in_sync(&ht->ah_fid, src_pools,
tgt_pools, &mo);
- llapi_printf(LLAPI_MSG_DEBUG,
+ LAMIGO_DEBUG(
"replicate idling hot to CP "DFID": %d\n",
PFID(&ht->ah_fid), sync);
if (sync != AMIGO_RESYNC_NONE) {
return NULL;
/* XXX: limit number of hot files to check? */
- ht = calloc(*nr + 1, sizeof(*ht));
- if (!ht) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "allocation for ht failed");
- return NULL;
- }
-
+ ht = xcalloc(*nr + 1, sizeof(*ht));
i = lamigo_alr_get_hot_files(period, ht, *nr,
ALR_TAG_NO_ACCT | ALR_TAG_REPLICATED);
if (i == 0) {
if (src_pools->pl_open) {
/* get most recent hot files */
ht = lamigo_get_hot(alr_hot_period, &nr);
- llapi_printf(LLAPI_MSG_DEBUG, "check hot in period %lu - %d\n",
+ LAMIGO_DEBUG("check hot in period %lu - %d\n",
alr_hot_period, nr);
if (ht) {
for (i = 0; i < nr; i++)
/* now check hot idling files - the files we found hot and
* skipped replication. now it's time to try again */
ht = lamigo_get_hot(alr_hot_period - opt.o_alr_hot_after_idle, &nr);
- llapi_printf(LLAPI_MSG_DEBUG, "check idle in period %lu - %d\n",
+ LAMIGO_DEBUG("check idle in period %lu - %d\n",
alr_hot_period - 3, nr);
if (!ht)
goto out;
for (i = 0; i < nr; i++) {
struct alr_heat *ah = ht + i;
- llapi_printf(LLAPI_MSG_DEBUG,
- "idle "DFID": P: %Lu/%Lu, live %d, idle %d\n",
+ LAMIGO_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n",
PFID(&ah->ah_fid), ah->ah_pools[0], ah->ah_pools[1],
ah->ah_livetime, ah->ah_idle);
if (src_pools->pl_open)
fd = open(path, O_RDONLY);
if (fd < 0) {
- llapi_error(LLAPI_MSG_ERROR, errno, "cannot open '%s'", path);
+ LAMIGO_ERROR("cannot open '%s': %s\n", path, strerror(errno));
/* 0 means non-available OST */
return 0;
}
if (rc > 0)
retval = strtoul(buf, NULL, 10);
if (rc < 0)
- llapi_error(LLAPI_MSG_ERROR, errno, "cannot read '%s'", path);
+ LAMIGO_ERROR("cannot read '%s': %s\n", path, strerror(errno));
close(fd);
/* report zero if something went wrong
/* check OSP is active */
active = lamigo_read_osp_param(ostidx, "active");
status = lamigo_read_osp_param(ostidx, "prealloc_status");
- llapi_printf(LLAPI_MSG_DEBUG,
- "statfs for %d%s/%d: %llu from %llu\n",
+ LAMIGO_DEBUG("statfs for %d%s/%d: %llu from %llu\n",
ostidx, active ? "(active)" : "(inactive)",
(int)status, kbavail, kbtotal);
if (!active || status)
pl->pl_open = true;
/* whether pool is good for replicas */
- llapi_printf(LLAPI_MSG_DEBUG,
- "statfs for %s %s pool: %llu from %llu, thresh %llu\n",
+ LAMIGO_DEBUG("statfs for %s %s pool: %llu from %llu, thresh %llu\n",
pl->pl_open ? "open" : "closed", pl->pl_pool, tavail,
ttotal, ttotal * threshold / 100);
}
int rc;
str = strstr(opt.o_mdtname, "-MDT");
- if (!str) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "failed to get MDT index from %s\n",
- opt.o_mdtname);
- exit(1);
- }
+ if (!str)
+ LAMIGO_FATAL("cannot get MDT index from '%s'\n", opt.o_mdtname);
+
lamigo_mdtidx = strtoul(str + 4, NULL, 16);
rc = cfs_get_param_paths(&paths, "osp");
- if (rc != 0) {
- llapi_error(LLAPI_MSG_FATAL, rc, "can't find OSP root");
- exit(1);
- }
+ if (rc != 0)
+ LAMIGO_FATAL("cannot find OSP root: %s\n", strerror(errno));
+
osproot = strdup(paths.gl_pathv[0]);
globfree(&paths);