#define DEF_MAX_SCAN_SECS 30
#define MIN_MAX_SCAN_SECS 5
#define MAX_MAX_SCAN_SECS 300
-
-#define MAX_ATIME_FID_LEN 128
+#define DEF_FORCE_SCAN_INTERVAL (30 * 60)
+#define MIN_FORCE_SCAN_INTERVAL (60)
+#define MAX_FORCE_SCAN_INTERVAL (24 * 60 * 60)
#define OPT_DRY_RUN 1
#define OPT_CANDIDATE_NUM 2
#define OPT_MAX_SCAN_SECS 5
#define OPT_PIDFILE 6
#define OPT_VERSION 7
+#define OPT_FORCE_SCAN_INTERVAL 8
#define LPCC_PURGE_LOG(level, fmt, ...) \
llapi_error((level) | LLAPI_MSG_NO_ERRNO, 0, fmt, ##__VA_ARGS__)
char *o_pidfile;
bool o_dry_run;
bool o_clear_hashdir;
+ int o_force_scan_interval;
};
static struct lpcc_purge_options opt = {
.o_rwid = -1,
.o_max_scan_secs = DEF_MAX_SCAN_SECS,
.o_dry_run = false,
.o_clear_hashdir = false,
+ .o_force_scan_interval = DEF_FORCE_SCAN_INTERVAL,
};
struct lpcc_purge_stats {
.s_lock = PTHREAD_MUTEX_INITIALIZER,
};
+struct lpcc_purge_extra_stats {
+ pthread_mutex_t xs_lock;
+ int64_t xs_last_scan_time;
+ uint64_t xs_files;
+ uint64_t xs_bytes;
+ uint64_t xs_min_size;
+ uint64_t xs_max_size;
+ int64_t xs_min_atime;
+ int64_t xs_max_atime;
+ int64_t xs_age0_time_ms;
+ int64_t xs_total_ages_ms;
+};
+
+struct lpcc_purge_extra_stats xstats[2] = {
+ {
+ .xs_lock = PTHREAD_MUTEX_INITIALIZER,
+ },
+ {
+ .xs_lock = PTHREAD_MUTEX_INITIALIZER,
+ }
+};
+
static struct lipe_instance *instance = NULL;
struct lpcc_purge_candidate {
- uint64_t c_atime_ms;
struct lu_fid c_fid;
+ uint64_t c_atime_ms;
+ uint64_t c_size;
char c_path[0];
};
static double lpcc_purge_get_fs_usage(const char *fs);
struct lpcc_purge_candidate *lpcc_purge_candidate_new(
- uint64_t atime_ms, struct lu_fid fid, const char *path)
+ struct lu_fid fid, struct lipe_object_attrs * attr, const char *path)
{
struct lpcc_purge_candidate *candidate =
(struct lpcc_purge_candidate *)malloc(
exit(1);
}
- candidate->c_atime_ms = atime_ms;
candidate->c_fid = fid;
+ candidate->c_size = attr->loa_size;
+ candidate->c_atime_ms = attr->loa_atime_ms;
strcpy(candidate->c_path, path);
return candidate;
char buff[64];
time_t curr;
double usage;
+ int64_t avg_age_ms;
json_object *j_all = json_object_new_object();
/* time, fsname, and cache usage */
curr = time(NULL);
- ctime_r(&curr, buff);
- buff[strlen(buff) - 1] = '\0';
- json_object *j_curr_time = json_object_new_string(buff);
- json_object_object_add(j_all, "curr_time", j_curr_time);
+ json_object *j_curr_time = json_object_new_int64(curr);
+ json_object_object_add(j_all, "snapshot_time_secs", j_curr_time);
rc = llapi_get_fsname(opt.o_mount, buff, sizeof(buff));
if (rc) {
llapi_error(LLAPI_MSG_FATAL, rc, "Cannot get fsname.");
}
json_object_object_add(j_all, "fsname", json_object_new_string(buff));
usage = lpcc_purge_get_fs_usage(opt.o_cache);
- json_object_object_add(j_all, "cache_usage", json_object_new_double(usage));
+ json_object_object_add(j_all, "cache_usage_pct", json_object_new_double(usage));
/* configuration */
json_object *j_config = json_object_new_object();
json_object_object_add(j_config, "clear_hashdir", json_object_new_boolean(opt.o_clear_hashdir));
json_object_object_add(j_config, "max_scan_secs", json_object_new_int64(opt.o_max_scan_secs));
json_object_object_add(j_config, "log_level", json_object_new_string(log_level_to_str(llapi_msg_get_level())));
+ json_object_object_add(j_config, "force_scan_interval",
+ json_object_new_int64(opt.o_force_scan_interval));
json_object_object_add(j_all, "config", j_config);
/* stats */
json_object_object_add(j_stats, "total_purged_objs", json_object_new_int64(stats.s_total_purged_objs));
json_object_object_add(j_stats, "total_failed_objs", json_object_new_int64(stats.s_total_failed_objs));
if (stats.s_start_time != 0) {
+ json_object_object_add(j_stats, "start_time_secs", json_object_new_int64(stats.s_start_time));
ctime_r(&stats.s_start_time, buff);
+ buff[strlen(buff) - 1] = '\0'; /* remove ending \n */
json_object_object_add(j_stats, "start_time", json_object_new_string(buff));
- json_object_object_add(j_stats, "start_usage", json_object_new_double(stats.s_start_usage));
+
+ json_object_object_add(j_stats, "start_usage_pct", json_object_new_double(stats.s_start_usage));
}
if (stats.s_end_time != 0) {
+ json_object_object_add(j_stats, "end_time_secs", json_object_new_int64(stats.s_end_time));
ctime_r(&stats.s_end_time, buff);
+ buff[strlen(buff) - 1] = '\0'; /* remove ending \n */
json_object_object_add(j_stats, "end_time", json_object_new_string(buff));
}
json_object_object_add(j_stats, "scanned_objs", json_object_new_int64(stats.s_scanned_objs));
json_object_object_add(j_stats, "purged_objs", json_object_new_int64(stats.s_purged_objs));
json_object_object_add(j_all, "stats", j_stats);
+ /* xstats */
+ json_object *j_xstats = json_object_new_object();
+ json_object_object_add(j_xstats, "last_scan_time_secs",
+ json_object_new_int64(xstats[0].xs_last_scan_time));
+ ctime_r(&xstats[0].xs_last_scan_time, buff);
+ buff[strlen(buff) - 1] = '\0'; /* remove ending \n */
+ json_object_object_add(j_xstats, "last_scan_time",
+ json_object_new_string(buff));
+ json_object_object_add(j_xstats, "cached_files",
+ json_object_new_int64(xstats[0].xs_files));
+ json_object_object_add(j_xstats, "cached_bytes",
+ json_object_new_int64(xstats[0].xs_bytes));
+ json_object_object_add(j_xstats, "min_cached_file_size",
+ json_object_new_int64(xstats[0].xs_min_size));
+ json_object_object_add(j_xstats, "max_cached_file_size",
+ json_object_new_int64(xstats[0].xs_max_size));
+
+ json_object_object_add(j_xstats, "min_cached_file_atime_secs",
+ json_object_new_int64(xstats[0].xs_min_atime));
+ ctime_r(&xstats[0].xs_min_atime, buff);
+ buff[strlen(buff) - 1] = '\0'; /* remove ending \n */
+ json_object_object_add(j_xstats, "min_cached_file_atime",
+ json_object_new_string(buff));
+
+ json_object_object_add(j_xstats, "max_cached_file_atime_secs",
+ json_object_new_int64(xstats[0].xs_max_atime));
+ ctime_r(&xstats[0].xs_max_atime, buff);
+ buff[strlen(buff) - 1] = '\0'; /* remove ending \n */
+ json_object_object_add(j_xstats, "max_cached_file_atime",
+ json_object_new_string(buff));
+
+ if (xstats[0].xs_files == 0)
+ avg_age_ms = 0;
+ else
+ avg_age_ms = xstats[0].xs_total_ages_ms / xstats[0].xs_files +
+ curr * 1000 - xstats[0].xs_age0_time_ms;
+ json_object_object_add(j_xstats, "average_age_secs",
+ json_object_new_int64(avg_age_ms / 1000));
+
+ json_object_object_add(j_all, "cache_stats", j_xstats);
/* convert the JSON object to string and print to file */
const char *str = json_object_to_json_string_ext(j_all,
JSON_C_TO_STRING_PRETTY |
"\t-b, --debug, enable debugging output\n"
"\t --log-level={debug|info|normal|warn|error|fatal|off}, set log level (default: info)\n"
"\t-f, --config-file=FILE\n"
- "\t-C, --cache=DIR root directory of PCC\n"
- "\t-M, --mount=DIR local Lustre client's mountpoint\n"
+ "\t-C, --cache=DIR, root directory of PCC\n"
+ "\t-M, --mount=DIR, local Lustre client's mountpoint\n"
"\t-A, --rwid, --roid=NUM the roid/rwid of PCC\n"
- "\t-H, --high-usage=NUM %% of space or inode to start purging (default: %u)\n"
- "\t-L, --low-usage=NUM %% of space or inode to stop purging (default: %u)\n"
+ "\t-H, --high-usage=NUM, %% of space or inode to start purging (default: %u)\n"
+ "\t-L, --low-usage=NUM, %% of space or inode to stop purging (default: %u)\n"
"\t-i, --interval=NUM, seconds to next check (default: %u)\n"
- "\t-t, --scan-threads=NUM scanning threads (default: %u)\n"
+ "\t-t, --scan-threads=NUM, scanning threads (default: %u)\n"
"\t --candidate-num=NUM, candidate number of approximate LRU (default: %d, min: %d, max: %d)\n"
- "\t --max-scan-secs, max seconds to scan continously before purging (default: %d, min: %d, max: %d)\n"
+ "\t --max-scan-secs=NUM, max seconds to scan continously before purging (default: %d, min: %d, max: %d)\n"
+ "\t --force-scan-interval=NUM, seconds to next forceful scanning (default: %d, min: %d, max: %d)\n"
"\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-RWID.stats)\n"
"\t --pidfile=FILE, the pidfile name (default: /var/run/lpcc_purge-RWID.pid)\n"
"\t --clear-hashdir, clear empty hash dir after detaching file\n"
DEF_INTERVAL,
DEF_SCAN_THREADS,
DEF_CANDIDATE_NUM, MIN_CANDIDATE_NUM, MAX_CANDIDATE_NUM,
- DEF_MAX_SCAN_SECS, MIN_MAX_SCAN_SECS, MAX_MAX_SCAN_SECS
+ DEF_MAX_SCAN_SECS, MIN_MAX_SCAN_SECS, MAX_MAX_SCAN_SECS,
+ DEF_FORCE_SCAN_INTERVAL, MIN_FORCE_SCAN_INTERVAL,
+ MAX_FORCE_SCAN_INTERVAL
);
}
{ "pidfile", required_argument, NULL, OPT_PIDFILE},
{ "clear-hashdir", no_argument, NULL, OPT_CLEAR_HASHDIR},
{ "max-scan-secs", required_argument, NULL, OPT_MAX_SCAN_SECS},
+ { "force-scan-interval", required_argument, NULL,
+ OPT_FORCE_SCAN_INTERVAL},
{ "version", no_argument, NULL, OPT_VERSION},
{ "help", no_argument, NULL, 'h' },
{ NULL }
}
opt.o_max_scan_secs = value;
break;
+ case OPT_FORCE_SCAN_INTERVAL:
+ value = strtol(optarg, &endptr, 10);
+ if (*endptr != '\0' || value < MIN_FORCE_SCAN_INTERVAL ||
+ value > MAX_FORCE_SCAN_INTERVAL) {
+ llapi_error(LLAPI_MSG_FATAL, -EINVAL,
+ "invalid force-scan-interval: '%s'\n",
+ optarg);
+ exit(1);
+ }
+ opt.o_force_scan_interval = value;
+ break;
case OPT_VERSION:
lipe_version();
exit(0);
static void lpcc_purge_wait_for_scan(void)
{
while (1) {
+ time_t now = time(NULL);
double usage = lpcc_purge_get_fs_usage(opt.o_cache);
LPCC_PURGE_LOG(LLAPI_MSG_DEBUG, "usage: %.1f\n", usage);
- if (usage >= opt.o_high_usage || opt.o_dry_run) {
+ if (usage >= opt.o_high_usage || opt.o_dry_run ||
+ (now - xstats[1].xs_last_scan_time) >
+ opt.o_force_scan_interval) {
stats.s_start_usage = usage;
+ xstats[1].xs_last_scan_time = now;
break;
} else {
sleep(opt.o_interval);
return 0;
}
+static void do_xstats(struct lpcc_purge_candidate *candidate)
+{
+ pthread_mutex_lock(&xstats[1].xs_lock);
+
+ xstats[1].xs_files++;
+ xstats[1].xs_bytes += candidate->c_size;
+ if (xstats[1].xs_min_size == 0 ||
+ candidate->c_size < xstats[1].xs_min_size)
+ xstats[1].xs_min_size = candidate->c_size;
+ if (candidate->c_size > xstats[1].xs_max_size)
+ xstats[1].xs_max_size = candidate->c_size;
+ if (xstats[1].xs_min_atime == 0 ||
+ candidate->c_atime_ms < xstats[1].xs_min_atime * 1000)
+ xstats[1].xs_min_atime = candidate->c_atime_ms / 1000;
+ if (candidate->c_atime_ms > xstats[1].xs_max_atime * 1000)
+ xstats[1].xs_max_atime = candidate->c_atime_ms / 1000;
+ xstats[1].xs_total_ages_ms += xstats[1].xs_age0_time_ms -
+ candidate->c_atime_ms;
+
+ pthread_mutex_unlock(&xstats[1].xs_lock);
+}
+
static int lpcc_purge_scan_callback(struct lipe_instance *instance,
struct lipe_object *object,
struct lipe_object_attrs *attrs)
goto out;
}
- candidate = lpcc_purge_candidate_new(attrs->loa_atime_ms, fid, object->u.lo_posix.lop_path);
+ candidate = lpcc_purge_candidate_new(fid, attrs, object->u.lo_posix.lop_path);
if (candidate == NULL) {
rc = -ENOMEM;
goto out;
(candidate_set.cs_count - n_detach - n_discard) * sizeof(candidate_set.cs_arr[0]));
for (j = candidate_set.cs_count - n_discard; j < candidate_set.cs_count; j++) {
+ do_xstats(candidate_set.cs_arr[j]);
lpcc_purge_candidate_destroy(candidate_set.cs_arr[j]);
}
candidate_set.cs_count = candidate_set.cs_count - n_detach - n_discard;
for (i = 0; i < candidate_set.cs_count; i++) {
double usage = lpcc_purge_get_fs_usage(opt.o_cache);
- if (usage >= opt.o_low_usage || opt.o_dry_run)
- lpcc_purge_detach_candidate(opt.o_mount, candidate_set.cs_arr[i]);
+ struct lpcc_purge_candidate *candidate = candidate_set.cs_arr[i];
+
+ if (stats.s_start_usage > opt.o_high_usage &&
+ (usage >= opt.o_low_usage || opt.o_dry_run))
+ lpcc_purge_detach_candidate(opt.o_mount, candidate);
else
- break;
+ do_xstats(candidate);
}
lpcc_purge_candidate_set_clear();
stats.s_end_time = 0;
stats.s_start_time = time(NULL);
+ xstats[1].xs_files = 0;
+ xstats[1].xs_bytes = 0;
+ xstats[1].xs_min_size = 0;
+ xstats[1].xs_max_size = 0;
+ xstats[1].xs_min_atime = 0;
+ xstats[1].xs_max_atime = 0;
+ xstats[1].xs_age0_time_ms = stats.s_start_time * 1000;
+ xstats[1].xs_total_ages_ms = 0;
+
lpcc_purge_scan();
lpcc_purge_free_space();
stats.s_end_time = time(NULL);
+ xstats[0] = xstats[1];
LPCC_PURGE_LOG(LLAPI_MSG_INFO,
"scanned %ld object, purged %ld\n",