X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Flprocfs_jobstats.c;h=ca5ae3b2e95e360d440c4415adc782dc18066d5a;hp=0072feb32f62ec87cb1ea9c150ca6b04b691eb8d;hb=HEAD;hpb=60b5c3e464d6b4b333506e6db6b0635bb5a06577 diff --git a/lustre/obdclass/lprocfs_jobstats.c b/lustre/obdclass/lprocfs_jobstats.c index 0072feb..c2d27e2 100644 --- a/lustre/obdclass/lprocfs_jobstats.c +++ b/lustre/obdclass/lprocfs_jobstats.c @@ -14,12 +14,12 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* - * Copyright (c) 2011 Whamcloud, Inc. + * Copyright (c) 2012, 2016, Intel Corporation. * Use is subject to license terms. * * Author: Niu Yawei @@ -28,20 +28,12 @@ * lustre/obdclass/lprocfs_jobstats.c */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_CLASS -#ifndef __KERNEL__ -# include -#endif - #include #include -#include -#if defined(LPROCFS) +#ifdef CONFIG_PROC_FS /* * JobID formats & JobID environment variable names for supported @@ -70,80 +62,96 @@ */ struct job_stat { - cfs_hlist_node_t js_hash; - cfs_list_t js_list; - cfs_atomic_t js_refcount; - char js_jobid[JOBSTATS_JOBID_SIZE]; - time_t js_timestamp; /* seconds */ - struct lprocfs_stats *js_stats; - struct obd_job_stats *js_jobstats; + struct hlist_node js_hash; /* hash struct for this jobid */ + struct list_head js_list; /* on ojs_list, with ojs_lock */ + struct kref js_refcount; /* num users of this struct */ + char js_jobid[LUSTRE_JOBID_SIZE]; /* job name + NUL*/ + ktime_t js_time_init; /* time of initial stat*/ + ktime_t js_time_latest; /* time of most recent stat*/ + struct lprocfs_stats *js_stats; /* per-job statistics */ + struct obd_job_stats *js_jobstats; /* for accessing ojs_lock */ + struct rcu_head js_rcu; /* RCU head for job_reclaim_rcu*/ }; -static unsigned job_stat_hash(cfs_hash_t *hs, const void *key, unsigned mask) +static unsigned int +job_stat_hash(struct cfs_hash *hs, const void *key, const unsigned int bits) { - return cfs_hash_djb2_hash(key, strlen(key), mask); + return cfs_hash_djb2_hash(key, strlen(key), bits); } -static void *job_stat_key(cfs_hlist_node_t *hnode) +static void *job_stat_key(struct hlist_node *hnode) { struct job_stat *job; - job = cfs_hlist_entry(hnode, struct job_stat, js_hash); + job = hlist_entry(hnode, struct job_stat, js_hash); return job->js_jobid; } -static int job_stat_keycmp(const void *key, cfs_hlist_node_t *hnode) +static int job_stat_keycmp(const void *key, struct hlist_node *hnode) { struct job_stat *job; - job = cfs_hlist_entry(hnode, struct job_stat, js_hash); + job = hlist_entry(hnode, struct job_stat, js_hash); return (strlen(job->js_jobid) == strlen(key)) && !strncmp(job->js_jobid, key, strlen(key)); } -static void *job_stat_object(cfs_hlist_node_t *hnode) +static void *job_stat_object(struct hlist_node *hnode) { - return cfs_hlist_entry(hnode, struct job_stat, js_hash); + return hlist_entry(hnode, struct job_stat, js_hash); } -static void job_stat_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +static bool job_getref_try(struct job_stat *job) { - struct job_stat *job; - job = cfs_hlist_entry(hnode, struct job_stat, js_hash); - cfs_atomic_inc(&job->js_refcount); + return kref_get_unless_zero(&job->js_refcount); } -static void job_free(struct job_stat *job) +static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode) { - LASSERT(atomic_read(&job->js_refcount) == 0); - LASSERT(job->js_jobstats); + struct job_stat *job; + job = hlist_entry(hnode, struct job_stat, js_hash); + kref_get(&job->js_refcount); +} - cfs_write_lock(&job->js_jobstats->ojs_lock); - cfs_list_del_init(&job->js_list); - cfs_write_unlock(&job->js_jobstats->ojs_lock); +static void job_reclaim_rcu(struct rcu_head *head) +{ + struct job_stat *job = container_of(head, typeof(*job), js_rcu); - lprocfs_free_stats(&job->js_stats); + lprocfs_stats_free(&job->js_stats); OBD_FREE_PTR(job); } +static void job_free(struct kref *kref) +{ + struct job_stat *job = container_of(kref, struct job_stat, + js_refcount); + + LASSERT(job->js_jobstats != NULL); + spin_lock(&job->js_jobstats->ojs_lock); + list_del_rcu(&job->js_list); + spin_unlock(&job->js_jobstats->ojs_lock); + + call_rcu(&job->js_rcu, job_reclaim_rcu); +} + static void job_putref(struct job_stat *job) { - LASSERT(atomic_read(&job->js_refcount) > 0); - if (atomic_dec_and_test(&job->js_refcount)) - job_free(job); + LASSERT(kref_read(&job->js_refcount) > 0); + kref_put(&job->js_refcount, job_free); } -static void job_stat_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +static void job_stat_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) { struct job_stat *job; - job = cfs_hlist_entry(hnode, struct job_stat, js_hash); + + job = hlist_entry(hnode, struct job_stat, js_hash); job_putref(job); } -static void job_stat_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +static void job_stat_exit(struct cfs_hash *hs, struct hlist_node *hnode) { - CERROR("Should not have any items!"); + CERROR("should not have any items\n"); } -static cfs_hash_ops_t job_stats_hash_ops = { +static struct cfs_hash_ops job_stats_hash_ops = { .hs_hash = job_stat_hash, .hs_key = job_stat_key, .hs_keycmp = job_stat_keycmp, @@ -153,30 +161,132 @@ static cfs_hash_ops_t job_stats_hash_ops = { .hs_exit = job_stat_exit, }; -static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) +/** + * Jobstats expiry iterator to clean up old jobids + * + * Called for each job_stat structure on this device, it should delete stats + * older than the specified \a oldest_time in seconds. If \a oldest_time is + * in the future then this will delete all statistics (e.g. during shutdown). + * + * \param[in] hs hash of all jobids on this device + * \param[in] bd hash bucket containing this jobid + * \param[in] hnode hash structure for this jobid + * \param[in] data pointer to stats expiry time in seconds + */ +static int job_cleanup_iter_callback(struct cfs_hash *hs, + struct cfs_hash_bd *bd, + struct hlist_node *hnode, void *data) { + ktime_t oldest_time = *((ktime_t *)data); struct job_stat *job; - LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn); + job = hlist_entry(hnode, struct job_stat, js_hash); + if (ktime_before(job->js_time_latest, oldest_time)) + cfs_hash_bd_del_locked(hs, bd, hnode); + + return 0; +} + +/** + * Clean up jobstats that were updated more than \a before seconds ago. + * + * Since this function may be called frequently, do not scan all of the + * jobstats on each call, only twice per cleanup interval. That means stats + * may be on average around cleanup_interval / 4 older than the cleanup + * interval, but that is not considered harmful. + * + * The value stored in ojs_cleanup_interval is how often to perform a cleanup + * scan, and 1/2 of the maximum age of the individual statistics. This is + * done rather than dividing the interval by two each time, because it is + * much easier to do the division when the value is initially set (in seconds) + * rather than after it has been converted to ktime_t, and maybe a bit faster. + * + * If \a clear is true then this will force clean up all jobstats + * (e.g. at shutdown). + * + * If there is already another thread doing jobstats cleanup, don't try to + * do this again in the current thread unless this is a force cleanup. + * + * \param[in] stats stucture tracking all job stats for this device + * \param[in] clear clear all job stats if true + */ +static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool clear) +{ + ktime_t cleanup_interval = stats->ojs_cleanup_interval; + ktime_t now = ktime_get_real(); + ktime_t oldest; + + if (likely(!clear)) { + /* ojs_cleanup_interval of zero means never clean up stats */ + if (ktime_to_ns(cleanup_interval) == 0) + return; + + if (ktime_before(now, ktime_add(stats->ojs_cleanup_last, + cleanup_interval))) + return; + + if (stats->ojs_cleaning) + return; + } + + spin_lock(&stats->ojs_lock); + if (!clear && stats->ojs_cleaning) { + spin_unlock(&stats->ojs_lock); + return; + } + + stats->ojs_cleaning = true; + spin_unlock(&stats->ojs_lock); + + /* Can't hold ojs_lock over hash iteration, since it is grabbed by + * job_cleanup_iter_callback() + * ->cfs_hash_bd_del_locked() + * ->job_putref() + * ->job_free() + * + * Holding ojs_lock isn't necessary for safety of the hash iteration, + * since locking of the hash is handled internally, but there isn't + * any benefit to having multiple threads doing cleanup at one time. + * + * Subtract or add twice the cleanup_interval, since it is 1/2 the + * maximum age. When clearing all stats, push oldest into the future. + */ + cleanup_interval = ktime_add(cleanup_interval, cleanup_interval); + if (likely(!clear)) + oldest = ktime_sub(now, cleanup_interval); + else + oldest = ktime_add(now, cleanup_interval); + cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback, + &oldest); + + spin_lock(&stats->ojs_lock); + stats->ojs_cleaning = false; + stats->ojs_cleanup_last = ktime_get_real(); + spin_unlock(&stats->ojs_lock); +} + +static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) +{ + struct job_stat *job; OBD_ALLOC_PTR(job); if (job == NULL) return NULL; - job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0); + job->js_stats = lprocfs_stats_alloc(jobs->ojs_cntr_num, 0); if (job->js_stats == NULL) { OBD_FREE_PTR(job); return NULL; } - jobs->ojs_cntr_init_fn(job->js_stats); + jobs->ojs_cntr_init_fn(job->js_stats, 0, 0); - memcpy(job->js_jobid, jobid, JOBSTATS_JOBID_SIZE); - job->js_timestamp = cfs_time_current_sec(); + memcpy(job->js_jobid, jobid, sizeof(job->js_jobid)); + job->js_time_latest = job->js_stats->ls_init; job->js_jobstats = jobs; - CFS_INIT_HLIST_NODE(&job->js_hash); - CFS_INIT_LIST_HEAD(&job->js_list); - cfs_atomic_set(&job->js_refcount, 1); + INIT_HLIST_NODE(&job->js_hash); + INIT_LIST_HEAD(&job->js_list); + kref_init(&job->js_refcount); return job; } @@ -184,18 +294,23 @@ static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, int event, long amount) { - struct obd_job_stats *stats = &obd->u.obt.obt_jobstats; + struct obd_job_stats *stats = &obd2obt(obd)->obt_jobstats; struct job_stat *job, *job2; ENTRY; - LASSERT(stats && stats->ojs_hash); + LASSERT(stats != NULL); + LASSERT(stats->ojs_hash != NULL); - if (!jobid || !strlen(jobid)) + if (event >= stats->ojs_cntr_num) RETURN(-EINVAL); - if (strlen(jobid) >= JOBSTATS_JOBID_SIZE) { - CERROR("Invalid jobid size (%lu), expect(%d)\n", - (unsigned long)strlen(jobid) + 1, JOBSTATS_JOBID_SIZE); + if (jobid == NULL || strlen(jobid) == 0) + RETURN(0); + + /* unterminated jobid should be handled in lustre_msg_get_jobid() */ + if (strlen(jobid) >= LUSTRE_JOBID_SIZE) { + CERROR("%s: invalid jobid size %lu, expect %d\n", obd->obd_name, + (unsigned long)strlen(jobid) + 1, LUSTRE_JOBID_SIZE); RETURN(-EINVAL); } @@ -203,6 +318,8 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, if (job) goto found; + lprocfs_job_cleanup(stats, false); + job = job_alloc(jobid, stats); if (job == NULL) RETURN(-ENOMEM); @@ -212,64 +329,67 @@ int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, if (job2 != job) { job_putref(job); job = job2; - LASSERT(!cfs_list_empty(&job->js_list)); + /* We cannot LASSERT(!list_empty(&job->js_list)) here, + * since we just lost the race for inserting "job" into the + * ojs_list, and some other thread is doing it _right_now_. + * Instead, be content the other thread is doing this, since + * "job2" was initialized in job_alloc() already. LU-2163 */ } else { - LASSERT(cfs_list_empty(&job->js_list)); - cfs_write_lock(&stats->ojs_lock); - cfs_list_add_tail(&job->js_list, &stats->ojs_list); - cfs_write_unlock(&stats->ojs_lock); + LASSERT(list_empty(&job->js_list)); + spin_lock(&stats->ojs_lock); + list_add_tail_rcu(&job->js_list, &stats->ojs_list); + spin_unlock(&stats->ojs_lock); } found: LASSERT(stats == job->js_jobstats); - LASSERT(stats->ojs_cntr_num > event); - job->js_timestamp = cfs_time_current_sec(); + job->js_time_latest = ktime_get_real(); lprocfs_counter_add(job->js_stats, event, amount); job_putref(job); + RETURN(0); } EXPORT_SYMBOL(lprocfs_job_stats_log); -static int job_iter_callback(cfs_hash_t *hs, cfs_hash_bd_t *bd, - cfs_hlist_node_t *hnode, void *data) -{ - time_t oldest = *((time_t *)data); - struct job_stat *job; - - job = cfs_hlist_entry(hnode, struct job_stat, js_hash); - if (!oldest || job->js_timestamp < oldest) - cfs_hash_bd_del_locked(hs, bd, hnode); - - return 0; -} - void lprocfs_job_stats_fini(struct obd_device *obd) { - struct obd_job_stats *stats = &obd->u.obt.obt_jobstats; - time_t oldest = 0; + struct obd_job_stats *stats = &obd2obt(obd)->obt_jobstats; if (stats->ojs_hash == NULL) return; - cfs_timer_disarm(&stats->ojs_cleanup_timer); - cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest); + + lprocfs_job_cleanup(stats, true); cfs_hash_putref(stats->ojs_hash); stats->ojs_hash = NULL; - LASSERT(cfs_list_empty(&stats->ojs_list)); + LASSERT(list_empty(&stats->ojs_list)); } EXPORT_SYMBOL(lprocfs_job_stats_fini); + +struct lprocfs_jobstats_data { + struct obd_job_stats *pjd_stats; + loff_t pjd_last_pos; + struct job_stat *pjd_last_job; +}; + static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos) { - struct obd_job_stats *stats = p->private; + struct lprocfs_jobstats_data *data = p->private; + struct obd_job_stats *stats = data->pjd_stats; loff_t off = *pos; struct job_stat *job; - cfs_read_lock(&stats->ojs_lock); + rcu_read_lock(); if (off == 0) return SEQ_START_TOKEN; + + /* if pos matches the offset of last saved job, start from saved job */ + if (data->pjd_last_job && data->pjd_last_pos == off) + return data->pjd_last_job; + off--; - cfs_list_for_each_entry(job, &stats->ojs_list, js_list) { + list_for_each_entry_rcu(job, &stats->ojs_list, js_list) { if (!off--) return job; } @@ -278,44 +398,66 @@ static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos) static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v) { - struct obd_job_stats *stats = p->private; + struct lprocfs_jobstats_data *data = p->private; + struct job_stat *job = NULL; + + /* try to get a ref on current job (not deleted) */ + if (v && v != SEQ_START_TOKEN && job_getref_try(v)) + job = v; + + rcu_read_unlock(); + + /* drop the ref on the old saved job */ + if (data->pjd_last_job) { + job_putref(data->pjd_last_job); + data->pjd_last_job = NULL; + } - cfs_read_unlock(&stats->ojs_lock); + /* save the current job for the next read */ + if (job) + data->pjd_last_job = job; } static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos) { - struct obd_job_stats *stats = p->private; + struct lprocfs_jobstats_data *data = p->private; + struct obd_job_stats *stats = data->pjd_stats; struct job_stat *job; - cfs_list_t *next; + struct list_head *cur; ++*pos; + data->pjd_last_pos = *pos; if (v == SEQ_START_TOKEN) { - next = stats->ojs_list.next; + cur = &stats->ojs_list; } else { job = (struct job_stat *)v; - next = job->js_list.next; + cur = &job->js_list; } - return next == &stats->ojs_list ? NULL : - cfs_list_entry(next, struct job_stat, js_list); + job = list_entry_rcu(cur->next, struct job_stat, js_list); + if (&job->js_list == &stats->ojs_list) + return NULL; + + return job; } /* * Example of output on MDT: * * job_stats: - * - job_id: test_id.222.25844 - * snapshot_time: 1322494486 - * open: { samples: 3, unit: reqs } - * close: { samples: 3, unit: reqs } + * - job_id: dd.4854 + * snapshot_time: 1322494486.123456789 + * start_time: 1322494476.012345678 + * elapsed_time: 10.111111111 + * open: { samples: 1, unit: reqs } + * close: { samples: 1, unit: reqs } * mknod: { samples: 0, unit: reqs } * link: { samples: 0, unit: reqs } * unlink: { samples: 0, unit: reqs } * mkdir: { samples: 0, unit: reqs } * rmdir: { samples: 0, unit: reqs } - * rename: { samples: 1, unit: reqs } - * getattr: { samples: 7, unit: reqs } + * rename: { samples: 0, unit: reqs } + * getattr: { samples: 1, unit: reqs } * setattr: { samples: 0, unit: reqs } * getxattr: { samples: 0, unit: reqs } * setxattr: { samples: 0, unit: reqs } @@ -325,13 +467,15 @@ static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos) * Example of output on OST: * * job_stats: - * - job_id 4854 - * snapshot_time: 1322494602 - * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 } - * write: { samples: 1, unit: bytes, min: 10, max: 10, sum: 10 } - * setattr: { samples: 0, unit: reqs } - * punch: { samples: 0, unit: reqs } - * sync: { samples: 0, unit: reqs } + * - job_id dd.4854 + * snapshot_time: 1322494602.123456789 + * start_time: 1322494592.987654321 + * elapsed_time: 9.135802468 + * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 } + * write: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 } + * setattr: { samples: 0, unit: reqs } + * punch: { samples: 0, unit: reqs } + * sync: { samples: 0, unit: reqs } */ static const char spaces[] = " "; @@ -345,103 +489,190 @@ static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) { struct job_stat *job = v; struct lprocfs_stats *s; - struct lprocfs_counter ret, *cntr; - int i; + struct lprocfs_counter ret; + struct lprocfs_counter_header *cntr_header; + char escaped[LUSTRE_JOBID_SIZE * 4] = ""; + char *quote = "", *c, *end; + int i, joblen = 0; if (v == SEQ_START_TOKEN) { - seq_printf(p, "job_stats:\n"); + seq_puts(p, "job_stats:\n"); return 0; } - seq_printf(p, "- %-16s %s\n", "job_id:", job->js_jobid); - seq_printf(p, " %-16s %ld\n", "snapshot_time:", job->js_timestamp); + /* Quote and escape jobid characters to escape hex codes "\xHH" if + * it contains any non-standard characters (space, newline, etc), + * so it will be confined to single line and not break parsing. + */ + for (c = job->js_jobid, end = job->js_jobid + sizeof(job->js_jobid); + c < end && *c != '\0'; + c++, joblen++) { + if (!isalnum(*c) && strchr(".@-_:/", *c) == NULL) { + quote = "\""; + snprintf(escaped + joblen, sizeof(escaped), "\\x%02X", + (unsigned char)*c); + joblen += 3; + } else { + escaped[joblen] = *c; + /* if jobid has ':', it should be quoted too */ + if (*c == ':') + quote = "\""; + } + } + /* '@' is reserved in YAML, so it cannot start a bare string. */ + if (escaped[0] == '@') + quote = "\""; + + seq_printf(p, "- %-16s %s%*s%s\n", + "job_id:", quote, joblen, escaped, quote); + lprocfs_stats_header(p, job->js_time_latest, job->js_stats->ls_init, + 16, ":", true, " "); s = job->js_stats; for (i = 0; i < s->ls_num; i++) { - cntr = &(s->ls_percpu[0]->lp_cntr[i]); + struct obd_histogram *hist; + + cntr_header = &s->ls_cnt_header[i]; lprocfs_stats_collect(s, i, &ret); - seq_printf(p, " %s:%.*s { samples: %11"LPF64"u", - cntr->lc_name, width(cntr->lc_name, 15), spaces, + seq_printf(p, " %s:%.*s { samples: %11llu", + cntr_header->lc_name, + width(cntr_header->lc_name, 15), spaces, ret.lc_count); - if (cntr->lc_units[0] != '\0') - seq_printf(p, ", unit: %5s", cntr->lc_units); + if (cntr_header->lc_units[0] != '\0') + seq_printf(p, ", unit: %5s", cntr_header->lc_units); - if (cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { - seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u," - " sum:%16"LPF64"u", + if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) { + seq_printf(p, ", min: %8llu, max: %8llu, sum: %16llu", ret.lc_count ? ret.lc_min : 0, ret.lc_count ? ret.lc_max : 0, ret.lc_count ? ret.lc_sum : 0); } - if (cntr->lc_config & LPROCFS_CNTR_STDDEV) { - seq_printf(p, ", sumsq: %18"LPF64"u", + if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) { + seq_printf(p, ", sumsq: %18llu", ret.lc_count ? ret.lc_sumsquare : 0); } - seq_printf(p, " }\n"); - + /* show obd_histogram */ + hist = s->ls_cnt_header[i].lc_hist; + if (hist != NULL) { + bool first = true; + int j; + + seq_puts(p, ", hist: { "); + for (j = 0; j < ARRAY_SIZE(hist->oh_buckets); j++) { + unsigned long val = hist->oh_buckets[j]; + + if (val == 0) + continue; + if (first) + first = false; + else + seq_puts(p, ", "); + + if (j < 10) + seq_printf(p, "%lu: %lu", BIT(j), val); + else if (j < 20) + seq_printf(p, "%luK: %lu", BIT(j - 10), + val); + else if (j < 30) + seq_printf(p, "%luM: %lu", BIT(j - 20), + val); + else + seq_printf(p, "%luG: %lu", BIT(j - 30), + val); + } + seq_puts(p, " }"); + } + seq_puts(p, " }\n"); } + return 0; } -struct seq_operations lprocfs_jobstats_seq_sops = { - start: lprocfs_jobstats_seq_start, - stop: lprocfs_jobstats_seq_stop, - next: lprocfs_jobstats_seq_next, - show: lprocfs_jobstats_seq_show, +static const struct seq_operations lprocfs_jobstats_seq_sops = { + .start = lprocfs_jobstats_seq_start, + .stop = lprocfs_jobstats_seq_stop, + .next = lprocfs_jobstats_seq_next, + .show = lprocfs_jobstats_seq_show, }; static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file) { - struct proc_dir_entry *dp = PDE(inode); + struct lprocfs_jobstats_data *data = NULL; struct seq_file *seq; int rc; - if (LPROCFS_ENTRY_AND_CHECK(dp)) - return -ENOENT; - rc = seq_open(file, &lprocfs_jobstats_seq_sops); - if (rc) { - LPROCFS_EXIT(); + if (rc) return rc; - } + + OBD_ALLOC_PTR(data); + if (!data) + return -ENOMEM; + + data->pjd_stats = pde_data(inode); + data->pjd_last_job = NULL; + data->pjd_last_pos = 0; seq = file->private_data; - seq->private = dp->data; + seq->private = data; return 0; } -static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf, +static ssize_t lprocfs_jobstats_seq_write(struct file *file, + const char __user *buf, size_t len, loff_t *off) { struct seq_file *seq = file->private_data; - struct obd_job_stats *stats = seq->private; - char jobid[JOBSTATS_JOBID_SIZE]; - int all = 0; + struct lprocfs_jobstats_data *data = seq->private; + struct obd_job_stats *stats = data->pjd_stats; + char jobid[4 * LUSTRE_JOBID_SIZE]; /* all escaped chars, plus ""\n\0 */ + char *p1, *p2, *last; + unsigned int c; struct job_stat *job; - if (!memcmp(buf, "clear", strlen("clear"))) { - all = 1; - } else if (len < JOBSTATS_JOBID_SIZE) { - memset(jobid, 0, JOBSTATS_JOBID_SIZE); - /* Trim '\n' if any */ - if (buf[len - 1] == '\n') - memcpy(jobid, buf, len - 1); - else - memcpy(jobid, buf, len); - } else { + if (len == 0 || len >= 4 * LUSTRE_JOBID_SIZE) return -EINVAL; + + if (stats->ojs_hash == NULL) + return -ENODEV; + + if (copy_from_user(jobid, buf, len)) + return -EFAULT; + jobid[len] = 0; + last = jobid + len - 1; + + /* Trim '\n' if any */ + if (*last == '\n') + *(last--) = 0; + + /* decode escaped chars if jobid is a quoted string */ + if (jobid[0] == '"' && *last == '"') { + last--; + + for (p1 = jobid, p2 = jobid + 1; p2 <= last; p1++, p2++) { + if (*p2 != '\\') { + *p1 = *p2; + } else if (p2 + 3 <= last && *(p2 + 1) == 'x' && + sscanf(p2 + 2, "%02X", &c) == 1) { + *p1 = c; + p2 += 3; + } else { + return -EINVAL; + } + } + *p1 = 0; + } + jobid[LUSTRE_JOBID_SIZE - 1] = 0; + + if (strcmp(jobid, "clear") == 0) { + lprocfs_job_cleanup(stats, true); - LASSERT(stats->ojs_hash); - if (all) { - time_t oldest = 0; - cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, - &oldest); return len; } - if (!strlen(jobid)) + if (strlen(jobid) == 0) return -EINVAL; job = cfs_hash_lookup(stats->ojs_hash, jobid); @@ -454,29 +685,45 @@ static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf, return len; } -struct file_operations lprocfs_jobstats_seq_fops = { - .owner = THIS_MODULE, - .open = lprocfs_jobstats_seq_open, - .read = seq_read, - .write = lprocfs_jobstats_seq_write, - .llseek = seq_lseek, - .release = lprocfs_seq_release, -}; - -static void job_cleanup_callback(unsigned long data) +/** + * Clean up the seq file state when the /proc file is closed. + * + * This also expires old job stats from the cache after they have been + * printed in case the system is idle and not generating new jobstats. + * + * \param[in] inode struct inode for seq file being closed + * \param[in] file struct file for seq file being closed + * + * \retval 0 on success + * \retval negative errno on failure + */ +static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file) { - struct obd_job_stats *stats = (struct obd_job_stats *)data; - time_t oldest; - - if (stats->ojs_cleanup_interval) { - oldest = cfs_time_current_sec() - stats->ojs_cleanup_interval; - cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, - &oldest); - cfs_timer_arm(&stats->ojs_cleanup_timer, - cfs_time_shift(stats->ojs_cleanup_interval)); + struct seq_file *seq = file->private_data; + struct lprocfs_jobstats_data *data = seq->private; + + /* drop the ref of last saved job */ + if (data->pjd_last_job) { + job_putref(data->pjd_last_job); + data->pjd_last_pos = 0; + data->pjd_last_job = NULL; } + + lprocfs_job_cleanup(data->pjd_stats, false); + OBD_FREE_PTR(data); + + return lprocfs_seq_release(inode, file); } +static const struct proc_ops lprocfs_jobstats_seq_fops = { + PROC_OWNER(THIS_MODULE) + .proc_open = lprocfs_jobstats_seq_open, + .proc_read = seq_read, + .proc_write = lprocfs_jobstats_seq_write, + .proc_lseek = seq_lseek, + .proc_release = lprocfs_jobstats_seq_release, +}; + int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num, cntr_init_callback init_fn) { @@ -487,12 +734,20 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num, LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_type->typ_name); - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) { - CERROR("Invalid obd device type.\n"); + if (cntr_num <= 0) + RETURN(-EINVAL); + + if (init_fn == NULL) + RETURN(-EINVAL); + + /* Currently needs to be a target due to the use of obt_jobstats. */ + if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0 && + strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) != 0) { + CERROR("%s: invalid device type %s for job stats: rc = %d\n", + obd->obd_name, obd->obd_type->typ_name, -EINVAL); RETURN(-EINVAL); } - stats = &obd->u.obt.obt_jobstats; + stats = &obd2obt(obd)->obt_jobstats; LASSERT(stats->ojs_hash == NULL); stats->ojs_hash = cfs_hash_create("JOB_STATS", @@ -506,66 +761,61 @@ int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num, if (stats->ojs_hash == NULL) RETURN(-ENOMEM); - CFS_INIT_LIST_HEAD(&stats->ojs_list); - cfs_rwlock_init(&stats->ojs_lock); + INIT_LIST_HEAD(&stats->ojs_list); + spin_lock_init(&stats->ojs_lock); stats->ojs_cntr_num = cntr_num; stats->ojs_cntr_init_fn = init_fn; - cfs_timer_init(&stats->ojs_cleanup_timer, job_cleanup_callback, stats); - stats->ojs_cleanup_interval = 600; /* 10 mins by default */ - cfs_timer_arm(&stats->ojs_cleanup_timer, - cfs_time_shift(stats->ojs_cleanup_interval)); - - LPROCFS_WRITE_ENTRY(); - entry = create_proc_entry("job_stats", 0644, obd->obd_proc_entry); - LPROCFS_WRITE_EXIT(); - if (entry) { - entry->proc_fops = &lprocfs_jobstats_seq_fops; - entry->data = stats; - RETURN(0); - } else { + /* Store 1/2 the actual interval, since we use that the most, and + * it is easier to work with. + */ + stats->ojs_cleanup_interval = ktime_set(600 / 2, 0); /* default 10 min*/ + stats->ojs_cleanup_last = ktime_get_real(); + + entry = lprocfs_add_simple(obd->obd_proc_entry, "job_stats", stats, + &lprocfs_jobstats_seq_fops); + if (IS_ERR(entry)) { lprocfs_job_stats_fini(obd); RETURN(-ENOMEM); } + RETURN(0); } EXPORT_SYMBOL(lprocfs_job_stats_init); +#endif /* CONFIG_PROC_FS*/ -int lprocfs_rd_job_interval(char *page, char **start, off_t off, - int count, int *eof, void *data) +ssize_t job_cleanup_interval_show(struct kobject *kobj, struct attribute *attr, + char *buf) { - struct obd_device *obd = (struct obd_device *)data; + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); struct obd_job_stats *stats; + struct timespec64 ts; + + stats = &obd2obt(obd)->obt_jobstats; + ts = ktime_to_timespec64(stats->ojs_cleanup_interval); - LASSERT(obd != NULL); - stats = &obd->u.obt.obt_jobstats; - *eof = 1; - return snprintf(page, count, "%d\n", stats->ojs_cleanup_interval); + return scnprintf(buf, PAGE_SIZE, "%lld\n", (long long)ts.tv_sec * 2); } -EXPORT_SYMBOL(lprocfs_rd_job_interval); +EXPORT_SYMBOL(job_cleanup_interval_show); -int lprocfs_wr_job_interval(struct file *file, const char *buffer, - unsigned long count, void *data) +ssize_t job_cleanup_interval_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) { - struct obd_device *obd = (struct obd_device *)data; + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); struct obd_job_stats *stats; - int val, rc; + unsigned int val; + int rc; - LASSERT(obd != NULL); - stats = &obd->u.obt.obt_jobstats; + stats = &obd2obt(obd)->obt_jobstats; - rc = lprocfs_write_helper(buffer, count, &val); + rc = kstrtouint(buffer, 0, &val); if (rc) return rc; - stats->ojs_cleanup_interval = val; - if (!stats->ojs_cleanup_interval) - cfs_timer_disarm(&stats->ojs_cleanup_timer); - else - cfs_timer_arm(&stats->ojs_cleanup_timer, - cfs_time_shift(stats->ojs_cleanup_interval)); + stats->ojs_cleanup_interval = ktime_set(val / 2, 0); + lprocfs_job_cleanup(stats, false); return count; - } -EXPORT_SYMBOL(lprocfs_wr_job_interval); - -#endif /* LPROCFS*/ +EXPORT_SYMBOL(job_cleanup_interval_store);