3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see
17 * http://www.gnu.org/licenses/gpl-2.0.html
22 * Copyright (c) 2012, 2016, Intel Corporation.
23 * Use is subject to license terms.
25 * Author: Niu Yawei <niu@whamcloud.com>
28 * lustre/obdclass/lprocfs_jobstats.c
31 #define DEBUG_SUBSYSTEM S_CLASS
33 #include <obd_class.h>
34 #include <lprocfs_status.h>
39 * JobID formats & JobID environment variable names for supported
43 * JobID format: 32 bit integer.
44 * JobID env var: SLURM_JOB_ID.
46 * JobID format: Decimal integer range to 99999.
47 * JobID env var: JOB_ID.
49 * JobID format: 6 digit integer by default (up to 999999), can be
50 * increased to 10 digit (up to 2147483646).
51 * JobID env var: LSB_JOBID.
53 * JobID format: String of machine_name.cluster_id.process_id, for
54 * example: fr2n02.32.0
55 * JobID env var: LOADL_STEP_ID.
57 * JobID format: String of sequence_number[.server_name][@server].
58 * JobID env var: PBS_JOBID.
60 * JobID format: Same as PBS.
61 * JobID env var: Same as PBS.
65 struct hlist_node js_hash; /* hash struct for this jobid */
66 struct list_head js_list; /* on ojs_list, with ojs_lock */
67 atomic_t js_refcount; /* num users of this struct */
68 char js_jobid[LUSTRE_JOBID_SIZE]; /* job name + NUL*/
69 ktime_t js_time_init; /* time of initial stat*/
70 ktime_t js_time_latest; /* time of most recent stat*/
71 struct lprocfs_stats *js_stats; /* per-job statistics */
72 struct obd_job_stats *js_jobstats; /* for accessing ojs_lock */
76 job_stat_hash(struct cfs_hash *hs, const void *key, unsigned mask)
78 return cfs_hash_djb2_hash(key, strlen(key), mask);
81 static void *job_stat_key(struct hlist_node *hnode)
84 job = hlist_entry(hnode, struct job_stat, js_hash);
88 static int job_stat_keycmp(const void *key, struct hlist_node *hnode)
91 job = hlist_entry(hnode, struct job_stat, js_hash);
92 return (strlen(job->js_jobid) == strlen(key)) &&
93 !strncmp(job->js_jobid, key, strlen(key));
96 static void *job_stat_object(struct hlist_node *hnode)
98 return hlist_entry(hnode, struct job_stat, js_hash);
101 static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode)
103 struct job_stat *job;
104 job = hlist_entry(hnode, struct job_stat, js_hash);
105 atomic_inc(&job->js_refcount);
108 static void job_free(struct job_stat *job)
110 LASSERT(atomic_read(&job->js_refcount) == 0);
111 LASSERT(job->js_jobstats != NULL);
113 write_lock(&job->js_jobstats->ojs_lock);
114 list_del_init(&job->js_list);
115 write_unlock(&job->js_jobstats->ojs_lock);
117 lprocfs_free_stats(&job->js_stats);
121 static void job_putref(struct job_stat *job)
123 LASSERT(atomic_read(&job->js_refcount) > 0);
124 if (atomic_dec_and_test(&job->js_refcount))
128 static void job_stat_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
130 struct job_stat *job;
131 job = hlist_entry(hnode, struct job_stat, js_hash);
135 static void job_stat_exit(struct cfs_hash *hs, struct hlist_node *hnode)
137 CERROR("should not have any items\n");
140 static struct cfs_hash_ops job_stats_hash_ops = {
141 .hs_hash = job_stat_hash,
142 .hs_key = job_stat_key,
143 .hs_keycmp = job_stat_keycmp,
144 .hs_object = job_stat_object,
145 .hs_get = job_stat_get,
146 .hs_put_locked = job_stat_put_locked,
147 .hs_exit = job_stat_exit,
151 * Jobstats expiry iterator to clean up old jobids
153 * Called for each job_stat structure on this device, it should delete stats
154 * older than the specified \a oldest_time in seconds. If \a oldest_time is
155 * in the future then this will delete all statistics (e.g. during shutdown).
157 * \param[in] hs hash of all jobids on this device
158 * \param[in] bd hash bucket containing this jobid
159 * \param[in] hnode hash structure for this jobid
160 * \param[in] data pointer to stats expiry time in seconds
162 static int job_cleanup_iter_callback(struct cfs_hash *hs,
163 struct cfs_hash_bd *bd,
164 struct hlist_node *hnode, void *data)
166 ktime_t oldest_time = *((ktime_t *)data);
167 struct job_stat *job;
169 job = hlist_entry(hnode, struct job_stat, js_hash);
170 if (ktime_before(job->js_time_latest, oldest_time))
171 cfs_hash_bd_del_locked(hs, bd, hnode);
177 * Clean up jobstats that were updated more than \a before seconds ago.
179 * Since this function may be called frequently, do not scan all of the
180 * jobstats on each call, only twice per cleanup interval. That means stats
181 * may be on average around cleanup_interval / 4 older than the cleanup
182 * interval, but that is not considered harmful.
184 * The value stored in ojs_cleanup_interval is how often to perform a cleanup
185 * scan, and 1/2 of the maximum age of the individual statistics. This is
186 * done rather than dividing the interval by two each time, because it is
187 * much easier to do the division when the value is initially set (in seconds)
188 * rather than after it has been converted to ktime_t, and maybe a bit faster.
190 * If \a clear is true then this will force clean up all jobstats
191 * (e.g. at shutdown).
193 * If there is already another thread doing jobstats cleanup, don't try to
194 * do this again in the current thread unless this is a force cleanup.
196 * \param[in] stats stucture tracking all job stats for this device
197 * \param[in] clear clear all job stats if true
199 static void lprocfs_job_cleanup(struct obd_job_stats *stats, bool clear)
201 ktime_t cleanup_interval = stats->ojs_cleanup_interval;
202 ktime_t now = ktime_get();
205 if (likely(!clear)) {
206 /* ojs_cleanup_interval of zero means never clean up stats */
207 if (ktime_to_ns(cleanup_interval) == 0)
210 if (ktime_before(now, ktime_add(stats->ojs_cleanup_last,
214 if (stats->ojs_cleaning)
218 write_lock(&stats->ojs_lock);
219 if (!clear && stats->ojs_cleaning) {
220 write_unlock(&stats->ojs_lock);
224 stats->ojs_cleaning = true;
225 write_unlock(&stats->ojs_lock);
227 /* Can't hold ojs_lock over hash iteration, since it is grabbed by
228 * job_cleanup_iter_callback()
229 * ->cfs_hash_bd_del_locked()
233 * Holding ojs_lock isn't necessary for safety of the hash iteration,
234 * since locking of the hash is handled internally, but there isn't
235 * any benefit to having multiple threads doing cleanup at one time.
237 * Subtract twice the cleanup_interval, since it is 1/2 the maximum age.
239 oldest = ktime_sub(now, ktime_add(cleanup_interval, cleanup_interval));
240 cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback,
243 write_lock(&stats->ojs_lock);
244 stats->ojs_cleaning = false;
245 stats->ojs_cleanup_last = ktime_get();
246 write_unlock(&stats->ojs_lock);
249 static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
251 struct job_stat *job;
257 job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0);
258 if (job->js_stats == NULL) {
263 jobs->ojs_cntr_init_fn(job->js_stats, 0);
265 memcpy(job->js_jobid, jobid, sizeof(job->js_jobid));
266 job->js_time_init = ktime_get();
267 job->js_time_latest = job->js_time_init;
268 job->js_jobstats = jobs;
269 INIT_HLIST_NODE(&job->js_hash);
270 INIT_LIST_HEAD(&job->js_list);
271 atomic_set(&job->js_refcount, 1);
276 int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
277 int event, long amount)
279 struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
280 struct job_stat *job, *job2;
283 LASSERT(stats != NULL);
284 LASSERT(stats->ojs_hash != NULL);
286 if (event >= stats->ojs_cntr_num)
289 if (jobid == NULL || strlen(jobid) == 0)
292 if (strlen(jobid) >= LUSTRE_JOBID_SIZE) {
293 CERROR("Invalid jobid size (%lu), expect(%d)\n",
294 (unsigned long)strlen(jobid) + 1, LUSTRE_JOBID_SIZE);
298 job = cfs_hash_lookup(stats->ojs_hash, jobid);
302 lprocfs_job_cleanup(stats, false);
304 job = job_alloc(jobid, stats);
308 job2 = cfs_hash_findadd_unique(stats->ojs_hash, job->js_jobid,
313 /* We cannot LASSERT(!list_empty(&job->js_list)) here,
314 * since we just lost the race for inserting "job" into the
315 * ojs_list, and some other thread is doing it _right_now_.
316 * Instead, be content the other thread is doing this, since
317 * "job2" was initialized in job_alloc() already. LU-2163 */
319 LASSERT(list_empty(&job->js_list));
320 write_lock(&stats->ojs_lock);
321 list_add_tail(&job->js_list, &stats->ojs_list);
322 write_unlock(&stats->ojs_lock);
326 LASSERT(stats == job->js_jobstats);
327 job->js_time_latest = ktime_get();
328 lprocfs_counter_add(job->js_stats, event, amount);
334 EXPORT_SYMBOL(lprocfs_job_stats_log);
336 void lprocfs_job_stats_fini(struct obd_device *obd)
338 struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
340 if (stats->ojs_hash == NULL)
343 lprocfs_job_cleanup(stats, true);
344 cfs_hash_putref(stats->ojs_hash);
345 stats->ojs_hash = NULL;
346 LASSERT(list_empty(&stats->ojs_list));
348 EXPORT_SYMBOL(lprocfs_job_stats_fini);
350 static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
352 struct obd_job_stats *stats = p->private;
354 struct job_stat *job;
356 read_lock(&stats->ojs_lock);
358 return SEQ_START_TOKEN;
360 list_for_each_entry(job, &stats->ojs_list, js_list) {
367 static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
369 struct obd_job_stats *stats = p->private;
371 read_unlock(&stats->ojs_lock);
374 static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
376 struct obd_job_stats *stats = p->private;
377 struct job_stat *job;
378 struct list_head *next;
381 if (v == SEQ_START_TOKEN) {
382 next = stats->ojs_list.next;
384 job = (struct job_stat *)v;
385 next = job->js_list.next;
388 return next == &stats->ojs_list ? NULL :
389 list_entry(next, struct job_stat, js_list);
393 * Example of output on MDT:
397 * snapshot_time: 1322494486.123456789
398 * start_time: 1322494476.012345678
399 * elapsed_time: 10.111111111
400 * open: { samples: 1, unit: reqs }
401 * close: { samples: 1, unit: reqs }
402 * mknod: { samples: 0, unit: reqs }
403 * link: { samples: 0, unit: reqs }
404 * unlink: { samples: 0, unit: reqs }
405 * mkdir: { samples: 0, unit: reqs }
406 * rmdir: { samples: 0, unit: reqs }
407 * rename: { samples: 0, unit: reqs }
408 * getattr: { samples: 1, unit: reqs }
409 * setattr: { samples: 0, unit: reqs }
410 * getxattr: { samples: 0, unit: reqs }
411 * setxattr: { samples: 0, unit: reqs }
412 * statfs: { samples: 0, unit: reqs }
413 * sync: { samples: 0, unit: reqs }
415 * Example of output on OST:
419 * snapshot_time: 1322494602.123456789
420 * start_time: 1322494592.987654321
421 * elapsed_time: 9.135802468
422 * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 }
423 * write: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
424 * setattr: { samples: 0, unit: reqs }
425 * punch: { samples: 0, unit: reqs }
426 * sync: { samples: 0, unit: reqs }
429 static const char spaces[] = " ";
431 static int inline width(const char *str, int len)
433 return len - min((int)strlen(str), 15);
436 static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
438 struct job_stat *job = v;
439 struct lprocfs_stats *s;
440 struct lprocfs_counter ret;
441 struct lprocfs_counter_header *cntr_header;
444 if (v == SEQ_START_TOKEN) {
445 seq_printf(p, "job_stats:\n");
449 /* Replace the non-printable character in jobid with '?', so
450 * that the output of jobid will be confined in single line. */
451 seq_printf(p, "- %-16s ", "job_id:");
452 for (i = 0; i < strlen(job->js_jobid); i++) {
453 if (isprint(job->js_jobid[i]) != 0)
454 seq_putc(p, job->js_jobid[i]);
460 lprocfs_stats_header(p, job->js_time_latest, job->js_time_init, 16,
464 for (i = 0; i < s->ls_num; i++) {
465 cntr_header = &s->ls_cnt_header[i];
466 lprocfs_stats_collect(s, i, &ret);
468 seq_printf(p, " %s:%.*s { samples: %11llu",
469 cntr_header->lc_name,
470 width(cntr_header->lc_name, 15), spaces,
472 if (cntr_header->lc_units[0] != '\0')
473 seq_printf(p, ", unit: %5s", cntr_header->lc_units);
475 if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
476 seq_printf(p, ", min: %8llu, max: %8llu, sum: %16llu",
477 ret.lc_count ? ret.lc_min : 0,
478 ret.lc_count ? ret.lc_max : 0,
479 ret.lc_count ? ret.lc_sum : 0);
481 if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
482 seq_printf(p, ", sumsq: %18llu",
483 ret.lc_count ? ret.lc_sumsquare : 0);
486 seq_printf(p, " }\n");
492 static const struct seq_operations lprocfs_jobstats_seq_sops = {
493 .start = lprocfs_jobstats_seq_start,
494 .stop = lprocfs_jobstats_seq_stop,
495 .next = lprocfs_jobstats_seq_next,
496 .show = lprocfs_jobstats_seq_show,
499 static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
501 struct seq_file *seq;
504 rc = seq_open(file, &lprocfs_jobstats_seq_sops);
507 seq = file->private_data;
508 seq->private = PDE_DATA(inode);
512 static ssize_t lprocfs_jobstats_seq_write(struct file *file,
513 const char __user *buf,
514 size_t len, loff_t *off)
516 struct seq_file *seq = file->private_data;
517 struct obd_job_stats *stats = seq->private;
518 char jobid[LUSTRE_JOBID_SIZE];
519 struct job_stat *job;
521 if (len == 0 || len >= LUSTRE_JOBID_SIZE)
524 if (stats->ojs_hash == NULL)
527 if (copy_from_user(jobid, buf, len))
531 /* Trim '\n' if any */
532 if (jobid[len - 1] == '\n')
535 if (strcmp(jobid, "clear") == 0) {
536 lprocfs_job_cleanup(stats, true);
541 if (strlen(jobid) == 0)
544 job = cfs_hash_lookup(stats->ojs_hash, jobid);
548 cfs_hash_del_key(stats->ojs_hash, jobid);
555 * Clean up the seq file state when the /proc file is closed.
557 * This also expires old job stats from the cache after they have been
558 * printed in case the system is idle and not generating new jobstats.
560 * \param[in] inode struct inode for seq file being closed
561 * \param[in] file struct file for seq file being closed
563 * \retval 0 on success
564 * \retval negative errno on failure
566 static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file)
568 struct seq_file *seq = file->private_data;
569 struct obd_job_stats *stats = seq->private;
571 lprocfs_job_cleanup(stats, false);
573 return lprocfs_seq_release(inode, file);
576 static const struct proc_ops lprocfs_jobstats_seq_fops = {
577 PROC_OWNER(THIS_MODULE)
578 .proc_open = lprocfs_jobstats_seq_open,
579 .proc_read = seq_read,
580 .proc_write = lprocfs_jobstats_seq_write,
581 .proc_lseek = seq_lseek,
582 .proc_release = lprocfs_jobstats_seq_release,
585 int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
586 cntr_init_callback init_fn)
588 struct proc_dir_entry *entry;
589 struct obd_job_stats *stats;
592 LASSERT(obd->obd_proc_entry != NULL);
593 LASSERT(obd->obd_type->typ_name);
601 /* Currently needs to be a target due to the use of obt_jobstats. */
602 if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0 &&
603 strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) != 0) {
604 CERROR("%s: invalid device type %s for job stats: rc = %d\n",
605 obd->obd_name, obd->obd_type->typ_name, -EINVAL);
608 stats = &obd->u.obt.obt_jobstats;
610 LASSERT(stats->ojs_hash == NULL);
611 stats->ojs_hash = cfs_hash_create("JOB_STATS",
612 HASH_JOB_STATS_CUR_BITS,
613 HASH_JOB_STATS_MAX_BITS,
614 HASH_JOB_STATS_BKT_BITS, 0,
619 if (stats->ojs_hash == NULL)
622 INIT_LIST_HEAD(&stats->ojs_list);
623 rwlock_init(&stats->ojs_lock);
624 stats->ojs_cntr_num = cntr_num;
625 stats->ojs_cntr_init_fn = init_fn;
626 /* Store 1/2 the actual interval, since we use that the most, and
627 * it is easier to work with.
629 stats->ojs_cleanup_interval = ktime_set(600 / 2, 0); /* default 10 min*/
630 stats->ojs_cleanup_last = ktime_get();
632 entry = lprocfs_add_simple(obd->obd_proc_entry, "job_stats", stats,
633 &lprocfs_jobstats_seq_fops);
635 lprocfs_job_stats_fini(obd);
640 EXPORT_SYMBOL(lprocfs_job_stats_init);
641 #endif /* CONFIG_PROC_FS*/
643 ssize_t job_cleanup_interval_show(struct kobject *kobj, struct attribute *attr,
646 struct obd_device *obd = container_of(kobj, struct obd_device,
648 struct obd_job_stats *stats;
649 struct timespec64 ts;
651 stats = &obd->u.obt.obt_jobstats;
652 ts = ktime_to_timespec64(stats->ojs_cleanup_interval);
654 return scnprintf(buf, PAGE_SIZE, "%lld\n", (long long)ts.tv_sec * 2);
656 EXPORT_SYMBOL(job_cleanup_interval_show);
658 ssize_t job_cleanup_interval_store(struct kobject *kobj,
659 struct attribute *attr,
660 const char *buffer, size_t count)
662 struct obd_device *obd = container_of(kobj, struct obd_device,
664 struct obd_job_stats *stats;
668 stats = &obd->u.obt.obt_jobstats;
670 rc = kstrtouint(buffer, 0, &val);
674 stats->ojs_cleanup_interval = ktime_set(val / 2, 0);
675 lprocfs_job_cleanup(stats, false);
679 EXPORT_SYMBOL(job_cleanup_interval_store);