3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see
17 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Copyright (c) 2012, 2015, Intel Corporation.
23 * Use is subject to license terms.
25 * Author: Niu Yawei <niu@whamcloud.com>
28 * lustre/obdclass/lprocfs_jobstats.c
31 #define DEBUG_SUBSYSTEM S_CLASS
34 #include <obd_class.h>
35 #include <lprocfs_status.h>
36 #include <lustre/lustre_idl.h>
41 * JobID formats & JobID environment variable names for supported
45 * JobID format: 32 bit integer.
46 * JobID env var: SLURM_JOB_ID.
48 * JobID format: Decimal integer range to 99999.
49 * JobID env var: JOB_ID.
51 * JobID format: 6 digit integer by default (up to 999999), can be
52 * increased to 10 digit (up to 2147483646).
53 * JobID env var: LSB_JOBID.
55 * JobID format: String of machine_name.cluster_id.process_id, for
56 * example: fr2n02.32.0
57 * JobID env var: LOADL_STEP_ID.
59 * JobID format: String of sequence_number[.server_name][@server].
60 * JobID env var: PBS_JOBID.
62 * JobID format: Same as PBS.
63 * JobID env var: Same as PBS.
67 struct hlist_node js_hash; /* hash struct for this jobid */
68 struct list_head js_list; /* on ojs_list, with ojs_lock */
69 atomic_t js_refcount; /* num users of this struct */
70 char js_jobid[LUSTRE_JOBID_SIZE]; /* job name */
71 time_t js_timestamp; /* seconds of most recent stat*/
72 struct lprocfs_stats *js_stats; /* per-job statistics */
73 struct obd_job_stats *js_jobstats; /* for accessing ojs_lock */
77 job_stat_hash(struct cfs_hash *hs, const void *key, unsigned mask)
79 return cfs_hash_djb2_hash(key, strlen(key), mask);
82 static void *job_stat_key(struct hlist_node *hnode)
85 job = hlist_entry(hnode, struct job_stat, js_hash);
89 static int job_stat_keycmp(const void *key, struct hlist_node *hnode)
92 job = hlist_entry(hnode, struct job_stat, js_hash);
93 return (strlen(job->js_jobid) == strlen(key)) &&
94 !strncmp(job->js_jobid, key, strlen(key));
97 static void *job_stat_object(struct hlist_node *hnode)
99 return hlist_entry(hnode, struct job_stat, js_hash);
102 static void job_stat_get(struct cfs_hash *hs, struct hlist_node *hnode)
104 struct job_stat *job;
105 job = hlist_entry(hnode, struct job_stat, js_hash);
106 atomic_inc(&job->js_refcount);
109 static void job_free(struct job_stat *job)
111 LASSERT(atomic_read(&job->js_refcount) == 0);
112 LASSERT(job->js_jobstats != NULL);
114 write_lock(&job->js_jobstats->ojs_lock);
115 list_del_init(&job->js_list);
116 write_unlock(&job->js_jobstats->ojs_lock);
118 lprocfs_free_stats(&job->js_stats);
122 static void job_putref(struct job_stat *job)
124 LASSERT(atomic_read(&job->js_refcount) > 0);
125 if (atomic_dec_and_test(&job->js_refcount))
129 static void job_stat_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
131 struct job_stat *job;
132 job = hlist_entry(hnode, struct job_stat, js_hash);
136 static void job_stat_exit(struct cfs_hash *hs, struct hlist_node *hnode)
138 CERROR("should not have any items\n");
141 static struct cfs_hash_ops job_stats_hash_ops = {
142 .hs_hash = job_stat_hash,
143 .hs_key = job_stat_key,
144 .hs_keycmp = job_stat_keycmp,
145 .hs_object = job_stat_object,
146 .hs_get = job_stat_get,
147 .hs_put_locked = job_stat_put_locked,
148 .hs_exit = job_stat_exit,
152 * Jobstats expiry iterator to clean up old jobids
154 * Called for each job_stat structure on this device, it should delete stats
155 * older than the specified \a oldest_time in seconds. If \a oldest_time is
156 * in the future then this will delete all statistics (e.g. during shutdown).
158 * \param[in] hs hash of all jobids on this device
159 * \param[in] bd hash bucket containing this jobid
160 * \param[in] hnode hash structure for this jobid
161 * \param[in] data pointer to stats expiry time in seconds
163 static int job_cleanup_iter_callback(struct cfs_hash *hs,
164 struct cfs_hash_bd *bd,
165 struct hlist_node *hnode, void *data)
167 time_t oldest_time = *((time_t *)data);
168 struct job_stat *job;
170 job = hlist_entry(hnode, struct job_stat, js_hash);
171 if (job->js_timestamp < oldest_time)
172 cfs_hash_bd_del_locked(hs, bd, hnode);
178 * Clean up jobstats that were updated more than \a before seconds ago.
180 * Since this function may be called frequently, do not scan all of the
181 * jobstats on each call, only twice per cleanup interval. That means stats
182 * may be around on average cleanup_interval / 4 longer than necessary,
183 * but that is not considered harmful.
185 * If \a before is negative then this will force clean up all jobstats due
186 * to the expiry time being in the future (e.g. at shutdown).
188 * If there is already another thread doing jobstats cleanup, don't try to
189 * do this again in the current thread unless this is a force cleanup.
191 * \param[in] stats stucture tracking all job stats for this device
192 * \param[in] before expire jobstats updated more than this many seconds ago
194 static void lprocfs_job_cleanup(struct obd_job_stats *stats, int before)
196 time_t now = cfs_time_current_sec();
199 if (likely(before >= 0)) {
200 unsigned int cleanup_interval = stats->ojs_cleanup_interval;
202 if (cleanup_interval == 0 || before == 0)
205 if (now < stats->ojs_last_cleanup + cleanup_interval / 2)
208 if (stats->ojs_cleaning)
212 write_lock(&stats->ojs_lock);
213 if (before >= 0 && stats->ojs_cleaning) {
214 write_unlock(&stats->ojs_lock);
218 stats->ojs_cleaning = true;
219 write_unlock(&stats->ojs_lock);
221 /* Can't hold ojs_lock over hash iteration, since it is grabbed by
222 * job_cleanup_iter_callback()
223 * ->cfs_hash_bd_del_locked()
227 * Holding ojs_lock isn't necessary for safety of the hash iteration,
228 * since locking of the hash is handled internally, but there isn't
229 * any benefit to having multiple threads doing cleanup at one time.
231 oldest = now - before;
232 cfs_hash_for_each_safe(stats->ojs_hash, job_cleanup_iter_callback,
235 write_lock(&stats->ojs_lock);
236 stats->ojs_cleaning = false;
237 stats->ojs_last_cleanup = cfs_time_current_sec();
238 write_unlock(&stats->ojs_lock);
241 static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs)
243 struct job_stat *job;
249 job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0);
250 if (job->js_stats == NULL) {
255 jobs->ojs_cntr_init_fn(job->js_stats);
257 memcpy(job->js_jobid, jobid, LUSTRE_JOBID_SIZE);
258 job->js_timestamp = cfs_time_current_sec();
259 job->js_jobstats = jobs;
260 INIT_HLIST_NODE(&job->js_hash);
261 INIT_LIST_HEAD(&job->js_list);
262 atomic_set(&job->js_refcount, 1);
267 int lprocfs_job_stats_log(struct obd_device *obd, char *jobid,
268 int event, long amount)
270 struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
271 struct job_stat *job, *job2;
274 LASSERT(stats != NULL);
275 LASSERT(stats->ojs_hash != NULL);
277 if (event >= stats->ojs_cntr_num)
280 if (jobid == NULL || strlen(jobid) == 0)
283 if (strlen(jobid) >= LUSTRE_JOBID_SIZE) {
284 CERROR("Invalid jobid size (%lu), expect(%d)\n",
285 (unsigned long)strlen(jobid) + 1, LUSTRE_JOBID_SIZE);
289 job = cfs_hash_lookup(stats->ojs_hash, jobid);
293 lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
295 job = job_alloc(jobid, stats);
299 job2 = cfs_hash_findadd_unique(stats->ojs_hash, job->js_jobid,
304 /* We cannot LASSERT(!list_empty(&job->js_list)) here,
305 * since we just lost the race for inserting "job" into the
306 * ojs_list, and some other thread is doing it _right_now_.
307 * Instead, be content the other thread is doing this, since
308 * "job2" was initialized in job_alloc() already. LU-2163 */
310 LASSERT(list_empty(&job->js_list));
311 write_lock(&stats->ojs_lock);
312 list_add_tail(&job->js_list, &stats->ojs_list);
313 write_unlock(&stats->ojs_lock);
317 LASSERT(stats == job->js_jobstats);
318 job->js_timestamp = cfs_time_current_sec();
319 lprocfs_counter_add(job->js_stats, event, amount);
325 EXPORT_SYMBOL(lprocfs_job_stats_log);
327 void lprocfs_job_stats_fini(struct obd_device *obd)
329 struct obd_job_stats *stats = &obd->u.obt.obt_jobstats;
331 if (stats->ojs_hash == NULL)
334 lprocfs_job_cleanup(stats, -99);
335 cfs_hash_putref(stats->ojs_hash);
336 stats->ojs_hash = NULL;
337 LASSERT(list_empty(&stats->ojs_list));
339 EXPORT_SYMBOL(lprocfs_job_stats_fini);
341 static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos)
343 struct obd_job_stats *stats = p->private;
345 struct job_stat *job;
347 read_lock(&stats->ojs_lock);
349 return SEQ_START_TOKEN;
351 list_for_each_entry(job, &stats->ojs_list, js_list) {
358 static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v)
360 struct obd_job_stats *stats = p->private;
362 read_unlock(&stats->ojs_lock);
365 static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos)
367 struct obd_job_stats *stats = p->private;
368 struct job_stat *job;
369 struct list_head *next;
372 if (v == SEQ_START_TOKEN) {
373 next = stats->ojs_list.next;
375 job = (struct job_stat *)v;
376 next = job->js_list.next;
379 return next == &stats->ojs_list ? NULL :
380 list_entry(next, struct job_stat, js_list);
384 * Example of output on MDT:
388 * snapshot_time: 1322494486
389 * open: { samples: 1, unit: reqs }
390 * close: { samples: 1, unit: reqs }
391 * mknod: { samples: 0, unit: reqs }
392 * link: { samples: 0, unit: reqs }
393 * unlink: { samples: 0, unit: reqs }
394 * mkdir: { samples: 0, unit: reqs }
395 * rmdir: { samples: 0, unit: reqs }
396 * rename: { samples: 0, unit: reqs }
397 * getattr: { samples: 1, unit: reqs }
398 * setattr: { samples: 0, unit: reqs }
399 * getxattr: { samples: 0, unit: reqs }
400 * setxattr: { samples: 0, unit: reqs }
401 * statfs: { samples: 0, unit: reqs }
402 * sync: { samples: 0, unit: reqs }
404 * Example of output on OST:
408 * snapshot_time: 1322494602
409 * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 }
410 * write: { samples: 1, unit: bytes, min: 4096, max: 4096, sum: 4096 }
411 * setattr: { samples: 0, unit: reqs }
412 * punch: { samples: 0, unit: reqs }
413 * sync: { samples: 0, unit: reqs }
416 static const char spaces[] = " ";
418 static int inline width(const char *str, int len)
420 return len - min((int)strlen(str), 15);
423 static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v)
425 struct job_stat *job = v;
426 struct lprocfs_stats *s;
427 struct lprocfs_counter ret;
428 struct lprocfs_counter_header *cntr_header;
431 if (v == SEQ_START_TOKEN) {
432 seq_printf(p, "job_stats:\n");
436 /* Replace the non-printable character in jobid with '?', so
437 * that the output of jobid will be confined in single line. */
438 seq_printf(p, "- %-16s ", "job_id:");
439 for (i = 0; i < strlen(job->js_jobid); i++) {
440 if (isprint(job->js_jobid[i]) != 0)
441 seq_putc(p, job->js_jobid[i]);
447 seq_printf(p, " %-16s %ld\n", "snapshot_time:", job->js_timestamp);
450 for (i = 0; i < s->ls_num; i++) {
451 cntr_header = &s->ls_cnt_header[i];
452 lprocfs_stats_collect(s, i, &ret);
454 seq_printf(p, " %s:%.*s { samples: %11llu",
455 cntr_header->lc_name,
456 width(cntr_header->lc_name, 15), spaces,
458 if (cntr_header->lc_units[0] != '\0')
459 seq_printf(p, ", unit: %5s", cntr_header->lc_units);
461 if (cntr_header->lc_config & LPROCFS_CNTR_AVGMINMAX) {
462 seq_printf(p, ", min:%8llu, max:%8llu,"
464 ret.lc_count ? ret.lc_min : 0,
465 ret.lc_count ? ret.lc_max : 0,
466 ret.lc_count ? ret.lc_sum : 0);
468 if (cntr_header->lc_config & LPROCFS_CNTR_STDDEV) {
469 seq_printf(p, ", sumsq: %18llu",
470 ret.lc_count ? ret.lc_sumsquare : 0);
473 seq_printf(p, " }\n");
479 static const struct seq_operations lprocfs_jobstats_seq_sops = {
480 start: lprocfs_jobstats_seq_start,
481 stop: lprocfs_jobstats_seq_stop,
482 next: lprocfs_jobstats_seq_next,
483 show: lprocfs_jobstats_seq_show,
486 static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file)
488 struct seq_file *seq;
491 rc = LPROCFS_ENTRY_CHECK(inode);
495 rc = seq_open(file, &lprocfs_jobstats_seq_sops);
498 seq = file->private_data;
499 seq->private = PDE_DATA(inode);
503 static ssize_t lprocfs_jobstats_seq_write(struct file *file,
504 const char __user *buf,
505 size_t len, loff_t *off)
507 struct seq_file *seq = file->private_data;
508 struct obd_job_stats *stats = seq->private;
509 char jobid[LUSTRE_JOBID_SIZE];
510 struct job_stat *job;
512 if (len == 0 || len >= LUSTRE_JOBID_SIZE)
515 if (stats->ojs_hash == NULL)
518 if (copy_from_user(jobid, buf, len))
522 /* Trim '\n' if any */
523 if (jobid[len - 1] == '\n')
526 if (strcmp(jobid, "clear") == 0) {
527 lprocfs_job_cleanup(stats, -99);
532 if (strlen(jobid) == 0)
535 job = cfs_hash_lookup(stats->ojs_hash, jobid);
539 cfs_hash_del_key(stats->ojs_hash, jobid);
546 * Clean up the seq file state when the /proc file is closed.
548 * This also expires old job stats from the cache after they have been
549 * printed in case the system is idle and not generating new jobstats.
551 * \param[in] inode struct inode for seq file being closed
552 * \param[in] file struct file for seq file being closed
554 * \retval 0 on success
555 * \retval negative errno on failure
557 static int lprocfs_jobstats_seq_release(struct inode *inode, struct file *file)
559 struct seq_file *seq = file->private_data;
560 struct obd_job_stats *stats = seq->private;
562 lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
564 return lprocfs_seq_release(inode, file);
567 static const struct file_operations lprocfs_jobstats_seq_fops = {
568 .owner = THIS_MODULE,
569 .open = lprocfs_jobstats_seq_open,
571 .write = lprocfs_jobstats_seq_write,
573 .release = lprocfs_jobstats_seq_release,
576 int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num,
577 cntr_init_callback init_fn)
579 struct proc_dir_entry *entry;
580 struct obd_job_stats *stats;
583 LASSERT(obd->obd_proc_entry != NULL);
584 LASSERT(obd->obd_type->typ_name);
592 /* Currently needs to be a target due to the use of obt_jobstats. */
593 if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) != 0 &&
594 strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME) != 0) {
595 CERROR("%s: invalid device type %s for job stats: rc = %d\n",
596 obd->obd_name, obd->obd_type->typ_name, -EINVAL);
599 stats = &obd->u.obt.obt_jobstats;
601 LASSERT(stats->ojs_hash == NULL);
602 stats->ojs_hash = cfs_hash_create("JOB_STATS",
603 HASH_JOB_STATS_CUR_BITS,
604 HASH_JOB_STATS_MAX_BITS,
605 HASH_JOB_STATS_BKT_BITS, 0,
610 if (stats->ojs_hash == NULL)
613 INIT_LIST_HEAD(&stats->ojs_list);
614 rwlock_init(&stats->ojs_lock);
615 stats->ojs_cntr_num = cntr_num;
616 stats->ojs_cntr_init_fn = init_fn;
617 stats->ojs_cleanup_interval = 600; /* 10 mins by default */
618 stats->ojs_last_cleanup = cfs_time_current_sec();
620 entry = lprocfs_add_simple(obd->obd_proc_entry, "job_stats", stats,
621 &lprocfs_jobstats_seq_fops);
623 lprocfs_job_stats_fini(obd);
628 EXPORT_SYMBOL(lprocfs_job_stats_init);
630 int lprocfs_job_interval_seq_show(struct seq_file *m, void *data)
632 struct obd_device *obd = m->private;
633 struct obd_job_stats *stats;
638 stats = &obd->u.obt.obt_jobstats;
639 seq_printf(m, "%d\n", stats->ojs_cleanup_interval);
642 EXPORT_SYMBOL(lprocfs_job_interval_seq_show);
645 lprocfs_job_interval_seq_write(struct file *file, const char __user *buffer,
646 size_t count, loff_t *off)
648 struct obd_device *obd;
649 struct obd_job_stats *stats;
653 obd = ((struct seq_file *)file->private_data)->private;
657 stats = &obd->u.obt.obt_jobstats;
659 rc = lprocfs_str_to_s64(buffer, count, &val);
662 if (val < 0 || val > UINT_MAX)
665 stats->ojs_cleanup_interval = val;
666 lprocfs_job_cleanup(stats, stats->ojs_cleanup_interval);
669 EXPORT_SYMBOL(lprocfs_job_interval_seq_write);
670 #endif /* CONFIG_PROC_FS*/