/* GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * * GPL HEADER END */ /* * Copyright (c) 2011 Whamcloud, Inc. * Use is subject to license terms. * * Author: Niu Yawei */ /* * lustre/obdclass/lprocfs_jobstats.c */ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_CLASS #ifndef __KERNEL__ # include #endif #include #include #include #if defined(LPROCFS) /* * JobID formats & JobID environment variable names for supported * job schedulers: * * SLURM: * JobID format: 32 bit integer. * JobID env var: SLURM_JOB_ID. * SGE: * JobID format: Decimal integer range to 99999. * JobID env var: JOB_ID. * LSF: * JobID format: 6 digit integer by default (up to 999999), can be * increased to 10 digit (up to 2147483646). * JobID env var: LSB_JOBID. * Loadleveler: * JobID format: String of machine_name.cluster_id.process_id, for * example: fr2n02.32.0 * JobID env var: LOADL_STEP_ID. * PBS: * JobID format: String of sequence_number[.server_name][@server]. * JobID env var: PBS_JOBID. * Maui/MOAB: * JobID format: Same as PBS. * JobID env var: Same as PBS. */ struct job_stat { cfs_hlist_node_t js_hash; cfs_list_t js_list; cfs_atomic_t js_refcount; char js_jobid[JOBSTATS_JOBID_SIZE]; time_t js_timestamp; /* seconds */ struct lprocfs_stats *js_stats; struct obd_job_stats *js_jobstats; }; static unsigned job_stat_hash(cfs_hash_t *hs, const void *key, unsigned mask) { return cfs_hash_djb2_hash(key, strlen(key), mask); } static void *job_stat_key(cfs_hlist_node_t *hnode) { struct job_stat *job; job = cfs_hlist_entry(hnode, struct job_stat, js_hash); return job->js_jobid; } static int job_stat_keycmp(const void *key, cfs_hlist_node_t *hnode) { struct job_stat *job; job = cfs_hlist_entry(hnode, struct job_stat, js_hash); return (strlen(job->js_jobid) == strlen(key)) && !strncmp(job->js_jobid, key, strlen(key)); } static void *job_stat_object(cfs_hlist_node_t *hnode) { return cfs_hlist_entry(hnode, struct job_stat, js_hash); } static void job_stat_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode) { struct job_stat *job; job = cfs_hlist_entry(hnode, struct job_stat, js_hash); cfs_atomic_inc(&job->js_refcount); } static void job_free(struct job_stat *job) { LASSERT(atomic_read(&job->js_refcount) == 0); LASSERT(job->js_jobstats); cfs_write_lock(&job->js_jobstats->ojs_lock); cfs_list_del_init(&job->js_list); cfs_write_unlock(&job->js_jobstats->ojs_lock); lprocfs_free_stats(&job->js_stats); OBD_FREE_PTR(job); } static void job_putref(struct job_stat *job) { LASSERT(atomic_read(&job->js_refcount) > 0); if (atomic_dec_and_test(&job->js_refcount)) job_free(job); } static void job_stat_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode) { struct job_stat *job; job = cfs_hlist_entry(hnode, struct job_stat, js_hash); job_putref(job); } static void job_stat_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode) { CERROR("Should not have any items!"); } static cfs_hash_ops_t job_stats_hash_ops = { .hs_hash = job_stat_hash, .hs_key = job_stat_key, .hs_keycmp = job_stat_keycmp, .hs_object = job_stat_object, .hs_get = job_stat_get, .hs_put_locked = job_stat_put_locked, .hs_exit = job_stat_exit, }; static struct job_stat *job_alloc(char *jobid, struct obd_job_stats *jobs) { struct job_stat *job; LASSERT(jobs->ojs_cntr_num && jobs->ojs_cntr_init_fn); OBD_ALLOC_PTR(job); if (job == NULL) return NULL; job->js_stats = lprocfs_alloc_stats(jobs->ojs_cntr_num, 0); if (job->js_stats == NULL) { OBD_FREE_PTR(job); return NULL; } jobs->ojs_cntr_init_fn(job->js_stats); memcpy(job->js_jobid, jobid, JOBSTATS_JOBID_SIZE); job->js_timestamp = cfs_time_current_sec(); job->js_jobstats = jobs; CFS_INIT_HLIST_NODE(&job->js_hash); CFS_INIT_LIST_HEAD(&job->js_list); cfs_atomic_set(&job->js_refcount, 1); return job; } int lprocfs_job_stats_log(struct obd_device *obd, char *jobid, int event, long amount) { struct obd_job_stats *stats = &obd->u.obt.obt_jobstats; struct job_stat *job, *job2; ENTRY; LASSERT(stats && stats->ojs_hash); if (!jobid || !strlen(jobid)) RETURN(-EINVAL); if (strlen(jobid) >= JOBSTATS_JOBID_SIZE) { CERROR("Invalid jobid size (%lu), expect(%d)\n", (unsigned long)strlen(jobid) + 1, JOBSTATS_JOBID_SIZE); RETURN(-EINVAL); } job = cfs_hash_lookup(stats->ojs_hash, jobid); if (job) goto found; job = job_alloc(jobid, stats); if (job == NULL) RETURN(-ENOMEM); job2 = cfs_hash_findadd_unique(stats->ojs_hash, job->js_jobid, &job->js_hash); if (job2 != job) { job_putref(job); job = job2; LASSERT(!cfs_list_empty(&job->js_list)); } else { LASSERT(cfs_list_empty(&job->js_list)); cfs_write_lock(&stats->ojs_lock); cfs_list_add_tail(&job->js_list, &stats->ojs_list); cfs_write_unlock(&stats->ojs_lock); } found: LASSERT(stats == job->js_jobstats); LASSERT(stats->ojs_cntr_num > event); job->js_timestamp = cfs_time_current_sec(); lprocfs_counter_add(job->js_stats, event, amount); job_putref(job); RETURN(0); } EXPORT_SYMBOL(lprocfs_job_stats_log); static int job_iter_callback(cfs_hash_t *hs, cfs_hash_bd_t *bd, cfs_hlist_node_t *hnode, void *data) { time_t oldest = *((time_t *)data); struct job_stat *job; job = cfs_hlist_entry(hnode, struct job_stat, js_hash); if (!oldest || job->js_timestamp < oldest) cfs_hash_bd_del_locked(hs, bd, hnode); return 0; } void lprocfs_job_stats_fini(struct obd_device *obd) { struct obd_job_stats *stats = &obd->u.obt.obt_jobstats; time_t oldest = 0; if (stats->ojs_hash == NULL) return; cfs_timer_disarm(&stats->ojs_cleanup_timer); cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest); cfs_hash_putref(stats->ojs_hash); stats->ojs_hash = NULL; LASSERT(cfs_list_empty(&stats->ojs_list)); } EXPORT_SYMBOL(lprocfs_job_stats_fini); static void *lprocfs_jobstats_seq_start(struct seq_file *p, loff_t *pos) { struct obd_job_stats *stats = p->private; loff_t off = *pos; struct job_stat *job; cfs_read_lock(&stats->ojs_lock); if (off == 0) return SEQ_START_TOKEN; off--; cfs_list_for_each_entry(job, &stats->ojs_list, js_list) { if (!off--) return job; } return NULL; } static void lprocfs_jobstats_seq_stop(struct seq_file *p, void *v) { struct obd_job_stats *stats = p->private; cfs_read_unlock(&stats->ojs_lock); } static void *lprocfs_jobstats_seq_next(struct seq_file *p, void *v, loff_t *pos) { struct obd_job_stats *stats = p->private; struct job_stat *job; cfs_list_t *next; ++*pos; if (v == SEQ_START_TOKEN) { next = stats->ojs_list.next; } else { job = (struct job_stat *)v; next = job->js_list.next; } return next == &stats->ojs_list ? NULL : cfs_list_entry(next, struct job_stat, js_list); } /* * Example of output on MDT: * * job_stats: * - job_id: test_id.222.25844 * snapshot_time: 1322494486 * open: { samples: 3, unit: reqs } * close: { samples: 3, unit: reqs } * mknod: { samples: 0, unit: reqs } * link: { samples: 0, unit: reqs } * unlink: { samples: 0, unit: reqs } * mkdir: { samples: 0, unit: reqs } * rmdir: { samples: 0, unit: reqs } * rename: { samples: 1, unit: reqs } * getattr: { samples: 7, unit: reqs } * setattr: { samples: 0, unit: reqs } * getxattr: { samples: 0, unit: reqs } * setxattr: { samples: 0, unit: reqs } * statfs: { samples: 0, unit: reqs } * sync: { samples: 0, unit: reqs } * * Example of output on OST: * * job_stats: * - job_id 4854 * snapshot_time: 1322494602 * read: { samples: 0, unit: bytes, min: 0, max: 0, sum: 0 } * write: { samples: 1, unit: bytes, min: 10, max: 10, sum: 10 } * setattr: { samples: 0, unit: reqs } * punch: { samples: 0, unit: reqs } * sync: { samples: 0, unit: reqs } */ static const char spaces[] = " "; static int inline width(const char *str, int len) { return len - min((int)strlen(str), 15); } static int lprocfs_jobstats_seq_show(struct seq_file *p, void *v) { struct job_stat *job = v; struct lprocfs_stats *s; struct lprocfs_counter ret, *cntr; int i; if (v == SEQ_START_TOKEN) { seq_printf(p, "job_stats:\n"); return 0; } seq_printf(p, "- %-16s %s\n", "job_id:", job->js_jobid); seq_printf(p, " %-16s %ld\n", "snapshot_time:", job->js_timestamp); s = job->js_stats; for (i = 0; i < s->ls_num; i++) { cntr = &(s->ls_percpu[0]->lp_cntr[i]); lprocfs_stats_collect(s, i, &ret); seq_printf(p, " %s:%.*s { samples: %11"LPF64"u", cntr->lc_name, width(cntr->lc_name, 15), spaces, ret.lc_count); if (cntr->lc_units[0] != '\0') seq_printf(p, ", unit: %5s", cntr->lc_units); if (cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { seq_printf(p, ", min:%8"LPF64"u, max:%8"LPF64"u," " sum:%16"LPF64"u", ret.lc_count ? ret.lc_min : 0, ret.lc_count ? ret.lc_max : 0, ret.lc_count ? ret.lc_sum : 0); } if (cntr->lc_config & LPROCFS_CNTR_STDDEV) { seq_printf(p, ", sumsq: %18"LPF64"u", ret.lc_count ? ret.lc_sumsquare : 0); } seq_printf(p, " }\n"); } return 0; } struct seq_operations lprocfs_jobstats_seq_sops = { start: lprocfs_jobstats_seq_start, stop: lprocfs_jobstats_seq_stop, next: lprocfs_jobstats_seq_next, show: lprocfs_jobstats_seq_show, }; static int lprocfs_jobstats_seq_open(struct inode *inode, struct file *file) { struct proc_dir_entry *dp = PDE(inode); struct seq_file *seq; int rc; if (LPROCFS_ENTRY_AND_CHECK(dp)) return -ENOENT; rc = seq_open(file, &lprocfs_jobstats_seq_sops); if (rc) { LPROCFS_EXIT(); return rc; } seq = file->private_data; seq->private = dp->data; return 0; } static ssize_t lprocfs_jobstats_seq_write(struct file *file, const char *buf, size_t len, loff_t *off) { struct seq_file *seq = file->private_data; struct obd_job_stats *stats = seq->private; char jobid[JOBSTATS_JOBID_SIZE]; int all = 0; struct job_stat *job; if (!memcmp(buf, "clear", strlen("clear"))) { all = 1; } else if (len < JOBSTATS_JOBID_SIZE) { memset(jobid, 0, JOBSTATS_JOBID_SIZE); /* Trim '\n' if any */ if (buf[len - 1] == '\n') memcpy(jobid, buf, len - 1); else memcpy(jobid, buf, len); } else { return -EINVAL; } LASSERT(stats->ojs_hash); if (all) { time_t oldest = 0; cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest); return len; } if (!strlen(jobid)) return -EINVAL; job = cfs_hash_lookup(stats->ojs_hash, jobid); if (!job) return -EINVAL; cfs_hash_del_key(stats->ojs_hash, jobid); job_putref(job); return len; } struct file_operations lprocfs_jobstats_seq_fops = { .owner = THIS_MODULE, .open = lprocfs_jobstats_seq_open, .read = seq_read, .write = lprocfs_jobstats_seq_write, .llseek = seq_lseek, .release = lprocfs_seq_release, }; static void job_cleanup_callback(unsigned long data) { struct obd_job_stats *stats = (struct obd_job_stats *)data; time_t oldest; if (stats->ojs_cleanup_interval) { oldest = cfs_time_current_sec() - stats->ojs_cleanup_interval; cfs_hash_for_each_safe(stats->ojs_hash, job_iter_callback, &oldest); cfs_timer_arm(&stats->ojs_cleanup_timer, cfs_time_shift(stats->ojs_cleanup_interval)); } } int lprocfs_job_stats_init(struct obd_device *obd, int cntr_num, cntr_init_callback init_fn) { struct proc_dir_entry *entry; struct obd_job_stats *stats; ENTRY; LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_type->typ_name); if (strcmp(obd->obd_type->typ_name, LUSTRE_MDT_NAME) && strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) { CERROR("Invalid obd device type.\n"); RETURN(-EINVAL); } stats = &obd->u.obt.obt_jobstats; LASSERT(stats->ojs_hash == NULL); stats->ojs_hash = cfs_hash_create("JOB_STATS", HASH_JOB_STATS_CUR_BITS, HASH_JOB_STATS_MAX_BITS, HASH_JOB_STATS_BKT_BITS, 0, CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA, &job_stats_hash_ops, CFS_HASH_DEFAULT); if (stats->ojs_hash == NULL) RETURN(-ENOMEM); CFS_INIT_LIST_HEAD(&stats->ojs_list); cfs_rwlock_init(&stats->ojs_lock); stats->ojs_cntr_num = cntr_num; stats->ojs_cntr_init_fn = init_fn; cfs_timer_init(&stats->ojs_cleanup_timer, job_cleanup_callback, stats); stats->ojs_cleanup_interval = 600; /* 10 mins by default */ cfs_timer_arm(&stats->ojs_cleanup_timer, cfs_time_shift(stats->ojs_cleanup_interval)); LPROCFS_WRITE_ENTRY(); entry = create_proc_entry("job_stats", 0644, obd->obd_proc_entry); LPROCFS_WRITE_EXIT(); if (entry) { entry->proc_fops = &lprocfs_jobstats_seq_fops; entry->data = stats; RETURN(0); } else { lprocfs_job_stats_fini(obd); RETURN(-ENOMEM); } } EXPORT_SYMBOL(lprocfs_job_stats_init); int lprocfs_rd_job_interval(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = (struct obd_device *)data; struct obd_job_stats *stats; LASSERT(obd != NULL); stats = &obd->u.obt.obt_jobstats; *eof = 1; return snprintf(page, count, "%d\n", stats->ojs_cleanup_interval); } EXPORT_SYMBOL(lprocfs_rd_job_interval); int lprocfs_wr_job_interval(struct file *file, const char *buffer, unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; struct obd_job_stats *stats; int val, rc; LASSERT(obd != NULL); stats = &obd->u.obt.obt_jobstats; rc = lprocfs_write_helper(buffer, count, &val); if (rc) return rc; stats->ojs_cleanup_interval = val; if (!stats->ojs_cleanup_interval) cfs_timer_disarm(&stats->ojs_cleanup_timer); else cfs_timer_arm(&stats->ojs_cleanup_timer, cfs_time_shift(stats->ojs_cleanup_interval)); return count; } EXPORT_SYMBOL(lprocfs_wr_job_interval); #endif /* LPROCFS*/