X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Flprocfs_status.c;h=5640acb88d5924b4c903ab31cb2fc0506973641d;hp=3e9eed263b1f6a1fd87e057a7b5a4223fc831590;hb=HEAD;hpb=860e20e42ddc45bf5521cbd5f53d35a310b34ded;ds=sidebyside diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 3e9eed2..5437c9c 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/obdclass/lprocfs_status.c * @@ -41,6 +40,9 @@ #ifdef CONFIG_PROC_FS +/* enable start/elapsed_time in stats headers by default */ +unsigned int obd_enable_stats_header = 1; + static int lprocfs_no_percpu_stats = 0; module_param(lprocfs_no_percpu_stats, int, 0644); MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs stats"); @@ -49,78 +51,87 @@ MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs int lprocfs_single_release(struct inode *inode, struct file *file) { - return single_release(inode, file); + return single_release(inode, file); } EXPORT_SYMBOL(lprocfs_single_release); int lprocfs_seq_release(struct inode *inode, struct file *file) { - return seq_release(inode, file); + return seq_release(inode, file); } EXPORT_SYMBOL(lprocfs_seq_release); +static umode_t default_mode(const struct proc_ops *ops) +{ + umode_t mode = 0; + + if (ops->proc_read) + mode = 0444; + if (ops->proc_write) + mode |= 0200; + + return mode; +} + struct proc_dir_entry * lprocfs_add_simple(struct proc_dir_entry *root, char *name, - void *data, const struct file_operations *fops) + void *data, const struct proc_ops *fops) { struct proc_dir_entry *proc; - mode_t mode = 0; + umode_t mode; - if (root == NULL || name == NULL || fops == NULL) - return ERR_PTR(-EINVAL); + if (!root || !name || !fops) + return ERR_PTR(-EINVAL); - if (fops->read) - mode = 0444; - if (fops->write) - mode |= 0200; + mode = default_mode(fops); proc = proc_create_data(name, mode, root, fops, data); if (!proc) { CERROR("LprocFS: No memory to create /proc entry %s\n", name); return ERR_PTR(-ENOMEM); } - return proc; + return proc; } EXPORT_SYMBOL(lprocfs_add_simple); struct proc_dir_entry *lprocfs_add_symlink(const char *name, - struct proc_dir_entry *parent, const char *format, ...) + struct proc_dir_entry *parent, + const char *format, ...) { - struct proc_dir_entry *entry; - char *dest; - va_list ap; + struct proc_dir_entry *entry; + char *dest; + va_list ap; - if (parent == NULL || format == NULL) - return NULL; + if (!parent || !format) + return NULL; - OBD_ALLOC_WAIT(dest, MAX_STRING_SIZE + 1); - if (dest == NULL) - return NULL; + OBD_ALLOC_WAIT(dest, MAX_STRING_SIZE + 1); + if (!dest) + return NULL; - va_start(ap, format); - vsnprintf(dest, MAX_STRING_SIZE, format, ap); - va_end(ap); + va_start(ap, format); + vsnprintf(dest, MAX_STRING_SIZE, format, ap); + va_end(ap); - entry = proc_symlink(name, parent, dest); - if (entry == NULL) + entry = proc_symlink(name, parent, dest); + if (!entry) CERROR("LprocFS: Could not create symbolic link from " "%s to %s\n", name, dest); - OBD_FREE(dest, MAX_STRING_SIZE + 1); - return entry; + OBD_FREE(dest, MAX_STRING_SIZE + 1); + return entry; } EXPORT_SYMBOL(lprocfs_add_symlink); -static const struct file_operations lprocfs_generic_fops = { }; +static const struct file_operations ldebugfs_empty_ops = { }; -int ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list, - void *data) +void ldebugfs_add_vars(struct dentry *parent, struct ldebugfs_vars *list, + void *data) { if (IS_ERR_OR_NULL(parent) || IS_ERR_OR_NULL(list)) - return -EINVAL; + return; while (list->name) { - struct dentry *entry; umode_t mode = 0; if (list->proc_mode != 0000) { @@ -131,17 +142,16 @@ int ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list, if (list->fops->write) mode |= 0200; } - entry = debugfs_create_file(list->name, mode, parent, - list->data ? : data, - list->fops ? : &lprocfs_generic_fops); - if (IS_ERR_OR_NULL(entry)) - return entry ? PTR_ERR(entry) : -ENOMEM; + debugfs_create_file(list->name, mode, parent, + list->data ? : data, + list->fops ? : &ldebugfs_empty_ops); list++; } - return 0; } EXPORT_SYMBOL_GPL(ldebugfs_add_vars); +static const struct proc_ops lprocfs_empty_ops = { }; + /** * Add /proc entries. * @@ -157,25 +167,21 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, void *data) { - if (root == NULL || list == NULL) + if (!root || !list) return -EINVAL; - while (list->name != NULL) { + while (list->name) { struct proc_dir_entry *proc; - mode_t mode = 0; + umode_t mode = 0; - if (list->proc_mode != 0000) { + if (list->proc_mode) mode = list->proc_mode; - } else if (list->fops) { - if (list->fops->read) - mode = 0444; - if (list->fops->write) - mode |= 0200; - } + else if (list->fops) + mode = default_mode(list->fops); proc = proc_create_data(list->name, mode, root, - list->fops ?: &lprocfs_generic_fops, + list->fops ?: &lprocfs_empty_ops, list->data ?: data); - if (proc == NULL) + if (!proc) return -ENOMEM; list++; } @@ -183,108 +189,6 @@ lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, } EXPORT_SYMBOL(lprocfs_add_vars); -void ldebugfs_remove(struct dentry **entryp) -{ - debugfs_remove_recursive(*entryp); - *entryp = NULL; -} -EXPORT_SYMBOL_GPL(ldebugfs_remove); - -#ifndef HAVE_REMOVE_PROC_SUBTREE -/* for b=10866, global variable */ -DECLARE_RWSEM(_lprocfs_lock); -EXPORT_SYMBOL(_lprocfs_lock); - -static void lprocfs_remove_nolock(struct proc_dir_entry **proot) -{ - struct proc_dir_entry *root = *proot; - struct proc_dir_entry *temp = root; - struct proc_dir_entry *rm_entry; - struct proc_dir_entry *parent; - - *proot = NULL; - if (root == NULL || IS_ERR(root)) - return; - - parent = root->parent; - LASSERT(parent != NULL); - - while (1) { - while (temp->subdir != NULL) - temp = temp->subdir; - - rm_entry = temp; - temp = temp->parent; - - /* Memory corruption once caused this to fail, and - without this LASSERT we would loop here forever. */ - LASSERTF(strlen(rm_entry->name) == rm_entry->namelen, - "0x%p %s/%s len %d\n", rm_entry, temp->name, - rm_entry->name, (int)strlen(rm_entry->name)); - - remove_proc_entry(rm_entry->name, temp); - if (temp == parent) - break; - } -} - -int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) -{ - struct proc_dir_entry *t = NULL; - struct proc_dir_entry **p; - int len, busy = 0; - - LASSERT(parent != NULL); - len = strlen(name); - - down_write(&_lprocfs_lock); - /* lookup target name */ - for (p = &parent->subdir; *p; p = &(*p)->next) { - if ((*p)->namelen != len) - continue; - if (memcmp(name, (*p)->name, len)) - continue; - t = *p; - break; - } - - if (t) { - /* verify it's empty: do not count "num_refs" */ - for (p = &t->subdir; *p; p = &(*p)->next) { - if ((*p)->namelen != strlen("num_refs")) { - busy = 1; - break; - } - if (memcmp("num_refs", (*p)->name, - strlen("num_refs"))) { - busy = 1; - break; - } - } - } - - if (busy == 0) - lprocfs_remove_nolock(&t); - - up_write(&_lprocfs_lock); - return 0; -} -#endif /* !HAVE_REMOVE_PROC_SUBTREE */ - -#ifndef HAVE_PROC_REMOVE -void proc_remove(struct proc_dir_entry *de) -{ -#ifndef HAVE_REMOVE_PROC_SUBTREE - down_write(&_lprocfs_lock); /* search vs remove race */ - lprocfs_remove_nolock(&de); - up_write(&_lprocfs_lock); -#else - if (de) - remove_proc_subtree(de->name, de->parent); -#endif -} -#endif - void lprocfs_remove(struct proc_dir_entry **rooth) { proc_remove(*rooth); @@ -299,31 +203,6 @@ void lprocfs_remove_proc_entry(const char *name, struct proc_dir_entry *parent) } EXPORT_SYMBOL(lprocfs_remove_proc_entry); -struct dentry *ldebugfs_register(const char *name, struct dentry *parent, - struct lprocfs_vars *list, void *data) -{ - struct dentry *entry; - - entry = debugfs_create_dir(name, parent); - if (IS_ERR_OR_NULL(entry)) { - entry = entry ?: ERR_PTR(-ENOMEM); - goto out; - } - - if (!IS_ERR_OR_NULL(list)) { - int rc; - - rc = ldebugfs_add_vars(entry, list, data); - if (rc) { - debugfs_remove(entry); - entry = ERR_PTR(rc); - } - } -out: - return entry; -} -EXPORT_SYMBOL_GPL(ldebugfs_register); - struct proc_dir_entry * lprocfs_register(const char *name, struct proc_dir_entry *parent, struct lprocfs_vars *list, void *data) @@ -331,10 +210,10 @@ lprocfs_register(const char *name, struct proc_dir_entry *parent, struct proc_dir_entry *newchild; newchild = proc_mkdir(name, parent); - if (newchild == NULL) + if (!newchild) return ERR_PTR(-ENOMEM); - if (list != NULL) { + if (list) { int rc = lprocfs_add_vars(newchild, list, data); if (rc) { lprocfs_remove(&newchild); @@ -366,16 +245,6 @@ static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr, } LUSTRE_RO_ATTR(uuid); -int lprocfs_name_seq_show(struct seq_file *m, void *data) -{ - struct obd_device *dev = data; - - LASSERT(dev != NULL); - seq_printf(m, "%s\n", dev->obd_name); - return 0; -} -EXPORT_SYMBOL(lprocfs_name_seq_show); - static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -409,9 +278,7 @@ static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr, u32 blk_size = osfs.os_bsize >> 10; u64 result = osfs.os_blocks; - while (blk_size >>= 1) - result <<= 1; - + result *= rounddown_pow_of_two(blk_size ?: 1); return sprintf(buf, "%llu\n", result); } @@ -505,44 +372,43 @@ static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr, } LUSTRE_RO_ATTR(filesfree); -int lprocfs_server_uuid_seq_show(struct seq_file *m, void *data) +ssize_t conn_uuid_show(struct kobject *kobj, struct attribute *attr, char *buf) { - struct obd_device *obd = data; + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); struct obd_import *imp; - char *imp_state_name = NULL; - int rc = 0; + struct ptlrpc_connection *conn; + ssize_t count; - LASSERT(obd != NULL); - LPROCFS_CLIMP_CHECK(obd); - imp = obd->u.cli.cl_import; - imp_state_name = ptlrpc_import_state_name(imp->imp_state); - seq_printf(m, "%s\t%s%s\n", obd2cli_tgt(obd), imp_state_name, - imp->imp_deactive ? "\tDEACTIVATED" : ""); + with_imp_locked(obd, imp, count) { + conn = imp->imp_connection; + if (conn) + count = sprintf(buf, "%s\n", conn->c_remote_uuid.uuid); + else + count = sprintf(buf, "%s\n", ""); + } - LPROCFS_CLIMP_EXIT(obd); - return rc; + return count; } -EXPORT_SYMBOL(lprocfs_server_uuid_seq_show); +EXPORT_SYMBOL(conn_uuid_show); -int lprocfs_conn_uuid_seq_show(struct seq_file *m, void *data) +int lprocfs_server_uuid_seq_show(struct seq_file *m, void *data) { struct obd_device *obd = data; - struct ptlrpc_connection *conn; + struct obd_import *imp; + const char *imp_state_name = NULL; int rc = 0; LASSERT(obd != NULL); + with_imp_locked(obd, imp, rc) { + imp_state_name = ptlrpc_import_state_name(imp->imp_state); + seq_printf(m, "%s\t%s%s\n", obd2cli_tgt(obd), imp_state_name, + imp->imp_deactive ? "\tDEACTIVATED" : ""); + } - LPROCFS_CLIMP_CHECK(obd); - conn = obd->u.cli.cl_import->imp_connection; - if (conn && obd->u.cli.cl_import) - seq_printf(m, "%s\n", conn->c_remote_uuid.uuid); - else - seq_printf(m, "%s\n", ""); - - LPROCFS_CLIMP_EXIT(obd); return rc; } -EXPORT_SYMBOL(lprocfs_conn_uuid_seq_show); +EXPORT_SYMBOL(lprocfs_server_uuid_seq_show); /** add up per-cpu counters */ @@ -578,10 +444,7 @@ int lprocfs_stats_lock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, *flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); return opc == LPROCFS_GET_NUM_CPU ? 1 : 0; } @@ -624,10 +487,7 @@ void lprocfs_stats_unlock(struct lprocfs_stats *stats, unsigned long *flags) { if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_unlock_irqrestore(&stats->ls_lock, *flags); - else - spin_unlock(&stats->ls_lock); + spin_unlock(&stats->ls_lock); } else if (opc == LPROCFS_GET_SMP_ID) { put_cpu(); } @@ -637,14 +497,14 @@ void lprocfs_stats_unlock(struct lprocfs_stats *stats, void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, struct lprocfs_counter *cnt) { - unsigned int num_entry; - struct lprocfs_counter *percpu_cntr; - int i; - unsigned long flags = 0; + unsigned int num_entry; + struct lprocfs_counter *percpu_cntr; + int i; + unsigned long flags = 0; memset(cnt, 0, sizeof(*cnt)); - if (stats == NULL) { + if (!stats) { /* set count to 1 to avoid divide-by-zero errs in callers */ cnt->lc_count = 1; return; @@ -655,7 +515,7 @@ void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); for (i = 0; i < num_entry; i++) { - if (stats->ls_percpu[i] == NULL) + if (!stats->ls_percpu[i]) continue; percpu_cntr = lprocfs_stats_counter_get(stats, i, idx); @@ -688,79 +548,111 @@ static void obd_import_flags2str(struct obd_import *imp, struct seq_file *m) flag2str(imp, pingable); flag2str(imp, resend_replay); flag2str(imp, no_pinger_recover); - flag2str(imp, need_mne_swab); flag2str(imp, connect_tried); } -static const char *obd_connect_names[] = { - /* flags names */ - "read_only", - "lov_index", - "connect_from_mds", - "write_grant", - "server_lock", - "version", - "request_portal", - "acl", - "xattr", - "create_on_write", - "truncate_lock", - "initial_transno", - "inode_bit_locks", - "barrier", - "getattr_by_fid", - "no_oh_for_devices", - "remote_client", - "remote_client_by_force", - "max_byte_per_rpc", - "64bit_qdata", - "mds_capability", - "oss_capability", - "early_lock_cancel", - "som", - "adaptive_timeouts", - "lru_resize", - "mds_mds_connection", - "real_conn", - "change_qunit_size", - "alt_checksum_algorithm", - "fid_is_enabled", - "version_recovery", - "pools", - "grant_shrink", - "skip_orphan", - "large_ea", - "full20", - "layout_lock", - "64bithash", - "object_max_bytes", - "imp_recov", - "jobstats", - "umask", - "einprogress", - "grant_param", - "flock_owner", - "lvb_type", - "nanoseconds_times", - "lightweight_conn", - "short_io", - "pingless", - "flock_deadlock", - "disp_stripe", - "open_by_fid", - "lfsck", - "unknown", - "unlink_close", - "multi_mod_rpcs", - "dir_stripe", - "subtree", - "lockahead", - "bulk_mbits", - "compact_obdo", - "second_flags", - /* flags2 names */ - "file_secctx", - "lockaheadv2", +static const char *const obd_connect_names[] = { + "read_only", /* 0x01 */ + "lov_index", /* 0x02 */ + "connect_from_mds", /* 0x03 */ + "write_grant", /* 0x04 */ + "server_lock", /* 0x10 */ + "version", /* 0x20 */ + "request_portal", /* 0x40 */ + "acl", /* 0x80 */ + "xattr", /* 0x100 */ + "create_on_write", /* 0x200 */ + "truncate_lock", /* 0x400 */ + "initial_transno", /* 0x800 */ + "inode_bit_locks", /* 0x1000 */ + "barrier", /* 0x2000 */ + "getattr_by_fid", /* 0x4000 */ + "no_oh_for_devices", /* 0x8000 */ + "remote_client", /* 0x10000 */ + "remote_client_by_force", /* 0x20000 */ + "max_byte_per_rpc", /* 0x40000 */ + "64bit_qdata", /* 0x80000 */ + "mds_capability", /* 0x100000 */ + "oss_capability", /* 0x200000 */ + "early_lock_cancel", /* 0x400000 */ + "som", /* 0x800000 */ + "adaptive_timeouts", /* 0x1000000 */ + "lru_resize", /* 0x2000000 */ + "mds_mds_connection", /* 0x4000000 */ + "real_conn", /* 0x8000000 */ + "change_qunit_size", /* 0x10000000 */ + "alt_checksum_algorithm", /* 0x20000000 */ + "fid_is_enabled", /* 0x40000000 */ + "version_recovery", /* 0x80000000 */ + "pools", /* 0x100000000 */ + "grant_shrink", /* 0x200000000 */ + "skip_orphan", /* 0x400000000 */ + "large_ea", /* 0x800000000 */ + "full20", /* 0x1000000000 */ + "layout_lock", /* 0x2000000000 */ + "64bithash", /* 0x4000000000 */ + "object_max_bytes", /* 0x8000000000 */ + "imp_recov", /* 0x10000000000 */ + "jobstats", /* 0x20000000000 */ + "umask", /* 0x40000000000 */ + "einprogress", /* 0x80000000000 */ + "grant_param", /* 0x100000000000 */ + "flock_owner", /* 0x200000000000 */ + "lvb_type", /* 0x400000000000 */ + "nanoseconds_times", /* 0x800000000000 */ + "lightweight_conn", /* 0x1000000000000 */ + "short_io", /* 0x2000000000000 */ + "pingless", /* 0x4000000000000 */ + "flock_deadlock", /* 0x8000000000000 */ + "disp_stripe", /* 0x10000000000000 */ + "open_by_fid", /* 0x20000000000000 */ + "lfsck", /* 0x40000000000000 */ + "unknown", /* 0x80000000000000 */ + "unlink_close", /* 0x100000000000000 */ + "multi_mod_rpcs", /* 0x200000000000000 */ + "dir_stripe", /* 0x400000000000000 */ + "subtree", /* 0x800000000000000 */ + "lockahead", /* 0x1000000000000000 */ + "bulk_mbits", /* 0x2000000000000000 */ + "compact_obdo", /* 0x4000000000000000 */ + "second_flags", /* 0x8000000000000000 */ + /* ocd_connect_flags2 names */ + "file_secctx", /* 0x01 */ + "lockaheadv2", /* 0x02 */ + "dir_migrate", /* 0x04 */ + "sum_statfs", /* 0x08 */ + "overstriping", /* 0x10 */ + "flr", /* 0x20 */ + "wbc", /* 0x40 */ + "lock_convert", /* 0x80 */ + "archive_id_array", /* 0x100 */ + "increasing_xid", /* 0x200 */ + "selinux_policy", /* 0x400 */ + "lsom", /* 0x800 */ + "pcc", /* 0x1000 */ + "crush", /* 0x2000 */ + "async_discard", /* 0x4000 */ + "client_encryption", /* 0x8000 */ + "fidmap", /* 0x10000 */ + "getattr_pfid", /* 0x20000 */ + "lseek", /* 0x40000 */ + "dom_lvb", /* 0x80000 */ + "reply_mbits", /* 0x100000 */ + "mode_convert", /* 0x200000 */ + "batch_rpc", /* 0x400000 */ + "pcc_ro", /* 0x800000 */ + "mne_nid_type", /* 0x1000000 */ + "lock_contend", /* 0x2000000 */ + "atomic_open_lock", /* 0x4000000 */ + "name_encryption", /* 0x8000000 */ + "mkdir_replay", /* 0x10000000 */ + "dmv_imp_inherit", /* 0x20000000 */ + "encryption_fid2path", /* 0x40000000 */ + "replay_create", /* 0x80000000 */ + "large_nid", /* 0x100000000 */ + "compressed_file", /* 0x200000000 */ + "unaligned_dio", /* 0x400000000 */ + "conn_policy", /* 0x800000000 */ NULL }; @@ -894,22 +786,19 @@ obd_connect_data_seqprint(struct seq_file *m, struct obd_connect_data *ocd) ocd->ocd_maxmodrpcs); } -int lprocfs_import_seq_show(struct seq_file *m, void *data) +static void lprocfs_import_seq_show_locked(struct seq_file *m, + struct obd_device *obd, + struct obd_import *imp) { - char nidstr[LNET_NIDSTR_SIZE]; - struct lprocfs_counter ret; - struct lprocfs_counter_header *header; - struct obd_device *obd = (struct obd_device *)data; - struct obd_import *imp; - struct obd_import_conn *conn; - struct obd_connect_data *ocd; - int j; - int k; - int rw = 0; + char nidstr[LNET_NIDSTR_SIZE]; + struct lprocfs_counter ret; + struct lprocfs_counter_header *header; + struct obd_import_conn *conn; + struct obd_connect_data *ocd; + int j; + int k; + int rw = 0; - LASSERT(obd != NULL); - LPROCFS_CLIMP_CHECK(obd); - imp = obd->u.cli.cl_import; ocd = &imp->imp_connect_data; seq_printf(m, "import:\n" @@ -934,38 +823,60 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data) spin_lock(&imp->imp_lock); j = 0; list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { - libcfs_nid2str_r(conn->oic_conn->c_peer.nid, - nidstr, sizeof(nidstr)); - seq_printf(m, "%s%s", j ? ", " : "", nidstr); + libcfs_nidstr_r(&conn->oic_conn->c_peer.nid, + nidstr, sizeof(nidstr)); + if (j) + seq_puts(m, ", "); + /* Place nidstr in quotes */ + seq_printf(m, "\"%s\"", nidstr); j++; } - if (imp->imp_connection != NULL) - libcfs_nid2str_r(imp->imp_connection->c_peer.nid, - nidstr, sizeof(nidstr)); + if (imp->imp_connection) + libcfs_nidstr_r(&imp->imp_connection->c_peer.nid, + nidstr, sizeof(nidstr)); else strncpy(nidstr, "", sizeof(nidstr)); seq_printf(m, " ]\n" - " current_connection: %s\n" + " nids_stats:"); + list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { + libcfs_nidstr_r(&conn->oic_conn->c_peer.nid, + nidstr, sizeof(nidstr)); + seq_printf(m, "\n \"%s\": { connects: %u, replied: %u," + " uptodate: %s, sec_ago: ", + nidstr, conn->oic_attempts, conn->oic_replied, + conn->oic_uptodate ? "true" : "false"); + if (conn->oic_last_attempt) + seq_printf(m, "%lld }", ktime_get_seconds() - + conn->oic_last_attempt); + else + seq_puts(m, "never }"); + } + if (imp->imp_connection) + libcfs_nidstr_r(&imp->imp_connection->c_peer.nid, + nidstr, sizeof(nidstr)); + else + strncpy(nidstr, "", sizeof(nidstr)); + seq_printf(m, "\n" + " current_connection: \"%s\"\n" " connection_attempts: %u\n" " generation: %u\n" - " in-progress_invalidations: %u\n", + " in-progress_invalidations: %u\n" + " idle: %lld sec\n", nidstr, imp->imp_conn_cnt, imp->imp_generation, - atomic_read(&imp->imp_inval_count)); + atomic_read(&imp->imp_inval_count), + ktime_get_real_seconds() - imp->imp_last_reply_time); spin_unlock(&imp->imp_lock); - if (obd->obd_svc_stats == NULL) - goto out_climp; + if (!obd->obd_svc_stats) + return; header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR]; lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret); - if (ret.lc_count != 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - do_div(sum, ret.lc_count); - ret.lc_sum = sum; - } else + if (ret.lc_count != 0) + ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count); + else ret.lc_sum = 0; seq_printf(m, " rpcs:\n" " inflight: %u\n" @@ -982,13 +893,14 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data) if (imp->imp_at.iat_portal[j] == 0) break; k = max_t(unsigned int, k, - at_get(&imp->imp_at.iat_service_estimate[j])); + obd_at_get(imp->imp_obd, + &imp->imp_at.iat_service_estimate[j])); } seq_printf(m, " service_estimates:\n" " services: %u sec\n" - " network: %u sec\n", + " network: %d sec\n", k, - at_get(&imp->imp_at.iat_net_latency)); + obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency)); seq_printf(m, " transactions:\n" " last_replay: %llu\n" @@ -1004,10 +916,7 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data) PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw, &ret); if (ret.lc_sum > 0 && ret.lc_count > 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - do_div(sum, ret.lc_count); - ret.lc_sum = sum; + ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count); seq_printf(m, " %s_data_averages:\n" " bytes_per_rpc: %llu\n", rw ? "write" : "read", @@ -1018,10 +927,7 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data) header = &obd->obd_svc_stats->ls_cnt_header[j]; lprocfs_stats_collect(obd->obd_svc_stats, j, &ret); if (ret.lc_sum > 0 && ret.lc_count != 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - do_div(sum, ret.lc_count); - ret.lc_sum = sum; + ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count); seq_printf(m, " %s_per_rpc: %llu\n", header->lc_units, ret.lc_sum); j = (int)ret.lc_sum; @@ -1030,10 +936,18 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data) k / j, (100 * k / j) % 100); } } +} -out_climp: - LPROCFS_CLIMP_EXIT(obd); - return 0; +int lprocfs_import_seq_show(struct seq_file *m, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + struct obd_import *imp; + int rv; + + LASSERT(obd != NULL); + with_imp_locked(obd, imp, rv) + lprocfs_import_seq_show_locked(m, obd, imp); + return rv; } EXPORT_SYMBOL(lprocfs_import_seq_show); @@ -1042,26 +956,25 @@ int lprocfs_state_seq_show(struct seq_file *m, void *data) struct obd_device *obd = (struct obd_device *)data; struct obd_import *imp; int j, k; + int rc; LASSERT(obd != NULL); - LPROCFS_CLIMP_CHECK(obd); - imp = obd->u.cli.cl_import; - - seq_printf(m, "current_state: %s\n", - ptlrpc_import_state_name(imp->imp_state)); - seq_printf(m, "state_history:\n"); - k = imp->imp_state_hist_idx; - for (j = 0; j < IMP_STATE_HIST_LEN; j++) { - struct import_state_hist *ish = - &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN]; - if (ish->ish_state == 0) - continue; - seq_printf(m, " - [ %lld, %s ]\n", (s64)ish->ish_time, - ptlrpc_import_state_name(ish->ish_state)); + with_imp_locked(obd, imp, rc) { + seq_printf(m, "current_state: %s\n", + ptlrpc_import_state_name(imp->imp_state)); + seq_printf(m, "state_history:\n"); + k = imp->imp_state_hist_idx; + for (j = 0; j < IMP_STATE_HIST_LEN; j++) { + struct import_state_hist *ish = + &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN]; + if (ish->ish_state == 0) + continue; + seq_printf(m, " - [ %lld, %s ]\n", (s64)ish->ish_time, + ptlrpc_import_state_name(ish->ish_state)); + } } - LPROCFS_CLIMP_EXIT(obd); - return 0; + return rc; } EXPORT_SYMBOL(lprocfs_state_seq_show); @@ -1076,17 +989,15 @@ int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at) EXPORT_SYMBOL(lprocfs_at_hist_helper); /* See also ptlrpc_lprocfs_timeouts_show_seq */ -int lprocfs_timeouts_seq_show(struct seq_file *m, void *data) +static void lprocfs_timeouts_seq_show_locked(struct seq_file *m, + struct obd_device *obd, + struct obd_import *imp) { - struct obd_device *obd = (struct obd_device *)data; - struct obd_import *imp; - unsigned int cur, worst; - time64_t now, worstt; + timeout_t cur_timeout, worst_timeout; + time64_t now, worst_timestamp; int i; LASSERT(obd != NULL); - LPROCFS_CLIMP_CHECK(obd); - imp = obd->u.cli.cl_import; now = ktime_get_real_seconds(); @@ -1095,27 +1006,41 @@ int lprocfs_timeouts_seq_show(struct seq_file *m, void *data) "last reply", (s64)imp->imp_last_reply_time, (s64)(now - imp->imp_last_reply_time)); - cur = at_get(&imp->imp_at.iat_net_latency); - worst = imp->imp_at.iat_net_latency.at_worst_ever; - worstt = imp->imp_at.iat_net_latency.at_worst_time; + cur_timeout = obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency); + worst_timeout = imp->imp_at.iat_net_latency.at_worst_timeout_ever; + worst_timestamp = imp->imp_at.iat_net_latency.at_worst_timestamp; seq_printf(m, "%-10s : cur %3u worst %3u (at %lld, %llds ago) ", - "network", cur, worst, (s64)worstt, (s64)(now - worstt)); + "network", cur_timeout, worst_timeout, worst_timestamp, + now - worst_timestamp); lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency); for(i = 0; i < IMP_AT_MAX_PORTALS; i++) { + struct adaptive_timeout *service_est; + if (imp->imp_at.iat_portal[i] == 0) break; - cur = at_get(&imp->imp_at.iat_service_estimate[i]); - worst = imp->imp_at.iat_service_estimate[i].at_worst_ever; - worstt = imp->imp_at.iat_service_estimate[i].at_worst_time; + + service_est = &imp->imp_at.iat_service_estimate[i]; + cur_timeout = obd_at_get(imp->imp_obd, service_est); + worst_timeout = service_est->at_worst_timeout_ever; + worst_timestamp = service_est->at_worst_timestamp; seq_printf(m, "portal %-2d : cur %3u worst %3u (at %lld, %llds ago) ", - imp->imp_at.iat_portal[i], cur, worst, (s64)worstt, - (s64)(now - worstt)); - lprocfs_at_hist_helper(m, &imp->imp_at.iat_service_estimate[i]); + imp->imp_at.iat_portal[i], cur_timeout, + worst_timeout, worst_timestamp, + now - worst_timestamp); + lprocfs_at_hist_helper(m, service_est); } +} - LPROCFS_CLIMP_EXIT(obd); - return 0; +int lprocfs_timeouts_seq_show(struct seq_file *m, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + struct obd_import *imp; + int rc; + + with_imp_locked(obd, imp, rc) + lprocfs_timeouts_seq_show_locked(m, obd, imp); + return rc; } EXPORT_SYMBOL(lprocfs_timeouts_seq_show); @@ -1124,25 +1049,28 @@ int lprocfs_connect_flags_seq_show(struct seq_file *m, void *data) struct obd_device *obd = data; __u64 flags; __u64 flags2; + struct obd_import *imp; + int rc; - LPROCFS_CLIMP_CHECK(obd); - flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags; - flags2 = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags2; - seq_printf(m, "flags=%#llx\n", flags); - seq_printf(m, "flags2=%#llx\n", flags2); - obd_connect_seq_flags2str(m, flags, flags2, "\n"); - seq_printf(m, "\n"); - LPROCFS_CLIMP_EXIT(obd); - return 0; + with_imp_locked(obd, imp, rc) { + flags = imp->imp_connect_data.ocd_connect_flags; + flags2 = imp->imp_connect_data.ocd_connect_flags2; + seq_printf(m, "flags=%#llx\n", flags); + seq_printf(m, "flags2=%#llx\n", flags2); + obd_connect_seq_flags2str(m, flags, flags2, "\n"); + seq_printf(m, "\n"); + } + + return rc; } EXPORT_SYMBOL(lprocfs_connect_flags_seq_show); -static struct attribute *obd_def_uuid_attrs[] = { +static const struct attribute *obd_def_uuid_attrs[] = { &lustre_attr_uuid.attr, NULL, }; -static struct attribute *obd_def_attrs[] = { +static const struct attribute *obd_def_attrs[] = { &lustre_attr_blocksize.attr, &lustre_attr_kbytestotal.attr, &lustre_attr_kbytesfree.attr, @@ -1163,6 +1091,7 @@ static void obd_sysfs_release(struct kobject *kobj) int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only) { + struct ldebugfs_vars *debugfs_vars = NULL; int rc; if (!obd || obd->obd_magic != OBD_DEVICE_MAGIC) @@ -1174,8 +1103,6 @@ int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only) obd->obd_ktype.sysfs_ops = &lustre_sysfs_ops; obd->obd_ktype.release = obd_sysfs_release; - if (obd->obd_attrs) - obd->obd_ktype.default_attrs = obd->obd_attrs; obd->obd_kset.kobj.parent = &obd->obd_type->typ_kobj; obd->obd_kset.kobj.ktype = &obd->obd_ktype; @@ -1185,20 +1112,24 @@ int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only) return rc; if (uuid_only) - obd->obd_attrs_group.attrs = obd_def_uuid_attrs; + obd->obd_attrs = obd_def_uuid_attrs; else - obd->obd_attrs_group.attrs = obd_def_attrs; + obd->obd_attrs = obd_def_attrs; - rc = sysfs_create_group(&obd->obd_kset.kobj, &obd->obd_attrs_group); + rc = sysfs_create_files(&obd->obd_kset.kobj, obd->obd_attrs); if (rc) { kset_unregister(&obd->obd_kset); return rc; } - if (obd->obd_proc_entry) - GOTO(already_registered, rc); + if (!obd->obd_type->typ_procroot) + debugfs_vars = obd->obd_debugfs_vars; + obd->obd_debugfs_entry = debugfs_create_dir( + obd->obd_name, obd->obd_type->typ_debugfs_entry); + ldebugfs_add_vars(obd->obd_debugfs_entry, debugfs_vars, obd); - LASSERT(obd->obd_type->typ_procroot != NULL); + if (obd->obd_proc_entry || !obd->obd_type->typ_procroot) + GOTO(already_registered, rc); obd->obd_proc_entry = lprocfs_register(obd->obd_name, obd->obd_type->typ_procroot, @@ -1207,7 +1138,14 @@ int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only) rc = PTR_ERR(obd->obd_proc_entry); CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name); obd->obd_proc_entry = NULL; - lprocfs_obd_cleanup(obd); + + debugfs_remove_recursive(obd->obd_debugfs_entry); + obd->obd_debugfs_entry = NULL; + + sysfs_remove_files(&obd->obd_kset.kobj, obd->obd_attrs); + obd->obd_attrs = NULL; + kset_unregister(&obd->obd_kset); + return rc; } already_registered: return rc; @@ -1219,6 +1157,9 @@ int lprocfs_obd_cleanup(struct obd_device *obd) if (!obd) return -EINVAL; + debugfs_remove_recursive(obd->obd_debugfs_gss_dir); + obd->obd_debugfs_gss_dir = NULL; + if (obd->obd_proc_exports_entry) { /* Should be no exports left */ lprocfs_remove(&obd->obd_proc_exports_entry); @@ -1230,41 +1171,43 @@ int lprocfs_obd_cleanup(struct obd_device *obd) obd->obd_proc_entry = NULL; } - sysfs_remove_group(&obd->obd_kset.kobj, &obd->obd_attrs_group); + debugfs_remove_recursive(obd->obd_debugfs_entry); + obd->obd_debugfs_entry = NULL; + + /* obd device never allocated a kset */ + if (!obd->obd_kset.kobj.state_initialized) + return 0; + + if (obd->obd_attrs) { + sysfs_remove_files(&obd->obd_kset.kobj, obd->obd_attrs); + obd->obd_attrs = NULL; + } + kset_unregister(&obd->obd_kset); wait_for_completion(&obd->obd_kobj_unregister); - return 0; } EXPORT_SYMBOL(lprocfs_obd_cleanup); int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) { - struct lprocfs_counter *cntr; - unsigned int percpusize; - int rc = -ENOMEM; - unsigned long flags = 0; - int i; + struct lprocfs_counter *cntr; + unsigned int percpusize; + int rc = -ENOMEM; + int i; LASSERT(stats->ls_percpu[cpuid] == NULL); LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0); percpusize = lprocfs_stats_counter_size(stats); LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[cpuid], percpusize); - if (stats->ls_percpu[cpuid] != NULL) { + if (stats->ls_percpu[cpuid]) { rc = 0; if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, flags); - else - spin_lock(&stats->ls_lock); + spin_lock(&stats->ls_lock); if (stats->ls_biggest_alloc_num <= cpuid) stats->ls_biggest_alloc_num = cpuid + 1; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) { - spin_unlock_irqrestore(&stats->ls_lock, flags); - } else { - spin_unlock(&stats->ls_lock); - } + spin_unlock(&stats->ls_lock); } /* initialize the ls_percpu[cpuid] non-zero counter */ for (i = 0; i < stats->ls_num; ++i) { @@ -1275,19 +1218,18 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) return rc; } -struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, +struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num, enum lprocfs_stats_flags flags) { - struct lprocfs_stats *stats; - unsigned int num_entry; - unsigned int percpusize = 0; - int i; + struct lprocfs_stats *stats; + unsigned int num_entry; + unsigned int percpusize = 0; - if (num == 0) - return NULL; + if (num == 0) + return NULL; - if (lprocfs_no_percpu_stats != 0) - flags |= LPROCFS_STATS_FLAG_NOPERCPU; + if (lprocfs_no_percpu_stats != 0) + flags |= LPROCFS_STATS_FLAG_NOPERCPU; if (flags & LPROCFS_STATS_FLAG_NOPERCPU) num_entry = 1; @@ -1296,51 +1238,46 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, /* alloc percpu pointers for all possible cpu slots */ LIBCFS_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_entry])); - if (stats == NULL) + if (!stats) return NULL; stats->ls_num = num; stats->ls_flags = flags; + stats->ls_init = ktime_get_real(); spin_lock_init(&stats->ls_lock); /* alloc num of counter headers */ - LIBCFS_ALLOC(stats->ls_cnt_header, - stats->ls_num * sizeof(struct lprocfs_counter_header)); - if (stats->ls_cnt_header == NULL) + CFS_ALLOC_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num); + if (!stats->ls_cnt_header) goto fail; if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) { /* contains only one set counters */ percpusize = lprocfs_stats_counter_size(stats); LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[0], percpusize); - if (stats->ls_percpu[0] == NULL) + if (!stats->ls_percpu[0]) goto fail; stats->ls_biggest_alloc_num = 1; - } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) { - /* alloc all percpu data, currently only obd_memory use this */ - for (i = 0; i < num_entry; ++i) - if (lprocfs_stats_alloc_one(stats, i) < 0) - goto fail; } return stats; fail: - lprocfs_free_stats(&stats); + lprocfs_stats_free(&stats); return NULL; } -EXPORT_SYMBOL(lprocfs_alloc_stats); +EXPORT_SYMBOL(lprocfs_stats_alloc); -void lprocfs_free_stats(struct lprocfs_stats **statsh) +void lprocfs_stats_free(struct lprocfs_stats **statsh) { struct lprocfs_stats *stats = *statsh; unsigned int num_entry; unsigned int percpusize; unsigned int i; - if (stats == NULL || stats->ls_num == 0) - return; - *statsh = NULL; + if (!stats || stats->ls_num == 0) + return; + *statsh = NULL; if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) num_entry = 1; @@ -1349,14 +1286,19 @@ void lprocfs_free_stats(struct lprocfs_stats **statsh) percpusize = lprocfs_stats_counter_size(stats); for (i = 0; i < num_entry; i++) - if (stats->ls_percpu[i] != NULL) + if (stats->ls_percpu[i]) LIBCFS_FREE(stats->ls_percpu[i], percpusize); - if (stats->ls_cnt_header != NULL) - LIBCFS_FREE(stats->ls_cnt_header, stats->ls_num * - sizeof(struct lprocfs_counter_header)); + + if (stats->ls_cnt_header) { + for (i = 0; i < stats->ls_num; i++) + if (stats->ls_cnt_header[i].lc_hist != NULL) + CFS_FREE_PTR(stats->ls_cnt_header[i].lc_hist); + CFS_FREE_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num); + } + LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry])); } -EXPORT_SYMBOL(lprocfs_free_stats); +EXPORT_SYMBOL(lprocfs_stats_free); u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, enum lprocfs_fields_flags field) @@ -1384,18 +1326,25 @@ u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, } EXPORT_SYMBOL(lprocfs_stats_collector); -void lprocfs_clear_stats(struct lprocfs_stats *stats) +void lprocfs_stats_clear(struct lprocfs_stats *stats) { - struct lprocfs_counter *percpu_cntr; - int i; - int j; - unsigned int num_entry; - unsigned long flags = 0; + struct lprocfs_counter *percpu_cntr; + unsigned int num_entry; + unsigned long flags = 0; + int i, j; num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); + /* clear histogram if exists */ + for (j = 0; j < stats->ls_num; j++) { + struct obd_histogram *hist = stats->ls_cnt_header[j].lc_hist; + + if (hist != NULL) + lprocfs_oh_clear(hist); + } + for (i = 0; i < num_entry; i++) { - if (stats->ls_percpu[i] == NULL) + if (!stats->ls_percpu[i]) continue; for (j = 0; j < stats->ls_num; j++) { percpu_cntr = lprocfs_stats_counter_get(stats, i, j); @@ -1404,25 +1353,24 @@ void lprocfs_clear_stats(struct lprocfs_stats *stats) percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - percpu_cntr->lc_sum_irq = 0; } } + stats->ls_init = ktime_get_real(); lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); } -EXPORT_SYMBOL(lprocfs_clear_stats); +EXPORT_SYMBOL(lprocfs_stats_clear); static ssize_t lprocfs_stats_seq_write(struct file *file, const char __user *buf, size_t len, loff_t *off) { - struct seq_file *seq = file->private_data; - struct lprocfs_stats *stats = seq->private; + struct seq_file *seq = file->private_data; + struct lprocfs_stats *stats = seq->private; - lprocfs_clear_stats(stats); + lprocfs_stats_clear(stats); - return len; + return len; } static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos) @@ -1443,21 +1391,57 @@ static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) return lprocfs_stats_seq_start(p, pos); } +/** + * print header of stats including snapshot_time, start_time and elapsed_time. + * + * \param seq the file to print content to + * \param now end time to calculate elapsed_time + * \param ts_init start time to calculate elapsed_time + * \param width the width of key to align them well + * \param colon "" or ":" + * \param show_units show units or not + * \param prefix prefix (indent) before printing each line of header + * to align them with other content + */ +void lprocfs_stats_header(struct seq_file *seq, ktime_t now, ktime_t ts_init, + int width, const char *colon, bool show_units, + const char *prefix) +{ + const char *units = show_units ? " secs.nsecs" : ""; + struct timespec64 ts; + const char *field; + + field = (colon && colon[0]) ? "snapshot_time:" : "snapshot_time"; + ts = ktime_to_timespec64(now); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); + + if (!obd_enable_stats_header) + return; + + field = (colon && colon[0]) ? "start_time:" : "start_time"; + ts = ktime_to_timespec64(ts_init); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); + + field = (colon && colon[0]) ? "elapsed_time:" : "elapsed_time"; + ts = ktime_to_timespec64(ktime_sub(now, ts_init)); + seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field, + (s64)ts.tv_sec, ts.tv_nsec, units); +} +EXPORT_SYMBOL(lprocfs_stats_header); + /* seq file export of one lprocfs counter */ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) { - struct lprocfs_stats *stats = p->private; - struct lprocfs_counter_header *hdr; - struct lprocfs_counter ctr; - int idx = *(loff_t *)v; - - if (idx == 0) { - struct timespec64 now; + struct lprocfs_stats *stats = p->private; + struct lprocfs_counter_header *hdr; + struct lprocfs_counter ctr; + int idx = *(loff_t *)v; - ktime_get_real_ts64(&now); - seq_printf(p, "%-25s %llu.%09lu secs.nsecs\n", - "snapshot_time", (s64)now.tv_sec, now.tv_nsec); - } + if (idx == 0) + lprocfs_stats_header(p, ktime_get_real(), stats->ls_init, 25, + "", true, ""); hdr = &stats->ls_cnt_header[idx]; lprocfs_stats_collect(stats, idx, &ctr); @@ -1490,65 +1474,82 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) struct seq_file *seq; int rc; - rc = LPROCFS_ENTRY_CHECK(inode); - if (rc < 0) - return rc; - rc = seq_open(file, &lprocfs_stats_seq_sops); if (rc) return rc; seq = file->private_data; - seq->private = inode->i_private ? : PDE_DATA(inode); + seq->private = inode->i_private ? inode->i_private : pde_data(inode); return 0; } -static const struct file_operations lprocfs_stats_seq_fops = { - .owner = THIS_MODULE, - .open = lprocfs_stats_seq_open, - .read = seq_read, - .write = lprocfs_stats_seq_write, - .llseek = seq_lseek, - .release = lprocfs_seq_release, +const struct file_operations ldebugfs_stats_seq_fops = { + .owner = THIS_MODULE, + .open = lprocfs_stats_seq_open, + .read = seq_read, + .write = lprocfs_stats_seq_write, + .llseek = seq_lseek, + .release = lprocfs_seq_release, +}; +EXPORT_SYMBOL(ldebugfs_stats_seq_fops); + +static const struct proc_ops lprocfs_stats_seq_fops = { + PROC_OWNER(THIS_MODULE) + .proc_open = lprocfs_stats_seq_open, + .proc_read = seq_read, + .proc_write = lprocfs_stats_seq_write, + .proc_lseek = seq_lseek, + .proc_release = lprocfs_seq_release, }; -int ldebugfs_register_stats(struct dentry *parent, const char *name, - struct lprocfs_stats *stats) -{ - struct dentry *entry; - - LASSERT(!IS_ERR_OR_NULL(parent)); - - entry = debugfs_create_file(name, 0644, parent, stats, - &lprocfs_stats_seq_fops); - if (IS_ERR_OR_NULL(entry)) - return entry ? PTR_ERR(entry) : -ENOMEM; - - return 0; -} -EXPORT_SYMBOL_GPL(ldebugfs_register_stats); - -int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, - struct lprocfs_stats *stats) +int lprocfs_stats_register(struct proc_dir_entry *root, const char *name, + struct lprocfs_stats *stats) { struct proc_dir_entry *entry; - LASSERT(root != NULL); + LASSERT(root != NULL); entry = proc_create_data(name, 0644, root, &lprocfs_stats_seq_fops, stats); - if (entry == NULL) + if (!entry) return -ENOMEM; + return 0; } -EXPORT_SYMBOL(lprocfs_register_stats); +EXPORT_SYMBOL(lprocfs_stats_register); -void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, const char *units) +static const char *lprocfs_counter_config_units(const char *name, + enum lprocfs_counter_config config) { - struct lprocfs_counter_header *header; - struct lprocfs_counter *percpu_cntr; - unsigned long flags = 0; - unsigned int i; - unsigned int num_cpu; + const char *units; + + switch (config & LPROCFS_TYPE_MASK) { + default: + units = "reqs"; break; + case LPROCFS_TYPE_BYTES: + units = "bytes"; break; + case LPROCFS_TYPE_PAGES: + units = "pages"; break; + case LPROCFS_TYPE_LOCKS: + units = "locks"; break; + case LPROCFS_TYPE_LOCKSPS: + units = "locks/s"; break; + case LPROCFS_TYPE_SECS: + units = "secs"; break; + case LPROCFS_TYPE_USECS: + units = "usecs"; break; + } + + return units; +} + +void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index, + enum lprocfs_counter_config config, + const char *name, const char *units) +{ + struct lprocfs_counter_header *header; + struct lprocfs_counter *percpu_cntr; + unsigned long flags = 0; + unsigned int i; + unsigned int num_cpu; LASSERT(stats != NULL); @@ -1556,13 +1557,21 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index, LASSERTF(header != NULL, "Failed to allocate stats header:[%d]%s/%s\n", index, name, units); - header->lc_config = conf; - header->lc_name = name; - header->lc_units = units; + header->lc_config = config; + header->lc_name = name; + header->lc_units = units; + if (config & LPROCFS_CNTR_HISTOGRAM) { + CFS_ALLOC_PTR(stats->ls_cnt_header[index].lc_hist); + if (stats->ls_cnt_header[index].lc_hist == NULL) + CERROR("LprocFS: Failed to allocate histogram:[%d]%s/%s\n", + index, name, units); + else + spin_lock_init(&stats->ls_cnt_header[index].lc_hist->oh_lock); + } num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); for (i = 0; i < num_cpu; ++i) { - if (stats->ls_percpu[i] == NULL) + if (!stats->ls_percpu[i]) continue; percpu_cntr = lprocfs_stats_counter_get(stats, i, index); percpu_cntr->lc_count = 0; @@ -1570,56 +1579,37 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index, percpu_cntr->lc_max = 0; percpu_cntr->lc_sumsquare = 0; percpu_cntr->lc_sum = 0; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq = 0; } lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); } -EXPORT_SYMBOL(lprocfs_counter_init); +EXPORT_SYMBOL(lprocfs_counter_init_units); -/* Note that we only init md counters for ops whose offset is less - * than NUM_MD_STATS. This is explained in a comment in the definition - * of struct md_ops. */ -#define LPROCFS_MD_OP_INIT(base, stats, op) \ - do { \ - unsigned int _idx = base + MD_COUNTER_OFFSET(op); \ - \ - if (MD_COUNTER_OFFSET(op) < NUM_MD_STATS) { \ - LASSERT(_idx < stats->ls_num); \ - lprocfs_counter_init(stats, _idx, 0, #op, "reqs"); \ - } \ - } while (0) - -void lprocfs_init_mps_stats(int num_private_stats, struct lprocfs_stats *stats) -{ - LPROCFS_MD_OP_INIT(num_private_stats, stats, get_root); - LPROCFS_MD_OP_INIT(num_private_stats, stats, null_inode); - LPROCFS_MD_OP_INIT(num_private_stats, stats, close); - LPROCFS_MD_OP_INIT(num_private_stats, stats, create); - LPROCFS_MD_OP_INIT(num_private_stats, stats, enqueue); - LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr); - LPROCFS_MD_OP_INIT(num_private_stats, stats, getattr_name); - LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_lock); - LPROCFS_MD_OP_INIT(num_private_stats, stats, link); - LPROCFS_MD_OP_INIT(num_private_stats, stats, rename); - LPROCFS_MD_OP_INIT(num_private_stats, stats, setattr); - LPROCFS_MD_OP_INIT(num_private_stats, stats, fsync); - LPROCFS_MD_OP_INIT(num_private_stats, stats, read_page); - LPROCFS_MD_OP_INIT(num_private_stats, stats, unlink); - LPROCFS_MD_OP_INIT(num_private_stats, stats, setxattr); - LPROCFS_MD_OP_INIT(num_private_stats, stats, getxattr); - LPROCFS_MD_OP_INIT(num_private_stats, stats, init_ea_size); - LPROCFS_MD_OP_INIT(num_private_stats, stats, get_lustre_md); - LPROCFS_MD_OP_INIT(num_private_stats, stats, free_lustre_md); - LPROCFS_MD_OP_INIT(num_private_stats, stats, merge_attr); - LPROCFS_MD_OP_INIT(num_private_stats, stats, set_open_replay_data); - LPROCFS_MD_OP_INIT(num_private_stats, stats, clear_open_replay_data); - LPROCFS_MD_OP_INIT(num_private_stats, stats, set_lock_data); - LPROCFS_MD_OP_INIT(num_private_stats, stats, lock_match); - LPROCFS_MD_OP_INIT(num_private_stats, stats, cancel_unused); - LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_getattr_async); - LPROCFS_MD_OP_INIT(num_private_stats, stats, revalidate_lock); +void lprocfs_counter_init(struct lprocfs_stats *stats, int index, + enum lprocfs_counter_config config, + const char *name) +{ + lprocfs_counter_init_units(stats, index, config, name, + lprocfs_counter_config_units(name, config)); } +EXPORT_SYMBOL(lprocfs_counter_init); + +static const char * const mps_stats[] = { + [LPROC_MD_CLOSE] = "close", + [LPROC_MD_CREATE] = "create", + [LPROC_MD_ENQUEUE] = "enqueue", + [LPROC_MD_GETATTR] = "getattr", + [LPROC_MD_INTENT_LOCK] = "intent_lock", + [LPROC_MD_LINK] = "link", + [LPROC_MD_RENAME] = "rename", + [LPROC_MD_SETATTR] = "setattr", + [LPROC_MD_FSYNC] = "fsync", + [LPROC_MD_READ_PAGE] = "read_page", + [LPROC_MD_UNLINK] = "unlink", + [LPROC_MD_SETXATTR] = "setxattr", + [LPROC_MD_GETXATTR] = "getxattr", + [LPROC_MD_INTENT_GETATTR_ASYNC] = "intent_getattr_async", + [LPROC_MD_REVALIDATE_LOCK] = "revalidate_lock", +}; int lprocfs_alloc_md_stats(struct obd_device *obd, unsigned int num_private_stats) @@ -1628,11 +1618,8 @@ int lprocfs_alloc_md_stats(struct obd_device *obd, unsigned int num_stats; int rc, i; - CLASSERT(offsetof(struct md_ops, MD_STATS_FIRST_OP) == 0); - CLASSERT(_MD_COUNTER_OFFSET(MD_STATS_FIRST_OP) == 0); - CLASSERT(_MD_COUNTER_OFFSET(MD_STATS_LAST_OP) > 0); - - /* TODO Ensure that this function is only used where + /* + * TODO Ensure that this function is only used where * appropriate by adding an assertion to the effect that * obd->obd_type->typ_md_ops is not NULL. We can't do this now * because mdt_procfs_init() uses this function to allocate @@ -1642,30 +1629,27 @@ int lprocfs_alloc_md_stats(struct obd_device *obd, */ LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_md_stats == NULL); - LASSERT(obd->obd_md_cntr_base == 0); - num_stats = NUM_MD_STATS + num_private_stats; - stats = lprocfs_alloc_stats(num_stats, 0); - if (stats == NULL) + num_stats = ARRAY_SIZE(mps_stats) + num_private_stats; + stats = lprocfs_stats_alloc(num_stats, 0); + if (!stats) return -ENOMEM; - lprocfs_init_mps_stats(num_private_stats, stats); - - for (i = num_private_stats; i < num_stats; i++) { - if (stats->ls_cnt_header[i].lc_name == NULL) { - CERROR("Missing md_stat initializer md_op " - "operation at offset %d. Aborting.\n", - i - num_private_stats); + for (i = 0; i < ARRAY_SIZE(mps_stats); i++) { + lprocfs_counter_init(stats, i, LPROCFS_TYPE_REQS, + mps_stats[i]); + if (!stats->ls_cnt_header[i].lc_name) { + CERROR("Missing md_stat initializer md_op operation at offset %d. Aborting.\n", + i); LBUG(); } } - rc = lprocfs_register_stats(obd->obd_proc_entry, "md_stats", stats); + rc = lprocfs_stats_register(obd->obd_proc_entry, "md_stats", stats); if (rc < 0) { - lprocfs_free_stats(&stats); + lprocfs_stats_free(&stats); } else { obd->obd_md_stats = stats; - obd->obd_md_cntr_base = num_private_stats; } return rc; @@ -1676,34 +1660,27 @@ void lprocfs_free_md_stats(struct obd_device *obd) { struct lprocfs_stats *stats = obd->obd_md_stats; - if (stats != NULL) { + if (stats) { obd->obd_md_stats = NULL; - obd->obd_md_cntr_base = 0; - lprocfs_free_stats(&stats); + lprocfs_stats_free(&stats); } } EXPORT_SYMBOL(lprocfs_free_md_stats); void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats) { - lprocfs_counter_init(ldlm_stats, - LDLM_ENQUEUE - LDLM_FIRST_OPC, - 0, "ldlm_enqueue", "reqs"); - lprocfs_counter_init(ldlm_stats, - LDLM_CONVERT - LDLM_FIRST_OPC, - 0, "ldlm_convert", "reqs"); - lprocfs_counter_init(ldlm_stats, - LDLM_CANCEL - LDLM_FIRST_OPC, - 0, "ldlm_cancel", "reqs"); - lprocfs_counter_init(ldlm_stats, - LDLM_BL_CALLBACK - LDLM_FIRST_OPC, - 0, "ldlm_bl_callback", "reqs"); - lprocfs_counter_init(ldlm_stats, - LDLM_CP_CALLBACK - LDLM_FIRST_OPC, - 0, "ldlm_cp_callback", "reqs"); - lprocfs_counter_init(ldlm_stats, - LDLM_GL_CALLBACK - LDLM_FIRST_OPC, - 0, "ldlm_gl_callback", "reqs"); + lprocfs_counter_init(ldlm_stats, LDLM_ENQUEUE - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_enqueue"); + lprocfs_counter_init(ldlm_stats, LDLM_CONVERT - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_convert"); + lprocfs_counter_init(ldlm_stats, LDLM_CANCEL - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_cancel"); + lprocfs_counter_init(ldlm_stats, LDLM_BL_CALLBACK - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_bl_callback"); + lprocfs_counter_init(ldlm_stats, LDLM_CP_CALLBACK - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_cp_callback"); + lprocfs_counter_init(ldlm_stats, LDLM_GL_CALLBACK - LDLM_FIRST_OPC, + LPROCFS_TYPE_REQS, "ldlm_gl_callback"); } EXPORT_SYMBOL(lprocfs_init_ldlm_stats); @@ -1714,7 +1691,7 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, { __s64 ret = 0; - if (lc == NULL || header == NULL) + if (!lc || !header) RETURN(0); switch (field) { @@ -1723,8 +1700,6 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, break; case LPROCFS_FIELDS_FLAGS_SUM: ret = lc->lc_sum; - if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - ret += lc->lc_sum_irq; break; case LPROCFS_FIELDS_FLAGS_MIN: ret = lc->lc_min; @@ -1733,7 +1708,7 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, ret = lc->lc_max; break; case LPROCFS_FIELDS_FLAGS_AVG: - ret = (lc->lc_max - lc->lc_min) / 2; + ret = div64_u64(lc->lc_sum, lc->lc_count); break; case LPROCFS_FIELDS_FLAGS_SUMSQUARE: ret = lc->lc_sumsquare; @@ -1748,373 +1723,194 @@ __s64 lprocfs_read_helper(struct lprocfs_counter *lc, } EXPORT_SYMBOL(lprocfs_read_helper); -int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val, - int mult) -{ - long decimal_val, frac_val; - int prtn; - - if (count < 10) - return -EINVAL; - - decimal_val = val / mult; - prtn = snprintf(buffer, count, "%ld", decimal_val); - frac_val = val % mult; - - if (prtn < (count - 4) && frac_val > 0) { - long temp_frac; - int i, temp_mult = 1, frac_bits = 0; - - temp_frac = frac_val * 10; - buffer[prtn++] = '.'; - while (frac_bits < 2 && (temp_frac / mult) < 1 ) { - /* only reserved 2 bits fraction */ - buffer[prtn++] ='0'; - temp_frac *= 10; - frac_bits++; - } - /* - * Need to think these cases : - * 1. #echo x.00 > /proc/xxx output result : x - * 2. #echo x.0x > /proc/xxx output result : x.0x - * 3. #echo x.x0 > /proc/xxx output result : x.x - * 4. #echo x.xx > /proc/xxx output result : x.xx - * Only reserved 2 bits fraction. - */ - for (i = 0; i < (5 - prtn); i++) - temp_mult *= 10; - - frac_bits = min((int)count - prtn, 3 - frac_bits); - prtn += snprintf(buffer + prtn, frac_bits, "%ld", - frac_val * temp_mult / mult); - - prtn--; - while(buffer[prtn] < '1' || buffer[prtn] > '9') { - prtn--; - if (buffer[prtn] == '.') { - prtn--; - break; - } - } - prtn++; - } - buffer[prtn++] ='\n'; - return prtn; -} -EXPORT_SYMBOL(lprocfs_read_frac_helper); - -int lprocfs_seq_read_frac_helper(struct seq_file *m, long val, int mult) -{ - long decimal_val, frac_val; - - decimal_val = val / mult; - seq_printf(m, "%ld", decimal_val); - frac_val = val % mult; - - if (frac_val > 0) { - frac_val *= 100; - frac_val /= mult; - } - if (frac_val > 0) { - /* Three cases: x0, xx, 0x */ - if ((frac_val % 10) != 0) - seq_printf(m, ".%ld", frac_val); - else - seq_printf(m, ".%ld", frac_val / 10); - } - - seq_printf(m, "\n"); - return 0; -} -EXPORT_SYMBOL(lprocfs_seq_read_frac_helper); - -/* Obtains the conversion factor for the unit specified */ -static int get_mult(char unit, __u64 *mult) -{ - __u64 units = 1; - - switch (unit) { - /* peta, tera, giga, mega, and kilo */ - case 'p': - case 'P': - units <<= 10; - case 't': - case 'T': - units <<= 10; - case 'g': - case 'G': - units <<= 10; - case 'm': - case 'M': - units <<= 10; - case 'k': - case 'K': - units <<= 10; - break; - /* some tests expect % to be accepted */ - case '%': - units = 1; - break; - default: - return -EINVAL; - } - - *mult = units; - - return 0; -} - -/* - * Ensures the numeric string is valid. The function provides the final - * multiplier in the case a unit exists at the end of the string. It also - * locates the start of the whole and fractional parts (if any). This - * function modifies the string so kstrtoull can be used to parse both - * the whole and fraction portions. This function also figures out - * the base of the number. +/** + * string_to_size - convert ASCII string representing a numerical + * value with optional units to 64-bit binary value + * + * @size: The numerical value extract out of @buffer + * @buffer: passed in string to parse + * @count: length of the @buffer + * + * This function returns a 64-bit binary value if @buffer contains a valid + * numerical string. The string is parsed to 3 significant figures after + * the decimal point. Support the string containing an optional units at + * the end which can be base 2 or base 10 in value. If no units are given + * the string is assumed to just a numerical value. + * + * Returns: @count if the string is successfully parsed, + * -errno on invalid input strings. Error values: + * + * - ``-EINVAL``: @buffer is not a proper numerical string + * - ``-EOVERFLOW``: results does not fit into 64 bits. + * - ``-E2BIG ``: @buffer is too large (not a valid number) */ -static int preprocess_numeric_str(char *buffer, __u64 *mult, __u64 def_mult, - bool allow_units, char **whole, char **frac, - unsigned int *base) +int string_to_size(u64 *size, const char *buffer, size_t count) { - bool hit_decimal = false; - bool hit_unit = false; - int rc = 0; - char *start; - *mult = def_mult; - *whole = NULL; - *frac = NULL; - *base = 10; + /* For string_get_size() it can support values above exabytes, + * (ZiB, YiB) due to breaking the return value into a size and + * bulk size to avoid 64 bit overflow. We don't break the size + * up into block size units so we don't support ZiB or YiB. + */ + static const char *const units_10[] = { + "kB", "MB", "GB", "TB", "PB", "EB", + }; + static const char *const units_2[] = { + "K", "M", "G", "T", "P", "E", + }; + static const char *const *const units_str[] = { + [STRING_UNITS_2] = units_2, + [STRING_UNITS_10] = units_10, + }; + static const unsigned int coeff[] = { + [STRING_UNITS_10] = 1000, + [STRING_UNITS_2] = 1024, + }; + enum string_size_units unit = STRING_UNITS_2; + u64 whole, blk_size = 1; + char kernbuf[22], *end; + size_t len = count; + int rc; + int i; - /* a hex string if it starts with "0x" */ - if (buffer[0] == '0' && tolower(buffer[1]) == 'x') { - *base = 16; - buffer += 2; + if (count >= sizeof(kernbuf)) { + CERROR("count %zd > buffer %zd\n", count, sizeof(kernbuf)); + return -E2BIG; } - start = buffer; - - while (*buffer) { - /* allow for a single new line before the null terminator */ - if (*buffer == '\n') { - *buffer = '\0'; - buffer++; - - if (*buffer) - return -EINVAL; - + *size = 0; + /* The "iB" suffix is optionally allowed for indicating base-2 numbers. + * If suffix is only "B" and not "iB" then we treat it as base-10. + */ + end = strstr(buffer, "B"); + if (end && *(end - 1) != 'i') + unit = STRING_UNITS_10; + + i = unit == STRING_UNITS_2 ? ARRAY_SIZE(units_2) - 1 : + ARRAY_SIZE(units_10) - 1; + do { + end = strnstr(buffer, units_str[unit][i], count); + if (end) { + for (; i >= 0; i--) + blk_size *= coeff[unit]; + len = end - buffer; break; } + } while (i--); - /* any chars after our unit indicates a malformed string */ - if (hit_unit) - return -EINVAL; - - /* ensure we only hit one decimal */ - if (*buffer == '.') { - if (hit_decimal) - return -EINVAL; - - /* if past start, there's a whole part */ - if (start != buffer) - *whole = start; - - *buffer = '\0'; - start = buffer + 1; - hit_decimal = true; - } else if (!isdigit(*buffer) && - !(*base == 16 && isxdigit(*buffer))) { - if (allow_units) { - /* if we allow units, attempt to get mult */ - hit_unit = true; - rc = get_mult(*buffer, mult); - if (rc) - return rc; - - /* string stops here, but keep processing */ - *buffer = '\0'; - } else { - /* bad string */ + /* as 'B' is a substring of all units, we need to handle it + * separately. + */ + if (!end) { + /* 'B' is only acceptable letter at this point */ + end = strnchr(buffer, count, 'B'); + if (end) { + len = end - buffer; + + if (count - len > 2 || + (count - len == 2 && strcmp(end, "B\n") != 0)) { + CDEBUG(D_INFO, "unknown suffix '%s'\n", buffer); return -EINVAL; } } - - buffer++; - } - - if (hit_decimal) { - /* hit a decimal, make sure there's a fractional part */ - if (!*start) - return -EINVAL; - - *frac = start; - } else { - /* didn't hit a decimal, but may have a whole part */ - if (start != buffer && *start) - *whole = start; - } - - /* malformed string if we didn't get anything */ - if (!*frac && !*whole) - return -EINVAL; - - return 0; -} - -/* - * Parses a numeric string which can contain a whole and fraction portion - * into a __u64. Accepts a multiplier to apply to the value parsed. Also - * allows the string to have a unit at the end. The function handles - * wrapping of the final unsigned value. - */ -static int str_to_u64_parse(char *buffer, unsigned long count, - __u64 *val, __u64 def_mult, bool allow_units) -{ - __u64 whole = 0; - __u64 frac = 0; - unsigned int frac_d = 1; - __u64 wrap_indicator = ULLONG_MAX; - int rc = 0; - __u64 mult; - char *strwhole; - char *strfrac; - unsigned int base = 10; - - rc = preprocess_numeric_str(buffer, &mult, def_mult, allow_units, - &strwhole, &strfrac, &base); - - if (rc) - return rc; - - if (mult == 0) { - *val = 0; - return 0; + /* kstrtoull will error out if it has non digits */ + goto numbers_only; } - /* the multiplier limits how large the value can be */ - wrap_indicator /= mult; + end = strnchr(buffer, count, '.'); + if (end) { + /* need to limit 3 decimal places */ + char rem[4] = "000"; + u64 frac = 0; + size_t off; - if (strwhole) { - rc = kstrtoull(strwhole, base, &whole); - if (rc) - return rc; + len = end - buffer; + end++; - if (whole > wrap_indicator) - return -ERANGE; - - whole *= mult; - } - - if (strfrac) { - if (strlen(strfrac) > 10) - strfrac[10] = '\0'; - - rc = kstrtoull(strfrac, base, &frac); + /* limit to 3 decimal points */ + off = min_t(size_t, 3, strspn(end, "0123456789")); + /* need to limit frac_d to a u32 */ + memcpy(rem, end, off); + rc = kstrtoull(rem, 10, &frac); if (rc) return rc; - /* determine power of fractional portion */ - while (*strfrac) { - frac_d *= base; - strfrac++; - } + if (fls64(frac) + fls64(blk_size) - 1 > 64) + return -EOVERFLOW; - /* fractional portion is too large to perform calculation */ - if (frac > wrap_indicator) - return -ERANGE; - - frac *= mult; - do_div(frac, frac_d); + frac *= blk_size; + do_div(frac, 1000); + *size += frac; } - - /* check that the sum of whole and fraction fits in u64 */ - if (whole > (ULLONG_MAX - frac)) - return -ERANGE; - - *val = whole + frac; - - return 0; -} - -/* - * This function parses numeric/hex strings into __s64. It accepts a multiplier - * which will apply to the value parsed. It also can allow the string to - * have a unit as the last character. The function handles overflow/underflow - * of the signed integer. - */ -static int str_to_s64_internal(const char __user *buffer, unsigned long count, - __s64 *val, __u64 def_mult, bool allow_units) -{ - char kernbuf[22]; - __u64 tmp; - unsigned int offset = 0; - int signed sign = 1; - __u64 max = LLONG_MAX; - int rc = 0; - - if (count > (sizeof(kernbuf) - 1)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - - kernbuf[count] = '\0'; - - /* keep track of our sign */ - if (*kernbuf == '-') { - sign = -1; - offset++; - /* equivalent to max = -LLONG_MIN, avoids overflow */ - max++; - } - - rc = str_to_u64_parse(kernbuf + offset, count - offset, - &tmp, def_mult, allow_units); +numbers_only: + snprintf(kernbuf, sizeof(kernbuf), "%.*s", (int)len, buffer); + rc = kstrtoull(kernbuf, 10, &whole); if (rc) return rc; - /* check for overflow/underflow */ - if (max < tmp) - return -ERANGE; + if (whole != 0 && fls64(whole) + fls64(blk_size) - 1 > 64) + return -EOVERFLOW; - *val = (__s64)tmp * sign; + *size += whole * blk_size; - return 0; + return count; } +EXPORT_SYMBOL(string_to_size); /** - * Convert a user string into a signed 64 bit number. This function produces - * an error when the value parsed from the string times multiplier underflows or - * overflows. This function only accepts strings that contains digits, an - * optional decimal, and a char representing a unit at the end. If a unit is - * specified in the string, the multiplier provided by the caller is ignored. - * This function can also accept hexadecimal strings which are prefixed with - * "0x". + * sysfs_memparse - parse a ASCII string to 64-bit binary value, + * with optional units + * + * @buffer: kernel pointer to input string + * @count: number of bytes in the input @buffer + * @val: (output) binary value returned to caller + * @defunit: default unit suffix to use if none is provided * - * \param[in] buffer string consisting of numbers, a decimal, and a unit - * \param[in] count buffer length - * \param[in] val if successful, the value represented by the string - * \param[in] defunit default unit if string doesn't contain one + * Parses a string into a number. The number stored at @buffer is + * potentially suffixed with K, M, G, T, P, E. Besides these other + * valid suffix units are shown in the string_to_size() function. + * If the string lacks a suffix then the defunit is used. The defunit + * should be given as a binary unit (e.g. MiB) as that is the standard + * for tunables in Lustre. If no unit suffix is given (e.g. 'G'), then + * it is assumed to be in binary units. * - * \retval 0 on success - * \retval negative number on error + * Returns: 0 on success or -errno on failure. */ -int lprocfs_str_with_units_to_s64(const char __user *buffer, - unsigned long count, __s64 *val, char defunit) +int sysfs_memparse(const char *buffer, size_t count, u64 *val, + const char *defunit) { - __u64 mult = 1; + const char *param = buffer; + char tmp_buf[23]; int rc; - if (defunit != '1') { - rc = get_mult(defunit, &mult); - if (rc) - return rc; + count = strlen(buffer); + while (count > 0 && isspace(buffer[count - 1])) + count--; + + if (!count) + RETURN(-EINVAL); + + /* If there isn't already a unit on this value, append @defunit. + * Units of 'B' don't affect the value, so don't bother adding. + */ + if (!isalpha(buffer[count - 1]) && defunit[0] != 'B') { + if (count + 3 >= sizeof(tmp_buf)) { + CERROR("count %zd > size %zd\n", count, sizeof(param)); + RETURN(-E2BIG); + } + + scnprintf(tmp_buf, sizeof(tmp_buf), "%.*s%s", (int)count, + buffer, defunit); + param = tmp_buf; + count = strlen(param); } - return str_to_s64_internal(buffer, count, val, mult, true); + rc = string_to_size(val, param, count); + + return rc < 0 ? rc : 0; } -EXPORT_SYMBOL(lprocfs_str_with_units_to_s64); +EXPORT_SYMBOL(sysfs_memparse); -static char *lprocfs_strnstr(const char *s1, const char *s2, size_t len) +char *lprocfs_strnstr(const char *s1, const char *s2, size_t len) { size_t l2; @@ -2129,6 +1925,7 @@ static char *lprocfs_strnstr(const char *s1, const char *s2, size_t len) } return NULL; } +EXPORT_SYMBOL(lprocfs_strnstr); /** * Find the string \a name in the input \a buffer, and return a pointer to the @@ -2143,7 +1940,7 @@ char *lprocfs_find_named_value(const char *buffer, const char *name, /* there is no strnstr() in rhel5 and ubuntu kernels */ val = lprocfs_strnstr(buffer, name, buflen); - if (val == NULL) + if (!val) return (char *)buffer; val += strlen(name); /* skip prefix */ @@ -2160,51 +1957,35 @@ char *lprocfs_find_named_value(const char *buffer, const char *name, } EXPORT_SYMBOL(lprocfs_find_named_value); -int ldebugfs_seq_create(struct dentry *parent, const char *name, umode_t mode, - const struct file_operations *seq_fops, void *data) -{ - struct dentry *entry; - - /* Disallow secretly (un)writable entries. */ - LASSERT((!seq_fops->write) == (!(mode & 0222))); - - entry = debugfs_create_file(name, mode, parent, data, seq_fops); - if (IS_ERR_OR_NULL(entry)) - return entry ? PTR_ERR(entry) : -ENOMEM; - - return 0; -} -EXPORT_SYMBOL_GPL(ldebugfs_seq_create); - int lprocfs_seq_create(struct proc_dir_entry *parent, const char *name, mode_t mode, - const struct file_operations *seq_fops, + const struct proc_ops *seq_fops, void *data) { struct proc_dir_entry *entry; ENTRY; /* Disallow secretly (un)writable entries. */ - LASSERT((seq_fops->write == NULL) == ((mode & 0222) == 0)); + LASSERT(!seq_fops->proc_write == !(mode & 0222)); entry = proc_create_data(name, mode, parent, seq_fops, data); - if (entry == NULL) + if (!entry) RETURN(-ENOMEM); RETURN(0); } EXPORT_SYMBOL(lprocfs_seq_create); -int lprocfs_obd_seq_create(struct obd_device *dev, +int lprocfs_obd_seq_create(struct obd_device *obd, const char *name, mode_t mode, - const struct file_operations *seq_fops, + const struct proc_ops *seq_fops, void *data) { - return (lprocfs_seq_create(dev->obd_proc_entry, name, - mode, seq_fops, data)); + return lprocfs_seq_create(obd->obd_proc_entry, name, + mode, seq_fops, data); } EXPORT_SYMBOL(lprocfs_obd_seq_create); @@ -2232,12 +2013,12 @@ EXPORT_SYMBOL(lprocfs_oh_tally_log2); unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { - unsigned long ret = 0; - int i; + unsigned long ret = 0; + int i; - for (i = 0; i < OBD_HIST_MAX; i++) - ret += oh->oh_buckets[i]; - return ret; + for (i = 0; i < OBD_HIST_MAX; i++) + ret += oh->oh_buckets[i]; + return ret; } EXPORT_SYMBOL(lprocfs_oh_sum); @@ -2249,6 +2030,95 @@ void lprocfs_oh_clear(struct obd_histogram *oh) } EXPORT_SYMBOL(lprocfs_oh_clear); +void lprocfs_oh_tally_pcpu(struct obd_hist_pcpu *oh, + unsigned int value) +{ + if (value >= OBD_HIST_MAX) + value = OBD_HIST_MAX - 1; + + percpu_counter_inc(&oh->oh_pc_buckets[value]); +} +EXPORT_SYMBOL(lprocfs_oh_tally_pcpu); + +void lprocfs_oh_tally_log2_pcpu(struct obd_hist_pcpu *oh, + unsigned int value) +{ + unsigned int val = 0; + + if (likely(value != 0)) + val = min(fls(value - 1), OBD_HIST_MAX); + + lprocfs_oh_tally_pcpu(oh, val); +} +EXPORT_SYMBOL(lprocfs_oh_tally_log2_pcpu); + +unsigned long lprocfs_oh_counter_pcpu(struct obd_hist_pcpu *oh, + unsigned int value) +{ + return percpu_counter_sum(&oh->oh_pc_buckets[value]); +} +EXPORT_SYMBOL(lprocfs_oh_counter_pcpu); + +unsigned long lprocfs_oh_sum_pcpu(struct obd_hist_pcpu *oh) +{ + unsigned long ret = 0; + int i; + + for (i = 0; i < OBD_HIST_MAX; i++) + ret += percpu_counter_sum(&oh->oh_pc_buckets[i]); + + return ret; +} +EXPORT_SYMBOL(lprocfs_oh_sum_pcpu); + +int lprocfs_oh_alloc_pcpu(struct obd_hist_pcpu *oh) +{ + int i, rc; + + if (oh->oh_initialized) + return 0; + + for (i = 0; i < OBD_HIST_MAX; i++) { + rc = percpu_counter_init(&oh->oh_pc_buckets[i], 0, GFP_KERNEL); + if (rc) + goto out; + } + + oh->oh_initialized = true; + + return 0; + +out: + for (i--; i >= 0; i--) + percpu_counter_destroy(&oh->oh_pc_buckets[i]); + + return rc; +} +EXPORT_SYMBOL(lprocfs_oh_alloc_pcpu); + +void lprocfs_oh_clear_pcpu(struct obd_hist_pcpu *oh) +{ + int i; + + for (i = 0; i < OBD_HIST_MAX; i++) + percpu_counter_set(&oh->oh_pc_buckets[i], 0); +} +EXPORT_SYMBOL(lprocfs_oh_clear_pcpu); + +void lprocfs_oh_release_pcpu(struct obd_hist_pcpu *oh) +{ + int i; + + if (!oh->oh_initialized) + return; + + for (i = 0; i < OBD_HIST_MAX; i++) + percpu_counter_destroy(&oh->oh_pc_buckets[i]); + + oh->oh_initialized = false; +} +EXPORT_SYMBOL(lprocfs_oh_release_pcpu); + ssize_t lustre_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -2275,8 +2145,8 @@ EXPORT_SYMBOL_GPL(lustre_sysfs_ops); int lprocfs_obd_max_pages_per_rpc_seq_show(struct seq_file *m, void *data) { - struct obd_device *dev = data; - struct client_obd *cli = &dev->u.cli; + struct obd_device *obd = data; + struct client_obd *cli = &obd->u.cli; spin_lock(&cli->cl_loi_list_lock); seq_printf(m, "%d\n", cli->cl_max_pages_per_rpc); @@ -2289,91 +2159,99 @@ ssize_t lprocfs_obd_max_pages_per_rpc_seq_write(struct file *file, const char __user *buffer, size_t count, loff_t *off) { - struct obd_device *dev = - ((struct seq_file *)file->private_data)->private; - struct client_obd *cli = &dev->u.cli; - struct obd_connect_data *ocd = &cli->cl_import->imp_connect_data; + struct seq_file *m = file->private_data; + struct obd_device *obd = m->private; + struct client_obd *cli = &obd->u.cli; + struct obd_import *imp; + struct obd_connect_data *ocd; int chunk_mask, rc; - s64 val; + char kernbuf[22]; + u64 val; + + if (count > sizeof(kernbuf) - 1) + return -EINVAL; - rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1'); + if (copy_from_user(kernbuf, buffer, count)) + return -EFAULT; + + kernbuf[count] = '\0'; + + rc = sysfs_memparse(kernbuf, count, &val, "B"); if (rc) return rc; - if (val < 0) - return -ERANGE; /* if the max_pages is specified in bytes, convert to pages */ if (val >= ONE_MB_BRW_SIZE) val >>= PAGE_SHIFT; - LPROCFS_CLIMP_CHECK(dev); - - chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1); - /* max_pages_per_rpc must be chunk aligned */ - val = (val + ~chunk_mask) & chunk_mask; - if (val == 0 || (ocd->ocd_brw_size != 0 && - val > ocd->ocd_brw_size >> PAGE_SHIFT)) { - LPROCFS_CLIMP_EXIT(dev); - return -ERANGE; + with_imp_locked(obd, imp, rc) { + ocd = &imp->imp_connect_data; + chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1); + /* max_pages_per_rpc must be chunk aligned */ + val = (val + ~chunk_mask) & chunk_mask; + if (val == 0 || (ocd->ocd_brw_size != 0 && + val > ocd->ocd_brw_size >> PAGE_SHIFT)) { + rc = -ERANGE; + } else { + spin_lock(&cli->cl_loi_list_lock); + cli->cl_max_pages_per_rpc = val; + client_adjust_max_dirty(cli); + spin_unlock(&cli->cl_loi_list_lock); + } } - spin_lock(&cli->cl_loi_list_lock); - cli->cl_max_pages_per_rpc = val; - client_adjust_max_dirty(cli); - spin_unlock(&cli->cl_loi_list_lock); - LPROCFS_CLIMP_EXIT(dev); - return count; + return rc ?: count; } EXPORT_SYMBOL(lprocfs_obd_max_pages_per_rpc_seq_write); -int lprocfs_obd_short_io_bytes_seq_show(struct seq_file *m, void *data) +ssize_t short_io_bytes_show(struct kobject *kobj, struct attribute *attr, + char *buf) { - struct obd_device *dev = data; - struct client_obd *cli = &dev->u.cli; + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct client_obd *cli = &obd->u.cli; + int rc; spin_lock(&cli->cl_loi_list_lock); - seq_printf(m, "%d\n", cli->cl_short_io_bytes); + rc = sprintf(buf, "%d\n", cli->cl_max_short_io_bytes); spin_unlock(&cli->cl_loi_list_lock); - return 0; + return rc; } -EXPORT_SYMBOL(lprocfs_obd_short_io_bytes_seq_show); +EXPORT_SYMBOL(short_io_bytes_show); /* Used to catch people who think they're specifying pages. */ -#define MIN_SHORT_IO_BYTES 64 +#define MIN_SHORT_IO_BYTES 64U -ssize_t lprocfs_obd_short_io_bytes_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) +ssize_t short_io_bytes_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) { - struct obd_device *dev = ((struct seq_file *) - file->private_data)->private; - struct client_obd *cli = &dev->u.cli; - int val; + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct client_obd *cli = &obd->u.cli; + u64 val; int rc; - LPROCFS_CLIMP_CHECK(dev); - - rc = kstrtoint_from_user(buffer, count, 0, &val); - if (rc) - GOTO(out, rc); + if (strcmp(buffer, "-1") == 0) { + val = OBD_DEF_SHORT_IO_BYTES; + } else { + rc = sysfs_memparse(buffer, count, &val, "B"); + if (rc) + GOTO(out, rc); + } - if (val < MIN_SHORT_IO_BYTES || val > OBD_MAX_SHORT_IO_BYTES) + if (val && (val < MIN_SHORT_IO_BYTES || val > LNET_MTU)) GOTO(out, rc = -ERANGE); rc = count; spin_lock(&cli->cl_loi_list_lock); - if (val > (cli->cl_max_pages_per_rpc << PAGE_SHIFT)) - rc = -ERANGE; - else - cli->cl_short_io_bytes = val; + cli->cl_max_short_io_bytes = min_t(u64, val, OST_MAX_SHORT_IO_BYTES); spin_unlock(&cli->cl_loi_list_lock); out: - LPROCFS_CLIMP_EXIT(dev); return rc; } -EXPORT_SYMBOL(lprocfs_obd_short_io_bytes_seq_write); +EXPORT_SYMBOL(short_io_bytes_store); int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, struct root_squash_info *squash, char *name) @@ -2395,7 +2273,7 @@ int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, /* look for uid gid separator */ tmp = strchr(kernbuf, ':'); - if (tmp == NULL) { + if (!tmp) { errmsg = "needs uid:gid format"; GOTO(failed, rc = -EINVAL); } @@ -2422,7 +2300,7 @@ int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, RETURN(count); failed: - if (tmp != NULL) { + if (tmp) { tmp--; *tmp = ':'; } @@ -2443,7 +2321,7 @@ int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, int rc; char *kernbuf = NULL; char *errmsg; - struct list_head tmp; + LIST_HEAD(tmp); int len = count; ENTRY; @@ -2453,7 +2331,7 @@ int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, } OBD_ALLOC(kernbuf, count + 1); - if (kernbuf == NULL) { + if (!kernbuf) { errmsg = "no memory"; GOTO(failed, rc = -ENOMEM); } @@ -2469,17 +2347,16 @@ int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, if ((len == 4 && strncmp(kernbuf, "NONE", len) == 0) || (len == 5 && strncmp(kernbuf, "clear", len) == 0)) { /* empty string is special case */ - down_write(&squash->rsi_sem); + spin_lock(&squash->rsi_lock); if (!list_empty(&squash->rsi_nosquash_nids)) cfs_free_nidlist(&squash->rsi_nosquash_nids); - up_write(&squash->rsi_sem); + spin_unlock(&squash->rsi_lock); LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name); OBD_FREE(kernbuf, count + 1); RETURN(count); } - INIT_LIST_HEAD(&tmp); - if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) { + if (cfs_parse_nidlist(kernbuf, &tmp) < 0) { errmsg = "can't parse"; GOTO(failed, rc = -EINVAL); } @@ -2488,11 +2365,11 @@ int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, OBD_FREE(kernbuf, count + 1); kernbuf = NULL; - down_write(&squash->rsi_sem); + spin_lock(&squash->rsi_lock); if (!list_empty(&squash->rsi_nosquash_nids)) cfs_free_nidlist(&squash->rsi_nosquash_nids); list_splice(&tmp, &squash->rsi_nosquash_nids); - up_write(&squash->rsi_sem); + spin_unlock(&squash->rsi_lock); RETURN(count);