Whamcloud - gitweb
LU-17217 obd: reserve server-side connection policy bits
[fs/lustre-release.git] / lustre / obdclass / lprocfs_status.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/obdclass/lprocfs_status.c
32  *
33  * Author: Hariharan Thantry <thantry@users.sourceforge.net>
34  */
35
36 #define DEBUG_SUBSYSTEM S_CLASS
37
38 #include <obd_class.h>
39 #include <lprocfs_status.h>
40
41 #ifdef CONFIG_PROC_FS
42
43 /* enable start/elapsed_time in stats headers by default */
44 unsigned int obd_enable_stats_header = 1;
45
46 static int lprocfs_no_percpu_stats = 0;
47 module_param(lprocfs_no_percpu_stats, int, 0644);
48 MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs stats");
49
50 #define MAX_STRING_SIZE 128
51
52 int lprocfs_single_release(struct inode *inode, struct file *file)
53 {
54         return single_release(inode, file);
55 }
56 EXPORT_SYMBOL(lprocfs_single_release);
57
58 int lprocfs_seq_release(struct inode *inode, struct file *file)
59 {
60         return seq_release(inode, file);
61 }
62 EXPORT_SYMBOL(lprocfs_seq_release);
63
64 static umode_t default_mode(const struct proc_ops *ops)
65 {
66         umode_t mode = 0;
67
68         if (ops->proc_read)
69                 mode = 0444;
70         if (ops->proc_write)
71                 mode |= 0200;
72
73         return mode;
74 }
75
76 struct proc_dir_entry *
77 lprocfs_add_simple(struct proc_dir_entry *root, char *name,
78                    void *data, const struct proc_ops *fops)
79 {
80         struct proc_dir_entry *proc;
81         umode_t mode;
82
83         if (!root || !name || !fops)
84                 return ERR_PTR(-EINVAL);
85
86         mode = default_mode(fops);
87         proc = proc_create_data(name, mode, root, fops, data);
88         if (!proc) {
89                 CERROR("LprocFS: No memory to create /proc entry %s\n",
90                        name);
91                 return ERR_PTR(-ENOMEM);
92         }
93         return proc;
94 }
95 EXPORT_SYMBOL(lprocfs_add_simple);
96
97 struct proc_dir_entry *lprocfs_add_symlink(const char *name,
98                                            struct proc_dir_entry *parent,
99                                            const char *format, ...)
100 {
101         struct proc_dir_entry *entry;
102         char *dest;
103         va_list ap;
104
105         if (!parent || !format)
106                 return NULL;
107
108         OBD_ALLOC_WAIT(dest, MAX_STRING_SIZE + 1);
109         if (!dest)
110                 return NULL;
111
112         va_start(ap, format);
113         vsnprintf(dest, MAX_STRING_SIZE, format, ap);
114         va_end(ap);
115
116         entry = proc_symlink(name, parent, dest);
117         if (!entry)
118                 CERROR("LprocFS: Could not create symbolic link from "
119                        "%s to %s\n", name, dest);
120
121         OBD_FREE(dest, MAX_STRING_SIZE + 1);
122         return entry;
123 }
124 EXPORT_SYMBOL(lprocfs_add_symlink);
125
126 static const struct file_operations ldebugfs_empty_ops = { };
127
128 void ldebugfs_add_vars(struct dentry *parent, struct ldebugfs_vars *list,
129                        void *data)
130 {
131         if (IS_ERR_OR_NULL(parent) || IS_ERR_OR_NULL(list))
132                 return;
133
134         while (list->name) {
135                 umode_t mode = 0;
136
137                 if (list->proc_mode != 0000) {
138                         mode = list->proc_mode;
139                 } else if (list->fops) {
140                         if (list->fops->read)
141                                 mode = 0444;
142                         if (list->fops->write)
143                                 mode |= 0200;
144                 }
145                 debugfs_create_file(list->name, mode, parent,
146                                     list->data ? : data,
147                                     list->fops ? : &ldebugfs_empty_ops);
148                 list++;
149         }
150 }
151 EXPORT_SYMBOL_GPL(ldebugfs_add_vars);
152
153 static const struct proc_ops lprocfs_empty_ops = { };
154
155 /**
156  * Add /proc entries.
157  *
158  * \param root [in]  The parent proc entry on which new entry will be added.
159  * \param list [in]  Array of proc entries to be added.
160  * \param data [in]  The argument to be passed when entries read/write routines
161  *                   are called through /proc file.
162  *
163  * \retval 0   on success
164  *         < 0 on error
165  */
166 int
167 lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
168                  void *data)
169 {
170         if (!root || !list)
171                 return -EINVAL;
172
173         while (list->name) {
174                 struct proc_dir_entry *proc;
175                 umode_t mode = 0;
176
177                 if (list->proc_mode)
178                         mode = list->proc_mode;
179                 else if (list->fops)
180                         mode = default_mode(list->fops);
181                 proc = proc_create_data(list->name, mode, root,
182                                         list->fops ?: &lprocfs_empty_ops,
183                                         list->data ?: data);
184                 if (!proc)
185                         return -ENOMEM;
186                 list++;
187         }
188         return 0;
189 }
190 EXPORT_SYMBOL(lprocfs_add_vars);
191
192 void lprocfs_remove(struct proc_dir_entry **rooth)
193 {
194         proc_remove(*rooth);
195         *rooth = NULL;
196 }
197 EXPORT_SYMBOL(lprocfs_remove);
198
199 void lprocfs_remove_proc_entry(const char *name, struct proc_dir_entry *parent)
200 {
201         LASSERT(parent != NULL);
202         remove_proc_entry(name, parent);
203 }
204 EXPORT_SYMBOL(lprocfs_remove_proc_entry);
205
206 struct proc_dir_entry *
207 lprocfs_register(const char *name, struct proc_dir_entry *parent,
208                  struct lprocfs_vars *list, void *data)
209 {
210         struct proc_dir_entry *newchild;
211
212         newchild = proc_mkdir(name, parent);
213         if (!newchild)
214                 return ERR_PTR(-ENOMEM);
215
216         if (list) {
217                 int rc = lprocfs_add_vars(newchild, list, data);
218                 if (rc) {
219                         lprocfs_remove(&newchild);
220                         return ERR_PTR(rc);
221                 }
222         }
223         return newchild;
224 }
225 EXPORT_SYMBOL(lprocfs_register);
226
227 /* Generic callbacks */
228 int lprocfs_uuid_seq_show(struct seq_file *m, void *data)
229 {
230         struct obd_device *obd = data;
231
232         LASSERT(obd != NULL);
233         seq_printf(m, "%s\n", obd->obd_uuid.uuid);
234         return 0;
235 }
236 EXPORT_SYMBOL(lprocfs_uuid_seq_show);
237
238 static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
239                          char *buf)
240 {
241         struct obd_device *obd = container_of(kobj, struct obd_device,
242                                               obd_kset.kobj);
243
244         return sprintf(buf, "%s\n", obd->obd_uuid.uuid);
245 }
246 LUSTRE_RO_ATTR(uuid);
247
248 static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr,
249                               char *buf)
250 {
251         struct obd_device *obd = container_of(kobj, struct obd_device,
252                                               obd_kset.kobj);
253         struct obd_statfs osfs;
254         int rc;
255
256         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
257                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
258                         OBD_STATFS_NODELAY);
259         if (!rc)
260                 return sprintf(buf, "%u\n", osfs.os_bsize);
261
262         return rc;
263 }
264 LUSTRE_RO_ATTR(blocksize);
265
266 static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr,
267                                 char *buf)
268 {
269         struct obd_device *obd = container_of(kobj, struct obd_device,
270                                               obd_kset.kobj);
271         struct obd_statfs osfs;
272         int rc;
273
274         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
275                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
276                         OBD_STATFS_NODELAY);
277         if (!rc) {
278                 u32 blk_size = osfs.os_bsize >> 10;
279                 u64 result = osfs.os_blocks;
280
281                 result *= rounddown_pow_of_two(blk_size ?: 1);
282                 return sprintf(buf, "%llu\n", result);
283         }
284
285         return rc;
286 }
287 LUSTRE_RO_ATTR(kbytestotal);
288
289 static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr,
290                                char *buf)
291 {
292         struct obd_device *obd = container_of(kobj, struct obd_device,
293                                               obd_kset.kobj);
294         struct obd_statfs osfs;
295         int rc;
296
297         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
298                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
299                         OBD_STATFS_NODELAY);
300         if (!rc) {
301                 u32 blk_size = osfs.os_bsize >> 10;
302                 u64 result = osfs.os_bfree;
303
304                 while (blk_size >>= 1)
305                         result <<= 1;
306
307                 return sprintf(buf, "%llu\n", result);
308         }
309
310         return rc;
311 }
312 LUSTRE_RO_ATTR(kbytesfree);
313
314 static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr,
315                                 char *buf)
316 {
317         struct obd_device *obd = container_of(kobj, struct obd_device,
318                                               obd_kset.kobj);
319         struct obd_statfs osfs;
320         int rc;
321
322         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
323                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
324                         OBD_STATFS_NODELAY);
325         if (!rc) {
326                 u32 blk_size = osfs.os_bsize >> 10;
327                 u64 result = osfs.os_bavail;
328
329                 while (blk_size >>= 1)
330                         result <<= 1;
331
332                 return sprintf(buf, "%llu\n", result);
333         }
334
335         return rc;
336 }
337 LUSTRE_RO_ATTR(kbytesavail);
338
339 static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr,
340                                char *buf)
341 {
342         struct obd_device *obd = container_of(kobj, struct obd_device,
343                                               obd_kset.kobj);
344         struct obd_statfs osfs;
345         int rc;
346
347         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
348                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
349                         OBD_STATFS_NODELAY);
350         if (!rc)
351                 return sprintf(buf, "%llu\n", osfs.os_files);
352
353         return rc;
354 }
355 LUSTRE_RO_ATTR(filestotal);
356
357 static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr,
358                               char *buf)
359 {
360         struct obd_device *obd = container_of(kobj, struct obd_device,
361                                               obd_kset.kobj);
362         struct obd_statfs osfs;
363         int rc;
364
365         rc = obd_statfs(NULL, obd->obd_self_export, &osfs,
366                         ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
367                         OBD_STATFS_NODELAY);
368         if (!rc)
369                 return sprintf(buf, "%llu\n", osfs.os_ffree);
370
371         return rc;
372 }
373 LUSTRE_RO_ATTR(filesfree);
374
375 ssize_t conn_uuid_show(struct kobject *kobj, struct attribute *attr, char *buf)
376 {
377         struct obd_device *obd = container_of(kobj, struct obd_device,
378                                               obd_kset.kobj);
379         struct obd_import *imp;
380         struct ptlrpc_connection *conn;
381         ssize_t count;
382
383         with_imp_locked(obd, imp, count) {
384                 conn = imp->imp_connection;
385                 if (conn)
386                         count = sprintf(buf, "%s\n", conn->c_remote_uuid.uuid);
387                 else
388                         count = sprintf(buf, "%s\n", "<none>");
389         }
390
391         return count;
392 }
393 EXPORT_SYMBOL(conn_uuid_show);
394
395 int lprocfs_server_uuid_seq_show(struct seq_file *m, void *data)
396 {
397         struct obd_device *obd = data;
398         struct obd_import *imp;
399         const char *imp_state_name = NULL;
400         int rc = 0;
401
402         LASSERT(obd != NULL);
403         with_imp_locked(obd, imp, rc) {
404                 imp_state_name = ptlrpc_import_state_name(imp->imp_state);
405                 seq_printf(m, "%s\t%s%s\n", obd2cli_tgt(obd), imp_state_name,
406                            imp->imp_deactive ? "\tDEACTIVATED" : "");
407         }
408
409         return rc;
410 }
411 EXPORT_SYMBOL(lprocfs_server_uuid_seq_show);
412
413 /** add up per-cpu counters */
414
415 /**
416  * Lock statistics structure for access, possibly only on this CPU.
417  *
418  * The statistics struct may be allocated with per-CPU structures for
419  * efficient concurrent update (usually only on server-wide stats), or
420  * as a single global struct (e.g. for per-client or per-job statistics),
421  * so the required locking depends on the type of structure allocated.
422  *
423  * For per-CPU statistics, pin the thread to the current cpuid so that
424  * will only access the statistics for that CPU.  If the stats structure
425  * for the current CPU has not been allocated (or previously freed),
426  * allocate it now.  The per-CPU statistics do not need locking since
427  * the thread is pinned to the CPU during update.
428  *
429  * For global statistics, lock the stats structure to prevent concurrent update.
430  *
431  * \param[in] stats     statistics structure to lock
432  * \param[in] opc       type of operation:
433  *                      LPROCFS_GET_SMP_ID: "lock" and return current CPU index
434  *                              for incrementing statistics for that CPU
435  *                      LPROCFS_GET_NUM_CPU: "lock" and return number of used
436  *                              CPU indices to iterate over all indices
437  * \param[out] flags    CPU interrupt saved state for IRQ-safe locking
438  *
439  * \retval cpuid of current thread or number of allocated structs
440  * \retval negative on error (only for opc LPROCFS_GET_SMP_ID + per-CPU stats)
441  */
442 int lprocfs_stats_lock(struct lprocfs_stats *stats,
443                        enum lprocfs_stats_lock_ops opc,
444                        unsigned long *flags)
445 {
446         if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
447                 if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
448                         spin_lock_irqsave(&stats->ls_lock, *flags);
449                 else
450                         spin_lock(&stats->ls_lock);
451                 return opc == LPROCFS_GET_NUM_CPU ? 1 : 0;
452         }
453
454         switch (opc) {
455         case LPROCFS_GET_SMP_ID: {
456                 unsigned int cpuid = get_cpu();
457
458                 if (unlikely(!stats->ls_percpu[cpuid])) {
459                         int rc = lprocfs_stats_alloc_one(stats, cpuid);
460
461                         if (rc < 0) {
462                                 put_cpu();
463                                 return rc;
464                         }
465                 }
466                 return cpuid;
467         }
468         case LPROCFS_GET_NUM_CPU:
469                 return stats->ls_biggest_alloc_num;
470         default:
471                 LBUG();
472         }
473 }
474
475 /**
476  * Unlock statistics structure after access.
477  *
478  * Unlock the lock acquired via lprocfs_stats_lock() for global statistics,
479  * or unpin this thread from the current cpuid for per-CPU statistics.
480  *
481  * This function must be called using the same arguments as used when calling
482  * lprocfs_stats_lock() so that the correct operation can be performed.
483  *
484  * \param[in] stats     statistics structure to unlock
485  * \param[in] opc       type of operation (current cpuid or number of structs)
486  * \param[in] flags     CPU interrupt saved state for IRQ-safe locking
487  */
488 void lprocfs_stats_unlock(struct lprocfs_stats *stats,
489                           enum lprocfs_stats_lock_ops opc,
490                           unsigned long *flags)
491 {
492         if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
493                 if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
494                         spin_unlock_irqrestore(&stats->ls_lock, *flags);
495                 else
496                         spin_unlock(&stats->ls_lock);
497         } else if (opc == LPROCFS_GET_SMP_ID) {
498                 put_cpu();
499         }
500 }
501
502 /** add up per-cpu counters */
503 void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
504                            struct lprocfs_counter *cnt)
505 {
506         unsigned int num_entry;
507         struct lprocfs_counter *percpu_cntr;
508         int i;
509         unsigned long flags = 0;
510
511         memset(cnt, 0, sizeof(*cnt));
512
513         if (!stats) {
514                 /* set count to 1 to avoid divide-by-zero errs in callers */
515                 cnt->lc_count = 1;
516                 return;
517         }
518
519         cnt->lc_min = LC_MIN_INIT;
520
521         num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
522
523         for (i = 0; i < num_entry; i++) {
524                 if (!stats->ls_percpu[i])
525                         continue;
526                 percpu_cntr = lprocfs_stats_counter_get(stats, i, idx);
527
528                 cnt->lc_count += percpu_cntr->lc_count;
529                 cnt->lc_sum += percpu_cntr->lc_sum;
530                 if (percpu_cntr->lc_min < cnt->lc_min)
531                         cnt->lc_min = percpu_cntr->lc_min;
532                 if (percpu_cntr->lc_max > cnt->lc_max)
533                         cnt->lc_max = percpu_cntr->lc_max;
534                 cnt->lc_sumsquare += percpu_cntr->lc_sumsquare;
535         }
536
537         lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
538 }
539
540 static void obd_import_flags2str(struct obd_import *imp, struct seq_file *m)
541 {
542         bool first = true;
543
544         if (imp->imp_obd->obd_no_recov) {
545                 seq_printf(m, "no_recov");
546                 first = false;
547         }
548
549         flag2str(imp, invalid);
550         flag2str(imp, deactive);
551         flag2str(imp, replayable);
552         flag2str(imp, delayed_recovery);
553         flag2str(imp, vbr_failed);
554         flag2str(imp, pingable);
555         flag2str(imp, resend_replay);
556         flag2str(imp, no_pinger_recover);
557         flag2str(imp, connect_tried);
558 }
559
560 static const char *const obd_connect_names[] = {
561         "read_only",                    /* 0x01 */
562         "lov_index",                    /* 0x02 */
563         "connect_from_mds",             /* 0x03 */
564         "write_grant",                  /* 0x04 */
565         "server_lock",                  /* 0x10 */
566         "version",                      /* 0x20 */
567         "request_portal",               /* 0x40 */
568         "acl",                          /* 0x80 */
569         "xattr",                        /* 0x100 */
570         "create_on_write",              /* 0x200 */
571         "truncate_lock",                /* 0x400 */
572         "initial_transno",              /* 0x800 */
573         "inode_bit_locks",              /* 0x1000 */
574         "barrier",                      /* 0x2000 */
575         "getattr_by_fid",               /* 0x4000 */
576         "no_oh_for_devices",            /* 0x8000 */
577         "remote_client",                /* 0x10000 */
578         "remote_client_by_force",       /* 0x20000 */
579         "max_byte_per_rpc",             /* 0x40000 */
580         "64bit_qdata",                  /* 0x80000 */
581         "mds_capability",               /* 0x100000 */
582         "oss_capability",               /* 0x200000 */
583         "early_lock_cancel",            /* 0x400000 */
584         "som",                          /* 0x800000 */
585         "adaptive_timeouts",            /* 0x1000000 */
586         "lru_resize",                   /* 0x2000000 */
587         "mds_mds_connection",           /* 0x4000000 */
588         "real_conn",                    /* 0x8000000 */
589         "change_qunit_size",            /* 0x10000000 */
590         "alt_checksum_algorithm",       /* 0x20000000 */
591         "fid_is_enabled",               /* 0x40000000 */
592         "version_recovery",             /* 0x80000000 */
593         "pools",                        /* 0x100000000 */
594         "grant_shrink",                 /* 0x200000000 */
595         "skip_orphan",                  /* 0x400000000 */
596         "large_ea",                     /* 0x800000000 */
597         "full20",                       /* 0x1000000000 */
598         "layout_lock",                  /* 0x2000000000 */
599         "64bithash",                    /* 0x4000000000 */
600         "object_max_bytes",             /* 0x8000000000 */
601         "imp_recov",                    /* 0x10000000000 */
602         "jobstats",                     /* 0x20000000000 */
603         "umask",                        /* 0x40000000000 */
604         "einprogress",                  /* 0x80000000000 */
605         "grant_param",                  /* 0x100000000000 */
606         "flock_owner",                  /* 0x200000000000 */
607         "lvb_type",                     /* 0x400000000000 */
608         "nanoseconds_times",            /* 0x800000000000 */
609         "lightweight_conn",             /* 0x1000000000000 */
610         "short_io",                     /* 0x2000000000000 */
611         "pingless",                     /* 0x4000000000000 */
612         "flock_deadlock",               /* 0x8000000000000 */
613         "disp_stripe",                  /* 0x10000000000000 */
614         "open_by_fid",                  /* 0x20000000000000 */
615         "lfsck",                        /* 0x40000000000000 */
616         "unknown",                      /* 0x80000000000000 */
617         "unlink_close",                 /* 0x100000000000000 */
618         "multi_mod_rpcs",               /* 0x200000000000000 */
619         "dir_stripe",                   /* 0x400000000000000 */
620         "subtree",                      /* 0x800000000000000 */
621         "lockahead",                    /* 0x1000000000000000 */
622         "bulk_mbits",                   /* 0x2000000000000000 */
623         "compact_obdo",                 /* 0x4000000000000000 */
624         "second_flags",                 /* 0x8000000000000000 */
625         /* ocd_connect_flags2 names */
626         "file_secctx",                  /* 0x01 */
627         "lockaheadv2",                  /* 0x02 */
628         "dir_migrate",                  /* 0x04 */
629         "sum_statfs",                   /* 0x08 */
630         "overstriping",                 /* 0x10 */
631         "flr",                          /* 0x20 */
632         "wbc",                          /* 0x40 */
633         "lock_convert",                 /* 0x80 */
634         "archive_id_array",             /* 0x100 */
635         "increasing_xid",               /* 0x200 */
636         "selinux_policy",               /* 0x400 */
637         "lsom",                         /* 0x800 */
638         "pcc",                          /* 0x1000 */
639         "crush",                        /* 0x2000 */
640         "async_discard",                /* 0x4000 */
641         "client_encryption",            /* 0x8000 */
642         "fidmap",                       /* 0x10000 */
643         "getattr_pfid",                 /* 0x20000 */
644         "lseek",                        /* 0x40000 */
645         "dom_lvb",                      /* 0x80000 */
646         "reply_mbits",                  /* 0x100000 */
647         "mode_convert",                 /* 0x200000 */
648         "batch_rpc",                    /* 0x400000 */
649         "pcc_ro",                       /* 0x800000 */
650         "mne_nid_type",                 /* 0x1000000 */
651         "lock_contend",                 /* 0x2000000 */
652         "atomic_open_lock",             /* 0x4000000 */
653         "name_encryption",              /* 0x8000000 */
654         "mkdir_replay",                 /* 0x10000000 */
655         "dmv_imp_inherit",              /* 0x20000000 */
656         "encryption_fid2path",          /* 0x40000000 */
657         "replay_create",                /* 0x80000000 */
658         "large_nid",                    /* 0x100000000 */
659         "compressed_file",              /* 0x200000000 */
660         "unaligned_dio",                /* 0x400000000 */
661         "conn_policy",                  /* 0x800000000 */
662         NULL
663 };
664
665 void obd_connect_seq_flags2str(struct seq_file *m, __u64 flags, __u64 flags2,
666                                const char *sep)
667 {
668         bool first = true;
669         __u64 mask;
670         int i;
671
672         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
673                 if (flags & mask) {
674                         seq_printf(m, "%s%s",
675                                    first ? "" : sep, obd_connect_names[i]);
676                         first = false;
677                 }
678         }
679
680         if (flags & ~(mask - 1)) {
681                 seq_printf(m, "%sunknown_%#llx",
682                            first ? "" : sep, flags & ~(mask - 1));
683                 first = false;
684         }
685
686         if (!(flags & OBD_CONNECT_FLAGS2) || flags2 == 0)
687                 return;
688
689         for (i = 64, mask = 1; obd_connect_names[i] != NULL; i++, mask <<= 1) {
690                 if (flags2 & mask) {
691                         seq_printf(m, "%s%s",
692                                    first ? "" : sep, obd_connect_names[i]);
693                         first = false;
694                 }
695         }
696
697         if (flags2 & ~(mask - 1)) {
698                 seq_printf(m, "%sunknown2_%#llx",
699                            first ? "" : sep, flags2 & ~(mask - 1));
700                 first = false;
701         }
702 }
703 EXPORT_SYMBOL(obd_connect_seq_flags2str);
704
705 int obd_connect_flags2str(char *page, int count, __u64 flags, __u64 flags2,
706                           const char *sep)
707 {
708         __u64 mask;
709         int i, ret = 0;
710
711         for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
712                 if (flags & mask)
713                         ret += snprintf(page + ret, count - ret, "%s%s",
714                                         ret ? sep : "", obd_connect_names[i]);
715         }
716
717         if (flags & ~(mask - 1))
718                 ret += snprintf(page + ret, count - ret,
719                                 "%sunknown_%#llx",
720                                 ret ? sep : "", flags & ~(mask - 1));
721
722         if (!(flags & OBD_CONNECT_FLAGS2) || flags2 == 0)
723                 return ret;
724
725         for (i = 64, mask = 1; obd_connect_names[i] != NULL; i++, mask <<= 1) {
726                 if (flags2 & mask)
727                         ret += snprintf(page + ret, count - ret, "%s%s",
728                                         ret ? sep : "", obd_connect_names[i]);
729         }
730
731         if (flags2 & ~(mask - 1))
732                 ret += snprintf(page + ret, count - ret,
733                                 "%sunknown2_%#llx",
734                                 ret ? sep : "", flags2 & ~(mask - 1));
735
736         return ret;
737 }
738 EXPORT_SYMBOL(obd_connect_flags2str);
739
740 void
741 obd_connect_data_seqprint(struct seq_file *m, struct obd_connect_data *ocd)
742 {
743         __u64 flags;
744
745         LASSERT(ocd != NULL);
746         flags = ocd->ocd_connect_flags;
747
748         seq_printf(m, "    connect_data:\n"
749                    "       flags: %#llx\n"
750                    "       instance: %u\n",
751                    ocd->ocd_connect_flags,
752                    ocd->ocd_instance);
753         if (flags & OBD_CONNECT_VERSION)
754                 seq_printf(m, "       target_version: %u.%u.%u.%u\n",
755                            OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
756                            OBD_OCD_VERSION_MINOR(ocd->ocd_version),
757                            OBD_OCD_VERSION_PATCH(ocd->ocd_version),
758                            OBD_OCD_VERSION_FIX(ocd->ocd_version));
759         if (flags & OBD_CONNECT_MDS)
760                 seq_printf(m, "       mdt_index: %d\n", ocd->ocd_group);
761         if (flags & OBD_CONNECT_GRANT)
762                 seq_printf(m, "       initial_grant: %d\n", ocd->ocd_grant);
763         if (flags & OBD_CONNECT_INDEX)
764                 seq_printf(m, "       target_index: %u\n", ocd->ocd_index);
765         if (flags & OBD_CONNECT_BRW_SIZE)
766                 seq_printf(m, "       max_brw_size: %d\n", ocd->ocd_brw_size);
767         if (flags & OBD_CONNECT_IBITS)
768                 seq_printf(m, "       ibits_known: %#llx\n",
769                            ocd->ocd_ibits_known);
770         if (flags & OBD_CONNECT_GRANT_PARAM)
771                 seq_printf(m, "       grant_block_size: %d\n"
772                            "       grant_inode_size: %d\n"
773                            "       grant_max_extent_size: %d\n"
774                            "       grant_extent_tax: %d\n",
775                            1 << ocd->ocd_grant_blkbits,
776                            1 << ocd->ocd_grant_inobits,
777                            ocd->ocd_grant_max_blks << ocd->ocd_grant_blkbits,
778                            ocd->ocd_grant_tax_kb << 10);
779         if (flags & OBD_CONNECT_TRANSNO)
780                 seq_printf(m, "       first_transno: %#llx\n",
781                            ocd->ocd_transno);
782         if (flags & OBD_CONNECT_CKSUM)
783                 seq_printf(m, "       cksum_types: %#x\n",
784                            ocd->ocd_cksum_types);
785         if (flags & OBD_CONNECT_MAX_EASIZE)
786                 seq_printf(m, "       max_easize: %d\n", ocd->ocd_max_easize);
787         if (flags & OBD_CONNECT_MAXBYTES)
788                 seq_printf(m, "       max_object_bytes: %llu\n",
789                            ocd->ocd_maxbytes);
790         if (flags & OBD_CONNECT_MULTIMODRPCS)
791                 seq_printf(m, "       max_mod_rpcs: %hu\n",
792                            ocd->ocd_maxmodrpcs);
793 }
794
795 static void lprocfs_import_seq_show_locked(struct seq_file *m,
796                                            struct obd_device *obd,
797                                            struct obd_import *imp)
798 {
799         char nidstr[LNET_NIDSTR_SIZE];
800         struct lprocfs_counter ret;
801         struct lprocfs_counter_header *header;
802         struct obd_import_conn *conn;
803         struct obd_connect_data *ocd;
804         int j;
805         int k;
806         int rw = 0;
807
808         ocd = &imp->imp_connect_data;
809
810         seq_printf(m, "import:\n"
811                    "    name: %s\n"
812                    "    target: %s\n"
813                    "    state: %s\n"
814                    "    connect_flags: [ ",
815                    obd->obd_name,
816                    obd2cli_tgt(obd),
817                    ptlrpc_import_state_name(imp->imp_state));
818         obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
819                                   imp->imp_connect_data.ocd_connect_flags2,
820                                   ", ");
821         seq_printf(m, " ]\n");
822         obd_connect_data_seqprint(m, ocd);
823         seq_printf(m, "    import_flags: [ ");
824         obd_import_flags2str(imp, m);
825
826         seq_printf(m, " ]\n"
827                    "    connection:\n"
828                    "       failover_nids: [ ");
829         spin_lock(&imp->imp_lock);
830         j = 0;
831         list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
832                 libcfs_nidstr_r(&conn->oic_conn->c_peer.nid,
833                                   nidstr, sizeof(nidstr));
834                 if (j)
835                         seq_puts(m, ", ");
836                 /* Place nidstr in quotes */
837                 seq_printf(m, "\"%s\"", nidstr);
838                 j++;
839         }
840         if (imp->imp_connection)
841                 libcfs_nidstr_r(&imp->imp_connection->c_peer.nid,
842                                   nidstr, sizeof(nidstr));
843         else
844                 strncpy(nidstr, "<none>", sizeof(nidstr));
845         seq_printf(m, " ]\n"
846                    "       current_connection: \"%s\"\n"
847                    "       connection_attempts: %u\n"
848                    "       generation: %u\n"
849                    "       in-progress_invalidations: %u\n"
850                    "       idle: %lld sec\n",
851                    nidstr,
852                    imp->imp_conn_cnt,
853                    imp->imp_generation,
854                    atomic_read(&imp->imp_inval_count),
855                    ktime_get_real_seconds() - imp->imp_last_reply_time);
856         spin_unlock(&imp->imp_lock);
857
858         if (!obd->obd_svc_stats)
859                 return;
860
861         header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR];
862         lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret);
863         if (ret.lc_count != 0)
864                 ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count);
865         else
866                 ret.lc_sum = 0;
867         seq_printf(m, "    rpcs:\n"
868                    "       inflight: %u\n"
869                    "       unregistering: %u\n"
870                    "       timeouts: %u\n"
871                    "       avg_waittime: %llu %s\n",
872                    atomic_read(&imp->imp_inflight),
873                    atomic_read(&imp->imp_unregistering),
874                    atomic_read(&imp->imp_timeouts),
875                    ret.lc_sum, header->lc_units);
876
877         k = 0;
878         for(j = 0; j < IMP_AT_MAX_PORTALS; j++) {
879                 if (imp->imp_at.iat_portal[j] == 0)
880                         break;
881                 k = max_t(unsigned int, k,
882                           obd_at_get(imp->imp_obd,
883                                      &imp->imp_at.iat_service_estimate[j]));
884         }
885         seq_printf(m, "    service_estimates:\n"
886                    "       services: %u sec\n"
887                    "       network: %d sec\n",
888                    k,
889                    obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency));
890
891         seq_printf(m, "    transactions:\n"
892                    "       last_replay: %llu\n"
893                    "       peer_committed: %llu\n"
894                    "       last_checked: %llu\n",
895                    imp->imp_last_replay_transno,
896                    imp->imp_peer_committed_transno,
897                    imp->imp_last_transno_checked);
898
899         /* avg data rates */
900         for (rw = 0; rw <= 1; rw++) {
901                 lprocfs_stats_collect(obd->obd_svc_stats,
902                                       PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw,
903                                       &ret);
904                 if (ret.lc_sum > 0 && ret.lc_count > 0) {
905                         ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count);
906                         seq_printf(m, "    %s_data_averages:\n"
907                                    "       bytes_per_rpc: %llu\n",
908                                    rw ? "write" : "read",
909                                    ret.lc_sum);
910                 }
911                 k = (int)ret.lc_sum;
912                 j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES;
913                 header = &obd->obd_svc_stats->ls_cnt_header[j];
914                 lprocfs_stats_collect(obd->obd_svc_stats, j, &ret);
915                 if (ret.lc_sum > 0 && ret.lc_count != 0) {
916                         ret.lc_sum = div64_s64(ret.lc_sum, ret.lc_count);
917                         seq_printf(m, "       %s_per_rpc: %llu\n",
918                                    header->lc_units, ret.lc_sum);
919                         j = (int)ret.lc_sum;
920                         if (j > 0)
921                                 seq_printf(m, "       MB_per_sec: %u.%.02u\n",
922                                            k / j, (100 * k / j) % 100);
923                 }
924         }
925 }
926
927 int lprocfs_import_seq_show(struct seq_file *m, void *data)
928 {
929         struct obd_device *obd = (struct obd_device *)data;
930         struct obd_import *imp;
931         int rv;
932
933         LASSERT(obd != NULL);
934         with_imp_locked(obd, imp, rv)
935                 lprocfs_import_seq_show_locked(m, obd, imp);
936         return rv;
937 }
938 EXPORT_SYMBOL(lprocfs_import_seq_show);
939
940 int lprocfs_state_seq_show(struct seq_file *m, void *data)
941 {
942         struct obd_device *obd = (struct obd_device *)data;
943         struct obd_import *imp;
944         int j, k;
945         int rc;
946
947         LASSERT(obd != NULL);
948         with_imp_locked(obd, imp, rc) {
949                 seq_printf(m, "current_state: %s\n",
950                            ptlrpc_import_state_name(imp->imp_state));
951                 seq_printf(m, "state_history:\n");
952                 k = imp->imp_state_hist_idx;
953                 for (j = 0; j < IMP_STATE_HIST_LEN; j++) {
954                         struct import_state_hist *ish =
955                                 &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN];
956                         if (ish->ish_state == 0)
957                                 continue;
958                         seq_printf(m, " - [ %lld, %s ]\n", (s64)ish->ish_time,
959                                    ptlrpc_import_state_name(ish->ish_state));
960                 }
961         }
962
963         return rc;
964 }
965 EXPORT_SYMBOL(lprocfs_state_seq_show);
966
967 int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at)
968 {
969         int i;
970         for (i = 0; i < AT_BINS; i++)
971                 seq_printf(m, "%3u ", at->at_hist[i]);
972         seq_printf(m, "\n");
973         return 0;
974 }
975 EXPORT_SYMBOL(lprocfs_at_hist_helper);
976
977 /* See also ptlrpc_lprocfs_timeouts_show_seq */
978 static void lprocfs_timeouts_seq_show_locked(struct seq_file *m,
979                                              struct obd_device *obd,
980                                              struct obd_import *imp)
981 {
982         timeout_t cur_timeout, worst_timeout;
983         time64_t now, worst_timestamp;
984         int i;
985
986         LASSERT(obd != NULL);
987
988         now = ktime_get_real_seconds();
989
990         /* Some network health info for kicks */
991         seq_printf(m, "%-10s : %lld, %llds ago\n",
992                    "last reply", (s64)imp->imp_last_reply_time,
993                    (s64)(now - imp->imp_last_reply_time));
994
995         cur_timeout = obd_at_get(imp->imp_obd, &imp->imp_at.iat_net_latency);
996         worst_timeout = imp->imp_at.iat_net_latency.at_worst_timeout_ever;
997         worst_timestamp = imp->imp_at.iat_net_latency.at_worst_timestamp;
998         seq_printf(m, "%-10s : cur %3u  worst %3u (at %lld, %llds ago) ",
999                    "network", cur_timeout, worst_timeout, worst_timestamp,
1000                    now - worst_timestamp);
1001         lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency);
1002
1003         for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
1004                 struct adaptive_timeout *service_est;
1005
1006                 if (imp->imp_at.iat_portal[i] == 0)
1007                         break;
1008
1009                 service_est = &imp->imp_at.iat_service_estimate[i];
1010                 cur_timeout = obd_at_get(imp->imp_obd, service_est);
1011                 worst_timeout = service_est->at_worst_timeout_ever;
1012                 worst_timestamp = service_est->at_worst_timestamp;
1013                 seq_printf(m, "portal %-2d  : cur %3u  worst %3u (at %lld, %llds ago) ",
1014                            imp->imp_at.iat_portal[i], cur_timeout,
1015                            worst_timeout, worst_timestamp,
1016                            now - worst_timestamp);
1017                 lprocfs_at_hist_helper(m, service_est);
1018         }
1019 }
1020
1021 int lprocfs_timeouts_seq_show(struct seq_file *m, void *data)
1022 {
1023         struct obd_device *obd = (struct obd_device *)data;
1024         struct obd_import *imp;
1025         int rc;
1026
1027         with_imp_locked(obd, imp, rc)
1028                 lprocfs_timeouts_seq_show_locked(m, obd, imp);
1029         return rc;
1030 }
1031 EXPORT_SYMBOL(lprocfs_timeouts_seq_show);
1032
1033 int lprocfs_connect_flags_seq_show(struct seq_file *m, void *data)
1034 {
1035         struct obd_device *obd = data;
1036         __u64 flags;
1037         __u64 flags2;
1038         struct obd_import *imp;
1039         int rc;
1040
1041         with_imp_locked(obd, imp, rc) {
1042                 flags = imp->imp_connect_data.ocd_connect_flags;
1043                 flags2 = imp->imp_connect_data.ocd_connect_flags2;
1044                 seq_printf(m, "flags=%#llx\n", flags);
1045                 seq_printf(m, "flags2=%#llx\n", flags2);
1046                 obd_connect_seq_flags2str(m, flags, flags2, "\n");
1047                 seq_printf(m, "\n");
1048         }
1049
1050         return rc;
1051 }
1052 EXPORT_SYMBOL(lprocfs_connect_flags_seq_show);
1053
1054 static const struct attribute *obd_def_uuid_attrs[] = {
1055         &lustre_attr_uuid.attr,
1056         NULL,
1057 };
1058
1059 static const struct attribute *obd_def_attrs[] = {
1060         &lustre_attr_blocksize.attr,
1061         &lustre_attr_kbytestotal.attr,
1062         &lustre_attr_kbytesfree.attr,
1063         &lustre_attr_kbytesavail.attr,
1064         &lustre_attr_filestotal.attr,
1065         &lustre_attr_filesfree.attr,
1066         &lustre_attr_uuid.attr,
1067         NULL,
1068 };
1069
1070 static void obd_sysfs_release(struct kobject *kobj)
1071 {
1072         struct obd_device *obd = container_of(kobj, struct obd_device,
1073                                               obd_kset.kobj);
1074
1075         complete(&obd->obd_kobj_unregister);
1076 }
1077
1078 int lprocfs_obd_setup(struct obd_device *obd, bool uuid_only)
1079 {
1080         struct ldebugfs_vars *debugfs_vars = NULL;
1081         int rc;
1082
1083         if (!obd || obd->obd_magic != OBD_DEVICE_MAGIC)
1084                 return -ENODEV;
1085
1086         rc = kobject_set_name(&obd->obd_kset.kobj, "%s", obd->obd_name);
1087         if (rc)
1088                 return rc;
1089
1090         obd->obd_ktype.sysfs_ops = &lustre_sysfs_ops;
1091         obd->obd_ktype.release = obd_sysfs_release;
1092
1093         obd->obd_kset.kobj.parent = &obd->obd_type->typ_kobj;
1094         obd->obd_kset.kobj.ktype = &obd->obd_ktype;
1095         init_completion(&obd->obd_kobj_unregister);
1096         rc = kset_register(&obd->obd_kset);
1097         if (rc)
1098                 return rc;
1099
1100         if (uuid_only)
1101                 obd->obd_attrs = obd_def_uuid_attrs;
1102         else
1103                 obd->obd_attrs = obd_def_attrs;
1104
1105         rc = sysfs_create_files(&obd->obd_kset.kobj, obd->obd_attrs);
1106         if (rc) {
1107                 kset_unregister(&obd->obd_kset);
1108                 return rc;
1109         }
1110
1111         if (!obd->obd_type->typ_procroot)
1112                 debugfs_vars = obd->obd_debugfs_vars;
1113         obd->obd_debugfs_entry = debugfs_create_dir(
1114                 obd->obd_name, obd->obd_type->typ_debugfs_entry);
1115         ldebugfs_add_vars(obd->obd_debugfs_entry, debugfs_vars, obd);
1116
1117         if (obd->obd_proc_entry || !obd->obd_type->typ_procroot)
1118                 GOTO(already_registered, rc);
1119
1120         obd->obd_proc_entry = lprocfs_register(obd->obd_name,
1121                                                obd->obd_type->typ_procroot,
1122                                                obd->obd_vars, obd);
1123         if (IS_ERR(obd->obd_proc_entry)) {
1124                 rc = PTR_ERR(obd->obd_proc_entry);
1125                 CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name);
1126                 obd->obd_proc_entry = NULL;
1127
1128                 debugfs_remove_recursive(obd->obd_debugfs_entry);
1129                 obd->obd_debugfs_entry = NULL;
1130
1131                 sysfs_remove_files(&obd->obd_kset.kobj, obd->obd_attrs);
1132                 obd->obd_attrs = NULL;
1133                 kset_unregister(&obd->obd_kset);
1134                 return rc;
1135         }
1136 already_registered:
1137         return rc;
1138 }
1139 EXPORT_SYMBOL(lprocfs_obd_setup);
1140
1141 int lprocfs_obd_cleanup(struct obd_device *obd)
1142 {
1143         if (!obd)
1144                 return -EINVAL;
1145
1146         debugfs_remove_recursive(obd->obd_debugfs_gss_dir);
1147         obd->obd_debugfs_gss_dir = NULL;
1148
1149         if (obd->obd_proc_exports_entry) {
1150                 /* Should be no exports left */
1151                 lprocfs_remove(&obd->obd_proc_exports_entry);
1152                 obd->obd_proc_exports_entry = NULL;
1153         }
1154
1155         if (obd->obd_proc_entry) {
1156                 lprocfs_remove(&obd->obd_proc_entry);
1157                 obd->obd_proc_entry = NULL;
1158         }
1159
1160         debugfs_remove_recursive(obd->obd_debugfs_entry);
1161         obd->obd_debugfs_entry = NULL;
1162
1163         /* obd device never allocated a kset */
1164         if (!obd->obd_kset.kobj.state_initialized)
1165                 return 0;
1166
1167         if (obd->obd_attrs) {
1168                 sysfs_remove_files(&obd->obd_kset.kobj, obd->obd_attrs);
1169                 obd->obd_attrs = NULL;
1170         }
1171
1172         kset_unregister(&obd->obd_kset);
1173         wait_for_completion(&obd->obd_kobj_unregister);
1174         return 0;
1175 }
1176 EXPORT_SYMBOL(lprocfs_obd_cleanup);
1177
1178 int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
1179 {
1180         struct lprocfs_counter *cntr;
1181         unsigned int percpusize;
1182         int rc = -ENOMEM;
1183         unsigned long flags = 0;
1184         int i;
1185
1186         LASSERT(stats->ls_percpu[cpuid] == NULL);
1187         LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0);
1188
1189         percpusize = lprocfs_stats_counter_size(stats);
1190         LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[cpuid], percpusize);
1191         if (stats->ls_percpu[cpuid]) {
1192                 rc = 0;
1193                 if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) {
1194                         if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
1195                                 spin_lock_irqsave(&stats->ls_lock, flags);
1196                         else
1197                                 spin_lock(&stats->ls_lock);
1198                         if (stats->ls_biggest_alloc_num <= cpuid)
1199                                 stats->ls_biggest_alloc_num = cpuid + 1;
1200                         if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) {
1201                                 spin_unlock_irqrestore(&stats->ls_lock, flags);
1202                         } else {
1203                                 spin_unlock(&stats->ls_lock);
1204                         }
1205                 }
1206                 /* initialize the ls_percpu[cpuid] non-zero counter */
1207                 for (i = 0; i < stats->ls_num; ++i) {
1208                         cntr = lprocfs_stats_counter_get(stats, cpuid, i);
1209                         cntr->lc_min = LC_MIN_INIT;
1210                 }
1211         }
1212         return rc;
1213 }
1214
1215 struct lprocfs_stats *lprocfs_stats_alloc(unsigned int num,
1216                                           enum lprocfs_stats_flags flags)
1217 {
1218         struct lprocfs_stats *stats;
1219         unsigned int num_entry;
1220         unsigned int percpusize = 0;
1221         int i;
1222
1223         if (num == 0)
1224                 return NULL;
1225
1226         if (lprocfs_no_percpu_stats != 0)
1227                 flags |= LPROCFS_STATS_FLAG_NOPERCPU;
1228
1229         if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
1230                 num_entry = 1;
1231         else
1232                 num_entry = num_possible_cpus();
1233
1234         /* alloc percpu pointers for all possible cpu slots */
1235         LIBCFS_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
1236         if (!stats)
1237                 return NULL;
1238
1239         stats->ls_num = num;
1240         stats->ls_flags = flags;
1241         stats->ls_init = ktime_get_real();
1242         spin_lock_init(&stats->ls_lock);
1243
1244         /* alloc num of counter headers */
1245         CFS_ALLOC_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num);
1246         if (!stats->ls_cnt_header)
1247                 goto fail;
1248
1249         if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) {
1250                 /* contains only one set counters */
1251                 percpusize = lprocfs_stats_counter_size(stats);
1252                 LIBCFS_ALLOC_ATOMIC(stats->ls_percpu[0], percpusize);
1253                 if (!stats->ls_percpu[0])
1254                         goto fail;
1255                 stats->ls_biggest_alloc_num = 1;
1256         } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) {
1257                 /* alloc all percpu data, currently only obd_memory use this */
1258                 for (i = 0; i < num_entry; ++i)
1259                         if (lprocfs_stats_alloc_one(stats, i) < 0)
1260                                 goto fail;
1261         }
1262
1263         return stats;
1264
1265 fail:
1266         lprocfs_stats_free(&stats);
1267         return NULL;
1268 }
1269 EXPORT_SYMBOL(lprocfs_stats_alloc);
1270
1271 void lprocfs_stats_free(struct lprocfs_stats **statsh)
1272 {
1273         struct lprocfs_stats *stats = *statsh;
1274         unsigned int num_entry;
1275         unsigned int percpusize;
1276         unsigned int i;
1277
1278         if (!stats || stats->ls_num == 0)
1279                 return;
1280         *statsh = NULL;
1281
1282         if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
1283                 num_entry = 1;
1284         else
1285                 num_entry = num_possible_cpus();
1286
1287         percpusize = lprocfs_stats_counter_size(stats);
1288         for (i = 0; i < num_entry; i++)
1289                 if (stats->ls_percpu[i])
1290                         LIBCFS_FREE(stats->ls_percpu[i], percpusize);
1291
1292         if (stats->ls_cnt_header) {
1293                 for (i = 0; i < stats->ls_num; i++)
1294                         if (stats->ls_cnt_header[i].lc_hist != NULL)
1295                                 CFS_FREE_PTR(stats->ls_cnt_header[i].lc_hist);
1296                 CFS_FREE_PTR_ARRAY(stats->ls_cnt_header, stats->ls_num);
1297         }
1298
1299         LIBCFS_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_entry]));
1300 }
1301 EXPORT_SYMBOL(lprocfs_stats_free);
1302
1303 u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
1304                             enum lprocfs_fields_flags field)
1305 {
1306         unsigned long flags = 0;
1307         unsigned int num_cpu;
1308         unsigned int i;
1309         u64 ret = 0;
1310
1311         LASSERT(stats);
1312
1313         num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
1314         for (i = 0; i < num_cpu; i++) {
1315                 struct lprocfs_counter *cntr;
1316
1317                 if (!stats->ls_percpu[i])
1318                         continue;
1319
1320                 cntr = lprocfs_stats_counter_get(stats, i, idx);
1321                 ret += lprocfs_read_helper(cntr, &stats->ls_cnt_header[idx],
1322                                            stats->ls_flags, field);
1323         }
1324         lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
1325         return ret;
1326 }
1327 EXPORT_SYMBOL(lprocfs_stats_collector);
1328
1329 void lprocfs_stats_clear(struct lprocfs_stats *stats)
1330 {
1331         struct lprocfs_counter *percpu_cntr;
1332         unsigned int num_entry;
1333         unsigned long flags = 0;
1334         int i, j;
1335
1336         num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
1337
1338         /* clear histogram if exists */
1339         for (j = 0; j < stats->ls_num; j++) {
1340                 struct obd_histogram *hist = stats->ls_cnt_header[j].lc_hist;
1341
1342                 if (hist != NULL)
1343                         lprocfs_oh_clear(hist);
1344         }
1345
1346         for (i = 0; i < num_entry; i++) {
1347                 if (!stats->ls_percpu[i])
1348                         continue;
1349                 for (j = 0; j < stats->ls_num; j++) {
1350                         percpu_cntr = lprocfs_stats_counter_get(stats, i, j);
1351                         percpu_cntr->lc_count           = 0;
1352                         percpu_cntr->lc_min             = LC_MIN_INIT;
1353                         percpu_cntr->lc_max             = 0;
1354                         percpu_cntr->lc_sumsquare       = 0;
1355                         percpu_cntr->lc_sum             = 0;
1356                         if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
1357                                 percpu_cntr->lc_sum_irq = 0;
1358                 }
1359         }
1360         stats->ls_init = ktime_get_real();
1361
1362         lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
1363 }
1364 EXPORT_SYMBOL(lprocfs_stats_clear);
1365
1366 static ssize_t lprocfs_stats_seq_write(struct file *file,
1367                                        const char __user *buf,
1368                                        size_t len, loff_t *off)
1369 {
1370         struct seq_file *seq = file->private_data;
1371         struct lprocfs_stats *stats = seq->private;
1372
1373         lprocfs_stats_clear(stats);
1374
1375         return len;
1376 }
1377
1378 static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
1379 {
1380         struct lprocfs_stats *stats = p->private;
1381
1382         return (*pos < stats->ls_num) ? pos : NULL;
1383 }
1384
1385 static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
1386 {
1387 }
1388
1389 static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
1390 {
1391         (*pos)++;
1392
1393         return lprocfs_stats_seq_start(p, pos);
1394 }
1395
1396 /**
1397  * print header of stats including snapshot_time, start_time and elapsed_time.
1398  *
1399  * \param seq           the file to print content to
1400  * \param now           end time to calculate elapsed_time
1401  * \param ts_init       start time to calculate elapsed_time
1402  * \param width         the width of key to align them well
1403  * \param colon         "" or ":"
1404  * \param show_units    show units or not
1405  * \param prefix        prefix (indent) before printing each line of header
1406  *                      to align them with other content
1407  */
1408 void lprocfs_stats_header(struct seq_file *seq, ktime_t now, ktime_t ts_init,
1409                           int width, const char *colon, bool show_units,
1410                           const char *prefix)
1411 {
1412         const char *units = show_units ? " secs.nsecs" : "";
1413         struct timespec64 ts;
1414         const char *field;
1415
1416         field = (colon && colon[0]) ? "snapshot_time:" : "snapshot_time";
1417         ts = ktime_to_timespec64(now);
1418         seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
1419                    (s64)ts.tv_sec, ts.tv_nsec, units);
1420
1421         if (!obd_enable_stats_header)
1422                 return;
1423
1424         field = (colon && colon[0]) ? "start_time:" : "start_time";
1425         ts = ktime_to_timespec64(ts_init);
1426         seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
1427                    (s64)ts.tv_sec, ts.tv_nsec, units);
1428
1429         field = (colon && colon[0]) ? "elapsed_time:" : "elapsed_time";
1430         ts = ktime_to_timespec64(ktime_sub(now, ts_init));
1431         seq_printf(seq, "%s%-*s %llu.%09lu%s\n", prefix, width, field,
1432                    (s64)ts.tv_sec, ts.tv_nsec, units);
1433 }
1434 EXPORT_SYMBOL(lprocfs_stats_header);
1435
1436 /* seq file export of one lprocfs counter */
1437 static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
1438 {
1439         struct lprocfs_stats *stats = p->private;
1440         struct lprocfs_counter_header *hdr;
1441         struct lprocfs_counter ctr;
1442         int idx = *(loff_t *)v;
1443
1444         if (idx == 0)
1445                 lprocfs_stats_header(p, ktime_get_real(), stats->ls_init, 25,
1446                                      "", true, "");
1447
1448         hdr = &stats->ls_cnt_header[idx];
1449         lprocfs_stats_collect(stats, idx, &ctr);
1450
1451         if (ctr.lc_count == 0)
1452                 return 0;
1453
1454         seq_printf(p, "%-25s %lld samples [%s]", hdr->lc_name,
1455                    ctr.lc_count, hdr->lc_units);
1456
1457         if ((hdr->lc_config & LPROCFS_CNTR_AVGMINMAX) && ctr.lc_count > 0) {
1458                 seq_printf(p, " %lld %lld %lld",
1459                            ctr.lc_min, ctr.lc_max, ctr.lc_sum);
1460                 if (hdr->lc_config & LPROCFS_CNTR_STDDEV)
1461                         seq_printf(p, " %llu", ctr.lc_sumsquare);
1462         }
1463         seq_putc(p, '\n');
1464         return 0;
1465 }
1466
1467 static const struct seq_operations lprocfs_stats_seq_sops = {
1468         .start  = lprocfs_stats_seq_start,
1469         .stop   = lprocfs_stats_seq_stop,
1470         .next   = lprocfs_stats_seq_next,
1471         .show   = lprocfs_stats_seq_show,
1472 };
1473
1474 static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
1475 {
1476         struct seq_file *seq;
1477         int rc;
1478
1479         rc = seq_open(file, &lprocfs_stats_seq_sops);
1480         if (rc)
1481                 return rc;
1482         seq = file->private_data;
1483         seq->private = inode->i_private ? inode->i_private : pde_data(inode);
1484         return 0;
1485 }
1486
1487 const struct file_operations ldebugfs_stats_seq_fops = {
1488         .owner   = THIS_MODULE,
1489         .open    = lprocfs_stats_seq_open,
1490         .read    = seq_read,
1491         .write   = lprocfs_stats_seq_write,
1492         .llseek  = seq_lseek,
1493         .release = lprocfs_seq_release,
1494 };
1495 EXPORT_SYMBOL(ldebugfs_stats_seq_fops);
1496
1497 static const struct proc_ops lprocfs_stats_seq_fops = {
1498         PROC_OWNER(THIS_MODULE)
1499         .proc_open      = lprocfs_stats_seq_open,
1500         .proc_read      = seq_read,
1501         .proc_write     = lprocfs_stats_seq_write,
1502         .proc_lseek     = seq_lseek,
1503         .proc_release   = lprocfs_seq_release,
1504 };
1505
1506 int lprocfs_stats_register(struct proc_dir_entry *root, const char *name,
1507                            struct lprocfs_stats *stats)
1508 {
1509         struct proc_dir_entry *entry;
1510
1511         LASSERT(root != NULL);
1512         entry = proc_create_data(name, 0644, root,
1513                                  &lprocfs_stats_seq_fops, stats);
1514         if (!entry)
1515                 return -ENOMEM;
1516
1517         return 0;
1518 }
1519 EXPORT_SYMBOL(lprocfs_stats_register);
1520
1521 static const char *lprocfs_counter_config_units(const char *name,
1522                                          enum lprocfs_counter_config config)
1523 {
1524         const char *units;
1525
1526         switch (config & LPROCFS_TYPE_MASK) {
1527         default:
1528                 units = "reqs"; break;
1529         case LPROCFS_TYPE_BYTES:
1530                 units = "bytes"; break;
1531         case LPROCFS_TYPE_PAGES:
1532                 units = "pages"; break;
1533         case LPROCFS_TYPE_LOCKS:
1534                 units = "locks"; break;
1535         case LPROCFS_TYPE_LOCKSPS:
1536                 units = "locks/s"; break;
1537         case LPROCFS_TYPE_SECS:
1538                 units = "secs"; break;
1539         case LPROCFS_TYPE_USECS:
1540                 units = "usecs"; break;
1541         }
1542
1543         return units;
1544 }
1545
1546 void lprocfs_counter_init_units(struct lprocfs_stats *stats, int index,
1547                                 enum lprocfs_counter_config config,
1548                                 const char *name, const char *units)
1549 {
1550         struct lprocfs_counter_header *header;
1551         struct lprocfs_counter *percpu_cntr;
1552         unsigned long flags = 0;
1553         unsigned int i;
1554         unsigned int num_cpu;
1555
1556         LASSERT(stats != NULL);
1557
1558         header = &stats->ls_cnt_header[index];
1559         LASSERTF(header != NULL, "Failed to allocate stats header:[%d]%s/%s\n",
1560                  index, name, units);
1561
1562         header->lc_config = config;
1563         header->lc_name = name;
1564         header->lc_units = units;
1565
1566         if (config & LPROCFS_CNTR_HISTOGRAM) {
1567                 CFS_ALLOC_PTR(stats->ls_cnt_header[index].lc_hist);
1568                 if (stats->ls_cnt_header[index].lc_hist == NULL)
1569                         CERROR("LprocFS: Failed to allocate histogram:[%d]%s/%s\n",
1570                                index, name, units);
1571                 else
1572                         spin_lock_init(&stats->ls_cnt_header[index].lc_hist->oh_lock);
1573         }
1574         num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags);
1575         for (i = 0; i < num_cpu; ++i) {
1576                 if (!stats->ls_percpu[i])
1577                         continue;
1578                 percpu_cntr = lprocfs_stats_counter_get(stats, i, index);
1579                 percpu_cntr->lc_count           = 0;
1580                 percpu_cntr->lc_min             = LC_MIN_INIT;
1581                 percpu_cntr->lc_max             = 0;
1582                 percpu_cntr->lc_sumsquare       = 0;
1583                 percpu_cntr->lc_sum             = 0;
1584                 if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
1585                         percpu_cntr->lc_sum_irq = 0;
1586         }
1587         lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
1588 }
1589 EXPORT_SYMBOL(lprocfs_counter_init_units);
1590
1591 void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
1592                           enum lprocfs_counter_config config,
1593                           const char *name)
1594 {
1595         lprocfs_counter_init_units(stats, index, config, name,
1596                                    lprocfs_counter_config_units(name, config));
1597 }
1598 EXPORT_SYMBOL(lprocfs_counter_init);
1599
1600 static const char * const mps_stats[] = {
1601         [LPROC_MD_CLOSE]                = "close",
1602         [LPROC_MD_CREATE]               = "create",
1603         [LPROC_MD_ENQUEUE]              = "enqueue",
1604         [LPROC_MD_GETATTR]              = "getattr",
1605         [LPROC_MD_INTENT_LOCK]          = "intent_lock",
1606         [LPROC_MD_LINK]                 = "link",
1607         [LPROC_MD_RENAME]               = "rename",
1608         [LPROC_MD_SETATTR]              = "setattr",
1609         [LPROC_MD_FSYNC]                = "fsync",
1610         [LPROC_MD_READ_PAGE]            = "read_page",
1611         [LPROC_MD_UNLINK]               = "unlink",
1612         [LPROC_MD_SETXATTR]             = "setxattr",
1613         [LPROC_MD_GETXATTR]             = "getxattr",
1614         [LPROC_MD_INTENT_GETATTR_ASYNC] = "intent_getattr_async",
1615         [LPROC_MD_REVALIDATE_LOCK]      = "revalidate_lock",
1616 };
1617
1618 int lprocfs_alloc_md_stats(struct obd_device *obd,
1619                            unsigned int num_private_stats)
1620 {
1621         struct lprocfs_stats *stats;
1622         unsigned int num_stats;
1623         int rc, i;
1624
1625         /*
1626          * TODO Ensure that this function is only used where
1627          * appropriate by adding an assertion to the effect that
1628          * obd->obd_type->typ_md_ops is not NULL. We can't do this now
1629          * because mdt_procfs_init() uses this function to allocate
1630          * the stats backing /proc/fs/lustre/mdt/.../md_stats but the
1631          * mdt layer does not use the md_ops interface. This is
1632          * confusing and a waste of memory. See LU-2484.
1633          */
1634         LASSERT(obd->obd_proc_entry != NULL);
1635         LASSERT(obd->obd_md_stats == NULL);
1636
1637         num_stats = ARRAY_SIZE(mps_stats) + num_private_stats;
1638         stats = lprocfs_stats_alloc(num_stats, 0);
1639         if (!stats)
1640                 return -ENOMEM;
1641
1642         for (i = 0; i < ARRAY_SIZE(mps_stats); i++) {
1643                 lprocfs_counter_init(stats, i, LPROCFS_TYPE_REQS,
1644                                      mps_stats[i]);
1645                 if (!stats->ls_cnt_header[i].lc_name) {
1646                         CERROR("Missing md_stat initializer md_op operation at offset %d. Aborting.\n",
1647                                i);
1648                         LBUG();
1649                 }
1650         }
1651
1652         rc = lprocfs_stats_register(obd->obd_proc_entry, "md_stats", stats);
1653         if (rc < 0) {
1654                 lprocfs_stats_free(&stats);
1655         } else {
1656                 obd->obd_md_stats = stats;
1657         }
1658
1659         return rc;
1660 }
1661 EXPORT_SYMBOL(lprocfs_alloc_md_stats);
1662
1663 void lprocfs_free_md_stats(struct obd_device *obd)
1664 {
1665         struct lprocfs_stats *stats = obd->obd_md_stats;
1666
1667         if (stats) {
1668                 obd->obd_md_stats = NULL;
1669                 lprocfs_stats_free(&stats);
1670         }
1671 }
1672 EXPORT_SYMBOL(lprocfs_free_md_stats);
1673
1674 void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
1675 {
1676         lprocfs_counter_init(ldlm_stats, LDLM_ENQUEUE - LDLM_FIRST_OPC,
1677                              LPROCFS_TYPE_REQS, "ldlm_enqueue");
1678         lprocfs_counter_init(ldlm_stats, LDLM_CONVERT - LDLM_FIRST_OPC,
1679                              LPROCFS_TYPE_REQS, "ldlm_convert");
1680         lprocfs_counter_init(ldlm_stats, LDLM_CANCEL - LDLM_FIRST_OPC,
1681                              LPROCFS_TYPE_REQS, "ldlm_cancel");
1682         lprocfs_counter_init(ldlm_stats, LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
1683                              LPROCFS_TYPE_REQS, "ldlm_bl_callback");
1684         lprocfs_counter_init(ldlm_stats, LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
1685                              LPROCFS_TYPE_REQS, "ldlm_cp_callback");
1686         lprocfs_counter_init(ldlm_stats, LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
1687                              LPROCFS_TYPE_REQS, "ldlm_gl_callback");
1688 }
1689 EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
1690
1691 __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
1692                           struct lprocfs_counter_header *header,
1693                           enum lprocfs_stats_flags flags,
1694                           enum lprocfs_fields_flags field)
1695 {
1696         __s64 ret = 0;
1697
1698         if (!lc || !header)
1699                 RETURN(0);
1700
1701         switch (field) {
1702                 case LPROCFS_FIELDS_FLAGS_CONFIG:
1703                         ret = header->lc_config;
1704                         break;
1705                 case LPROCFS_FIELDS_FLAGS_SUM:
1706                         ret = lc->lc_sum;
1707                         if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
1708                                 ret += lc->lc_sum_irq;
1709                         break;
1710                 case LPROCFS_FIELDS_FLAGS_MIN:
1711                         ret = lc->lc_min;
1712                         break;
1713                 case LPROCFS_FIELDS_FLAGS_MAX:
1714                         ret = lc->lc_max;
1715                         break;
1716                 case LPROCFS_FIELDS_FLAGS_AVG:
1717                         ret = div64_u64((flags & LPROCFS_STATS_FLAG_IRQ_SAFE ?
1718                                          lc->lc_sum_irq : 0) + lc->lc_sum,
1719                                         lc->lc_count);
1720                         break;
1721                 case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
1722                         ret = lc->lc_sumsquare;
1723                         break;
1724                 case LPROCFS_FIELDS_FLAGS_COUNT:
1725                         ret = lc->lc_count;
1726                         break;
1727                 default:
1728                         break;
1729         };
1730         RETURN(ret);
1731 }
1732 EXPORT_SYMBOL(lprocfs_read_helper);
1733
1734 /**
1735  * string_to_size - convert ASCII string representing a numerical
1736  *                  value with optional units to 64-bit binary value
1737  *
1738  * @size:       The numerical value extract out of @buffer
1739  * @buffer:     passed in string to parse
1740  * @count:      length of the @buffer
1741  *
1742  * This function returns a 64-bit binary value if @buffer contains a valid
1743  * numerical string. The string is parsed to 3 significant figures after
1744  * the decimal point. Support the string containing an optional units at
1745  * the end which can be base 2 or base 10 in value. If no units are given
1746  * the string is assumed to just a numerical value.
1747  *
1748  * Returns:     @count if the string is successfully parsed,
1749  *              -errno on invalid input strings. Error values:
1750  *
1751  *  - ``-EINVAL``: @buffer is not a proper numerical string
1752  *  - ``-EOVERFLOW``: results does not fit into 64 bits.
1753  *  - ``-E2BIG ``: @buffer is too large (not a valid number)
1754  */
1755 int string_to_size(u64 *size, const char *buffer, size_t count)
1756 {
1757         /* For string_get_size() it can support values above exabytes,
1758          * (ZiB, YiB) due to breaking the return value into a size and
1759          * bulk size to avoid 64 bit overflow. We don't break the size
1760          * up into block size units so we don't support ZiB or YiB.
1761          */
1762         static const char *const units_10[] = {
1763                 "kB", "MB", "GB", "TB", "PB", "EB",
1764         };
1765         static const char *const units_2[] = {
1766                 "K",  "M",  "G",  "T",  "P",  "E",
1767         };
1768         static const char *const *const units_str[] = {
1769                 [STRING_UNITS_2] = units_2,
1770                 [STRING_UNITS_10] = units_10,
1771         };
1772         static const unsigned int coeff[] = {
1773                 [STRING_UNITS_10] = 1000,
1774                 [STRING_UNITS_2] = 1024,
1775         };
1776         enum string_size_units unit = STRING_UNITS_2;
1777         u64 whole, blk_size = 1;
1778         char kernbuf[22], *end;
1779         size_t len = count;
1780         int rc;
1781         int i;
1782
1783         if (count >= sizeof(kernbuf)) {
1784                 CERROR("count %zd > buffer %zd\n", count, sizeof(kernbuf));
1785                 return -E2BIG;
1786         }
1787
1788         *size = 0;
1789         /* The "iB" suffix is optionally allowed for indicating base-2 numbers.
1790          * If suffix is only "B" and not "iB" then we treat it as base-10.
1791          */
1792         end = strstr(buffer, "B");
1793         if (end && *(end - 1) != 'i')
1794                 unit = STRING_UNITS_10;
1795
1796         i = unit == STRING_UNITS_2 ? ARRAY_SIZE(units_2) - 1 :
1797                                      ARRAY_SIZE(units_10) - 1;
1798         do {
1799                 end = strnstr(buffer, units_str[unit][i], count);
1800                 if (end) {
1801                         for (; i >= 0; i--)
1802                                 blk_size *= coeff[unit];
1803                         len = end - buffer;
1804                         break;
1805                 }
1806         } while (i--);
1807
1808         /* as 'B' is a substring of all units, we need to handle it
1809          * separately.
1810          */
1811         if (!end) {
1812                 /* 'B' is only acceptable letter at this point */
1813                 end = strnchr(buffer, count, 'B');
1814                 if (end) {
1815                         len = end - buffer;
1816
1817                         if (count - len > 2 ||
1818                             (count - len == 2 && strcmp(end, "B\n") != 0)) {
1819                                 CDEBUG(D_INFO, "unknown suffix '%s'\n", buffer);
1820                                 return -EINVAL;
1821                         }
1822                 }
1823                 /* kstrtoull will error out if it has non digits */
1824                 goto numbers_only;
1825         }
1826
1827         end = strnchr(buffer, count, '.');
1828         if (end) {
1829                 /* need to limit 3 decimal places */
1830                 char rem[4] = "000";
1831                 u64 frac = 0;
1832                 size_t off;
1833
1834                 len = end - buffer;
1835                 end++;
1836
1837                 /* limit to 3 decimal points */
1838                 off = min_t(size_t, 3, strspn(end, "0123456789"));
1839                 /* need to limit frac_d to a u32 */
1840                 memcpy(rem, end, off);
1841                 rc = kstrtoull(rem, 10, &frac);
1842                 if (rc)
1843                         return rc;
1844
1845                 if (fls64(frac) + fls64(blk_size) - 1 > 64)
1846                         return -EOVERFLOW;
1847
1848                 frac *= blk_size;
1849                 do_div(frac, 1000);
1850                 *size += frac;
1851         }
1852 numbers_only:
1853         snprintf(kernbuf, sizeof(kernbuf), "%.*s", (int)len, buffer);
1854         rc = kstrtoull(kernbuf, 10, &whole);
1855         if (rc)
1856                 return rc;
1857
1858         if (whole != 0 && fls64(whole) + fls64(blk_size) - 1 > 64)
1859                 return -EOVERFLOW;
1860
1861         *size += whole * blk_size;
1862
1863         return count;
1864 }
1865 EXPORT_SYMBOL(string_to_size);
1866
1867 /**
1868  * sysfs_memparse - parse a ASCII string to 64-bit binary value,
1869  *                  with optional units
1870  *
1871  * @buffer:     kernel pointer to input string
1872  * @count:      number of bytes in the input @buffer
1873  * @val:        (output) binary value returned to caller
1874  * @defunit:    default unit suffix to use if none is provided
1875  *
1876  * Parses a string into a number. The number stored at @buffer is
1877  * potentially suffixed with K, M, G, T, P, E. Besides these other
1878  * valid suffix units are shown in the string_to_size() function.
1879  * If the string lacks a suffix then the defunit is used. The defunit
1880  * should be given as a binary unit (e.g. MiB) as that is the standard
1881  * for tunables in Lustre. If no unit suffix is given (e.g. 'G'), then
1882  * it is assumed to be in binary units.
1883  *
1884  * Returns:     0 on success or -errno on failure.
1885  */
1886 int sysfs_memparse(const char *buffer, size_t count, u64 *val,
1887                    const char *defunit)
1888 {
1889         const char *param = buffer;
1890         char tmp_buf[23];
1891         int rc;
1892
1893         count = strlen(buffer);
1894         while (count > 0 && isspace(buffer[count - 1]))
1895                 count--;
1896
1897         if (!count)
1898                 RETURN(-EINVAL);
1899
1900         /* If there isn't already a unit on this value, append @defunit.
1901          * Units of 'B' don't affect the value, so don't bother adding.
1902          */
1903         if (!isalpha(buffer[count - 1]) && defunit[0] != 'B') {
1904                 if (count + 3 >= sizeof(tmp_buf)) {
1905                         CERROR("count %zd > size %zd\n", count, sizeof(param));
1906                         RETURN(-E2BIG);
1907                 }
1908
1909                 scnprintf(tmp_buf, sizeof(tmp_buf), "%.*s%s", (int)count,
1910                           buffer, defunit);
1911                 param = tmp_buf;
1912                 count = strlen(param);
1913         }
1914
1915         rc = string_to_size(val, param, count);
1916
1917         return rc < 0 ? rc : 0;
1918 }
1919 EXPORT_SYMBOL(sysfs_memparse);
1920
1921 char *lprocfs_strnstr(const char *s1, const char *s2, size_t len)
1922 {
1923         size_t l2;
1924
1925         l2 = strlen(s2);
1926         if (!l2)
1927                 return (char *)s1;
1928         while (len >= l2) {
1929                 len--;
1930                 if (!memcmp(s1, s2, l2))
1931                         return (char *)s1;
1932                 s1++;
1933         }
1934         return NULL;
1935 }
1936 EXPORT_SYMBOL(lprocfs_strnstr);
1937
1938 /**
1939  * Find the string \a name in the input \a buffer, and return a pointer to the
1940  * value immediately following \a name, reducing \a count appropriately.
1941  * If \a name is not found the original \a buffer is returned.
1942  */
1943 char *lprocfs_find_named_value(const char *buffer, const char *name,
1944                                 size_t *count)
1945 {
1946         char *val;
1947         size_t buflen = *count;
1948
1949         /* there is no strnstr() in rhel5 and ubuntu kernels */
1950         val = lprocfs_strnstr(buffer, name, buflen);
1951         if (!val)
1952                 return (char *)buffer;
1953
1954         val += strlen(name);                             /* skip prefix */
1955         while (val < buffer + buflen && isspace(*val)) /* skip separator */
1956                 val++;
1957
1958         *count = 0;
1959         while (val < buffer + buflen && isalnum(*val)) {
1960                 ++*count;
1961                 ++val;
1962         }
1963
1964         return val - *count;
1965 }
1966 EXPORT_SYMBOL(lprocfs_find_named_value);
1967
1968 int lprocfs_seq_create(struct proc_dir_entry *parent,
1969                        const char *name,
1970                        mode_t mode,
1971                        const struct proc_ops *seq_fops,
1972                        void *data)
1973 {
1974         struct proc_dir_entry *entry;
1975         ENTRY;
1976
1977         /* Disallow secretly (un)writable entries. */
1978         LASSERT(!seq_fops->proc_write == !(mode & 0222));
1979
1980         entry = proc_create_data(name, mode, parent, seq_fops, data);
1981
1982         if (!entry)
1983                 RETURN(-ENOMEM);
1984
1985         RETURN(0);
1986 }
1987 EXPORT_SYMBOL(lprocfs_seq_create);
1988
1989 int lprocfs_obd_seq_create(struct obd_device *obd,
1990                            const char *name,
1991                            mode_t mode,
1992                            const struct proc_ops *seq_fops,
1993                            void *data)
1994 {
1995         return lprocfs_seq_create(obd->obd_proc_entry, name,
1996                                   mode, seq_fops, data);
1997 }
1998 EXPORT_SYMBOL(lprocfs_obd_seq_create);
1999
2000 void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
2001 {
2002         if (value >= OBD_HIST_MAX)
2003                 value = OBD_HIST_MAX - 1;
2004
2005         spin_lock(&oh->oh_lock);
2006         oh->oh_buckets[value]++;
2007         spin_unlock(&oh->oh_lock);
2008 }
2009 EXPORT_SYMBOL(lprocfs_oh_tally);
2010
2011 void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
2012 {
2013         unsigned int val = 0;
2014
2015         if (likely(value != 0))
2016                 val = min(fls(value - 1), OBD_HIST_MAX);
2017
2018         lprocfs_oh_tally(oh, val);
2019 }
2020 EXPORT_SYMBOL(lprocfs_oh_tally_log2);
2021
2022 unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
2023 {
2024         unsigned long ret = 0;
2025         int i;
2026
2027         for (i = 0; i < OBD_HIST_MAX; i++)
2028                 ret +=  oh->oh_buckets[i];
2029         return ret;
2030 }
2031 EXPORT_SYMBOL(lprocfs_oh_sum);
2032
2033 void lprocfs_oh_clear(struct obd_histogram *oh)
2034 {
2035         spin_lock(&oh->oh_lock);
2036         memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
2037         spin_unlock(&oh->oh_lock);
2038 }
2039 EXPORT_SYMBOL(lprocfs_oh_clear);
2040
2041 void lprocfs_oh_tally_pcpu(struct obd_hist_pcpu *oh,
2042                            unsigned int value)
2043 {
2044         if (value >= OBD_HIST_MAX)
2045                 value = OBD_HIST_MAX - 1;
2046
2047         percpu_counter_inc(&oh->oh_pc_buckets[value]);
2048 }
2049 EXPORT_SYMBOL(lprocfs_oh_tally_pcpu);
2050
2051 void lprocfs_oh_tally_log2_pcpu(struct obd_hist_pcpu *oh,
2052                                 unsigned int value)
2053 {
2054         unsigned int val = 0;
2055
2056         if (likely(value != 0))
2057                 val = min(fls(value - 1), OBD_HIST_MAX);
2058
2059         lprocfs_oh_tally_pcpu(oh, val);
2060 }
2061 EXPORT_SYMBOL(lprocfs_oh_tally_log2_pcpu);
2062
2063 unsigned long lprocfs_oh_counter_pcpu(struct obd_hist_pcpu *oh,
2064                                       unsigned int value)
2065 {
2066         return percpu_counter_sum(&oh->oh_pc_buckets[value]);
2067 }
2068 EXPORT_SYMBOL(lprocfs_oh_counter_pcpu);
2069
2070 unsigned long lprocfs_oh_sum_pcpu(struct obd_hist_pcpu *oh)
2071 {
2072         unsigned long ret = 0;
2073         int i;
2074
2075         for (i = 0; i < OBD_HIST_MAX; i++)
2076                 ret += percpu_counter_sum(&oh->oh_pc_buckets[i]);
2077
2078         return ret;
2079 }
2080 EXPORT_SYMBOL(lprocfs_oh_sum_pcpu);
2081
2082 int lprocfs_oh_alloc_pcpu(struct obd_hist_pcpu *oh)
2083 {
2084         int i, rc;
2085
2086         if (oh->oh_initialized)
2087                 return 0;
2088
2089         for (i = 0; i < OBD_HIST_MAX; i++) {
2090                 rc = percpu_counter_init(&oh->oh_pc_buckets[i], 0, GFP_KERNEL);
2091                 if (rc)
2092                         goto out;
2093         }
2094
2095         oh->oh_initialized = true;
2096
2097         return 0;
2098
2099 out:
2100         for (i--; i >= 0; i--)
2101                 percpu_counter_destroy(&oh->oh_pc_buckets[i]);
2102
2103         return rc;
2104 }
2105 EXPORT_SYMBOL(lprocfs_oh_alloc_pcpu);
2106
2107 void lprocfs_oh_clear_pcpu(struct obd_hist_pcpu *oh)
2108 {
2109         int i;
2110
2111         for (i = 0; i < OBD_HIST_MAX; i++)
2112                 percpu_counter_set(&oh->oh_pc_buckets[i], 0);
2113 }
2114 EXPORT_SYMBOL(lprocfs_oh_clear_pcpu);
2115
2116 void lprocfs_oh_release_pcpu(struct obd_hist_pcpu *oh)
2117 {
2118         int i;
2119
2120         if (!oh->oh_initialized)
2121                 return;
2122
2123         for (i = 0; i < OBD_HIST_MAX; i++)
2124                 percpu_counter_destroy(&oh->oh_pc_buckets[i]);
2125
2126         oh->oh_initialized = false;
2127 }
2128 EXPORT_SYMBOL(lprocfs_oh_release_pcpu);
2129
2130 ssize_t lustre_attr_show(struct kobject *kobj,
2131                          struct attribute *attr, char *buf)
2132 {
2133         struct lustre_attr *a = container_of(attr, struct lustre_attr, attr);
2134
2135         return a->show ? a->show(kobj, attr, buf) : 0;
2136 }
2137 EXPORT_SYMBOL_GPL(lustre_attr_show);
2138
2139 ssize_t lustre_attr_store(struct kobject *kobj, struct attribute *attr,
2140                           const char *buf, size_t len)
2141 {
2142         struct lustre_attr *a = container_of(attr, struct lustre_attr, attr);
2143
2144         return a->store ? a->store(kobj, attr, buf, len) : len;
2145 }
2146 EXPORT_SYMBOL_GPL(lustre_attr_store);
2147
2148 const struct sysfs_ops lustre_sysfs_ops = {
2149         .show  = lustre_attr_show,
2150         .store = lustre_attr_store,
2151 };
2152 EXPORT_SYMBOL_GPL(lustre_sysfs_ops);
2153
2154 int lprocfs_obd_max_pages_per_rpc_seq_show(struct seq_file *m, void *data)
2155 {
2156         struct obd_device *obd = data;
2157         struct client_obd *cli = &obd->u.cli;
2158
2159         spin_lock(&cli->cl_loi_list_lock);
2160         seq_printf(m, "%d\n", cli->cl_max_pages_per_rpc);
2161         spin_unlock(&cli->cl_loi_list_lock);
2162         return 0;
2163 }
2164 EXPORT_SYMBOL(lprocfs_obd_max_pages_per_rpc_seq_show);
2165
2166 ssize_t lprocfs_obd_max_pages_per_rpc_seq_write(struct file *file,
2167                                                 const char __user *buffer,
2168                                                 size_t count, loff_t *off)
2169 {
2170         struct seq_file *m = file->private_data;
2171         struct obd_device *obd = m->private;
2172         struct client_obd *cli = &obd->u.cli;
2173         struct obd_import *imp;
2174         struct obd_connect_data *ocd;
2175         int chunk_mask, rc;
2176         char kernbuf[22];
2177         u64 val;
2178
2179         if (count > sizeof(kernbuf) - 1)
2180                 return -EINVAL;
2181
2182         if (copy_from_user(kernbuf, buffer, count))
2183                 return -EFAULT;
2184
2185         kernbuf[count] = '\0';
2186
2187         rc = sysfs_memparse(kernbuf, count, &val, "B");
2188         if (rc)
2189                 return rc;
2190
2191         /* if the max_pages is specified in bytes, convert to pages */
2192         if (val >= ONE_MB_BRW_SIZE)
2193                 val >>= PAGE_SHIFT;
2194
2195         with_imp_locked(obd, imp, rc) {
2196                 ocd = &imp->imp_connect_data;
2197                 chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
2198                 /* max_pages_per_rpc must be chunk aligned */
2199                 val = (val + ~chunk_mask) & chunk_mask;
2200                 if (val == 0 || (ocd->ocd_brw_size != 0 &&
2201                                  val > ocd->ocd_brw_size >> PAGE_SHIFT)) {
2202                         rc = -ERANGE;
2203                 } else {
2204                         spin_lock(&cli->cl_loi_list_lock);
2205                         cli->cl_max_pages_per_rpc = val;
2206                         client_adjust_max_dirty(cli);
2207                         spin_unlock(&cli->cl_loi_list_lock);
2208                 }
2209         }
2210
2211         return rc ?: count;
2212 }
2213 EXPORT_SYMBOL(lprocfs_obd_max_pages_per_rpc_seq_write);
2214
2215 ssize_t short_io_bytes_show(struct kobject *kobj, struct attribute *attr,
2216                             char *buf)
2217 {
2218         struct obd_device *obd = container_of(kobj, struct obd_device,
2219                                               obd_kset.kobj);
2220         struct client_obd *cli = &obd->u.cli;
2221         int rc;
2222
2223         spin_lock(&cli->cl_loi_list_lock);
2224         rc = sprintf(buf, "%d\n", cli->cl_max_short_io_bytes);
2225         spin_unlock(&cli->cl_loi_list_lock);
2226         return rc;
2227 }
2228 EXPORT_SYMBOL(short_io_bytes_show);
2229
2230 /* Used to catch people who think they're specifying pages. */
2231 #define MIN_SHORT_IO_BYTES 64U
2232
2233 ssize_t short_io_bytes_store(struct kobject *kobj, struct attribute *attr,
2234                              const char *buffer, size_t count)
2235 {
2236         struct obd_device *obd = container_of(kobj, struct obd_device,
2237                                               obd_kset.kobj);
2238         struct client_obd *cli = &obd->u.cli;
2239         u64 val;
2240         int rc;
2241
2242         if (strcmp(buffer, "-1") == 0) {
2243                 val = OBD_DEF_SHORT_IO_BYTES;
2244         } else {
2245                 rc = sysfs_memparse(buffer, count, &val, "B");
2246                 if (rc)
2247                         GOTO(out, rc);
2248         }
2249
2250         if (val && (val < MIN_SHORT_IO_BYTES || val > LNET_MTU))
2251                 GOTO(out, rc = -ERANGE);
2252
2253         rc = count;
2254
2255         spin_lock(&cli->cl_loi_list_lock);
2256         cli->cl_max_short_io_bytes = min_t(u64, val, OST_MAX_SHORT_IO_BYTES);
2257         spin_unlock(&cli->cl_loi_list_lock);
2258
2259 out:
2260         return rc;
2261 }
2262 EXPORT_SYMBOL(short_io_bytes_store);
2263
2264 int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
2265                            struct root_squash_info *squash, char *name)
2266 {
2267         int rc;
2268         char kernbuf[64], *tmp, *errmsg;
2269         unsigned long uid, gid;
2270         ENTRY;
2271
2272         if (count >= sizeof(kernbuf)) {
2273                 errmsg = "string too long";
2274                 GOTO(failed_noprint, rc = -EINVAL);
2275         }
2276         if (copy_from_user(kernbuf, buffer, count)) {
2277                 errmsg = "bad address";
2278                 GOTO(failed_noprint, rc = -EFAULT);
2279         }
2280         kernbuf[count] = '\0';
2281
2282         /* look for uid gid separator */
2283         tmp = strchr(kernbuf, ':');
2284         if (!tmp) {
2285                 errmsg = "needs uid:gid format";
2286                 GOTO(failed, rc = -EINVAL);
2287         }
2288         *tmp = '\0';
2289         tmp++;
2290
2291         /* parse uid */
2292         if (kstrtoul(kernbuf, 0, &uid) != 0) {
2293                 errmsg = "bad uid";
2294                 GOTO(failed, rc = -EINVAL);
2295         }
2296
2297         /* parse gid */
2298         if (kstrtoul(tmp, 0, &gid) != 0) {
2299                 errmsg = "bad gid";
2300                 GOTO(failed, rc = -EINVAL);
2301         }
2302
2303         squash->rsi_uid = uid;
2304         squash->rsi_gid = gid;
2305
2306         LCONSOLE_INFO("%s: root_squash is set to %u:%u\n",
2307                       name, squash->rsi_uid, squash->rsi_gid);
2308         RETURN(count);
2309
2310 failed:
2311         if (tmp) {
2312                 tmp--;
2313                 *tmp = ':';
2314         }
2315         CWARN("%s: failed to set root_squash to \"%s\", %s, rc = %d\n",
2316               name, kernbuf, errmsg, rc);
2317         RETURN(rc);
2318 failed_noprint:
2319         CWARN("%s: failed to set root_squash due to %s, rc = %d\n",
2320               name, errmsg, rc);
2321         RETURN(rc);
2322 }
2323 EXPORT_SYMBOL(lprocfs_wr_root_squash);
2324
2325
2326 int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
2327                              struct root_squash_info *squash, char *name)
2328 {
2329         int rc;
2330         char *kernbuf = NULL;
2331         char *errmsg;
2332         LIST_HEAD(tmp);
2333         int len = count;
2334         ENTRY;
2335
2336         if (count > 4096) {
2337                 errmsg = "string too long";
2338                 GOTO(failed, rc = -EINVAL);
2339         }
2340
2341         OBD_ALLOC(kernbuf, count + 1);
2342         if (!kernbuf) {
2343                 errmsg = "no memory";
2344                 GOTO(failed, rc = -ENOMEM);
2345         }
2346         if (copy_from_user(kernbuf, buffer, count)) {
2347                 errmsg = "bad address";
2348                 GOTO(failed, rc = -EFAULT);
2349         }
2350         kernbuf[count] = '\0';
2351
2352         if (count > 0 && kernbuf[count - 1] == '\n')
2353                 len = count - 1;
2354
2355         if ((len == 4 && strncmp(kernbuf, "NONE", len) == 0) ||
2356             (len == 5 && strncmp(kernbuf, "clear", len) == 0)) {
2357                 /* empty string is special case */
2358                 spin_lock(&squash->rsi_lock);
2359                 if (!list_empty(&squash->rsi_nosquash_nids))
2360                         cfs_free_nidlist(&squash->rsi_nosquash_nids);
2361                 spin_unlock(&squash->rsi_lock);
2362                 LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name);
2363                 OBD_FREE(kernbuf, count + 1);
2364                 RETURN(count);
2365         }
2366
2367         if (cfs_parse_nidlist(kernbuf, &tmp) < 0) {
2368                 errmsg = "can't parse";
2369                 GOTO(failed, rc = -EINVAL);
2370         }
2371         LCONSOLE_INFO("%s: nosquash_nids set to %s\n",
2372                       name, kernbuf);
2373         OBD_FREE(kernbuf, count + 1);
2374         kernbuf = NULL;
2375
2376         spin_lock(&squash->rsi_lock);
2377         if (!list_empty(&squash->rsi_nosquash_nids))
2378                 cfs_free_nidlist(&squash->rsi_nosquash_nids);
2379         list_splice(&tmp, &squash->rsi_nosquash_nids);
2380         spin_unlock(&squash->rsi_lock);
2381
2382         RETURN(count);
2383
2384 failed:
2385         if (kernbuf) {
2386                 CWARN("%s: failed to set nosquash_nids to \"%s\", %s rc = %d\n",
2387                       name, kernbuf, errmsg, rc);
2388                 OBD_FREE(kernbuf, count + 1);
2389         } else {
2390                 CWARN("%s: failed to set nosquash_nids due to %s rc = %d\n",
2391                       name, errmsg, rc);
2392         }
2393         RETURN(rc);
2394 }
2395 EXPORT_SYMBOL(lprocfs_wr_nosquash_nids);
2396
2397 #endif /* CONFIG_PROC_FS*/