Whamcloud - gitweb
Cleanup for lov objid code, remove scability problems and
[fs/lustre-release.git] / lustre / llite / lproc_llite.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22 #define DEBUG_SUBSYSTEM S_LLITE
23
24 #include <linux/version.h>
25 #include <lustre_lite.h>
26 #include <lprocfs_status.h>
27 #include <linux/seq_file.h>
28 #include <obd_support.h>
29
30 #include "llite_internal.h"
31
32 struct proc_dir_entry *proc_lustre_fs_root;
33
34 #ifdef LPROCFS
35 /* /proc/lustre/llite mount point registration */
36 struct file_operations llite_dump_pgcache_fops;
37 struct file_operations ll_ra_stats_fops;
38 struct file_operations ll_rw_extents_stats_fops;
39 struct file_operations ll_rw_extents_stats_pp_fops;
40 struct file_operations ll_rw_offset_stats_fops;
41
42 static int ll_rd_blksize(char *page, char **start, off_t off, int count,
43                          int *eof, void *data)
44 {
45         struct super_block *sb = (struct super_block *)data;
46         struct obd_statfs osfs;
47         int rc;
48
49         LASSERT(sb != NULL);
50         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
51         if (!rc) {
52               *eof = 1;
53               rc = snprintf(page, count, "%u\n", osfs.os_bsize);
54         }
55
56         return rc;
57 }
58
59 static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
60                              int *eof, void *data)
61 {
62         struct super_block *sb = (struct super_block *)data;
63         struct obd_statfs osfs;
64         int rc;
65
66         LASSERT(sb != NULL);
67         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
68         if (!rc) {
69                 __u32 blk_size = osfs.os_bsize >> 10;
70                 __u64 result = osfs.os_blocks;
71
72                 while (blk_size >>= 1)
73                         result <<= 1;
74
75                 *eof = 1;
76                 rc = snprintf(page, count, LPU64"\n", result);
77         }
78         return rc;
79
80 }
81
82 static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
83                             int *eof, void *data)
84 {
85         struct super_block *sb = (struct super_block *)data;
86         struct obd_statfs osfs;
87         int rc;
88
89         LASSERT(sb != NULL);
90         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
91         if (!rc) {
92                 __u32 blk_size = osfs.os_bsize >> 10;
93                 __u64 result = osfs.os_bfree;
94
95                 while (blk_size >>= 1)
96                         result <<= 1;
97
98                 *eof = 1;
99                 rc = snprintf(page, count, LPU64"\n", result);
100         }
101         return rc;
102 }
103
104 static int ll_rd_kbytesavail(char *page, char **start, off_t off, int count,
105                              int *eof, void *data)
106 {
107         struct super_block *sb = (struct super_block *)data;
108         struct obd_statfs osfs;
109         int rc;
110
111         LASSERT(sb != NULL);
112         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
113         if (!rc) {
114                 __u32 blk_size = osfs.os_bsize >> 10;
115                 __u64 result = osfs.os_bavail;
116
117                 while (blk_size >>= 1)
118                         result <<= 1;
119
120                 *eof = 1;
121                 rc = snprintf(page, count, LPU64"\n", result);
122         }
123         return rc;
124 }
125
126 static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
127                             int *eof, void *data)
128 {
129         struct super_block *sb = (struct super_block *)data;
130         struct obd_statfs osfs;
131         int rc;
132
133         LASSERT(sb != NULL);
134         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
135         if (!rc) {
136                  *eof = 1;
137                  rc = snprintf(page, count, LPU64"\n", osfs.os_files);
138         }
139         return rc;
140 }
141
142 static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
143                            int *eof, void *data)
144 {
145         struct super_block *sb = (struct super_block *)data;
146         struct obd_statfs osfs;
147         int rc;
148
149         LASSERT(sb != NULL);
150         rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - HZ);
151         if (!rc) {
152                  *eof = 1;
153                  rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
154         }
155         return rc;
156
157 }
158
159 static int ll_rd_fstype(char *page, char **start, off_t off, int count,
160                         int *eof, void *data)
161 {
162         struct super_block *sb = (struct super_block*)data;
163
164         LASSERT(sb != NULL);
165         *eof = 1;
166         return snprintf(page, count, "%s\n", sb->s_type->name);
167 }
168
169 static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
170                          int *eof, void *data)
171 {
172         struct super_block *sb = (struct super_block *)data;
173
174         LASSERT(sb != NULL);
175         *eof = 1;
176         return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
177 }
178
179 static int ll_rd_max_readahead_mb(char *page, char **start, off_t off,
180                                    int count, int *eof, void *data)
181 {
182         struct super_block *sb = data;
183         struct ll_sb_info *sbi = ll_s2sbi(sb);
184         long pages_number;
185         int mult;
186
187         spin_lock(&sbi->ll_lock);
188         pages_number = sbi->ll_ra_info.ra_max_pages;
189         spin_unlock(&sbi->ll_lock);
190
191         mult = 1 << (20 - CFS_PAGE_SHIFT);
192         return lprocfs_read_frac_helper(page, count, pages_number, mult);
193 }
194
195 static int ll_wr_max_readahead_mb(struct file *file, const char *buffer,
196                                    unsigned long count, void *data)
197 {
198         struct super_block *sb = data;
199         struct ll_sb_info *sbi = ll_s2sbi(sb);
200         int mult, rc, pages_number;
201
202         mult = 1 << (20 - CFS_PAGE_SHIFT);
203         rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
204         if (rc)
205                 return rc;
206
207         if (pages_number < 0 || pages_number > num_physpages / 2) {
208                 CERROR("can't set file readahead more than %lu MB\n",
209                         num_physpages >> (20 - CFS_PAGE_SHIFT + 1)); /*1/2 of RAM*/
210                 return -ERANGE;
211         }
212
213         spin_lock(&sbi->ll_lock);
214         sbi->ll_ra_info.ra_max_pages = pages_number;
215         spin_unlock(&sbi->ll_lock);
216
217         return count;
218 }
219
220 static int ll_rd_max_read_ahead_whole_mb(char *page, char **start, off_t off,
221                                        int count, int *eof, void *data)
222 {
223         struct super_block *sb = data;
224         struct ll_sb_info *sbi = ll_s2sbi(sb);
225         long pages_number;
226         int mult;
227
228         spin_lock(&sbi->ll_lock);
229         pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
230         spin_unlock(&sbi->ll_lock);
231
232         mult = 1 << (20 - CFS_PAGE_SHIFT);
233         return lprocfs_read_frac_helper(page, count, pages_number, mult);
234 }
235
236 static int ll_wr_max_read_ahead_whole_mb(struct file *file, const char *buffer,
237                                        unsigned long count, void *data)
238 {
239         struct super_block *sb = data;
240         struct ll_sb_info *sbi = ll_s2sbi(sb);
241         int mult, rc, pages_number;
242
243         mult = 1 << (20 - CFS_PAGE_SHIFT);
244         rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
245         if (rc)
246                 return rc;
247
248         /* Cap this at the current max readahead window size, the readahead
249          * algorithm does this anyway so it's pointless to set it larger. */
250         if (pages_number < 0 || pages_number > sbi->ll_ra_info.ra_max_pages) {
251                 CERROR("can't set max_read_ahead_whole_mb more than "
252                        "max_read_ahead_mb: %lu\n",
253                        sbi->ll_ra_info.ra_max_pages >> (20 - CFS_PAGE_SHIFT));
254                 return -ERANGE;
255         }
256
257         spin_lock(&sbi->ll_lock);
258         sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number;
259         spin_unlock(&sbi->ll_lock);
260
261         return count;
262 }
263
264 static int ll_rd_max_cached_mb(char *page, char **start, off_t off,
265                                int count, int *eof, void *data)
266 {
267         struct super_block *sb = data;
268         struct ll_sb_info *sbi = ll_s2sbi(sb);
269         long pages_number;
270         int mult;
271
272         spin_lock(&sbi->ll_lock);
273         pages_number = sbi->ll_async_page_max;
274         spin_unlock(&sbi->ll_lock);
275
276         mult = 1 << (20 - CFS_PAGE_SHIFT);
277         return lprocfs_read_frac_helper(page, count, pages_number, mult);;
278 }
279
280 static int ll_wr_max_cached_mb(struct file *file, const char *buffer,
281                                   unsigned long count, void *data)
282 {
283         struct super_block *sb = data;
284         struct ll_sb_info *sbi = ll_s2sbi(sb);
285         int mult, rc, pages_number;
286
287         mult = 1 << (20 - CFS_PAGE_SHIFT);
288         rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
289         if (rc)
290                 return rc;
291
292         if (pages_number < 0 || pages_number > num_physpages) {
293                 CERROR("can't set max cache more than %lu MB\n",
294                         num_physpages >> (20 - CFS_PAGE_SHIFT));
295                 return -ERANGE;
296         }
297
298         spin_lock(&sbi->ll_lock);
299         sbi->ll_async_page_max = pages_number ;
300         spin_unlock(&sbi->ll_lock);
301         
302         if (!sbi->ll_osc_exp)
303                 /* Not set up yet, don't call llap_shrink_cache */
304                 return count;
305
306         if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
307                 llap_shrink_cache(sbi, 0);
308
309         return count;
310 }
311
312 static int ll_rd_checksum(char *page, char **start, off_t off,
313                           int count, int *eof, void *data)
314 {
315         struct super_block *sb = data;
316         struct ll_sb_info *sbi = ll_s2sbi(sb);
317
318         return snprintf(page, count, "%u\n",
319                         (sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) ? 1 : 0);
320 }
321
322 static int ll_wr_checksum(struct file *file, const char *buffer,
323                           unsigned long count, void *data)
324 {
325         struct super_block *sb = data;
326         struct ll_sb_info *sbi = ll_s2sbi(sb);
327         int val, rc;
328
329         if (!sbi->ll_osc_exp)
330                 /* Not set up yet */
331                 return -EAGAIN;
332
333         rc = lprocfs_write_helper(buffer, count, &val);
334         if (rc)
335                 return rc;
336         if (val)
337                 sbi->ll_flags |=  (LL_SBI_LLITE_CHECKSUM|LL_SBI_DATA_CHECKSUM);
338         else
339                 sbi->ll_flags &= ~(LL_SBI_LLITE_CHECKSUM|LL_SBI_DATA_CHECKSUM);
340
341         rc = obd_set_info_async(sbi->ll_osc_exp, strlen("checksum"), "checksum",
342                                 sizeof(val), &val, NULL);
343         if (rc)
344                 CWARN("Failed to set OSC checksum flags: %d\n", rc);
345
346         return count;
347 }
348
349 static int ll_rd_max_rw_chunk(char *page, char **start, off_t off,
350                           int count, int *eof, void *data)
351 {
352         struct super_block *sb = data;
353
354         return snprintf(page, count, "%lu\n", ll_s2sbi(sb)->ll_max_rw_chunk);
355 }
356
357 static int ll_wr_max_rw_chunk(struct file *file, const char *buffer,
358                           unsigned long count, void *data)
359 {
360         struct super_block *sb = data;
361         int rc, val;
362
363         rc = lprocfs_write_helper(buffer, count, &val);
364         if (rc)
365                 return rc;
366         ll_s2sbi(sb)->ll_max_rw_chunk = val;
367         return count;
368 }
369
370 static int ll_rd_track_id(char *page, int count, void *data, 
371                           enum stats_track_type type)
372 {
373         struct super_block *sb = data;
374
375         if (ll_s2sbi(sb)->ll_stats_track_type == type) {
376                 return snprintf(page, count, "%d\n",
377                                 ll_s2sbi(sb)->ll_stats_track_id);
378         
379         } else if (ll_s2sbi(sb)->ll_stats_track_type == STATS_TRACK_ALL) {
380                 return snprintf(page, count, "0 (all)\n");
381         } else {
382                 return snprintf(page, count, "untracked\n");
383         }
384 }
385
386 static int ll_wr_track_id(const char *buffer, unsigned long count, void *data,
387                           enum stats_track_type type)
388 {
389         struct super_block *sb = data;
390         int rc, pid;
391
392         rc = lprocfs_write_helper(buffer, count, &pid);
393         if (rc)
394                 return rc;
395         ll_s2sbi(sb)->ll_stats_track_id = pid;
396         if (pid == 0)
397                 ll_s2sbi(sb)->ll_stats_track_type = STATS_TRACK_ALL;
398         else
399                 ll_s2sbi(sb)->ll_stats_track_type = type;
400         lprocfs_clear_stats(ll_s2sbi(sb)->ll_stats);
401         return count;
402 }
403
404 static int ll_rd_track_pid(char *page, char **start, off_t off,
405                           int count, int *eof, void *data)
406 {
407         return (ll_rd_track_id(page, count, data, STATS_TRACK_PID));
408 }
409
410 static int ll_wr_track_pid(struct file *file, const char *buffer,
411                           unsigned long count, void *data)
412 {
413         return (ll_wr_track_id(buffer, count, data, STATS_TRACK_PID));
414 }
415
416 static int ll_rd_track_ppid(char *page, char **start, off_t off,
417                           int count, int *eof, void *data)
418 {
419         return (ll_rd_track_id(page, count, data, STATS_TRACK_PPID));
420 }
421
422 static int ll_wr_track_ppid(struct file *file, const char *buffer,
423                           unsigned long count, void *data)
424 {
425         return (ll_wr_track_id(buffer, count, data, STATS_TRACK_PPID));
426 }
427
428 static int ll_rd_track_gid(char *page, char **start, off_t off,
429                           int count, int *eof, void *data)
430 {
431         return (ll_rd_track_id(page, count, data, STATS_TRACK_GID));
432 }
433
434 static int ll_wr_track_gid(struct file *file, const char *buffer,
435                           unsigned long count, void *data)
436 {                                                                 
437         return (ll_wr_track_id(buffer, count, data, STATS_TRACK_GID));
438 }
439
440 static int ll_rd_contention_time(char *page, char **start, off_t off,
441                                  int count, int *eof, void *data)
442 {
443         struct super_block *sb = data;
444
445         *eof = 1;
446         return snprintf(page, count, "%u\n", ll_s2sbi(sb)->ll_contention_time);
447
448 }
449
450 static int ll_wr_contention_time(struct file *file, const char *buffer,
451                                  unsigned long count, void *data)
452 {
453         struct super_block *sb = data;
454         struct ll_sb_info *sbi = ll_s2sbi(sb);
455
456         return lprocfs_write_helper(buffer, count,&sbi->ll_contention_time) ?:
457                 count;
458 }
459
460 static int ll_rd_statahead_count(char *page, char **start, off_t off,
461                                  int count, int *eof, void *data)
462 {
463         struct super_block *sb = data;
464         struct ll_sb_info *sbi = ll_s2sbi(sb);
465
466         return snprintf(page, count, "%u\n", sbi->ll_sa_count);
467 }
468
469 static int ll_rd_statahead_max(char *page, char **start, off_t off,
470                                int count, int *eof, void *data)
471 {
472         struct super_block *sb = data;
473         struct ll_sb_info *sbi = ll_s2sbi(sb);
474
475         return snprintf(page, count, "%u\n", sbi->ll_sa_max);
476 }
477
478 static int ll_wr_statahead_max(struct file *file, const char *buffer,
479                                unsigned long count, void *data)
480 {
481         struct super_block *sb = data;
482         struct ll_sb_info *sbi = ll_s2sbi(sb);
483         int val, rc;
484
485         rc = lprocfs_write_helper(buffer, count, &val);
486         if (rc)
487                 return rc;
488         if (val >= 0 && val <= LL_STATAHEAD_MAX)
489                 sbi->ll_sa_max = val;
490         else
491                 CERROR("Bad statahead_max value %d. Valid values are in the "
492                        "range [0, %d]\n", val, LL_STATAHEAD_MAX);
493
494         return count;
495 }
496
497 static int ll_rd_statahead_stats(char *page, char **start, off_t off,
498                                  int count, int *eof, void *data)
499 {
500         struct super_block *sb = data;
501         struct ll_sb_info *sbi = ll_s2sbi(sb);
502
503         return snprintf(page, count,
504                         "statahead wrong: %u\n"
505                         "statahead total: %u\n"
506                         "ls blocked:      %llu\n"
507                         "ls total:        %llu\n",
508                         sbi->ll_sa_wrong, sbi->ll_sa_total,
509                         sbi->ll_sa_blocked,
510                         sbi->ll_sa_blocked + sbi->ll_sa_cached);
511 }
512
513 static struct lprocfs_vars lprocfs_obd_vars[] = {
514         { "uuid",         ll_rd_sb_uuid,          0, 0 },
515         //{ "mntpt_path",   ll_rd_path,             0, 0 },
516         { "fstype",       ll_rd_fstype,           0, 0 },
517         { "blocksize",    ll_rd_blksize,          0, 0 },
518         { "kbytestotal",  ll_rd_kbytestotal,      0, 0 },
519         { "kbytesfree",   ll_rd_kbytesfree,       0, 0 },
520         { "kbytesavail",  ll_rd_kbytesavail,      0, 0 },
521         { "filestotal",   ll_rd_filestotal,       0, 0 },
522         { "filesfree",    ll_rd_filesfree,        0, 0 },
523         //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
524         { "max_read_ahead_mb", ll_rd_max_readahead_mb,
525                                ll_wr_max_readahead_mb, 0 },
526         { "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb,
527                                      ll_wr_max_read_ahead_whole_mb, 0 },
528         { "max_cached_mb",  ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 },
529         { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 },
530         { "max_rw_chunk",   ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 },
531         { "stats_track_pid",  ll_rd_track_pid, ll_wr_track_pid, 0 },
532         { "stats_track_ppid", ll_rd_track_ppid, ll_wr_track_ppid, 0 },
533         { "stats_track_gid",  ll_rd_track_gid, ll_wr_track_gid, 0 },
534         { "contention_seconds", ll_rd_contention_time, ll_wr_contention_time, 0},
535         { "statahead_count", ll_rd_statahead_count, 0, 0 },
536         { "statahead_max",   ll_rd_statahead_max, ll_wr_statahead_max, 0 },
537         { "statahead_stats", ll_rd_statahead_stats, 0, 0 },
538         { 0 }
539 };
540
541 #define MAX_STRING_SIZE 128
542
543 struct llite_file_opcode {
544         __u32       opcode;
545         __u32       type;
546         const char *opname;
547 } llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
548         /* file operation */
549         { LPROC_LL_DIRTY_HITS,     LPROCFS_TYPE_REGS, "dirty_pages_hits" },
550         { LPROC_LL_DIRTY_MISSES,   LPROCFS_TYPE_REGS, "dirty_pages_misses" },
551         { LPROC_LL_WB_WRITEPAGE,   LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
552                                    "writeback_from_writepage" },
553         { LPROC_LL_WB_PRESSURE,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
554                                    "writeback_from_pressure" },
555         { LPROC_LL_WB_OK,          LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
556                                    "writeback_ok_pages" },
557         { LPROC_LL_WB_FAIL,        LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
558                                    "writeback_failed_pages" },
559         { LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
560                                    "read_bytes" },
561         { LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
562                                    "write_bytes" },
563         { LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
564                                    "brw_read" },
565         { LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
566                                    "brw_write" },
567
568         { LPROC_LL_IOCTL,          LPROCFS_TYPE_REGS, "ioctl" },
569         { LPROC_LL_OPEN,           LPROCFS_TYPE_REGS, "open" },
570         { LPROC_LL_RELEASE,        LPROCFS_TYPE_REGS, "close" },
571         { LPROC_LL_MAP,            LPROCFS_TYPE_REGS, "mmap" },
572         { LPROC_LL_LLSEEK,         LPROCFS_TYPE_REGS, "seek" },
573         { LPROC_LL_FSYNC,          LPROCFS_TYPE_REGS, "fsync" },
574         /* inode operation */
575         { LPROC_LL_SETATTR,        LPROCFS_TYPE_REGS, "setattr" },
576         { LPROC_LL_TRUNC,          LPROCFS_TYPE_REGS, "truncate" },
577         { LPROC_LL_FLOCK,          LPROCFS_TYPE_REGS, "flock" },
578 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
579         { LPROC_LL_GETATTR,        LPROCFS_TYPE_REGS, "getattr" },
580 #else
581         { LPROC_LL_REVALIDATE,     LPROCFS_TYPE_REGS, "getattr" },
582 #endif
583         /* special inode operation */
584         { LPROC_LL_STAFS,          LPROCFS_TYPE_REGS, "statfs" },
585         { LPROC_LL_ALLOC_INODE,    LPROCFS_TYPE_REGS, "alloc_inode" },
586         { LPROC_LL_SETXATTR,       LPROCFS_TYPE_REGS, "setxattr" },
587         { LPROC_LL_GETXATTR,       LPROCFS_TYPE_REGS, "getxattr" },
588         { LPROC_LL_LISTXATTR,      LPROCFS_TYPE_REGS, "listxattr" },
589         { LPROC_LL_REMOVEXATTR,    LPROCFS_TYPE_REGS, "removexattr" },
590         { LPROC_LL_INODE_PERM,     LPROCFS_TYPE_REGS, "inode_permission" },
591         { LPROC_LL_DIRECT_READ,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
592                                    "direct_read" },
593         { LPROC_LL_DIRECT_WRITE,   LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
594                                    "direct_write" },
595         { LPROC_LL_LOCKLESS_READ,  LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
596                                    "lockless_read_bytes" },
597         { LPROC_LL_LOCKLESS_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
598                                    "lockless_write_bytes" },
599
600 };
601
602 void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
603 {
604         if (!sbi->ll_stats)
605                 return;
606         if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
607                 lprocfs_counter_add(sbi->ll_stats, op, count);
608         else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
609                  sbi->ll_stats_track_id == current->pid)
610                 lprocfs_counter_add(sbi->ll_stats, op, count);
611         else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
612                  sbi->ll_stats_track_id == current->p_pptr->pid)
613                 lprocfs_counter_add(sbi->ll_stats, op, count);
614         else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
615                  sbi->ll_stats_track_id == current->gid)
616                 lprocfs_counter_add(sbi->ll_stats, op, count);
617 }
618 EXPORT_SYMBOL(ll_stats_ops_tally);
619
620 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
621                                 struct super_block *sb, char *osc, char *mdc)
622 {
623         struct lprocfs_vars lvars[2];
624         struct lustre_sb_info *lsi = s2lsi(sb);
625         struct ll_sb_info *sbi = ll_s2sbi(sb);
626         struct obd_device *obd;
627         char name[MAX_STRING_SIZE + 1], *ptr;
628         int err, id, len;
629         struct proc_dir_entry *entry;
630         ENTRY;
631
632         memset(lvars, 0, sizeof(lvars));
633
634         name[MAX_STRING_SIZE] = '\0';
635         lvars[0].name = name;
636
637         LASSERT(sbi != NULL);
638         LASSERT(mdc != NULL);
639         LASSERT(osc != NULL);
640
641         /* Get fsname */
642         len = strlen(lsi->lsi_lmd->lmd_profile);
643         ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
644         if (ptr && (strcmp(ptr, "-client") == 0))
645                 len -= 7; 
646         
647         /* Mount info */
648         snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len,
649                  lsi->lsi_lmd->lmd_profile, sb);
650         
651         sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
652         if (IS_ERR(sbi->ll_proc_root)) {
653                 err = PTR_ERR(sbi->ll_proc_root);
654                 sbi->ll_proc_root = NULL;
655                 RETURN(err);
656         }
657
658         entry = create_proc_entry("dump_page_cache", 0444, sbi->ll_proc_root);
659         if (entry == NULL)
660                 GOTO(out, err = -ENOMEM);
661         entry->proc_fops = &llite_dump_pgcache_fops;
662         entry->data = sbi;
663
664         entry = create_proc_entry("read_ahead_stats", 0644, sbi->ll_proc_root);
665         if (entry == NULL)
666                 GOTO(out, err = -ENOMEM);
667         entry->proc_fops = &ll_ra_stats_fops;
668         entry->data = sbi;
669
670         entry = create_proc_entry("extents_stats", 0644, sbi->ll_proc_root);
671         if (entry == NULL)
672                  GOTO(out, err = -ENOMEM);
673         entry->proc_fops = &ll_rw_extents_stats_fops;
674         entry->data = sbi;
675
676         entry = create_proc_entry("extents_stats_per_process", 0644,
677                                   sbi->ll_proc_root);
678         if (entry == NULL)
679                  GOTO(out, err = -ENOMEM);
680         entry->proc_fops = &ll_rw_extents_stats_pp_fops;
681         entry->data = sbi;
682
683         entry = create_proc_entry("offset_stats", 0644, sbi->ll_proc_root);
684         if (entry == NULL)
685                 GOTO(out, err = -ENOMEM);
686         entry->proc_fops = &ll_rw_offset_stats_fops;
687         entry->data = sbi;
688
689         /* File operations stats */
690         sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, 
691                                             LPROCFS_STATS_FLAG_PERCPU);
692         if (sbi->ll_stats == NULL)
693                 GOTO(out, err = -ENOMEM);
694         /* do counter init */
695         for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
696                 __u32 type = llite_opcode_table[id].type;
697                 void *ptr = NULL;
698                 if (type & LPROCFS_TYPE_REGS)
699                         ptr = "regs";
700                 else if (type & LPROCFS_TYPE_BYTES)
701                         ptr = "bytes";
702                 else if (type & LPROCFS_TYPE_PAGES)
703                         ptr = "pages";
704                 lprocfs_counter_init(sbi->ll_stats,
705                                      llite_opcode_table[id].opcode,
706                                      (type & LPROCFS_CNTR_AVGMINMAX),
707                                      llite_opcode_table[id].opname, ptr);
708         }
709         err = lprocfs_register_stats(sbi->ll_proc_root, "stats", sbi->ll_stats);
710         if (err)
711                 GOTO(out, err);
712
713         err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
714         if (err)
715                 GOTO(out, err);
716
717         /* MDC info */
718         obd = class_name2obd(mdc);
719
720         LASSERT(obd != NULL);
721         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
722         LASSERT(obd->obd_type->typ_name != NULL);
723
724         snprintf(name, MAX_STRING_SIZE, "%s/common_name",
725                  obd->obd_type->typ_name);
726         lvars[0].read_fptr = lprocfs_rd_name;
727         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
728         if (err)
729                 GOTO(out, err);
730
731         snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
732         lvars[0].read_fptr = lprocfs_rd_uuid;
733         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
734         if (err)
735                 GOTO(out, err);
736
737         /* OSC */
738         obd = class_name2obd(osc);
739
740         LASSERT(obd != NULL);
741         LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
742         LASSERT(obd->obd_type->typ_name != NULL);
743
744         snprintf(name, MAX_STRING_SIZE, "%s/common_name",
745                  obd->obd_type->typ_name);
746         lvars[0].read_fptr = lprocfs_rd_name;
747         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
748         if (err)
749                 GOTO(out, err);
750
751         snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
752         lvars[0].read_fptr = lprocfs_rd_uuid;
753         err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
754 out:
755         if (err) {
756                 lprocfs_remove(&sbi->ll_proc_root);
757                 lprocfs_free_stats(&sbi->ll_stats);
758         }
759         RETURN(err);
760 }
761
762 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi)
763 {
764         if (sbi->ll_proc_root) {
765                 lprocfs_remove(&sbi->ll_proc_root);
766                 lprocfs_free_stats(&sbi->ll_stats);
767         }
768 }
769 #undef MAX_STRING_SIZE
770
771 #define seq_page_flag(seq, page, flag, has_flags) do {                  \
772                 if (test_bit(PG_##flag, &(page)->flags)) {              \
773                         if (!has_flags)                                 \
774                                 has_flags = 1;                          \
775                         else                                            \
776                                 seq_putc(seq, '|');                     \
777                         seq_puts(seq, #flag);                           \
778                 }                                                       \
779         } while(0);
780
781 static void *llite_dump_pgcache_seq_start(struct seq_file *seq, loff_t *pos)
782 {
783         struct ll_async_page *dummy_llap = seq->private;
784
785         if (dummy_llap->llap_magic == 2)
786                 return NULL;
787
788         return (void *)1;
789 }
790
791 static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
792 {
793         struct ll_async_page *llap, *dummy_llap = seq->private;
794         struct ll_sb_info *sbi = dummy_llap->llap_cookie;
795
796         /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
797          * it in our own state */
798         if (dummy_llap->llap_magic == 0) {
799                 seq_printf(seq, "gener |  llap  cookie  origin wq du wb | page "
800                                 "inode index count [ page flags ]\n");
801                 return 0;
802         }
803
804         spin_lock(&sbi->ll_lock);
805
806         llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
807         if (llap != NULL)  {
808                 int has_flags = 0;
809                 struct page *page = llap->llap_page;
810
811                 LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
812                          llap->llap_origin);
813
814                 seq_printf(seq," %5lu | %p %p %s %s %s %s | %p %lu/%u(%p) "
815                            "%lu %u [",
816                            sbi->ll_pglist_gen,
817                            llap, llap->llap_cookie,
818                            llap_origins[llap->llap_origin],
819                            llap->llap_write_queued ? "wq" : "- ",
820                            llap->llap_defer_uptodate ? "du" : "- ",
821                            PageWriteback(page) ? "wb" : "-",
822                            page, page->mapping->host->i_ino,
823                            page->mapping->host->i_generation,
824                            page->mapping->host, page->index,
825                            page_count(page));
826                 seq_page_flag(seq, page, locked, has_flags);
827                 seq_page_flag(seq, page, error, has_flags);
828                 seq_page_flag(seq, page, referenced, has_flags);
829                 seq_page_flag(seq, page, uptodate, has_flags);
830                 seq_page_flag(seq, page, dirty, has_flags);
831 #if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,12))
832                 seq_page_flag(seq, page, highmem, has_flags);
833 #endif
834                 seq_page_flag(seq, page, writeback, has_flags);
835                 if (!has_flags)
836                         seq_puts(seq, "-]\n");
837                 else
838                         seq_puts(seq, "]\n");
839         }
840
841         spin_unlock(&sbi->ll_lock);
842
843         return 0;
844 }
845
846 static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v, 
847                                          loff_t *pos)
848 {
849         struct ll_async_page *llap, *dummy_llap = seq->private;
850         struct ll_sb_info *sbi = dummy_llap->llap_cookie;
851
852         /* bail if we just displayed the banner */
853         if (dummy_llap->llap_magic == 0) {
854                 dummy_llap->llap_magic = 1;
855                 return dummy_llap;
856         }
857
858         /* we've just displayed the llap that is after us in the list.
859          * we advance to a position beyond it, returning null if there
860          * isn't another llap in the list beyond that new position. */
861         spin_lock(&sbi->ll_lock);
862         llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
863         list_del_init(&dummy_llap->llap_pglist_item);
864         if (llap) {
865                 list_add(&dummy_llap->llap_pglist_item,&llap->llap_pglist_item);
866                 llap =llite_pglist_next_llap(sbi,&dummy_llap->llap_pglist_item);
867         }
868         spin_unlock(&sbi->ll_lock);
869
870         ++*pos;
871         if (llap == NULL) {
872                 dummy_llap->llap_magic = 2;
873                 return NULL;
874         }
875         return dummy_llap;
876 }
877
878 static void null_stop(struct seq_file *seq, void *v)
879 {
880 }
881
882 struct seq_operations llite_dump_pgcache_seq_sops = {
883         .start = llite_dump_pgcache_seq_start,
884         .stop = null_stop,
885         .next = llite_dump_pgcache_seq_next,
886         .show = llite_dump_pgcache_seq_show,
887 };
888
889 /* we're displaying llaps in a list_head list.  we don't want to hold a lock
890  * while we walk the entire list, and we don't want to have to seek into
891  * the right position in the list as an app advances with many syscalls.  we
892  * allocate a dummy llap and hang it off file->private.  its position in
893  * the list records where the app is currently displaying.  this way our
894  * seq .start and .stop don't actually do anything.  .next returns null
895  * when the dummy hits the end of the list which eventually leads to .release
896  * where we tear down.  this kind of displaying is super-racey, so we put
897  * a generation counter on the list so the output shows when the list
898  * changes between reads.
899  */
900 static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file)
901 {
902         struct proc_dir_entry *dp = PDE(inode);
903         struct ll_async_page *dummy_llap;
904         struct seq_file *seq;
905         struct ll_sb_info *sbi = dp->data;
906         int rc = -ENOMEM;
907
908         LPROCFS_ENTRY_AND_CHECK(dp);
909
910         OBD_ALLOC_PTR_WAIT(dummy_llap);
911         if (dummy_llap == NULL)
912                 GOTO(out, rc);
913
914         dummy_llap->llap_page = NULL;
915         dummy_llap->llap_cookie = sbi;
916         dummy_llap->llap_magic = 0;
917
918         rc = seq_open(file, &llite_dump_pgcache_seq_sops);
919         if (rc) {
920                 OBD_FREE(dummy_llap, sizeof(*dummy_llap));
921                 GOTO(out, rc);
922         }
923         seq = file->private_data;
924         seq->private = dummy_llap;
925
926         spin_lock(&sbi->ll_lock);
927         list_add(&dummy_llap->llap_pglist_item, &sbi->ll_pglist);
928         spin_unlock(&sbi->ll_lock);
929
930 out:
931         if (rc)
932                 LPROCFS_EXIT();
933         return rc;
934 }
935
936 static int llite_dump_pgcache_seq_release(struct inode *inode,
937                                           struct file *file)
938 {
939         struct seq_file *seq = file->private_data;
940         struct ll_async_page *dummy_llap = seq->private;
941         struct ll_sb_info *sbi = dummy_llap->llap_cookie;
942
943         spin_lock(&sbi->ll_lock);
944         if (!list_empty(&dummy_llap->llap_pglist_item))
945                 list_del_init(&dummy_llap->llap_pglist_item);
946         spin_unlock(&sbi->ll_lock);
947         OBD_FREE(dummy_llap, sizeof(*dummy_llap));
948
949         return lprocfs_seq_release(inode, file);
950 }
951
952 struct file_operations llite_dump_pgcache_fops = {
953         .owner   = THIS_MODULE,
954         .open    = llite_dump_pgcache_seq_open,
955         .read    = seq_read,
956         .release = llite_dump_pgcache_seq_release,
957 };
958
959 static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
960 {
961         struct timeval now;
962         struct ll_sb_info *sbi = seq->private;
963         struct ll_ra_info *ra = &sbi->ll_ra_info;
964         int i;
965         static char *ra_stat_strings[] = {
966                 [RA_STAT_HIT] = "hits",
967                 [RA_STAT_MISS] = "misses",
968                 [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
969                 [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
970                 [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
971                 [RA_STAT_FAILED_MATCH] = "failed lock match",
972                 [RA_STAT_DISCARDED] = "read but discarded",
973                 [RA_STAT_ZERO_LEN] = "zero length file",
974                 [RA_STAT_ZERO_WINDOW] = "zero size window",
975                 [RA_STAT_EOF] = "read-ahead to EOF",
976                 [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
977                 [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
978         };
979
980         do_gettimeofday(&now);
981
982         spin_lock(&sbi->ll_lock);
983
984         seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
985                    now.tv_sec, now.tv_usec);
986         seq_printf(seq, "pending issued pages:           %lu\n",
987                    ra->ra_cur_pages);
988
989         for(i = 0; i < _NR_RA_STAT; i++)
990                 seq_printf(seq, "%-25s %lu\n", ra_stat_strings[i], 
991                            ra->ra_stats[i]);
992
993         spin_unlock(&sbi->ll_lock);
994
995         return 0;
996 }
997
998 static ssize_t ll_ra_stats_seq_write(struct file *file, const char *buf,
999                                        size_t len, loff_t *off)
1000 {
1001         struct seq_file *seq = file->private_data;
1002         struct ll_sb_info *sbi = seq->private;
1003         struct ll_ra_info *ra = &sbi->ll_ra_info;
1004
1005         spin_lock(&sbi->ll_lock);
1006         memset(ra->ra_stats, 0, sizeof(ra->ra_stats));
1007         spin_unlock(&sbi->ll_lock);
1008
1009         return len;
1010 }
1011
1012 LPROC_SEQ_FOPS(ll_ra_stats);
1013
1014 #define pct(a,b) (b ? a * 100 / b : 0)
1015
1016 static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
1017                                    struct seq_file *seq, int which)
1018 {
1019         unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
1020         unsigned long start, end, r, w;
1021         char *unitp = "KMGTPEZY";
1022         int i, units = 10;
1023         struct per_process_info *pp_info = &io_extents->pp_extents[which];
1024
1025         read_cum = 0;
1026         write_cum = 0;
1027         start = 0;
1028
1029         for(i = 0; i < LL_HIST_MAX; i++) {
1030                 read_tot += pp_info->pp_r_hist.oh_buckets[i];
1031                 write_tot += pp_info->pp_w_hist.oh_buckets[i];
1032         }
1033
1034         for(i = 0; i < LL_HIST_MAX; i++) {
1035                 r = pp_info->pp_r_hist.oh_buckets[i];
1036                 w = pp_info->pp_w_hist.oh_buckets[i];
1037                 read_cum += r;
1038                 write_cum += w;
1039                 end = 1 << (i + LL_HIST_START - units);
1040                 seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu  | "
1041                            "%14lu %4lu %4lu\n", start, *unitp, end, *unitp,
1042                            (i == LL_HIST_MAX - 1) ? '+' : ' ',
1043                            r, pct(r, read_tot), pct(read_cum, read_tot),
1044                            w, pct(w, write_tot), pct(write_cum, write_tot));
1045                 start = end;
1046                 if (start == 1<<10) {
1047                         start = 1;
1048                         units += 10;
1049                         unitp++;
1050                 }
1051                 if (read_cum == read_tot && write_cum == write_tot)
1052                         break;
1053         }
1054 }
1055
1056 static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
1057 {
1058         struct timeval now;
1059         struct ll_sb_info *sbi = seq->private;
1060         struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
1061         int k;
1062
1063         do_gettimeofday(&now);
1064
1065         if (!sbi->ll_rw_stats_on) {
1066                 seq_printf(seq, "Disabled\n"
1067                                 "Write anything in this file to activate\n");
1068                 return 0;
1069         }
1070         seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
1071                    now.tv_sec, now.tv_usec);
1072         seq_printf(seq, "%15s %19s       | %20s\n", " ", "read", "write");
1073         seq_printf(seq, "%13s   %14s %4s %4s  | %14s %4s %4s\n", 
1074                    "extents", "calls", "%", "cum%",
1075                    "calls", "%", "cum%");
1076         spin_lock(&sbi->ll_pp_extent_lock);
1077         for(k = 0; k < LL_PROCESS_HIST_MAX; k++) {
1078                 if(io_extents->pp_extents[k].pid != 0) {
1079                         seq_printf(seq, "\nPID: %d\n",
1080                                    io_extents->pp_extents[k].pid);
1081                         ll_display_extents_info(io_extents, seq, k);
1082                 }
1083         }
1084         spin_unlock(&sbi->ll_pp_extent_lock);
1085         return 0;
1086 }
1087
1088 static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
1089                                                 const char *buf, size_t len,
1090                                                 loff_t *off)
1091 {
1092         struct seq_file *seq = file->private_data;
1093         struct ll_sb_info *sbi = seq->private;
1094         struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
1095         int i;
1096
1097         sbi->ll_rw_stats_on = 1;
1098         spin_lock(&sbi->ll_pp_extent_lock);
1099         for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
1100                 io_extents->pp_extents[i].pid = 0;
1101                 lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
1102                 lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
1103         }
1104         spin_unlock(&sbi->ll_pp_extent_lock);
1105         return len;
1106 }
1107
1108 LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
1109
1110 static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
1111 {
1112         struct timeval now;
1113         struct ll_sb_info *sbi = seq->private;
1114         struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
1115
1116         do_gettimeofday(&now);
1117
1118         if (!sbi->ll_rw_stats_on) {
1119                 seq_printf(seq, "Disabled\n"
1120                                 "Write anything in this file to activate\n");
1121                 return 0;
1122         }
1123         seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
1124                    now.tv_sec, now.tv_usec);
1125
1126         seq_printf(seq, "%15s %19s       | %20s\n", " ", "read", "write");
1127         seq_printf(seq, "%13s   %14s %4s %4s  | %14s %4s %4s\n", 
1128                    "extents", "calls", "%", "cum%",
1129                    "calls", "%", "cum%");
1130         spin_lock(&sbi->ll_lock);
1131         ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
1132         spin_unlock(&sbi->ll_lock);
1133
1134         return 0;
1135 }
1136
1137 static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf,
1138                                         size_t len, loff_t *off)
1139 {
1140         struct seq_file *seq = file->private_data;
1141         struct ll_sb_info *sbi = seq->private;
1142         struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
1143         int i;
1144
1145         sbi->ll_rw_stats_on = 1;
1146         spin_lock(&sbi->ll_pp_extent_lock);
1147         for(i = 0; i <= LL_PROCESS_HIST_MAX; i++)
1148         {
1149                 io_extents->pp_extents[i].pid = 0;
1150                 lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
1151                 lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
1152         }
1153         spin_unlock(&sbi->ll_pp_extent_lock);
1154
1155         return len;
1156 }
1157
1158 LPROC_SEQ_FOPS(ll_rw_extents_stats);
1159
1160 void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
1161                                *file, size_t count, int rw)
1162 {
1163         int i, cur = -1;
1164         struct ll_rw_process_info *process;
1165         struct ll_rw_process_info *offset;
1166         int *off_count = &sbi->ll_rw_offset_entry_count;
1167         int *process_count = &sbi->ll_offset_process_count;
1168         struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
1169
1170         if(!sbi->ll_rw_stats_on)
1171                 return;
1172         process = sbi->ll_rw_process_info;
1173         offset = sbi->ll_rw_offset_info;
1174
1175         spin_lock(&sbi->ll_pp_extent_lock);
1176         /* Extent statistics */
1177         for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
1178                 if(io_extents->pp_extents[i].pid == pid) {
1179                         cur = i;
1180                         break;
1181                 }
1182         }
1183
1184         if (cur == -1) {
1185                 /* new process */
1186                 sbi->ll_extent_process_count = 
1187                         (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
1188                 cur = sbi->ll_extent_process_count;
1189                 io_extents->pp_extents[cur].pid = pid;
1190                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
1191                 lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
1192         }
1193
1194         for(i = 0; (count >= (1 << LL_HIST_START << i)) && 
1195              (i < (LL_HIST_MAX - 1)); i++);
1196         if (rw == 0) {
1197                 io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
1198                 io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
1199         } else {
1200                 io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
1201                 io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
1202         }
1203         spin_unlock(&sbi->ll_pp_extent_lock);
1204
1205         spin_lock(&sbi->ll_process_lock);
1206         /* Offset statistics */
1207         for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
1208                 if (process[i].rw_pid == pid) {
1209                         if (process[i].rw_last_file != file) {
1210                                 process[i].rw_range_start = file->f_pos;
1211                                 process[i].rw_last_file_pos =
1212                                                         file->f_pos + count;
1213                                 process[i].rw_smallest_extent = count;
1214                                 process[i].rw_largest_extent = count;
1215                                 process[i].rw_offset = 0;
1216                                 process[i].rw_last_file = file;
1217                                 spin_unlock(&sbi->ll_process_lock);
1218                                 return;
1219                         }
1220                         if (process[i].rw_last_file_pos != file->f_pos) {
1221                                 *off_count =
1222                                     (*off_count + 1) % LL_OFFSET_HIST_MAX;
1223                                 offset[*off_count].rw_op = process[i].rw_op;
1224                                 offset[*off_count].rw_pid = pid;
1225                                 offset[*off_count].rw_range_start =
1226                                         process[i].rw_range_start;
1227                                 offset[*off_count].rw_range_end =
1228                                         process[i].rw_last_file_pos;
1229                                 offset[*off_count].rw_smallest_extent =
1230                                         process[i].rw_smallest_extent;
1231                                 offset[*off_count].rw_largest_extent =
1232                                         process[i].rw_largest_extent;
1233                                 offset[*off_count].rw_offset =
1234                                         process[i].rw_offset;
1235                                 process[i].rw_op = rw;
1236                                 process[i].rw_range_start = file->f_pos;
1237                                 process[i].rw_smallest_extent = count;
1238                                 process[i].rw_largest_extent = count;
1239                                 process[i].rw_offset = file->f_pos -
1240                                         process[i].rw_last_file_pos;
1241                         }
1242                         if(process[i].rw_smallest_extent > count)
1243                                 process[i].rw_smallest_extent = count;
1244                         if(process[i].rw_largest_extent < count)
1245                                 process[i].rw_largest_extent = count;
1246                         process[i].rw_last_file_pos = file->f_pos + count;
1247                         spin_unlock(&sbi->ll_process_lock);
1248                         return;
1249                 }
1250         }
1251         *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
1252         process[*process_count].rw_pid = pid;
1253         process[*process_count].rw_op = rw;
1254         process[*process_count].rw_range_start = file->f_pos;
1255         process[*process_count].rw_last_file_pos = file->f_pos + count;
1256         process[*process_count].rw_smallest_extent = count;
1257         process[*process_count].rw_largest_extent = count;
1258         process[*process_count].rw_offset = 0;
1259         process[*process_count].rw_last_file = file;
1260         spin_unlock(&sbi->ll_process_lock);
1261 }
1262
1263 char lpszt[] = LPSZ;
1264
1265 static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
1266 {
1267         struct timeval now;
1268         struct ll_sb_info *sbi = seq->private;
1269         struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
1270         struct ll_rw_process_info *process = sbi->ll_rw_process_info;
1271         char format[50];
1272         int i;
1273
1274         do_gettimeofday(&now);
1275
1276         if (!sbi->ll_rw_stats_on) {
1277                 seq_printf(seq, "Disabled\n"
1278                                 "Write anything in this file to activate\n");
1279                 return 0;
1280         }
1281         spin_lock(&sbi->ll_process_lock);
1282
1283         seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
1284                    now.tv_sec, now.tv_usec);
1285         seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
1286                    "R/W", "PID", "RANGE START", "RANGE END",
1287                    "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
1288         sprintf(format, "%s%s%s%s%s\n",
1289                 "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld");
1290         /* We stored the discontiguous offsets here; print them first */
1291         for(i = 0; i < LL_OFFSET_HIST_MAX; i++) {
1292                 if (offset[i].rw_pid != 0)
1293                         /* Is there a way to snip the '%' off of LPSZ? */
1294                         seq_printf(seq, format,
1295                                    offset[i].rw_op ? 'W' : 'R',
1296                                    offset[i].rw_pid,
1297                                    offset[i].rw_range_start,
1298                                    offset[i].rw_range_end,
1299                                    offset[i].rw_smallest_extent,
1300                                    offset[i].rw_largest_extent,
1301                                    offset[i].rw_offset);
1302         }
1303         /* Then print the current offsets for each process */
1304         for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
1305                 if (process[i].rw_pid != 0)
1306                         seq_printf(seq, format,
1307                                    process[i].rw_op ? 'W' : 'R',
1308                                    process[i].rw_pid,
1309                                    process[i].rw_range_start,
1310                                    process[i].rw_last_file_pos,
1311                                    process[i].rw_smallest_extent,
1312                                    process[i].rw_largest_extent,
1313                                    process[i].rw_offset);
1314         }
1315         spin_unlock(&sbi->ll_process_lock);
1316
1317         return 0;
1318 }
1319
1320 static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf,
1321                                        size_t len, loff_t *off)
1322 {
1323         struct seq_file *seq = file->private_data;
1324         struct ll_sb_info *sbi = seq->private;
1325         struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
1326         struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
1327
1328         sbi->ll_rw_stats_on = 1;
1329
1330         spin_lock(&sbi->ll_process_lock);
1331         sbi->ll_offset_process_count = 0;
1332         sbi->ll_rw_offset_entry_count = 0;
1333         memset(process_info, 0, sizeof(struct ll_rw_process_info) *
1334                LL_PROCESS_HIST_MAX);
1335         memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
1336                LL_OFFSET_HIST_MAX);
1337         spin_unlock(&sbi->ll_process_lock);
1338
1339         return len;
1340 }
1341
1342 LPROC_SEQ_FOPS(ll_rw_offset_stats);
1343
1344 LPROCFS_INIT_VARS(llite, NULL, lprocfs_obd_vars)
1345 #endif /* LPROCFS */