1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #define DEBUG_SUBSYSTEM S_LLITE
24 #include <linux/version.h>
25 #include <linux/lustre_lite.h>
26 #include <linux/lprocfs_status.h>
27 #include <linux/seq_file.h>
28 #include <linux/obd_support.h>
30 #include "llite_internal.h"
32 /* /proc/lustre/llite mount point registration */
33 struct proc_dir_entry *proc_lustre_fs_root;
34 struct file_operations llite_dump_pgcache_fops;
35 struct file_operations ll_ra_stats_fops;
38 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
39 struct super_block *sb, char *osc, char *mdc)
43 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
46 long long mnt_instance;
48 static int ll_rd_blksize(char *page, char **start, off_t off, int count,
51 struct super_block *sb = (struct super_block *)data;
52 struct obd_statfs osfs;
56 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
59 rc = snprintf(page, count, "%u\n", osfs.os_bsize);
65 static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
68 struct super_block *sb = (struct super_block *)data;
69 struct obd_statfs osfs;
73 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
75 __u32 blk_size = osfs.os_bsize >> 10;
76 __u64 result = osfs.os_blocks;
78 while (blk_size >>= 1)
82 rc = snprintf(page, count, LPU64"\n", result);
88 static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
91 struct super_block *sb = (struct super_block *)data;
92 struct obd_statfs osfs;
96 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
98 __u32 blk_size = osfs.os_bsize >> 10;
99 __u64 result = osfs.os_bfree;
101 while (blk_size >>= 1)
105 rc = snprintf(page, count, LPU64"\n", result);
110 static int ll_rd_kbytesavail(char *page, char **start, off_t off, int count,
111 int *eof, void *data)
113 struct super_block *sb = (struct super_block *)data;
114 struct obd_statfs osfs;
118 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
120 __u32 blk_size = osfs.os_bsize >> 10;
121 __u64 result = osfs.os_bavail;
123 while (blk_size >>= 1)
127 rc = snprintf(page, count, LPU64"\n", result);
132 static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
133 int *eof, void *data)
135 struct super_block *sb = (struct super_block *)data;
136 struct obd_statfs osfs;
140 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
143 rc = snprintf(page, count, LPU64"\n", osfs.os_files);
148 static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
149 int *eof, void *data)
151 struct super_block *sb = (struct super_block *)data;
152 struct obd_statfs osfs;
156 rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
159 rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
165 static int ll_rd_fstype(char *page, char **start, off_t off, int count,
166 int *eof, void *data)
168 struct super_block *sb = (struct super_block*)data;
172 return snprintf(page, count, "%s\n", sb->s_type->name);
175 static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
176 int *eof, void *data)
178 struct super_block *sb = (struct super_block *)data;
182 return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
185 static int ll_rd_read_ahead(char *page, char **start, off_t off, int count,
186 int *eof, void *data)
188 struct super_block *sb = (struct super_block*)data;
189 struct ll_sb_info *sbi = ll_s2sbi(sb);
194 val = (sbi->ll_flags & LL_SBI_READAHEAD) ? 1 : 0;
195 rc = snprintf(page, count, "%d\n", val);
199 static int ll_wr_read_ahead(struct file *file, const char *buffer,
200 unsigned long count, void *data)
202 struct super_block *sb = (struct super_block*)data;
203 struct ll_sb_info *sbi = ll_s2sbi(sb);
207 if (sscanf(buffer, "%d", &readahead) != 1)
211 sbi->ll_flags |= LL_SBI_READAHEAD;
213 sbi->ll_flags &= ~LL_SBI_READAHEAD;
218 static int ll_rd_max_read_ahead_mb(char *page, char **start, off_t off,
219 int count, int *eof, void *data)
221 struct super_block *sb = data;
222 struct ll_sb_info *sbi = ll_s2sbi(sb);
225 spin_lock(&sbi->ll_lock);
226 val = (sbi->ll_ra_info.ra_max_pages << PAGE_CACHE_SHIFT) >> 20;
227 spin_unlock(&sbi->ll_lock);
229 return snprintf(page, count, "%u\n", val);
232 static int ll_wr_max_read_ahead_mb(struct file *file, const char *buffer,
233 unsigned long count, void *data)
235 struct super_block *sb = data;
236 struct ll_sb_info *sbi = ll_s2sbi(sb);
239 rc = lprocfs_write_helper(buffer, count, &val);
243 if (val < 0 || val > (num_physpages << PAGE_SHIFT) >> 20)
246 spin_lock(&sbi->ll_lock);
247 sbi->ll_ra_info.ra_max_pages = (val << 20) >> PAGE_CACHE_SHIFT;
248 spin_unlock(&sbi->ll_lock);
253 static struct lprocfs_vars lprocfs_obd_vars[] = {
254 { "uuid", ll_rd_sb_uuid, 0, 0 },
255 //{ "mntpt_path", ll_rd_path, 0, 0 },
256 { "fstype", ll_rd_fstype, 0, 0 },
257 { "blocksize", ll_rd_blksize, 0, 0 },
258 { "kbytestotal", ll_rd_kbytestotal, 0, 0 },
259 { "kbytesfree", ll_rd_kbytesfree, 0, 0 },
260 { "kbytesavail", ll_rd_kbytesavail, 0, 0 },
261 { "filestotal", ll_rd_filestotal, 0, 0 },
262 { "filesfree", ll_rd_filesfree, 0, 0 },
263 //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
264 { "read_ahead", ll_rd_read_ahead, ll_wr_read_ahead, 0 },
265 { "max_read_ahead_mb", ll_rd_max_read_ahead_mb,
266 ll_wr_max_read_ahead_mb, 0 },
270 #define MAX_STRING_SIZE 128
272 struct llite_file_opcode {
276 } llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
278 { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
279 { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
280 { LPROC_LL_WB_WRITEPAGE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
281 "writeback_from_writepage" },
282 { LPROC_LL_WB_PRESSURE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
283 "writeback_from_pressure" },
284 { LPROC_LL_WB_OK, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
285 "writeback_ok_pages" },
286 { LPROC_LL_WB_FAIL, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
287 "writeback_failed_pages" },
288 { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
290 { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
292 { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
294 { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
297 { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
298 { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
299 { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
300 { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" },
301 { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" },
302 { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" },
303 /* inode operation */
304 { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" },
305 { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "punch" },
306 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
307 { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" },
309 { LPROC_LL_REVALIDATE, LPROCFS_TYPE_REGS, "getattr" },
311 /* special inode operation */
312 { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" },
313 { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
314 { LPROC_LL_DIRECT_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
316 { LPROC_LL_DIRECT_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
321 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
322 struct super_block *sb, char *osc, char *mdc)
324 struct lprocfs_vars lvars[2];
325 struct ll_sb_info *sbi = ll_s2sbi(sb);
326 struct obd_device *obd;
327 char name[MAX_STRING_SIZE + 1];
329 struct lprocfs_stats *svc_stats = NULL;
330 struct proc_dir_entry *entry;
333 memset(lvars, 0, sizeof(lvars));
335 name[MAX_STRING_SIZE] = '\0';
336 lvars[0].name = name;
338 LASSERT(sbi != NULL);
339 LASSERT(mdc != NULL);
340 LASSERT(osc != NULL);
343 snprintf(name, MAX_STRING_SIZE, "fs%llu", mnt_instance);
346 sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL);
347 if (IS_ERR(sbi->ll_proc_root)) {
348 err = PTR_ERR(sbi->ll_proc_root);
349 sbi->ll_proc_root = NULL;
353 entry = create_proc_entry("dump_page_cache", 0444, sbi->ll_proc_root);
355 GOTO(out, err = -ENOMEM);
356 entry->proc_fops = &llite_dump_pgcache_fops;
359 entry = create_proc_entry("read_ahead_stats", 0444, sbi->ll_proc_root);
361 GOTO(out, err = -ENOMEM);
362 entry->proc_fops = &ll_ra_stats_fops;
365 svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES);
366 if (svc_stats == NULL) {
370 /* do counter init */
371 for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
372 __u32 type = llite_opcode_table[id].type;
374 if (type & LPROCFS_TYPE_REGS)
377 if (type & LPROCFS_TYPE_BYTES)
380 if (type & LPROCFS_TYPE_PAGES)
384 lprocfs_counter_init(svc_stats, llite_opcode_table[id].opcode,
385 (type & LPROCFS_CNTR_AVGMINMAX),
386 llite_opcode_table[id].opname, ptr);
388 err = lprocfs_register_stats(sbi->ll_proc_root, "stats", svc_stats);
392 sbi->ll_stats = svc_stats;
393 /* need place to keep svc_stats */
395 /* Static configuration info */
396 err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb);
401 obd = class_name2obd(mdc);
403 LASSERT(obd != NULL);
404 LASSERT(obd->obd_type != NULL);
405 LASSERT(obd->obd_type->typ_name != NULL);
407 snprintf(name, MAX_STRING_SIZE, "%s/common_name",
408 obd->obd_type->typ_name);
409 lvars[0].read_fptr = lprocfs_rd_name;
410 err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
414 snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
415 lvars[0].read_fptr = lprocfs_rd_uuid;
416 err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
421 obd = class_name2obd(osc);
423 LASSERT(obd != NULL);
424 LASSERT(obd->obd_type != NULL);
425 LASSERT(obd->obd_type->typ_name != NULL);
427 snprintf(name, MAX_STRING_SIZE, "%s/common_name",
428 obd->obd_type->typ_name);
429 lvars[0].read_fptr = lprocfs_rd_name;
430 err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
434 snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name);
435 lvars[0].read_fptr = lprocfs_rd_uuid;
436 err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd);
440 lprocfs_free_stats(svc_stats);
441 if (sbi->ll_proc_root)
442 lprocfs_remove(sbi->ll_proc_root);
447 void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi)
449 if (sbi->ll_proc_root) {
450 struct proc_dir_entry *file_stats =
451 lprocfs_srch(sbi->ll_proc_root, "stats");
454 lprocfs_free_stats(sbi->ll_stats);
455 lprocfs_remove(file_stats);
459 #undef MAX_STRING_SIZE
461 static struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
462 struct list_head *list)
464 struct ll_async_page *llap;
465 struct list_head *pos;
467 list_for_each(pos, list) {
468 if (pos == &sbi->ll_pglist)
470 llap = list_entry(pos, struct ll_async_page, llap_proc_item);
471 if (llap->llap_page == NULL)
479 #define seq_page_flag(seq, page, flag, has_flags) do { \
480 if (test_bit(PG_##flag, &(page)->flags)) { \
484 seq_putc(seq, '|'); \
485 seq_puts(seq, #flag); \
489 static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
491 struct ll_async_page *llap, *dummy_llap = seq->private;
492 struct ll_sb_info *sbi = dummy_llap->llap_cookie;
494 /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
495 * it in our own state */
496 if (dummy_llap->llap_magic == 0) {
497 seq_printf(seq, "generation | llap .cookie origin | page ");
498 seq_printf(seq, "inode .index [ page flags ]\n");
502 spin_lock(&sbi->ll_lock);
504 llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
507 struct page *page = llap->llap_page;
508 static char *origins[] = {
509 [LLAP_ORIGIN_UNKNOWN] = "--",
510 [LLAP_ORIGIN_READPAGE] = "rp",
511 [LLAP_ORIGIN_READAHEAD] = "ra",
512 [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
513 [LLAP_ORIGIN_WRITEPAGE] = "wp",
516 LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
519 seq_printf(seq, "%lu | %p %p %s | %p %p %lu [",
521 llap, llap->llap_cookie,
522 origins[llap->llap_origin],
523 page, page->mapping->host, page->index);
524 seq_page_flag(seq, page, locked, has_flags);
525 seq_page_flag(seq, page, error, has_flags);
526 seq_page_flag(seq, page, referenced, has_flags);
527 seq_page_flag(seq, page, uptodate, has_flags);
528 seq_page_flag(seq, page, dirty, has_flags);
529 seq_page_flag(seq, page, highmem, has_flags);
531 seq_puts(seq, "-]\n");
533 seq_puts(seq, "]\n");
536 spin_unlock(&sbi->ll_lock);
541 static void *llite_dump_pgcache_seq_start(struct seq_file *seq, loff_t *pos)
543 struct ll_async_page *llap = seq->private;
545 if (llap->llap_magic == 2)
551 static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v,
554 struct ll_async_page *llap, *dummy_llap = seq->private;
555 struct ll_sb_info *sbi = dummy_llap->llap_cookie;
557 /* bail if we just displayed the banner */
558 if (dummy_llap->llap_magic == 0) {
559 dummy_llap->llap_magic = 1;
563 /* we've just displayed the llap that is after us in the list.
564 * we advance to a position beyond it, returning null if there
565 * isn't another llap in the list beyond that new position. */
566 spin_lock(&sbi->ll_lock);
567 llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
568 list_del_init(&dummy_llap->llap_proc_item);
570 list_add(&dummy_llap->llap_proc_item, &llap->llap_proc_item);
571 llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_proc_item);
573 spin_unlock(&sbi->ll_lock);
577 dummy_llap->llap_magic = 2;
583 static void llite_dump_pgcache_seq_stop(struct seq_file *seq, void *v)
587 struct seq_operations llite_dump_pgcache_seq_sops = {
588 .start = llite_dump_pgcache_seq_start,
589 .stop = llite_dump_pgcache_seq_stop,
590 .next = llite_dump_pgcache_seq_next,
591 .show = llite_dump_pgcache_seq_show,
594 /* we're displaying llaps in a list_head list. we don't want to hold a lock
595 * while we walk the entire list, and we don't want to have to seek into
596 * the right position in the list as an app advances with many syscalls. we
597 * allocate a dummy llap and hang it off file->private. its position in
598 * the list records where the app is currently displaying. this way our
599 * seq .start and .stop don't actually do anything. .next returns null
600 * when the dummy hits the end of the list which eventually leads to .release
601 * where we tear down. this kind of displaying is super-racey, so we put
602 * a generation counter on the list so the output shows when the list
603 * changes between reads.
605 static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file)
607 struct proc_dir_entry *dp = PDE(inode);
608 struct ll_async_page *llap;
609 struct seq_file *seq;
610 struct ll_sb_info *sbi = dp->data;
613 OBD_ALLOC_GFP(llap, sizeof(*llap), GFP_KERNEL);
616 llap->llap_page = NULL;
617 llap->llap_cookie = sbi;
618 llap->llap_magic = 0;
620 rc = seq_open(file, &llite_dump_pgcache_seq_sops);
622 OBD_FREE(llap, sizeof(*llap));
625 seq = file->private_data;
628 spin_lock(&sbi->ll_lock);
629 list_add(&llap->llap_proc_item, &sbi->ll_pglist);
630 spin_unlock(&sbi->ll_lock);
635 static int llite_dump_pgcache_seq_release(struct inode *inode,
638 struct seq_file *seq = file->private_data;
639 struct ll_async_page *llap = seq->private;
640 struct ll_sb_info *sbi = llap->llap_cookie;
642 spin_lock(&sbi->ll_lock);
643 if (!list_empty(&llap->llap_proc_item))
644 list_del_init(&llap->llap_proc_item);
645 spin_unlock(&sbi->ll_lock);
646 OBD_FREE(llap, sizeof(*llap));
648 return seq_release(inode, file);
651 struct file_operations llite_dump_pgcache_fops = {
652 .owner = THIS_MODULE,
653 .open = llite_dump_pgcache_seq_open,
655 .release = llite_dump_pgcache_seq_release,
658 static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
661 struct ll_sb_info *sbi = seq->private;
662 struct ll_ra_info *ra = &sbi->ll_ra_info;
664 static char *ra_stat_strings[] = {
665 [RA_STAT_HIT] = "hits",
666 [RA_STAT_MISS] = "misses",
667 [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
668 [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
669 [RA_STAT_FAILED_MATCH] = "failed lock match",
670 [RA_STAT_DISCARDED] = "read but discarded",
671 [RA_STAT_ZERO_LEN] = "zero length file",
672 [RA_STAT_ZERO_WINDOW] = "zero size window",
673 [RA_STAT_EOF] = "read-ahead to EOF",
674 [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
677 do_gettimeofday(&now);
679 spin_lock(&sbi->ll_lock);
681 seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
682 now.tv_sec, now.tv_usec);
683 seq_printf(seq, "pending issued pages: %lu\n",
686 for(i = 0; i < _NR_RA_STAT; i++)
687 seq_printf(seq, "%-25s %lu\n", ra_stat_strings[i],
690 spin_unlock(&sbi->ll_lock);
695 static void *ll_ra_stats_seq_start(struct seq_file *p, loff_t *pos)
701 static void *ll_ra_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
706 static void ll_ra_stats_seq_stop(struct seq_file *p, void *v)
709 struct seq_operations ll_ra_stats_seq_sops = {
710 .start = ll_ra_stats_seq_start,
711 .stop = ll_ra_stats_seq_stop,
712 .next = ll_ra_stats_seq_next,
713 .show = ll_ra_stats_seq_show,
716 static int ll_ra_stats_seq_open(struct inode *inode, struct file *file)
718 struct proc_dir_entry *dp = PDE(inode);
719 struct seq_file *seq;
722 rc = seq_open(file, &ll_ra_stats_seq_sops);
725 seq = file->private_data;
726 seq->private = dp->data;
730 static ssize_t ll_ra_stats_seq_write(struct file *file, const char *buf,
731 size_t len, loff_t *off)
733 struct seq_file *seq = file->private_data;
734 struct ll_sb_info *sbi = seq->private;
735 struct ll_ra_info *ra = &sbi->ll_ra_info;
737 spin_lock(&sbi->ll_lock);
738 memset(ra->ra_stats, 0, sizeof(ra->ra_stats));
739 spin_unlock(&sbi->ll_lock);
744 struct file_operations ll_ra_stats_fops = {
745 .owner = THIS_MODULE,
746 .open = ll_ra_stats_seq_open,
748 .write = ll_ra_stats_seq_write,
750 .release = seq_release,