+
+#define seq_page_flag(seq, page, flag, has_flags) do { \
+ if (test_bit(PG_##flag, &(page)->flags)) { \
+ if (!has_flags) \
+ has_flags = 1; \
+ else \
+ seq_putc(seq, '|'); \
+ seq_puts(seq, #flag); \
+ } \
+ } while(0);
+
+static void *llite_dump_pgcache_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct ll_async_page *dummy_llap = seq->private;
+
+ if (dummy_llap->llap_magic == 2)
+ return NULL;
+
+ return (void *)1;
+}
+
+static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
+{
+ struct ll_async_page *llap, *dummy_llap = seq->private;
+ struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+
+ /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
+ * it in our own state */
+ if (dummy_llap->llap_magic == 0) {
+ seq_printf(seq, "gener | llap cookie origin wq du wb | page "
+ "inode index count [ page flags ]\n");
+ return 0;
+ }
+
+ spin_lock(&sbi->ll_lock);
+
+ llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
+ if (llap != NULL) {
+ int has_flags = 0;
+ struct page *page = llap->llap_page;
+
+ LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
+ llap->llap_origin);
+
+ seq_printf(seq," %5lu | %p %p %s %s %s %s | %p %lu/%u(%p) "
+ "%lu %u [",
+ sbi->ll_pglist_gen,
+ llap, llap->llap_cookie,
+ llap_origins[llap->llap_origin],
+ llap->llap_write_queued ? "wq" : "- ",
+ llap->llap_defer_uptodate ? "du" : "- ",
+ PageWriteback(page) ? "wb" : "-",
+ page, page->mapping->host->i_ino,
+ page->mapping->host->i_generation,
+ page->mapping->host, page->index,
+ page_count(page));
+ seq_page_flag(seq, page, locked, has_flags);
+ seq_page_flag(seq, page, error, has_flags);
+ seq_page_flag(seq, page, referenced, has_flags);
+ seq_page_flag(seq, page, uptodate, has_flags);
+ seq_page_flag(seq, page, dirty, has_flags);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,12))
+ seq_page_flag(seq, page, highmem, has_flags);
+#endif
+ seq_page_flag(seq, page, writeback, has_flags);
+ if (!has_flags)
+ seq_puts(seq, "-]\n");
+ else
+ seq_puts(seq, "]\n");
+ }
+
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct ll_async_page *llap, *dummy_llap = seq->private;
+ struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+
+ /* bail if we just displayed the banner */
+ if (dummy_llap->llap_magic == 0) {
+ dummy_llap->llap_magic = 1;
+ return dummy_llap;
+ }
+
+ /* we've just displayed the llap that is after us in the list.
+ * we advance to a position beyond it, returning null if there
+ * isn't another llap in the list beyond that new position. */
+ spin_lock(&sbi->ll_lock);
+ llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
+ list_del_init(&dummy_llap->llap_pglist_item);
+ if (llap) {
+ list_add(&dummy_llap->llap_pglist_item,&llap->llap_pglist_item);
+ llap =llite_pglist_next_llap(sbi,&dummy_llap->llap_pglist_item);
+ }
+ spin_unlock(&sbi->ll_lock);
+
+ ++*pos;
+ if (llap == NULL) {
+ dummy_llap->llap_magic = 2;
+ return NULL;
+ }
+ return dummy_llap;
+}
+
+static void null_stop(struct seq_file *seq, void *v)
+{
+}
+
+struct seq_operations llite_dump_pgcache_seq_sops = {
+ .start = llite_dump_pgcache_seq_start,
+ .stop = null_stop,
+ .next = llite_dump_pgcache_seq_next,
+ .show = llite_dump_pgcache_seq_show,
+};
+
+/* we're displaying llaps in a list_head list. we don't want to hold a lock
+ * while we walk the entire list, and we don't want to have to seek into
+ * the right position in the list as an app advances with many syscalls. we
+ * allocate a dummy llap and hang it off file->private. its position in
+ * the list records where the app is currently displaying. this way our
+ * seq .start and .stop don't actually do anything. .next returns null
+ * when the dummy hits the end of the list which eventually leads to .release
+ * where we tear down. this kind of displaying is super-racey, so we put
+ * a generation counter on the list so the output shows when the list
+ * changes between reads.
+ */
+static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file)
+{
+ struct proc_dir_entry *dp = PDE(inode);
+ struct ll_async_page *dummy_llap;
+ struct seq_file *seq;
+ struct ll_sb_info *sbi = dp->data;
+ int rc = -ENOMEM;
+
+ LPROCFS_ENTRY_AND_CHECK(dp);
+
+ OBD_ALLOC_PTR_WAIT(dummy_llap);
+ if (dummy_llap == NULL)
+ GOTO(out, rc);
+ dummy_llap->llap_page = NULL;
+ dummy_llap->llap_cookie = sbi;
+ dummy_llap->llap_magic = 0;
+
+ rc = seq_open(file, &llite_dump_pgcache_seq_sops);
+ if (rc) {
+ OBD_FREE(dummy_llap, sizeof(*dummy_llap));
+ GOTO(out, rc);
+ }
+ seq = file->private_data;
+ seq->private = dummy_llap;
+
+ spin_lock(&sbi->ll_lock);
+ list_add(&dummy_llap->llap_pglist_item, &sbi->ll_pglist);
+ spin_unlock(&sbi->ll_lock);
+
+out:
+ if (rc)
+ LPROCFS_EXIT();
+ return rc;
+}
+
+static int llite_dump_pgcache_seq_release(struct inode *inode,
+ struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_async_page *dummy_llap = seq->private;
+ struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+
+ spin_lock(&sbi->ll_lock);
+ if (!list_empty(&dummy_llap->llap_pglist_item))
+ list_del_init(&dummy_llap->llap_pglist_item);
+ spin_unlock(&sbi->ll_lock);
+ OBD_FREE(dummy_llap, sizeof(*dummy_llap));
+
+ return lprocfs_seq_release(inode, file);
+}
+
+struct file_operations llite_dump_pgcache_fops = {
+ .owner = THIS_MODULE,
+ .open = llite_dump_pgcache_seq_open,
+ .read = seq_read,
+ .release = llite_dump_pgcache_seq_release,
+};
+
+static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ int i;
+ static char *ra_stat_strings[] = {
+ [RA_STAT_HIT] = "hits",
+ [RA_STAT_MISS] = "misses",
+ [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
+ [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
+ [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
+ [RA_STAT_FAILED_MATCH] = "failed lock match",
+ [RA_STAT_DISCARDED] = "read but discarded",
+ [RA_STAT_ZERO_LEN] = "zero length file",
+ [RA_STAT_ZERO_WINDOW] = "zero size window",
+ [RA_STAT_EOF] = "read-ahead to EOF",
+ [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
+ [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+ };
+
+ do_gettimeofday(&now);
+
+ spin_lock(&sbi->ll_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "pending issued pages: %lu\n",
+ ra->ra_cur_pages);
+
+ for(i = 0; i < _NR_RA_STAT; i++)
+ seq_printf(seq, "%-25s %lu\n", ra_stat_strings[i],
+ ra->ra_stats[i]);
+
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+static ssize_t ll_ra_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+
+ spin_lock(&sbi->ll_lock);
+ memset(ra->ra_stats, 0, sizeof(ra->ra_stats));
+ spin_unlock(&sbi->ll_lock);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_ra_stats);
+
+#define pct(a,b) (b ? a * 100 / b : 0)
+
+static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
+ struct seq_file *seq, int which)
+{
+ unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
+ unsigned long start, end, r, w;
+ char *unitp = "KMGTPEZY";
+ int i, units = 10;
+ struct per_process_info *pp_info = &io_extents->pp_extents[which];
+
+ read_cum = 0;
+ write_cum = 0;
+ start = 0;
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ read_tot += pp_info->pp_r_hist.oh_buckets[i];
+ write_tot += pp_info->pp_w_hist.oh_buckets[i];
+ }
+
+ for(i = 0; i < LL_HIST_MAX; i++) {
+ r = pp_info->pp_r_hist.oh_buckets[i];
+ w = pp_info->pp_w_hist.oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ end = 1 << (i + LL_HIST_START - units);
+ seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | "
+ "%14lu %4lu %4lu\n", start, *unitp, end, *unitp,
+ (i == LL_HIST_MAX - 1) ? '+' : ' ',
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+ start = end;
+ if (start == 1<<10) {
+ start = 1;
+ units += 10;
+ unitp++;
+ }
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+}
+
+static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int k;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "Disabled\n"
+ "Write anything in this file to activate\n");
+ return 0;
+ }
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for(k = 0; k < LL_PROCESS_HIST_MAX; k++) {
+ if(io_extents->pp_extents[k].pid != 0) {
+ seq_printf(seq, "\nPID: %d\n",
+ io_extents->pp_extents[k].pid);
+ ll_display_extents_info(io_extents, seq, k);
+ }
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
+ const char *buf, size_t len,
+ loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int i;
+
+ sbi->ll_rw_stats_on = 1;
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ io_extents->pp_extents[i].pid = 0;
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
+
+static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "Disabled\n"
+ "Write anything in this file to activate\n");
+ return 0;
+ }
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+
+ seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
+ seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
+ "extents", "calls", "%", "cum%",
+ "calls", "%", "cum%");
+ spin_lock(&sbi->ll_lock);
+ ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
+ spin_unlock(&sbi->ll_lock);
+
+ return 0;
+}
+
+static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+ int i;
+
+ sbi->ll_rw_stats_on = 1;
+ spin_lock(&sbi->ll_pp_extent_lock);
+ for(i = 0; i <= LL_PROCESS_HIST_MAX; i++)
+ {
+ io_extents->pp_extents[i].pid = 0;
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_extents_stats);
+
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
+ *file, size_t count, int rw)
+{
+ int i, cur = -1;
+ struct ll_rw_process_info *process;
+ struct ll_rw_process_info *offset;
+ int *off_count = &sbi->ll_rw_offset_entry_count;
+ int *process_count = &sbi->ll_offset_process_count;
+ struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
+
+ if(!sbi->ll_rw_stats_on)
+ return;
+ process = sbi->ll_rw_process_info;
+ offset = sbi->ll_rw_offset_info;
+
+ spin_lock(&sbi->ll_pp_extent_lock);
+ /* Extent statistics */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if(io_extents->pp_extents[i].pid == pid) {
+ cur = i;
+ break;
+ }
+ }
+
+ if (cur == -1) {
+ /* new process */
+ sbi->ll_extent_process_count =
+ (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
+ cur = sbi->ll_extent_process_count;
+ io_extents->pp_extents[cur].pid = pid;
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
+ lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
+ }
+
+ for(i = 0; (count >= (1 << LL_HIST_START << i)) &&
+ (i < (LL_HIST_MAX - 1)); i++);
+ if (rw == 0) {
+ io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
+ } else {
+ io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
+ io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
+ }
+ spin_unlock(&sbi->ll_pp_extent_lock);
+
+ spin_lock(&sbi->ll_process_lock);
+ /* Offset statistics */
+ for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid == pid) {
+ if (process[i].rw_last_file != file) {
+ process[i].rw_range_start = file->f_pos;
+ process[i].rw_last_file_pos =
+ file->f_pos + count;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = 0;
+ process[i].rw_last_file = file;
+ spin_unlock(&sbi->ll_process_lock);
+ return;
+ }
+ if (process[i].rw_last_file_pos != file->f_pos) {
+ *off_count =
+ (*off_count + 1) % LL_OFFSET_HIST_MAX;
+ offset[*off_count].rw_op = process[i].rw_op;
+ offset[*off_count].rw_pid = pid;
+ offset[*off_count].rw_range_start =
+ process[i].rw_range_start;
+ offset[*off_count].rw_range_end =
+ process[i].rw_last_file_pos;
+ offset[*off_count].rw_smallest_extent =
+ process[i].rw_smallest_extent;
+ offset[*off_count].rw_largest_extent =
+ process[i].rw_largest_extent;
+ offset[*off_count].rw_offset =
+ process[i].rw_offset;
+ process[i].rw_op = rw;
+ process[i].rw_range_start = file->f_pos;
+ process[i].rw_smallest_extent = count;
+ process[i].rw_largest_extent = count;
+ process[i].rw_offset = file->f_pos -
+ process[i].rw_last_file_pos;
+ }
+ if(process[i].rw_smallest_extent > count)
+ process[i].rw_smallest_extent = count;
+ if(process[i].rw_largest_extent < count)
+ process[i].rw_largest_extent = count;
+ process[i].rw_last_file_pos = file->f_pos + count;
+ spin_unlock(&sbi->ll_process_lock);
+ return;
+ }
+ }
+ *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
+ process[*process_count].rw_pid = pid;
+ process[*process_count].rw_op = rw;
+ process[*process_count].rw_range_start = file->f_pos;
+ process[*process_count].rw_last_file_pos = file->f_pos + count;
+ process[*process_count].rw_smallest_extent = count;
+ process[*process_count].rw_largest_extent = count;
+ process[*process_count].rw_offset = 0;
+ process[*process_count].rw_last_file = file;
+ spin_unlock(&sbi->ll_process_lock);
+}
+
+char lpszt[] = LPSZ;
+
+static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct timeval now;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
+ struct ll_rw_process_info *process = sbi->ll_rw_process_info;
+ char format[50];
+ int i;
+
+ do_gettimeofday(&now);
+
+ if (!sbi->ll_rw_stats_on) {
+ seq_printf(seq, "Disabled\n"
+ "Write anything in this file to activate\n");
+ return 0;
+ }
+ spin_lock(&sbi->ll_process_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
+ "R/W", "PID", "RANGE START", "RANGE END",
+ "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
+ sprintf(format, "%s%s%s%s%s\n",
+ "%3c %10d %14Lu %14Lu %17", lpszt+1, " %17", lpszt+1, " %14Ld");
+ /* We stored the discontiguous offsets here; print them first */
+ for(i = 0; i < LL_OFFSET_HIST_MAX; i++) {
+ if (offset[i].rw_pid != 0)
+ /* Is there a way to snip the '%' off of LPSZ? */
+ seq_printf(seq, format,
+ offset[i].rw_op ? 'W' : 'R',
+ offset[i].rw_pid,
+ offset[i].rw_range_start,
+ offset[i].rw_range_end,
+ offset[i].rw_smallest_extent,
+ offset[i].rw_largest_extent,
+ offset[i].rw_offset);
+ }
+ /* Then print the current offsets for each process */
+ for(i = 0; i < LL_PROCESS_HIST_MAX; i++) {
+ if (process[i].rw_pid != 0)
+ seq_printf(seq, format,
+ process[i].rw_op ? 'W' : 'R',
+ process[i].rw_pid,
+ process[i].rw_range_start,
+ process[i].rw_last_file_pos,
+ process[i].rw_smallest_extent,
+ process[i].rw_largest_extent,
+ process[i].rw_offset);
+ }
+ spin_unlock(&sbi->ll_process_lock);
+
+ return 0;
+}
+
+static ssize_t ll_rw_offset_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct ll_sb_info *sbi = seq->private;
+ struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
+ struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
+
+ sbi->ll_rw_stats_on = 1;
+
+ spin_lock(&sbi->ll_process_lock);
+ sbi->ll_offset_process_count = 0;
+ sbi->ll_rw_offset_entry_count = 0;
+ memset(process_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_PROCESS_HIST_MAX);
+ memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
+ LL_OFFSET_HIST_MAX);
+ spin_unlock(&sbi->ll_process_lock);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(ll_rw_offset_stats);
+
+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
+{
+ lvars->module_vars = NULL;
+ lvars->obd_vars = lprocfs_llite_obd_vars;
+}