#ifdef LPROCFS
/* /proc/lustre/llite mount point registration */
-struct file_operations llite_dump_pgcache_fops;
+extern struct file_operations vvp_dump_pgcache_file_ops;
struct file_operations ll_rw_extents_stats_fops;
struct file_operations ll_rw_extents_stats_pp_fops;
struct file_operations ll_rw_offset_stats_fops;
return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
}
+static int ll_rd_site_stats(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct super_block *sb = data;
+
+ /*
+ * See description of statistical counters in struct cl_site, and
+ * struct lu_site.
+ */
+ return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site),
+ page, count);
+}
+
static int ll_rd_max_readahead_mb(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
return count;
}
+static int ll_rd_max_readahead_per_file_mb(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct super_block *sb = data;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ long pages_number;
+ int mult;
+
+ spin_lock(&sbi->ll_lock);
+ pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
+ spin_unlock(&sbi->ll_lock);
+
+ mult = 1 << (20 - CFS_PAGE_SHIFT);
+ return lprocfs_read_frac_helper(page, count, pages_number, mult);
+}
+
+static int ll_wr_max_readahead_per_file_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct super_block *sb = data;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int mult, rc, pages_number;
+
+ mult = 1 << (20 - CFS_PAGE_SHIFT);
+ rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+ if (rc)
+ return rc;
+
+ if (pages_number < 0 ||
+ pages_number > sbi->ll_ra_info.ra_max_pages) {
+ CERROR("can't set file readahead more than"
+ "max_read_ahead_mb %lu MB\n", sbi->ll_ra_info.ra_max_pages);
+ return -ERANGE;
+ }
+
+ spin_lock(&sbi->ll_lock);
+ sbi->ll_ra_info.ra_max_pages_per_file = pages_number;
+ spin_unlock(&sbi->ll_lock);
+
+ return count;
+}
+
static int ll_rd_max_read_ahead_whole_mb(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
/* Cap this at the current max readahead window size, the readahead
* algorithm does this anyway so it's pointless to set it larger. */
- if (pages_number < 0 || pages_number > sbi->ll_ra_info.ra_max_pages) {
+ if (pages_number < 0 ||
+ pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
CERROR("can't set max_read_ahead_whole_mb more than "
- "max_read_ahead_mb: %lu\n",
- sbi->ll_ra_info.ra_max_pages >> (20 - CFS_PAGE_SHIFT));
+ "max_read_ahead_per_file_mb: %lu\n",
+ sbi->ll_ra_info.ra_max_pages_per_file >> (20 - CFS_PAGE_SHIFT));
return -ERANGE;
}
{
struct super_block *sb = data;
struct ll_sb_info *sbi = ll_s2sbi(sb);
- unsigned long budget;
- int mult, rc, pages_number, cpu;
+ int mult, rc, pages_number;
mult = 1 << (20 - CFS_PAGE_SHIFT);
rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
/* Not set up yet, don't call llap_shrink_cache */
return count;
- spin_lock(&sbi->ll_async_page_reblnc_lock);
- budget = sbi->ll_async_page_max / num_online_cpus();
- for_each_online_cpu(cpu)
- LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget = budget;
- spin_unlock(&sbi->ll_async_page_reblnc_lock);
-
- if (lcounter_read(&sbi->ll_async_page_count) >= sbi->ll_async_page_max)
- llap_shrink_cache(sbi, -1);
-
return count;
}
-static int ll_rd_pgcache_bnlc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct super_block *sb = data;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct ll_pglist_data *pd;
- unsigned long total_budget = 0;
- int n = 0, cpu;
-
- n += snprintf(page +n, count - n,
- "cpu\tpage count\tbudget\t\treblnc count\tgen\thit\tmiss\tcross\n");
- for_each_online_cpu(cpu) {
- pd = LL_PGLIST_DATA_CPU(sbi, cpu);
- n += snprintf(page + n, count - n,
- "%d\t%-8lu\t%-8lu\t%-8lu\t%lu\t%lu\t%lu\t%lu\n",
- cpu, pd->llpd_count, pd->llpd_budget,
- pd->llpd_reblnc_count, pd->llpd_gen,
- pd->llpd_hit, pd->llpd_miss, pd->llpd_cross);
- total_budget += pd->llpd_budget;
- }
- n += snprintf(page + n, count - n,
- "Total budget: %lu, page max: %lu, rebalance cnt: %lu\n",
- total_budget, sbi->ll_async_page_max,
- sbi->ll_async_page_reblnc_count);
- *eof = 1;
- return n;
-}
-
static int ll_rd_checksum(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
sbi->ll_sa_miss);
}
-static int ll_rd_contention_time(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct super_block *sb = data;
-
- *eof = 1;
- return snprintf(page, count, "%u\n", ll_s2sbi(sb)->ll_contention_time);
-
-}
-
-static int ll_wr_contention_time(struct file *file, const char *buffer,
- unsigned long count, void *data)
+static int ll_rd_lazystatfs(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
struct super_block *sb = data;
struct ll_sb_info *sbi = ll_s2sbi(sb);
- return lprocfs_write_helper(buffer, count,&sbi->ll_contention_time) ?:
- count;
-}
-
-static int ll_rd_lockless_truncate(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct super_block *sb = data;
-
- *eof = 1;
return snprintf(page, count, "%u\n",
- ll_s2sbi(sb)->ll_lockless_truncate_enable);
+ (sbi->ll_flags & LL_SBI_LAZYSTATFS) ? 1 : 0);
}
-static int ll_wr_lockless_truncate(struct file *file, const char *buffer,
- unsigned long count, void *data)
+static int ll_wr_lazystatfs(struct file *file, const char *buffer,
+ unsigned long count, void *data)
{
struct super_block *sb = data;
struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ if (val)
+ sbi->ll_flags |= LL_SBI_LAZYSTATFS;
+ else
+ sbi->ll_flags &= ~LL_SBI_LAZYSTATFS;
- return lprocfs_write_helper(buffer, count,
- &sbi->ll_lockless_truncate_enable)
- ?: count;
+ return count;
}
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
{ "uuid", ll_rd_sb_uuid, 0, 0 },
//{ "mntpt_path", ll_rd_path, 0, 0 },
{ "fstype", ll_rd_fstype, 0, 0 },
+ { "site", ll_rd_site_stats, 0, 0 },
{ "blocksize", ll_rd_blksize, 0, 0 },
{ "kbytestotal", ll_rd_kbytestotal, 0, 0 },
{ "kbytesfree", ll_rd_kbytesfree, 0, 0 },
//{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
{ "max_read_ahead_mb", ll_rd_max_readahead_mb,
ll_wr_max_readahead_mb, 0 },
+ { "max_read_ahead_per_file_mb", ll_rd_max_readahead_per_file_mb,
+ ll_wr_max_readahead_per_file_mb, 0 },
{ "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb,
ll_wr_max_read_ahead_whole_mb, 0 },
{ "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 },
- { "pgcache_balance",ll_rd_pgcache_bnlc, 0, 0 },
{ "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 },
{ "max_rw_chunk", ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 },
{ "stats_track_pid", ll_rd_track_pid, ll_wr_track_pid, 0 },
{ "stats_track_gid", ll_rd_track_gid, ll_wr_track_gid, 0 },
{ "statahead_max", ll_rd_statahead_max, ll_wr_statahead_max, 0 },
{ "statahead_stats", ll_rd_statahead_stats, 0, 0 },
- { "contention_seconds", ll_rd_contention_time, ll_wr_contention_time, 0},
- { "lockless_truncate", ll_rd_lockless_truncate,
- ll_wr_lockless_truncate, 0},
+ { "lazystatfs", ll_rd_lazystatfs, ll_wr_lazystatfs, 0 },
{ 0 }
};
}
EXPORT_SYMBOL(ll_stats_ops_tally);
+static const char *ra_stat_string[] = {
+ [RA_STAT_HIT] = "hits",
+ [RA_STAT_MISS] = "misses",
+ [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
+ [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
+ [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
+ [RA_STAT_FAILED_MATCH] = "failed lock match",
+ [RA_STAT_DISCARDED] = "read but discarded",
+ [RA_STAT_ZERO_LEN] = "zero length file",
+ [RA_STAT_ZERO_WINDOW] = "zero size window",
+ [RA_STAT_EOF] = "read-ahead to EOF",
+ [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
+ [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+};
+
+
int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
struct super_block *sb, char *osc, char *mdc)
{
struct obd_device *obd;
char name[MAX_STRING_SIZE + 1], *ptr;
int err, id, len, rc;
- static const char *ra_stats_string[] = LL_RA_STAT_STRINGS;
ENTRY;
memset(lvars, 0, sizeof(lvars));
RETURN(err);
}
-
rc = lprocfs_seq_create(sbi->ll_proc_root, "dump_page_cache", 0444,
- &llite_dump_pgcache_fops, sbi);
+ &vvp_dump_pgcache_file_ops, sbi);
if (rc)
CWARN("Error adding the dump_page_cache file\n");
/* File operations stats */
sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
- LPROCFS_STATS_FLAG_PERCPU);
+ LPROCFS_STATS_FLAG_NONE);
if (sbi->ll_stats == NULL)
GOTO(out, err = -ENOMEM);
/* do counter init */
if (err)
GOTO(out, err);
- sbi->ll_ra_stats = lprocfs_alloc_stats(LL_RA_STAT,
- LPROCFS_STATS_FLAG_PERCPU);
+ sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
+ LPROCFS_STATS_FLAG_NONE);
if (sbi->ll_ra_stats == NULL)
GOTO(out, err = -ENOMEM);
- for (id = 0; id < LL_RA_STAT; id++)
+ for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++)
lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
- ra_stats_string[id], "pages");
+ ra_stat_string[id], "pages");
err = lprocfs_register_stats(sbi->ll_proc_root, "read_ahead_stats",
sbi->ll_ra_stats);
if (err)
}
#undef MAX_STRING_SIZE
-#define seq_page_flag(seq, page, flag, has_flags) do { \
- if (test_bit(PG_##flag, &(page)->flags)) { \
- if (!has_flags) \
- has_flags = 1; \
- else \
- seq_putc(seq, '|'); \
- seq_puts(seq, #flag); \
- } \
- } while(0);
-
-static void *llite_dump_pgcache_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct ll_async_page *dummy_llap = seq->private;
-
- if (dummy_llap->llap_magic == 2)
- return NULL;
-
- return (void *)1;
-}
-
-static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
-{
- struct ll_async_page *llap, *dummy_llap = seq->private;
- struct ll_sb_info *sbi = dummy_llap->llap_cookie;
- struct ll_pglist_data *pd;
- int cpu = dummy_llap->llap_pglist_cpu;
-
- /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
- * it in our own state */
- if (dummy_llap->llap_magic == 0) {
- seq_printf(seq, "gener | llap cookie origin wq du wb | page "
- "inode index count [ page flags ]\n");
- return 0;
- }
-
- pd = ll_pglist_cpu_lock(sbi, cpu);
- llap = llite_pglist_next_llap(&pd->llpd_list,
- &dummy_llap->llap_pglist_item);
- if (llap != NULL) {
- int has_flags = 0, i;
- struct page *page = llap->llap_page;
- unsigned long gen = 0UL;
-
- LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
- llap->llap_origin);
-
- for_each_online_cpu(i)
- gen += LL_PGLIST_DATA_CPU(sbi, i)->llpd_gen;
-
- seq_printf(seq," %5lu | %p %p %s %s %s %s | %p %lu/%u(%p) "
- "%lu %u [",
- gen,
- llap, llap->llap_cookie,
- llap_origins[llap->llap_origin],
- llap->llap_write_queued ? "wq" : "- ",
- llap->llap_defer_uptodate ? "du" : "- ",
- PageWriteback(page) ? "wb" : "-",
- page, page->mapping->host->i_ino,
- page->mapping->host->i_generation,
- page->mapping->host, page->index,
- page_count(page));
- seq_page_flag(seq, page, locked, has_flags);
- seq_page_flag(seq, page, error, has_flags);
- seq_page_flag(seq, page, referenced, has_flags);
- seq_page_flag(seq, page, uptodate, has_flags);
- seq_page_flag(seq, page, dirty, has_flags);
-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,12))
- seq_page_flag(seq, page, highmem, has_flags);
-#endif
- seq_page_flag(seq, page, writeback, has_flags);
- if (!has_flags)
- seq_puts(seq, "-]\n");
- else
- seq_puts(seq, "]\n");
- }
- ll_pglist_cpu_unlock(sbi, cpu);
-
- return 0;
-}
-
-static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v,
- loff_t *pos)
-{
- struct ll_async_page *llap, *dummy_llap = seq->private;
- struct ll_sb_info *sbi = dummy_llap->llap_cookie;
- struct ll_pglist_data *pd, *next;
- int cpu = dummy_llap->llap_pglist_cpu;
-
- /* bail if we just displayed the banner */
- if (dummy_llap->llap_magic == 0) {
- dummy_llap->llap_magic = 1;
- return dummy_llap;
- }
-
- /* we've just displayed the llap that is after us in the list.
- * we advance to a position beyond it, returning null if there
- * isn't another llap in the list beyond that new position. */
- pd = ll_pglist_cpu_lock(sbi, cpu);
- llap = llite_pglist_next_llap(&pd->llpd_list,
- &dummy_llap->llap_pglist_item);
- list_del_init(&dummy_llap->llap_pglist_item);
- if (llap) {
- list_add(&dummy_llap->llap_pglist_item,&llap->llap_pglist_item);
- llap = llite_pglist_next_llap(&pd->llpd_list,
- &dummy_llap->llap_pglist_item);
- }
- if (llap == NULL) {
- int i = cpu + 1;
- for (next = NULL; i < num_possible_cpus(); i++, next = NULL) {
- next = ll_pglist_cpu_lock(sbi, i);
- if (!list_empty(&next->llpd_list))
- break;
- ll_pglist_cpu_unlock(sbi, i);
- }
- if (next != NULL) {
- list_move(&dummy_llap->llap_pglist_item,
- &next->llpd_list);
- dummy_llap->llap_pglist_cpu = i;
- ll_pglist_cpu_unlock(sbi, cpu);
- llap = llite_pglist_next_llap(&next->llpd_list,
- &dummy_llap->llap_pglist_item);
- LASSERT(llap);
- cpu = i;
- }
- }
- ll_pglist_cpu_unlock(sbi, cpu);
-
- ++*pos;
- if (llap == NULL) {
- dummy_llap->llap_magic = 2;
- return NULL;
- }
- return dummy_llap;
-}
-
-static void null_stop(struct seq_file *seq, void *v)
-{
-}
-
-struct seq_operations llite_dump_pgcache_seq_sops = {
- .start = llite_dump_pgcache_seq_start,
- .stop = null_stop,
- .next = llite_dump_pgcache_seq_next,
- .show = llite_dump_pgcache_seq_show,
-};
-
-/* we're displaying llaps in a list_head list. we don't want to hold a lock
- * while we walk the entire list, and we don't want to have to seek into
- * the right position in the list as an app advances with many syscalls. we
- * allocate a dummy llap and hang it off file->private. its position in
- * the list records where the app is currently displaying. this way our
- * seq .start and .stop don't actually do anything. .next returns null
- * when the dummy hits the end of the list which eventually leads to .release
- * where we tear down. this kind of displaying is super-racey, so we put
- * a generation counter on the list so the output shows when the list
- * changes between reads.
- */
-static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file)
-{
- struct proc_dir_entry *dp = PDE(inode);
- struct ll_async_page *dummy_llap;
- struct seq_file *seq;
- struct ll_sb_info *sbi = dp->data;
- struct ll_pglist_data *pd;
- int rc = -ENOMEM;
-
- LPROCFS_ENTRY_AND_CHECK(dp);
-
- OBD_ALLOC_PTR_WAIT(dummy_llap);
- if (dummy_llap == NULL)
- GOTO(out, rc);
- dummy_llap->llap_page = NULL;
- dummy_llap->llap_cookie = sbi;
- dummy_llap->llap_magic = 0;
- dummy_llap->llap_pglist_cpu = 0;
-
- rc = seq_open(file, &llite_dump_pgcache_seq_sops);
- if (rc) {
- OBD_FREE(dummy_llap, sizeof(*dummy_llap));
- GOTO(out, rc);
- }
- seq = file->private_data;
- seq->private = dummy_llap;
-
- pd = ll_pglist_cpu_lock(sbi, 0);
- list_add(&dummy_llap->llap_pglist_item, &pd->llpd_list);
- ll_pglist_cpu_unlock(sbi, 0);
-
-out:
- if (rc)
- LPROCFS_EXIT();
- return rc;
-}
-
-static int llite_dump_pgcache_seq_release(struct inode *inode,
- struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct ll_async_page *dummy_llap = seq->private;
- struct ll_sb_info *sbi = dummy_llap->llap_cookie;
- int cpu = dummy_llap->llap_pglist_cpu;
-
- ll_pglist_cpu_lock(sbi, cpu);
- if (!list_empty(&dummy_llap->llap_pglist_item))
- list_del_init(&dummy_llap->llap_pglist_item);
- ll_pglist_cpu_unlock(sbi, cpu);
- OBD_FREE(dummy_llap, sizeof(*dummy_llap));
-
- return lprocfs_seq_release(inode, file);
-}
-
-struct file_operations llite_dump_pgcache_fops = {
- .owner = THIS_MODULE,
- .open = llite_dump_pgcache_seq_open,
- .read = seq_read,
- .release = llite_dump_pgcache_seq_release,
-};
-
#define pct(a,b) (b ? a * 100 / b : 0)
static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
LPROC_SEQ_FOPS(ll_rw_extents_stats);
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
- *file, size_t count, int rw)
+void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
+ struct ll_file_data *file, loff_t pos,
+ size_t count, int rw)
{
int i, cur = -1;
struct ll_rw_process_info *process;
for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
if (process[i].rw_pid == pid) {
if (process[i].rw_last_file != file) {
- process[i].rw_range_start = file->f_pos;
- process[i].rw_last_file_pos =
- file->f_pos + count;
+ process[i].rw_range_start = pos;
+ process[i].rw_last_file_pos = pos + count;
process[i].rw_smallest_extent = count;
process[i].rw_largest_extent = count;
process[i].rw_offset = 0;
spin_unlock(&sbi->ll_process_lock);
return;
}
- if (process[i].rw_last_file_pos != file->f_pos) {
+ if (process[i].rw_last_file_pos != pos) {
*off_count =
(*off_count + 1) % LL_OFFSET_HIST_MAX;
offset[*off_count].rw_op = process[i].rw_op;
offset[*off_count].rw_offset =
process[i].rw_offset;
process[i].rw_op = rw;
- process[i].rw_range_start = file->f_pos;
+ process[i].rw_range_start = pos;
process[i].rw_smallest_extent = count;
process[i].rw_largest_extent = count;
- process[i].rw_offset = file->f_pos -
+ process[i].rw_offset = pos -
process[i].rw_last_file_pos;
}
if(process[i].rw_smallest_extent > count)
process[i].rw_smallest_extent = count;
if(process[i].rw_largest_extent < count)
process[i].rw_largest_extent = count;
- process[i].rw_last_file_pos = file->f_pos + count;
+ process[i].rw_last_file_pos = pos + count;
spin_unlock(&sbi->ll_process_lock);
return;
}
*process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
process[*process_count].rw_pid = pid;
process[*process_count].rw_op = rw;
- process[*process_count].rw_range_start = file->f_pos;
- process[*process_count].rw_last_file_pos = file->f_pos + count;
+ process[*process_count].rw_range_start = pos;
+ process[*process_count].rw_last_file_pos = pos + count;
process[*process_count].rw_smallest_extent = count;
process[*process_count].rw_largest_extent = count;
process[*process_count].rw_offset = 0;