Index: linux-2.6.16.46-0.14/include/linux/jbd.h =================================================================== --- linux-2.6.16.46-0.14.orig/include/linux/jbd.h +++ linux-2.6.16.46-0.14/include/linux/jbd.h @@ -422,6 +422,16 @@ struct handle_s }; +/* + * Some stats for checkpoint phase + */ +struct transaction_chp_stats_s { + unsigned long cs_chp_time; + unsigned long cs_forced_to_close; + unsigned long cs_written; + unsigned long cs_dropped; +}; + /* The transaction_t type is the guts of the journaling mechanism. It * tracks a compound transaction through its various states: * @@ -553,6 +563,21 @@ struct transaction_s spinlock_t t_handle_lock; /* + * Longest time some handle had to wait for running transaction + */ + unsigned long t_max_wait; + + /* + * When transaction started + */ + unsigned long t_start; + + /* + * Checkpointing stats [j_checkpoint_sem] + */ + struct transaction_chp_stats_s t_chp_stats; + + /* * Number of outstanding updates running on this transaction * [t_handle_lock] */ @@ -592,6 +617,57 @@ struct transaction_s struct list_head t_jcb; }; +struct transaction_run_stats_s { + unsigned long rs_wait; + unsigned long rs_running; + unsigned long rs_locked; + unsigned long rs_flushing; + unsigned long rs_logging; + + unsigned long rs_handle_count; + unsigned long rs_blocks; + unsigned long rs_blocks_logged; +}; + +struct transaction_stats_s +{ + int ts_type; + unsigned long ts_tid; + union { + struct transaction_run_stats_s run; + struct transaction_chp_stats_s chp; + } u; +}; + +#define JBD_STATS_RUN 1 +#define JBD_STATS_CHECKPOINT 2 + +#define ts_wait u.run.rs_wait +#define ts_running u.run.rs_running +#define ts_locked u.run.rs_locked +#define ts_flushing u.run.rs_flushing +#define ts_logging u.run.rs_logging +#define ts_handle_count u.run.rs_handle_count +#define ts_blocks u.run.rs_blocks +#define ts_blocks_logged u.run.rs_blocks_logged + +#define ts_chp_time u.chp.cs_chp_time +#define ts_forced_to_close u.chp.cs_forced_to_close +#define ts_written u.chp.cs_written +#define ts_dropped u.chp.cs_dropped + +#define CURRENT_MSECS (jiffies_to_msecs(jiffies)) + +static inline unsigned int +jbd_time_diff(unsigned int start, unsigned int end) +{ + if (unlikely(start > end)) + end = end + (~0UL - start); + else + end -= start; + return end; +} + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -845,6 +921,16 @@ struct journal_s pid_t j_last_sync_writer; /* + * + */ + struct transaction_stats_s *j_history; + int j_history_max; + int j_history_cur; + spinlock_t j_history_lock; + struct proc_dir_entry *j_proc_entry; + struct transaction_stats_s j_stats; + + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here */ Index: linux-2.6.16.46-0.14/fs/jbd/transaction.c =================================================================== --- linux-2.6.16.46-0.14.orig/fs/jbd/transaction.c +++ linux-2.6.16.46-0.14/fs/jbd/transaction.c @@ -60,6 +60,8 @@ get_transaction(journal_t *journal, tran J_ASSERT(journal->j_running_transaction == NULL); journal->j_running_transaction = transaction; + transaction->t_max_wait = 0; + transaction->t_start = CURRENT_MSECS; return transaction; } @@ -86,6 +88,7 @@ static int start_this_handle(journal_t * int nblocks = handle->h_buffer_credits; transaction_t *new_transaction = NULL; int ret = 0; + unsigned long ts = CURRENT_MSECS; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -219,6 +222,12 @@ repeat_locked: /* OK, account for the buffers that this operation expects to * use and add the handle to the running transaction. */ + if (time_after(transaction->t_start, ts)) { + ts = jbd_time_diff(ts, transaction->t_start); + if (ts > transaction->t_max_wait) + transaction->t_max_wait= ts; + } + handle->h_transaction = transaction; transaction->t_outstanding_credits += nblocks; transaction->t_updates++; Index: linux-2.6.16.46-0.14/fs/jbd/journal.c =================================================================== --- linux-2.6.16.46-0.14.orig/fs/jbd/journal.c +++ linux-2.6.16.46-0.14/fs/jbd/journal.c @@ -36,6 +36,7 @@ #include #include #include +#include EXPORT_SYMBOL(journal_start); EXPORT_SYMBOL(journal_restart); @@ -637,6 +638,300 @@ struct journal_head *journal_get_descrip return journal_add_journal_head(bh); } +struct jbd_stats_proc_session { + journal_t *journal; + struct transaction_stats_s *stats; + int start; + int max; +}; + +static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s, + struct transaction_stats_s *ts, + int first) +{ + if (ts == s->stats + s->max) + ts = s->stats; + if (!first && ts == s->stats + s->start) + return NULL; + while (ts->ts_type == 0) { + ts++; + if (ts == s->stats + s->max) + ts = s->stats; + if (ts == s->stats + s->start) + return NULL; + } + return ts; + +} + +static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos) +{ + struct jbd_stats_proc_session *s = seq->private; + struct transaction_stats_s *ts; + int l = *pos; + + if (l == 0) + return SEQ_START_TOKEN; + ts = jbd_history_skip_empty(s, s->stats + s->start, 1); + if (!ts) + return NULL; + while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL); + return ts; +} + +static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct jbd_stats_proc_session *s = seq->private; + struct transaction_stats_s *ts = v; + + ++*pos; + if (v == SEQ_START_TOKEN) + return jbd_history_skip_empty(s, s->stats + s->start, 1); + else + return jbd_history_skip_empty(s, ++ts, 0); +} + +static int jbd_seq_history_show(struct seq_file *seq, void *v) +{ + struct transaction_stats_s *ts = v; + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " + "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", + "wait", "run", "lock", "flush", "log", "hndls", + "block", "inlog", "ctime", "write", "drop", + "close"); + return 0; + } + if (ts->ts_type == JBD_STATS_RUN) + seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu " + "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, + ts->ts_wait, ts->ts_running, ts->ts_locked, + ts->ts_flushing, ts->ts_logging, + ts->ts_handle_count, ts->ts_blocks, + ts->ts_blocks_logged); + else if (ts->ts_type == JBD_STATS_CHECKPOINT) + seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n", + "C", ts->ts_tid, " ", ts->ts_chp_time, + ts->ts_written, ts->ts_dropped, + ts->ts_forced_to_close); + else + J_ASSERT(0); + return 0; +} + +static void jbd_seq_history_stop(struct seq_file *seq, void *v) +{ +} + +static struct seq_operations jbd_seq_history_ops = { + .start = jbd_seq_history_start, + .next = jbd_seq_history_next, + .stop = jbd_seq_history_stop, + .show = jbd_seq_history_show, +}; + +static int jbd_seq_history_open(struct inode *inode, struct file *file) +{ + journal_t *journal = PDE(inode)->data; + struct jbd_stats_proc_session *s; + int rc, size; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return -EIO; + size = sizeof(struct transaction_stats_s) * journal->j_history_max; + s->stats = kmalloc(size, GFP_KERNEL); + if (s == NULL) { + kfree(s); + return -EIO; + } + spin_lock(&journal->j_history_lock); + memcpy(s->stats, journal->j_history, size); + s->max = journal->j_history_max; + s->start = journal->j_history_cur % s->max; + spin_unlock(&journal->j_history_lock); + + rc = seq_open(file, &jbd_seq_history_ops); + if (rc == 0) { + struct seq_file *m = (struct seq_file *)file->private_data; + m->private = s; + } else { + kfree(s->stats); + kfree(s); + } + return rc; + +} + +static int jbd_seq_history_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct jbd_stats_proc_session *s = seq->private; + kfree(s->stats); + kfree(s); + return seq_release(inode, file); +} + +static struct file_operations jbd_seq_history_fops = { + .owner = THIS_MODULE, + .open = jbd_seq_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = jbd_seq_history_release, +}; + +static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? NULL : SEQ_START_TOKEN; +} + +static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) +{ + return NULL; +} + +static int jbd_seq_info_show(struct seq_file *seq, void *v) +{ + struct jbd_stats_proc_session *s = seq->private; + if (v != SEQ_START_TOKEN) + return 0; + seq_printf(seq, "%lu transaction, each upto %u blocks\n", + s->stats->ts_tid, + s->journal->j_max_transaction_buffers); + if (s->stats->ts_tid == 0) + return 0; + seq_printf(seq, "average: \n %lums waiting for transaction\n", + s->stats->ts_wait / s->stats->ts_tid); + seq_printf(seq, " %lums running transaction\n", + s->stats->ts_running / s->stats->ts_tid); + seq_printf(seq, " %lums transaction was being locked\n", + s->stats->ts_locked / s->stats->ts_tid); + seq_printf(seq, " %lums flushing data (in ordered mode)\n", + s->stats->ts_flushing / s->stats->ts_tid); + seq_printf(seq, " %lums logging transaction\n", + s->stats->ts_logging / s->stats->ts_tid); + seq_printf(seq, " %lu handles per transaction\n", + s->stats->ts_handle_count / s->stats->ts_tid); + seq_printf(seq, " %lu blocks per transaction\n", + s->stats->ts_blocks / s->stats->ts_tid); + seq_printf(seq, " %lu logged blocks per transaction\n", + s->stats->ts_blocks_logged / s->stats->ts_tid); + return 0; +} + +static void jbd_seq_info_stop(struct seq_file *seq, void *v) +{ +} + +static struct seq_operations jbd_seq_info_ops = { + .start = jbd_seq_info_start, + .next = jbd_seq_info_next, + .stop = jbd_seq_info_stop, + .show = jbd_seq_info_show, +}; + +static int jbd_seq_info_open(struct inode *inode, struct file *file) +{ + journal_t *journal = PDE(inode)->data; + struct jbd_stats_proc_session *s; + int rc, size; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return -EIO; + size = sizeof(struct transaction_stats_s); + s->stats = kmalloc(size, GFP_KERNEL); + if (s == NULL) { + kfree(s); + return -EIO; + } + spin_lock(&journal->j_history_lock); + memcpy(s->stats, &journal->j_stats, size); + s->journal = journal; + spin_unlock(&journal->j_history_lock); + + rc = seq_open(file, &jbd_seq_info_ops); + if (rc == 0) { + struct seq_file *m = (struct seq_file *)file->private_data; + m->private = s; + } else { + kfree(s->stats); + kfree(s); + } + return rc; + +} + +static int jbd_seq_info_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct jbd_stats_proc_session *s = seq->private; + kfree(s->stats); + kfree(s); + return seq_release(inode, file); +} + +static struct file_operations jbd_seq_info_fops = { + .owner = THIS_MODULE, + .open = jbd_seq_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = jbd_seq_info_release, +}; + +static struct proc_dir_entry *proc_jbd_stats = NULL; + +static void jbd_stats_proc_init(journal_t *journal) +{ + char name[64]; + + snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats); + if (journal->j_proc_entry) { + struct proc_dir_entry *p; + p = create_proc_entry("history", S_IRUGO, + journal->j_proc_entry); + if (p) { + p->proc_fops = &jbd_seq_history_fops; + p->data = journal; + p = create_proc_entry("info", S_IRUGO, + journal->j_proc_entry); + if (p) { + p->proc_fops = &jbd_seq_info_fops; + p->data = journal; + } + } + } +} + +static void jbd_stats_proc_exit(journal_t *journal) +{ + char name[64]; + + snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + remove_proc_entry("info", journal->j_proc_entry); + remove_proc_entry("history", journal->j_proc_entry); + remove_proc_entry(name, proc_jbd_stats); +} + +static void journal_init_stats(journal_t *journal) +{ + int size; + + if (proc_jbd_stats == NULL) + return; + + journal->j_history_max = 100; + size = sizeof(struct transaction_stats_s) * journal->j_history_max; + journal->j_history = kmalloc(size, GFP_KERNEL); + if (journal->j_history == NULL) { + journal->j_history_max = 0; + return; + } + memset(journal->j_history, 0, size); + spin_lock_init(&journal->j_history_lock); +} + /* * Management for journal control blocks: functions to create and * destroy journal_t structures, and to initialise and read existing @@ -679,6 +974,9 @@ static journal_t * journal_init_common ( kfree(journal); goto fail; } + + journal_init_stats(journal); + return journal; fail: return NULL; @@ -722,6 +1020,7 @@ journal_t * journal_init_dev(struct bloc journal->j_blk_offset = start; journal->j_maxlen = len; journal->j_blocksize = blocksize; + jbd_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); J_ASSERT(bh != NULL); @@ -771,6 +1070,7 @@ journal_t * journal_init_inode (struct i journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; journal->j_blocksize = inode->i_sb->s_blocksize; + jbd_stats_proc_init(journal); /* journal descriptor can store up to n blocks -bzzz */ n = journal->j_blocksize / sizeof(journal_block_tag_t); @@ -1152,6 +1452,8 @@ void journal_destroy(journal_t *journal) brelse(journal->j_sb_buffer); } + if (journal->j_proc_entry) + jbd_stats_proc_exit(journal); if (journal->j_inode) iput(journal->j_inode); if (journal->j_revoke) @@ -1920,6 +2222,28 @@ static void __exit remove_jbd_proc_entry #endif +#if defined(CONFIG_PROC_FS) + +#define JBD_STATS_PROC_NAME "fs/jbd" + +static void __init create_jbd_stats_proc_entry(void) +{ + proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL); +} + +static void __exit remove_jbd_stats_proc_entry(void) +{ + if (proc_jbd_stats) + remove_proc_entry(JBD_STATS_PROC_NAME, NULL); +} + +#else + +#define create_jbd_stats_proc_entry() do {} while (0) +#define remove_jbd_stats_proc_entry() do {} while (0) + +#endif + kmem_cache_t *jbd_handle_cache; static int __init journal_init_handle_cache(void) @@ -1982,6 +2306,7 @@ static int __init journal_init(void) if (ret != 0) journal_destroy_caches(); create_jbd_proc_entry(); + create_jbd_stats_proc_entry(); return ret; } @@ -1993,6 +2318,7 @@ static void __exit journal_exit(void) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); #endif remove_jbd_proc_entry(); + remove_jbd_stats_proc_entry(); journal_destroy_caches(); } Index: linux-2.6.16.46-0.14/fs/jbd/checkpoint.c =================================================================== --- linux-2.6.16.46-0.14.orig/fs/jbd/checkpoint.c +++ linux-2.6.16.46-0.14/fs/jbd/checkpoint.c @@ -166,6 +166,7 @@ static int __cleanup_transaction(journal transaction_t *t = jh->b_transaction; tid_t tid = t->t_tid; + transaction->t_chp_stats.cs_forced_to_close++; spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); log_start_commit(journal, tid); @@ -226,7 +227,7 @@ __flush_batch(journal_t *journal, struct */ static int __flush_buffer(journal_t *journal, struct journal_head *jh, struct buffer_head **bhs, int *batch_count, - int *drop_count) + int *drop_count, transaction_t *transaction) { struct buffer_head *bh = jh2bh(jh); int ret = 0; @@ -247,6 +248,7 @@ static int __flush_buffer(journal_t *jou set_buffer_jwrite(bh); bhs[*batch_count] = bh; jbd_unlock_bh_state(bh); + transaction->t_chp_stats.cs_written++; (*batch_count)++; if (*batch_count == NR_BATCH) { __flush_batch(journal, bhs, batch_count); @@ -315,6 +317,8 @@ int log_do_checkpoint(journal_t *journal tid_t this_tid; transaction = journal->j_checkpoint_transactions; + if (transaction->t_chp_stats.cs_chp_time == 0) + transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS; this_tid = transaction->t_tid; jh = transaction->t_checkpoint_list; last_jh = jh->b_cpprev; @@ -331,7 +335,8 @@ int log_do_checkpoint(journal_t *journal retry = 1; break; } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); + retry = __flush_buffer(journal, jh, bhs, &batch_count, + &drop_count, transaction); if (cond_resched_lock(&journal->j_list_lock)) { retry = 1; break; @@ -609,6 +614,8 @@ void __journal_insert_checkpoint(struct void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) { + struct transaction_stats_s stats; + assert_spin_locked(&journal->j_list_lock); if (transaction->t_cpnext) { transaction->t_cpnext->t_cpprev = transaction->t_cpprev; @@ -634,5 +641,25 @@ void __journal_drop_transaction(journal_ J_ASSERT(journal->j_running_transaction != transaction); jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); + + /* + * File the transaction for history + */ + if (transaction->t_chp_stats.cs_written != 0 || + transaction->t_chp_stats.cs_chp_time != 0) { + stats.ts_type = JBD_STATS_CHECKPOINT; + stats.ts_tid = transaction->t_tid; + stats.u.chp = transaction->t_chp_stats; + if (stats.ts_chp_time) + stats.ts_chp_time = + jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS); + spin_lock(&journal->j_history_lock); + memcpy(journal->j_history + journal->j_history_cur, &stats, + sizeof(stats)); + if (++journal->j_history_cur == journal->j_history_max) + journal->j_history_cur = 0; + spin_unlock(&journal->j_history_lock); + } + kfree(transaction); } Index: linux-2.6.16.46-0.14/fs/jbd/commit.c =================================================================== --- linux-2.6.16.46-0.14.orig/fs/jbd/commit.c +++ linux-2.6.16.46-0.14/fs/jbd/commit.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -168,6 +169,7 @@ static int journal_write_commit_record(j */ void journal_commit_transaction(journal_t *journal) { + struct transaction_stats_s stats; transaction_t *commit_transaction; struct journal_head *jh, *new_jh, *descriptor; struct buffer_head **wbuf = journal->j_wbuf; @@ -214,6 +216,11 @@ void journal_commit_transaction(journal_ spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + stats.ts_wait = commit_transaction->t_max_wait; + stats.ts_locked = CURRENT_MSECS; + stats.ts_running = jbd_time_diff(commit_transaction->t_start, + stats.ts_locked); + spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -284,6 +291,9 @@ void journal_commit_transaction(journal_ */ journal_switch_revoke_table(journal); + stats.ts_flushing = CURRENT_MSECS; + stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing); + commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; @@ -442,6 +452,11 @@ write_out_data: */ commit_transaction->t_state = T_COMMIT; + stats.ts_logging = CURRENT_MSECS; + stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging); + stats.ts_blocks = commit_transaction->t_outstanding_credits; + stats.ts_blocks_logged = 0; + descriptor = NULL; bufs = 0; while (commit_transaction->t_buffers) { @@ -590,6 +605,7 @@ start_journal_io: submit_bh(WRITE, bh); } cond_resched(); + stats.ts_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -784,6 +800,7 @@ restart_loop: cp_transaction = jh->b_cp_transaction; if (cp_transaction) { JBUFFER_TRACE(jh, "remove from old cp transaction"); + cp_transaction->t_chp_stats.cs_dropped++; __journal_remove_checkpoint(jh); } @@ -858,6 +875,36 @@ restart_loop: J_ASSERT(commit_transaction->t_state == T_COMMIT); + commit_transaction->t_start = CURRENT_MSECS; + stats.ts_logging = jbd_time_diff(stats.ts_logging, + commit_transaction->t_start); + + /* + * File the transaction for history + */ + stats.ts_type = JBD_STATS_RUN; + stats.ts_tid = commit_transaction->t_tid; + stats.ts_handle_count = commit_transaction->t_handle_count; + spin_lock(&journal->j_history_lock); + memcpy(journal->j_history + journal->j_history_cur, &stats, + sizeof(stats)); + if (++journal->j_history_cur == journal->j_history_max) + journal->j_history_cur = 0; + + /* + * Calculate overall stats + */ + journal->j_stats.ts_tid++; + journal->j_stats.ts_wait += stats.ts_wait; + journal->j_stats.ts_running += stats.ts_running; + journal->j_stats.ts_locked += stats.ts_locked; + journal->j_stats.ts_flushing += stats.ts_flushing; + journal->j_stats.ts_logging += stats.ts_logging; + journal->j_stats.ts_handle_count += stats.ts_handle_count; + journal->j_stats.ts_blocks += stats.ts_blocks; + journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged; + spin_unlock(&journal->j_history_lock); + commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid;