Index: linux-2.6.27.21-0.1/drivers/scsi/Kconfig =================================================================== --- linux-2.6.27.21-0.1.orig/drivers/scsi/Kconfig 2009-04-23 02:12:56.000000000 -0600 +++ linux-2.6.27.21-0.1/drivers/scsi/Kconfig 2009-05-22 08:38:28.000000000 -0600 @@ -82,6 +82,14 @@ In this case, do not compile the driver for your SCSI host adapter (below) as a module either. +config SD_IOSTATS + bool "Enable SCSI disk I/O stats" + depends on BLK_DEV_SD + default y + ---help--- + This enables SCSI disk I/O stats collection. You must also enable + /proc file system support if you want this feature. + config CHR_DEV_ST tristate "SCSI tape support" depends on SCSI Index: linux-2.6.27.21-0.1/drivers/scsi/scsi_proc.c =================================================================== --- linux-2.6.27.21-0.1.orig/drivers/scsi/scsi_proc.c 2009-04-23 02:12:56.000000000 -0600 +++ linux-2.6.27.21-0.1/drivers/scsi/scsi_proc.c 2009-05-22 08:38:28.000000000 -0600 @@ -40,7 +40,8 @@ /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) -static struct proc_dir_entry *proc_scsi; +struct proc_dir_entry *proc_scsi; +EXPORT_SYMBOL(proc_scsi); /* Protect sht->present and sht->proc_dir */ static DEFINE_MUTEX(global_host_template_mutex); Index: linux-2.6.27.21-0.1/drivers/scsi/sd.c =================================================================== --- linux-2.6.27.21-0.1.orig/drivers/scsi/sd.c 2009-04-23 02:12:56.000000000 -0600 +++ linux-2.6.27.21-0.1/drivers/scsi/sd.c 2009-05-22 08:38:28.000000000 -0600 @@ -108,6 +108,24 @@ * object after last put) */ static DEFINE_MUTEX(sd_ref_mutex); +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +# include +# include +struct proc_dir_entry *sd_iostats_procdir = NULL; +char sd_iostats_procdir_name[] = "sd_iostats"; +static struct file_operations sd_iostats_proc_fops; + +extern void sd_iostats_init(void); +extern void sd_iostats_fini(void); +void sd_iostats_start_req(struct scsi_cmnd *SCpnt); +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt); +#else +static inline void sd_iostats_init(void) {} +static inline void sd_iostats_fini(void) {} +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {} +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {} +#endif + static const char *sd_cache_types[] = { "write through", "none", "write back", "write back, no read (daft)" @@ -571,6 +589,8 @@ if (sdkp->protection_type || scsi_prot_sg_count(SCpnt)) sd_dif_op(SCpnt, sdkp->protection_type, scsi_prot_sg_count(SCpnt)); + sd_iostats_start_req(SCpnt); + /* * We shouldn't disconnect in the middle of a sector, so with a dumb * host adapter, it's safe to assume that we can at least transfer @@ -1091,6 +1111,7 @@ break; } out: + sd_iostats_finish_req(SCpnt); if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt)) sd_dif_complete(SCpnt, good_bytes); @@ -1873,6 +1894,36 @@ if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) + sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL); + if (!sdkp->stats) { + printk(KERN_WARNING "cannot allocate iostat structure for" + "%s\n", gd->disk_name); + } else { + do_gettimeofday(&sdkp->stats->iostat_timeval); + sdkp->stats->iostat_queue_stamp = jiffies; + spin_lock_init(&sdkp->stats->iostat_lock); + if (sd_iostats_procdir) { + struct proc_dir_entry *pde; + pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR, + sd_iostats_procdir); + if (!pde) { + printk(KERN_WARNING "Can't create /proc/scsi/" + "%s/%s\n", + sd_iostats_procdir_name, + gd->disk_name); + kfree(sdkp->stats); + sdkp->stats = NULL; + } else { + pde->proc_fops = &sd_iostats_proc_fops; + pde->data = gd; + } + } else { + kfree(sdkp->stats); + sdkp->stats = NULL; + } + } +#endif dev_set_drvdata(dev, sdkp); add_disk(gd); sd_dif_config_host(sdkp); @@ -1923,6 +1974,366 @@ return 0; } +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +static int +sd_iostats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct gendisk *disk = seq->private; + iostat_stats_t *stats; + unsigned long long read_len; + unsigned long long read_len_tot; + unsigned long read_num; + unsigned long read_num_tot; + unsigned long long write_len; + unsigned long long write_len_tot; + unsigned long write_num; + unsigned long write_num_tot; + int i; + int maxi; + + stats = scsi_disk(disk)->stats; + if (stats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); + BUG(); + } + + do_gettimeofday(&now); + now.tv_sec -= stats->iostat_timeval.tv_sec; + now.tv_usec -= stats->iostat_timeval.tv_usec; + if (now.tv_usec < 0) { + now.tv_usec += 1000000; + now.tv_sec--; + } + + /* this sampling races with updates */ + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", + (unsigned long) scsi_disk(disk)->index, + now.tv_sec, now.tv_usec); + + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) + if (stats->iostat_read_histogram[i].iostat_count != 0 || + stats->iostat_write_histogram[i].iostat_count != 0) + break; + maxi = i; + + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", + "reads", "total", "writes", "total"); + + read_len_tot = write_len_tot = 0; + read_num_tot = write_num_tot = 0; + for (i = 0; i <= maxi; i++) { + read_len = stats->iostat_read_histogram[i].iostat_size; + read_len_tot += read_len; + read_num = stats->iostat_read_histogram[i].iostat_count; + read_num_tot += read_num; + + write_len = stats->iostat_write_histogram[i].iostat_size; + write_len_tot += write_len; + write_num = stats->iostat_write_histogram[i].iostat_count; + write_num_tot += write_num; + + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", + 512<iostat_queue_ticks[i]; + if (ticks == 0) + continue; + percent = stats->iostat_queue_ticks[i] * 100; + do_div(percent, stats->iostat_queue_ticks_sum); + seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent); + } + + if (stats->iostat_reqs != 0) { + unsigned long long aveseek = 0, percent = 0; + + if (stats->iostat_seeks) { + aveseek = stats->iostat_seek_sectors; + do_div(aveseek, stats->iostat_seeks); + percent = stats->iostat_seeks * 100; + do_div(percent, stats->iostat_reqs); + } + + seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over " + "%llu sectors in ave, %llu%% of all reqs\n", + stats->iostat_sectors, stats->iostat_reqs, + stats->iostat_seeks, aveseek, percent); + } + + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads", + "%%", "writes", "%%"); + for (i = 0; i < IOSTAT_NCOUNTERS; i++) { + unsigned long read_percent = 0, write_percent = 0; + if (stats->iostat_wtime[i] == 0 && + stats->iostat_rtime[i] == 0) + continue; + if (stats->iostat_read_reqs) + read_percent = stats->iostat_rtime[i] * 100 / + stats->iostat_read_reqs; + if (stats->iostat_write_reqs) + write_percent = stats->iostat_wtime[i] * 100 / + stats->iostat_write_reqs; + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n", + jiffies_to_msecs(((1UL << i) >> 1) << 1), + stats->iostat_rtime[i], read_percent, + stats->iostat_wtime[i], write_percent); + } + + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads", + "%%", "writes", "%%"); + for (i = 0; i < IOSTAT_NCOUNTERS; i++) { + unsigned long read_percent = 0, write_percent = 0; + if (stats->iostat_wtime_in_queue[i] == 0 && + stats->iostat_rtime_in_queue[i] == 0) + continue; + if (stats->iostat_read_reqs) + read_percent = stats->iostat_rtime_in_queue[i] * 100 / + stats->iostat_read_reqs; + if (stats->iostat_write_reqs) + write_percent = stats->iostat_wtime_in_queue[i] * 100 / + stats->iostat_write_reqs; + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n", + jiffies_to_msecs(((1UL << i) >> 1) << 1), + stats->iostat_rtime_in_queue[i], + read_percent, + stats->iostat_wtime_in_queue[i], + write_percent); + } + + return 0; +} + +static void * +sd_iostats_seq_start(struct seq_file *p, loff_t *pos) +{ + return (*pos == 0) ? (void *)1 : NULL; +} + +static void * +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void +sd_iostats_seq_stop(struct seq_file *p, void *v) +{ +} + +static struct seq_operations sd_iostats_seqops = { + .start = sd_iostats_seq_start, + .stop = sd_iostats_seq_stop, + .next = sd_iostats_seq_next, + .show = sd_iostats_seq_show, +}; + +static int +sd_iostats_seq_open (struct inode *inode, struct file *file) +{ + int rc; + + rc = seq_open(file, &sd_iostats_seqops); + if (rc != 0) + return rc; + + ((struct seq_file *)file->private_data)->private = PDE(inode)->data; + return 0; +} + +static ssize_t +sd_iostats_seq_write(struct file *file, const char *buffer, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct gendisk *disk = seq->private; + iostat_stats_t *stats = scsi_disk(disk)->stats; + unsigned long flags; + unsigned long qdepth; + + + spin_lock_irqsave (&stats->iostat_lock, flags); + qdepth = stats->iostat_queue_depth; + memset (stats, 0, offsetof(iostat_stats_t, iostat_lock)); + do_gettimeofday(&stats->iostat_timeval); + stats->iostat_queue_stamp = jiffies; + stats->iostat_queue_depth = qdepth; + spin_unlock_irqrestore (&stats->iostat_lock, flags); + + return len; +} + +static struct file_operations sd_iostats_proc_fops = { + .owner = THIS_MODULE, + .open = sd_iostats_seq_open, + .read = seq_read, + .write = sd_iostats_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct proc_dir_entry *proc_scsi; + +void +sd_iostats_init(void) +{ + if (proc_scsi == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "proc_scsi is NULL\n"); + return; + } + + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, + S_IFDIR | S_IRUGO | S_IXUGO, + proc_scsi); + if (sd_iostats_procdir == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); + return; + } +} + +void sd_iostats_fini(void) +{ + if (proc_scsi != NULL && sd_iostats_procdir != NULL) + remove_proc_entry(sd_iostats_procdir_name, proc_scsi); + + sd_iostats_procdir = NULL; +} + +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) +{ + struct request *rq = SCpnt->request; + iostat_stats_t *stats; + unsigned long *tcounter; + int tbucket; + int tmp; + unsigned long irqflags; + unsigned long i; + + stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + + tmp = jiffies - rq->start_time; + for (tbucket = 0; tmp > 1; tbucket++) + tmp >>= 1; + if (tbucket >= IOSTAT_NCOUNTERS) + tbucket = IOSTAT_NCOUNTERS - 1; + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket); + + tcounter = rq_data_dir(rq) == WRITE ? + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket]; + + spin_lock_irqsave(&stats->iostat_lock, irqflags); + + /* update delay stats */ + (*tcounter)++; + + /* update queue depth stats */ + i = stats->iostat_queue_depth; + if (i >= IOSTAT_NCOUNTERS) + i = IOSTAT_NCOUNTERS - 1; + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; + BUG_ON(stats->iostat_queue_depth == 0); + stats->iostat_queue_depth--; + + /* update seek stats. XXX: not sure about nr_sectors */ + stats->iostat_sectors += rq->nr_sectors; + stats->iostat_reqs++; + if (rq->sector != stats->iostat_next_sector) { + stats->iostat_seek_sectors += + rq->sector > stats->iostat_next_sector ? + rq->sector - stats->iostat_next_sector : + stats->iostat_next_sector - rq->sector; + stats->iostat_seeks++; + } + stats->iostat_next_sector = rq->sector + rq->nr_sectors; + + stats->iostat_queue_stamp = jiffies; + + spin_unlock_irqrestore(&stats->iostat_lock, irqflags); +} + +void sd_iostats_start_req(struct scsi_cmnd *SCpnt) +{ + struct request *rq = SCpnt->request; + iostat_stats_t *stats; + iostat_counter_t *counter; + int bucket; + int tbucket; + int tmp; + unsigned long irqflags; + unsigned long i; + int nsect; + + stats = scsi_disk(rq->rq_disk)->stats; + if (stats == NULL) + return; + + nsect = scsi_bufflen(SCpnt) >> 9; + for (bucket = 0, tmp = nsect; tmp > 1; bucket++) + tmp >>= 1; + + if (bucket >= IOSTAT_NCOUNTERS) { + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); + BUG(); + } + + counter = rq_data_dir(rq) == WRITE ? + &stats->iostat_write_histogram[bucket] : + &stats->iostat_read_histogram[bucket]; + + tmp = jiffies - rq->start_time; + for (tbucket = 0; tmp > 1; tbucket++) + tmp >>= 1; + if (tbucket >= IOSTAT_NCOUNTERS) + tbucket = IOSTAT_NCOUNTERS - 1; + //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket); + + /* an ugly hack to know exact processing time. the right + * solution is to add one more field to struct request + * hopefully it will break nothing ... */ + rq->start_time = jiffies; + + spin_lock_irqsave(&stats->iostat_lock, irqflags); + + /* update queue depth stats */ + i = stats->iostat_queue_depth; + if (i >= IOSTAT_NCOUNTERS) + i = IOSTAT_NCOUNTERS - 1; + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; + stats->iostat_queue_depth++; + + /* update delay stats */ + if (rq_data_dir(rq) == WRITE) { + stats->iostat_wtime_in_queue[tbucket]++; + stats->iostat_write_reqs++; + } else { + stats->iostat_rtime_in_queue[tbucket]++; + stats->iostat_read_reqs++; + } + + /* update size stats */ + counter->iostat_size += nsect; + counter->iostat_count++; + + stats->iostat_queue_stamp = jiffies; + + spin_unlock_irqrestore(&stats->iostat_lock, irqflags); +} +#endif + /** * scsi_disk_release - Called to free the scsi_disk structure * @dev: pointer to embedded class device @@ -1941,10 +2352,16 @@ ida_remove(&sd_index_ida, sdkp->index); spin_unlock(&sd_index_lock); +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) + if (sdkp->stats) { + remove_proc_entry(disk->disk_name, sd_iostats_procdir); + kfree(sdkp->stats); + sdkp->stats = NULL; + } +#endif disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); - kfree(sdkp); } @@ -2061,6 +2478,8 @@ if (!majors) return -ENODEV; + sd_iostats_init(); + err = class_register(&sd_disk_class); if (err) goto err_out; @@ -2076,6 +2495,7 @@ err_out: for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); + sd_iostats_fini(); return err; } Index: linux-2.6.27.21-0.1/drivers/scsi/sd.h =================================================================== --- linux-2.6.27.21-0.1.orig/drivers/scsi/sd.h 2009-04-23 02:12:56.000000000 -0600 +++ linux-2.6.27.21-0.1/drivers/scsi/sd.h 2009-05-22 08:38:28.000000000 -0600 @@ -37,6 +37,46 @@ */ #define SD_LAST_BUGGY_SECTORS 8 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +typedef struct { + unsigned long long iostat_size; + unsigned long long iostat_count; +} iostat_counter_t; + +#define IOSTAT_NCOUNTERS 16 +typedef struct { + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; + struct timeval iostat_timeval; + + /* queue depth: how well the pipe is filled up */ + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS]; + unsigned long long iostat_queue_ticks_sum; + unsigned long iostat_queue_depth; + unsigned long iostat_queue_stamp; + + /* seeks: how linear the traffic is */ + unsigned long long iostat_next_sector; + unsigned long long iostat_seek_sectors; + unsigned long long iostat_seeks; + unsigned long long iostat_sectors; + unsigned long long iostat_reqs; + unsigned long iostat_read_reqs; + unsigned long iostat_write_reqs; + + /* process time: how long it takes to process requests */ + unsigned long iostat_rtime[IOSTAT_NCOUNTERS]; + unsigned long iostat_wtime[IOSTAT_NCOUNTERS]; + + /* queue time: how long process spent in elevator's queue */ + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS]; + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS]; + + /* must be the last field, as it's used to know size to be memset'ed */ + spinlock_t iostat_lock; +} ____cacheline_aligned_in_smp iostat_stats_t; +#endif + struct scsi_disk { struct scsi_driver *driver; /* always &sd_template */ struct scsi_device *device; @@ -53,6 +93,9 @@ unsigned WCE : 1; /* state of disk WCE bit */ unsigned RCD : 1; /* state of disk RCD bit, unused */ unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) + iostat_stats_t *stats; /* scsi disk statistics */ +#endif }; #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,dev)