+Index: linux-2.6.18-53.1.21/drivers/scsi/Kconfig
+===================================================================
+--- linux-2.6.18-53.1.21.orig/drivers/scsi/Kconfig
++++ linux-2.6.18-53.1.21/drivers/scsi/Kconfig
+@@ -66,6 +66,14 @@ config BLK_DEV_SD
+ polling I/O. If it doesn't, LKCD will fall back to ordinary
+ interrupt-driven I/O.
+
++config SD_IOSTATS
++ bool "Enable SCSI disk I/O stats"
++ depends on BLK_DEV_SD
++ default y
++ ---help---
++ This enables SCSI disk I/O stats collection. You must also enable
++ /proc file system support if you want this feature.
++
+ config CHR_DEV_ST
+ tristate "SCSI tape support"
+ depends on SCSI
+Index: linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c
+===================================================================
+--- linux-2.6.18-53.1.21.orig/drivers/scsi/scsi_proc.c
++++ linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c
+@@ -40,7 +40,8 @@
+ /* 4K page size, but our output routines, use some slack for overruns */
+ #define PROC_BLOCK_SIZE (3*1024)
+
+-static struct proc_dir_entry *proc_scsi;
++struct proc_dir_entry *proc_scsi;
++EXPORT_SYMBOL(proc_scsi);
+
+ /* Protect sht->present and sht->proc_dir */
+ static DEFINE_MUTEX(global_host_template_mutex);
+Index: linux-2.6.18-53.1.21/drivers/scsi/sd.c
+===================================================================
+--- linux-2.6.18-53.1.21.orig/drivers/scsi/sd.c
++++ linux-2.6.18-53.1.21/drivers/scsi/sd.c
+@@ -62,6 +62,63 @@
+
+ #include "scsi_logging.h"
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++# include <linux/proc_fs.h>
++# include <linux/seq_file.h>
++
++typedef struct {
++ unsigned long long iostat_size;
++ unsigned long long iostat_count;
++} iostat_counter_t;
++
++#define IOSTAT_NCOUNTERS 16
++typedef struct {
++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
++ struct timeval iostat_timeval;
++
++ /* queue depth: how well the pipe is filled up */
++ unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
++ unsigned long long iostat_queue_ticks_sum;
++ unsigned long iostat_queue_depth;
++ unsigned long iostat_queue_stamp;
++
++ /* seeks: how linear the traffic is */
++ unsigned long long iostat_next_sector;
++ unsigned long long iostat_seek_sectors;
++ unsigned long long iostat_seeks;
++ unsigned long long iostat_sectors;
++ unsigned long long iostat_reqs;
++ unsigned long iostat_read_reqs;
++ unsigned long iostat_write_reqs;
++
++ /* process time: how long it takes to process requests */
++ unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
++ unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
++
++ /* queue time: how long process spent in elevator's queue */
++ unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
++ unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
++
++ /* must be the last field, as it's used to know size to be memset'ed */
++ spinlock_t iostat_lock;
++} ____cacheline_aligned_in_smp iostat_stats_t;
++
++struct proc_dir_entry *sd_iostats_procdir = NULL;
++char sd_iostats_procdir_name[] = "sd_iostats";
++static struct file_operations sd_iostats_proc_fops;
++
++extern void sd_iostats_init(void);
++extern void sd_iostats_fini(void);
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
++#else
++static inline void sd_iostats_init(void) {}
++static inline void sd_iostats_fini(void) {}
++static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
++static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
++#endif
++
+ /*
+ * More than enough for everybody ;) The huge number of majors
+ * is a leftover from 16bit dev_t days, we don't really need that
+@@ -126,6 +183,9 @@ struct scsi_disk {
+ unsigned WCE : 1; /* state of disk WCE bit */
+ unsigned RCD : 1; /* state of disk RCD bit, unused */
+ unsigned DPOFUA : 1; /* state of disk DPOFUA bit */
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ iostat_stats_t *stats; /* scsi disk statistics */
++#endif
+ };
+ #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)
+
+@@ -557,6 +617,8 @@ static int sd_init_command(struct scsi_c
+ */
+ SCpnt->done = sd_rw_intr;
+
++ sd_iostats_start_req(SCpnt);
++
+ /*
+ * This indicates that the command is ready from our end to be
+ * queued.
+@@ -1040,6 +1102,7 @@ static void sd_rw_intr(struct scsi_cmnd
+ break;
+ }
+ out:
++ sd_iostats_finish_req(SCpnt);
+ scsi_io_completion(SCpnt, good_bytes);
+ }
+
+@@ -1735,6 +1798,36 @@ static int sd_probe(struct device *dev)
+ if (sdp->removable)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL);
++ if (!sdkp->stats) {
++ printk(KERN_WARNING "cannot allocate iostat structure for"
++ "%s\n", gd->disk_name);
++ } else {
++ do_gettimeofday(&sdkp->stats->iostat_timeval);
++ sdkp->stats->iostat_queue_stamp = jiffies;
++ spin_lock_init(&sdkp->stats->iostat_lock);
++ if (sd_iostats_procdir) {
++ struct proc_dir_entry *pde;
++ pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR,
++ sd_iostats_procdir);
++ if (!pde) {
++ printk(KERN_WARNING "Can't create /proc/scsi/"
++ "%s/%s\n",
++ sd_iostats_procdir_name,
++ gd->disk_name);
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ } else {
++ pde->proc_fops = &sd_iostats_proc_fops;
++ pde->data = gd;
++ }
++ } else {
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ }
++ }
++#endif
+ dev_set_drvdata(dev, sdkp);
+ add_disk(gd);
+
+@@ -1778,6 +1871,366 @@ static int sd_remove(struct device *dev)
+ return 0;
+ }
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++static int
++sd_iostats_seq_show(struct seq_file *seq, void *v)
++{
++ struct timeval now;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats;
++ unsigned long long read_len;
++ unsigned long long read_len_tot;
++ unsigned long read_num;
++ unsigned long read_num_tot;
++ unsigned long long write_len;
++ unsigned long long write_len_tot;
++ unsigned long write_num;
++ unsigned long write_num_tot;
++ int i;
++ int maxi;
++
++ stats = scsi_disk(disk)->stats;
++ if (stats == NULL) {
++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
++ BUG();
++ }
++
++ do_gettimeofday(&now);
++ now.tv_sec -= stats->iostat_timeval.tv_sec;
++ now.tv_usec -= stats->iostat_timeval.tv_usec;
++ if (now.tv_usec < 0) {
++ now.tv_usec += 1000000;
++ now.tv_sec--;
++ }
++
++ /* this sampling races with updates */
++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
++ (unsigned long) scsi_disk(disk)->index,
++ now.tv_sec, now.tv_usec);
++
++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
++ if (stats->iostat_read_histogram[i].iostat_count != 0 ||
++ stats->iostat_write_histogram[i].iostat_count != 0)
++ break;
++ maxi = i;
++
++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
++ "reads", "total", "writes", "total");
++
++ read_len_tot = write_len_tot = 0;
++ read_num_tot = write_num_tot = 0;
++ for (i = 0; i <= maxi; i++) {
++ read_len = stats->iostat_read_histogram[i].iostat_size;
++ read_len_tot += read_len;
++ read_num = stats->iostat_read_histogram[i].iostat_count;
++ read_num_tot += read_num;
++
++ write_len = stats->iostat_write_histogram[i].iostat_size;
++ write_len_tot += write_len;
++ write_num = stats->iostat_write_histogram[i].iostat_count;
++ write_num_tot += write_num;
++
++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
++ 512<<i, read_num, read_len, write_num, write_len);
++ }
++
++ seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
++ read_num_tot, read_len_tot,
++ write_num_tot, write_len_tot);
++
++ seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long long ticks, percent;
++ ticks = stats->iostat_queue_ticks[i];
++ if (ticks == 0)
++ continue;
++ percent = stats->iostat_queue_ticks[i] * 100;
++ do_div(percent, stats->iostat_queue_ticks_sum);
++ seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
++ }
++
++ if (stats->iostat_reqs != 0) {
++ unsigned long long aveseek = 0, percent = 0;
++
++ if (stats->iostat_seeks) {
++ aveseek = stats->iostat_seek_sectors;
++ do_div(aveseek, stats->iostat_seeks);
++ percent = stats->iostat_seeks * 100;
++ do_div(percent, stats->iostat_reqs);
++ }
++
++ seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
++ "%llu sectors in ave, %llu%% of all reqs\n",
++ stats->iostat_sectors, stats->iostat_reqs,
++ stats->iostat_seeks, aveseek, percent);
++ }
++
++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
++ "%%", "writes", "%%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long read_percent = 0, write_percent = 0;
++ if (stats->iostat_wtime[i] == 0 &&
++ stats->iostat_rtime[i] == 0)
++ continue;
++ if (stats->iostat_read_reqs)
++ read_percent = stats->iostat_rtime[i] * 100 /
++ stats->iostat_read_reqs;
++ if (stats->iostat_write_reqs)
++ write_percent = stats->iostat_wtime[i] * 100 /
++ stats->iostat_write_reqs;
++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++ jiffies_to_msecs(((1UL << i) >> 1) << 1),
++ stats->iostat_rtime[i], read_percent,
++ stats->iostat_wtime[i], write_percent);
++ }
++
++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
++ "%%", "writes", "%%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long read_percent = 0, write_percent = 0;
++ if (stats->iostat_wtime_in_queue[i] == 0 &&
++ stats->iostat_rtime_in_queue[i] == 0)
++ continue;
++ if (stats->iostat_read_reqs)
++ read_percent = stats->iostat_rtime_in_queue[i] * 100 /
++ stats->iostat_read_reqs;
++ if (stats->iostat_write_reqs)
++ write_percent = stats->iostat_wtime_in_queue[i] * 100 /
++ stats->iostat_write_reqs;
++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++ jiffies_to_msecs(((1UL << i) >> 1) << 1),
++ stats->iostat_rtime_in_queue[i],
++ read_percent,
++ stats->iostat_wtime_in_queue[i],
++ write_percent);
++ }
++
++ return 0;
++}
++
++static void *
++sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
++{
++ return (*pos == 0) ? (void *)1 : NULL;
++}
++
++static void *
++sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
++{
++ ++*pos;
++ return NULL;
++}
++
++static void
++sd_iostats_seq_stop(struct seq_file *p, void *v)
++{
++}
++
++static struct seq_operations sd_iostats_seqops = {
++ .start = sd_iostats_seq_start,
++ .stop = sd_iostats_seq_stop,
++ .next = sd_iostats_seq_next,
++ .show = sd_iostats_seq_show,
++};
++
++static int
++sd_iostats_seq_open (struct inode *inode, struct file *file)
++{
++ int rc;
++
++ rc = seq_open(file, &sd_iostats_seqops);
++ if (rc != 0)
++ return rc;
++
++ ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
++ return 0;
++}
++
++static ssize_t
++sd_iostats_seq_write(struct file *file, const char *buffer,
++ size_t len, loff_t *off)
++{
++ struct seq_file *seq = file->private_data;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats = scsi_disk(disk)->stats;
++ unsigned long flags;
++ unsigned long qdepth;
++
++
++ spin_lock_irqsave (&stats->iostat_lock, flags);
++ qdepth = stats->iostat_queue_depth;
++ memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
++ do_gettimeofday(&stats->iostat_timeval);
++ stats->iostat_queue_stamp = jiffies;
++ stats->iostat_queue_depth = qdepth;
++ spin_unlock_irqrestore (&stats->iostat_lock, flags);
++
++ return len;
++}
++
++static struct file_operations sd_iostats_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = sd_iostats_seq_open,
++ .read = seq_read,
++ .write = sd_iostats_seq_write,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++extern struct proc_dir_entry *proc_scsi;
++
++void
++sd_iostats_init(void)
++{
++ if (proc_scsi == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "proc_scsi is NULL\n");
++ return;
++ }
++
++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
++ S_IFDIR | S_IRUGO | S_IXUGO,
++ proc_scsi);
++ if (sd_iostats_procdir == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
++ return;
++ }
++}
++
++void sd_iostats_fini(void)
++{
++ if (proc_scsi != NULL && sd_iostats_procdir != NULL)
++ remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
++
++ sd_iostats_procdir = NULL;
++}
++
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
++{
++ struct request *rq = SCpnt->request;
++ iostat_stats_t *stats;
++ unsigned long *tcounter;
++ int tbucket;
++ int tmp;
++ unsigned long irqflags;
++ unsigned long i;
++
++ stats = scsi_disk(rq->rq_disk)->stats;
++ if (stats == NULL)
++ return;
++
++ tmp = jiffies - rq->start_time;
++ for (tbucket = 0; tmp > 1; tbucket++)
++ tmp >>= 1;
++ if (tbucket >= IOSTAT_NCOUNTERS)
++ tbucket = IOSTAT_NCOUNTERS - 1;
++ //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
++
++ tcounter = rq_data_dir(rq) == WRITE ?
++ &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
++
++ spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++ /* update delay stats */
++ (*tcounter)++;
++
++ /* update queue depth stats */
++ i = stats->iostat_queue_depth;
++ if (i >= IOSTAT_NCOUNTERS)
++ i = IOSTAT_NCOUNTERS - 1;
++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++ BUG_ON(stats->iostat_queue_depth == 0);
++ stats->iostat_queue_depth--;
++
++ /* update seek stats. XXX: not sure about nr_sectors */
++ stats->iostat_sectors += rq->nr_sectors;
++ stats->iostat_reqs++;
++ if (rq->sector != stats->iostat_next_sector) {
++ stats->iostat_seek_sectors +=
++ rq->sector > stats->iostat_next_sector ?
++ rq->sector - stats->iostat_next_sector :
++ stats->iostat_next_sector - rq->sector;
++ stats->iostat_seeks++;
++ }
++ stats->iostat_next_sector = rq->sector + rq->nr_sectors;
++
++ stats->iostat_queue_stamp = jiffies;
++
++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
++}
++
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
++{
++ struct request *rq = SCpnt->request;
++ iostat_stats_t *stats;
++ iostat_counter_t *counter;
++ int bucket;
++ int tbucket;
++ int tmp;
++ unsigned long irqflags;
++ unsigned long i;
++ int nsect;
++
++ stats = scsi_disk(rq->rq_disk)->stats;
++ if (stats == NULL)
++ return;
++
++ nsect = SCpnt->request_bufflen >> 9;
++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
++ tmp >>= 1;
++
++ if (bucket >= IOSTAT_NCOUNTERS) {
++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
++ BUG();
++ }
++
++ counter = rq_data_dir(rq) == WRITE ?
++ &stats->iostat_write_histogram[bucket] :
++ &stats->iostat_read_histogram[bucket];
++
++ tmp = jiffies - rq->start_time;
++ for (tbucket = 0; tmp > 1; tbucket++)
++ tmp >>= 1;
++ if (tbucket >= IOSTAT_NCOUNTERS)
++ tbucket = IOSTAT_NCOUNTERS - 1;
++ //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
++
++ /* an ugly hack to know exact processing time. the right
++ * solution is to add one more field to struct request
++ * hopefully it will break nothing ... */
++ rq->start_time = jiffies;
++
++ spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++ /* update queue depth stats */
++ i = stats->iostat_queue_depth;
++ if (i >= IOSTAT_NCOUNTERS)
++ i = IOSTAT_NCOUNTERS - 1;
++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_depth++;
++
++ /* update delay stats */
++ if (rq_data_dir(rq) == WRITE) {
++ stats->iostat_wtime_in_queue[tbucket]++;
++ stats->iostat_write_reqs++;
++ } else {
++ stats->iostat_rtime_in_queue[tbucket]++;
++ stats->iostat_read_reqs++;
++ }
++
++ /* update size stats */
++ counter->iostat_size += nsect;
++ counter->iostat_count++;
++
++ stats->iostat_queue_stamp = jiffies;
++
++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
++}
++#endif
++
+ /**
+ * scsi_disk_release - Called to free the scsi_disk structure
+ * @cdev: pointer to embedded class device
+@@ -1796,10 +2249,16 @@ static void scsi_disk_release(struct cla
+ idr_remove(&sd_index_idr, sdkp->index);
+ spin_unlock(&sd_index_lock);
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ if (sdkp->stats) {
++ remove_proc_entry(disk->disk_name, sd_iostats_procdir);
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ }
++#endif
+ disk->private_data = NULL;
+ put_disk(disk);
+ put_device(&sdkp->device->sdev_gendev);
+-
+ kfree(sdkp);
+ }
+
+@@ -1907,6 +2366,7 @@ done:
+ static int __init init_sd(void)
+ {
+ int majors = 0, i;
++ int rc = 0;
+
+ SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
+
+@@ -1917,9 +2377,13 @@ static int __init init_sd(void)
+ if (!majors)
+ return -ENODEV;
+
++ sd_iostats_init();
+ class_register(&sd_disk_class);
+
+- return scsi_register_driver(&sd_template.gendrv);
++ rc = scsi_register_driver(&sd_template.gendrv);
++ if (rc)
++ sd_iostats_fini();
++ return rc;
+ }
+
+ /**
+@@ -1938,6 +2402,7 @@ static void __exit exit_sd(void)
+ unregister_blkdev(sd_major(i), "sd");
+
+ class_unregister(&sd_disk_class);
++ sd_iostats_fini();
+ }
+
+ module_init(init_sd);