--- /dev/null
+Index: linux-2.6.22.5/block/blk-core.c
+===================================================================
+--- linux-2.6.22.5.orig/block/blk-core.c
++++ linux-2.6.22.5/block/blk-core.c
+@@ -3101,6 +3101,8 @@ static inline int should_fail_request(st
+
+ #endif /* CONFIG_FAIL_MAKE_REQUEST */
+
++int dev_check_rdonly(struct block_device *bdev);
++
+ /*
+ * Check whether this bio extends beyond the end of the device.
+ */
+@@ -3185,6 +3187,12 @@ end_io:
+
+ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ goto end_io;
++ /* this is cfs's dev_rdonly check */
++ if (bio->bi_rw == WRITE &&
++ dev_check_rdonly(bio->bi_bdev)) {
++ bio_endio(bio, 0);
++ break;
++ }
+
+ if (should_fail_request(bio))
+ goto end_io;
+@@ -3850,6 +3858,91 @@ void swap_io_context(struct io_context *
+ }
+ EXPORT_SYMBOL(kblockd_flush_work);
+
++ /*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently). This is for filesystem crash/recovery testing.
++ */
++struct deventry {
++ dev_t dev;
++ struct deventry *next;
++};
++
++static struct deventry *devlist = NULL;
++static spinlock_t devlock = SPIN_LOCK_UNLOCKED;
++
++int dev_check_rdonly(struct block_device *bdev)
++{
++ struct deventry *cur;
++ if (!bdev) return 0;
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ spin_unlock(&devlock);
++ return 1;
++ }
++ cur = cur->next;
++ }
++ spin_unlock(&devlock);
++ return 0;
++}
++
++void dev_set_rdonly(struct block_device *bdev)
++{
++ struct deventry *newdev, *cur;
++
++ if (!bdev)
++ return;
++ newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL);
++ if (!newdev)
++ return;
++
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ spin_unlock(&devlock);
++ kfree(newdev);
++ return;
++ }
++ cur = cur->next;
++ }
++ newdev->dev = bdev->bd_dev;
++ newdev->next = devlist;
++ devlist = newdev;
++ spin_unlock(&devlock);
++ printk(KERN_WARNING "Turning device %s (%#x) read-only\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev);
++}
++
++void dev_clear_rdonly(struct block_device *bdev)
++{
++ struct deventry *cur, *last = NULL;
++ if (!bdev) return;
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ if (last)
++ last->next = cur->next;
++ else
++ devlist = cur->next;
++ spin_unlock(&devlock);
++ kfree(cur);
++ printk(KERN_WARNING "Removing read-only on %s (%#x)\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name :
++ "unknown block", bdev->bd_dev);
++ return;
++ }
++ last = cur;
++ cur = cur->next;
++ }
++ spin_unlock(&devlock);
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
+ int __init blk_dev_init(void)
+ {
+ int i;
+Index: linux-2.6.22.5/fs/block_dev.c
+===================================================================
+--- linux-2.6.22.5.orig/fs/block_dev.c
++++ linux-2.6.22.5/fs/block_dev.c
+@@ -1294,6 +1294,7 @@ static int __blkdev_put(struct block_dev
+ if (bdev != bdev->bd_contains)
+ victim = bdev->bd_contains;
+ bdev->bd_contains = NULL;
++ dev_clear_rdonly(bdev);
+ }
+ unlock_kernel();
+ mutex_unlock(&bdev->bd_mutex);
+Index: linux-2.6.22.5/include/linux/fs.h
+===================================================================
+--- linux-2.6.22.5.orig/include/linux/fs.h
++++ linux-2.6.22.5/include/linux/fs.h
+@@ -1744,6 +1744,10 @@ struct bio;
+ extern void submit_bio(int, struct bio *);
+ extern int bdev_read_only(struct block_device *);
+ #endif
++#define HAVE_CLEAR_RDONLY_ON_PUT
++extern void dev_set_rdonly(struct block_device *bdev);
++extern int dev_check_rdonly(struct block_device *bdev);
++extern void dev_clear_rdonly(struct block_device *bdev);
+ extern int set_blocksize(struct block_device *, int);
+ extern int sb_set_blocksize(struct super_block *, int);
+ extern int sb_min_blocksize(struct super_block *, int);
--- /dev/null
+From: Jan Kara <jack@suse.cz>
+
+Implement conversion functions for new version (version 1) of quota format
+which supports 64-bit block and inode limits and 64-bit inode usage. The
+original implementation has been written by Andrew Perepechko.
+
+Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ fs/quota_v2.c | 140 ++++++++++++++++++++++++++++++++++++----------
+ fs/quotaio_v2.h | 26 ++++++--
+ 2 files changed, 132 insertions(+), 34 deletions(-)
+
+diff -puN fs/quota_v2.c~quota-support-64-bit-quota-format fs/quota_v2.c
+--- a/fs/quota_v2.c~quota-support-64-bit-quota-format
++++ a/fs/quota_v2.c
+@@ -23,14 +23,24 @@ MODULE_LICENSE("GPL");
+
+ #define __QUOTA_V2_PARANOIA
+
+-static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
+-static void v2_disk2memdqb(struct dquot *dquot, void *dp);
+-static int v2_is_id(void *dp, struct dquot *dquot);
+-
+-static struct qtree_fmt_operations v2_qtree_ops = {
+- .mem2disk_dqblk = v2_mem2diskdqb,
+- .disk2mem_dqblk = v2_disk2memdqb,
+- .is_id = v2_is_id,
++static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot);
++static void v2r0_disk2memdqb(struct dquot *dquot, void *dp);
++static int v2r0_is_id(void *dp, struct dquot *dquot);
++
++static struct qtree_fmt_operations v2r0_qtree_ops = {
++ .mem2disk_dqblk = v2r0_mem2diskdqb,
++ .disk2mem_dqblk = v2r0_disk2memdqb,
++ .is_id = v2r0_is_id,
++};
++
++static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
++static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
++static int v2r1_is_id(void *dp, struct dquot *dquot);
++
++static struct qtree_fmt_operations v2r1_qtree_ops = {
++ .mem2disk_dqblk = v2r1_mem2diskdqb,
++ .disk2mem_dqblk = v2r1_disk2memdqb,
++ .is_id = v2r1_is_id,
+ };
+
+ #define QUOTABLOCK_BITS 10
+@@ -46,8 +56,7 @@ static inline qsize_t v2_qbtos(qsize_t b
+ return blocks << QUOTABLOCK_BITS;
+ }
+
+-/* Check whether given file is really vfsv0 quotafile */
+-static int v2_check_quota_file(struct super_block *sb, int type)
++static int v2_check_quota_file_header(struct super_block *sb, int type)
+ {
+ struct v2_disk_dqheader dqhead;
+ ssize_t size;
+@@ -58,12 +67,20 @@ static int v2_check_quota_file(struct su
+ if (size != sizeof(struct v2_disk_dqheader)) {
+ printk("quota_v2: failed read expected=%zd got=%zd\n",
+ sizeof(struct v2_disk_dqheader), size);
+- return 0;
++ return -EIO;
+ }
+- if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
+- le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+- return 0;
+- return 1;
++ if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
++ return -ENOENT;
++ if (le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
++ return -EOPNOTSUPP;
++ return le32_to_cpu(dqhead.dqh_version);
++}
++
++
++/* Check whether given file is really vfsv0 quotafile */
++static int v2_check_quota_file(struct super_block *sb, int type)
++{
++ return v2_check_quota_file_header(sb, type) >= 0;
+ }
+
+ /* Read information header from quota file */
+@@ -73,7 +90,13 @@ static int v2_read_file_info(struct supe
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct qtree_mem_dqinfo *qinfo;
+ ssize_t size;
++ int version = v2_check_quota_file_header(sb, type);
+
++ if (version < 0) {
++ printk(KERN_WARNING "Cannot identify quota file version on "
++ "device %s: %d\n", sb->s_id, version);
++ return -1;
++ }
+ size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+ sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+ if (size != sizeof(struct v2_disk_dqinfo)) {
+@@ -88,9 +111,14 @@ static int v2_read_file_info(struct supe
+ return -1;
+ }
+ qinfo = info->dqi_priv;
+- /* limits are stored as unsigned 32-bit data */
+- info->dqi_maxblimit = 0xffffffff;
+- info->dqi_maxilimit = 0xffffffff;
++ if (version == 0) {
++ /* limits are stored as unsigned 32-bit data */
++ info->dqi_maxblimit = 0xffffffff;
++ info->dqi_maxilimit = 0xffffffff;
++ } else {
++ info->dqi_maxblimit = 0x7fffffffffffffffULL;
++ info->dqi_maxilimit = 0x7fffffffffffffffULL;
++ }
+ info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+ info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
+ info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
+@@ -102,8 +130,13 @@ static int v2_read_file_info(struct supe
+ qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+ qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+ qinfo->dqi_qtree_depth = qtree_depth(qinfo);
+- qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
+- qinfo->dqi_ops = &v2_qtree_ops;
++ if (version == 0) {
++ qinfo->dqi_entry_size = sizeof(struct v2r0_disk_dqblk);
++ qinfo->dqi_ops = &v2r0_qtree_ops;
++ } else {
++ qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
++ qinfo->dqi_ops = &v2r1_qtree_ops;
++ }
+ return 0;
+ }
+
+@@ -134,9 +167,9 @@ static int v2_write_file_info(struct sup
+ return 0;
+ }
+
+-static void v2_disk2memdqb(struct dquot *dquot, void *dp)
++static void v2r0_disk2memdqb(struct dquot *dquot, void *dp)
+ {
+- struct v2_disk_dqblk *d = dp, empty;
++ struct v2r0_disk_dqblk *d = dp, empty;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
+@@ -148,15 +181,15 @@ static void v2_disk2memdqb(struct dquot
+ m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+ m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ /* We need to escape back all-zero structure */
+- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
++ memset(&empty, 0, sizeof(struct v2r0_disk_dqblk));
+ empty.dqb_itime = cpu_to_le64(1);
+- if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
++ if (!memcmp(&empty, dp, sizeof(struct v2r0_disk_dqblk)))
+ m->dqb_itime = 0;
+ }
+
+-static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
++static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
+ {
+- struct v2_disk_dqblk *d = dp;
++ struct v2r0_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+@@ -174,9 +207,60 @@ static void v2_mem2diskdqb(void *dp, str
+ d->dqb_itime = cpu_to_le64(1);
+ }
+
+-static int v2_is_id(void *dp, struct dquot *dquot)
++static int v2r0_is_id(void *dp, struct dquot *dquot)
++{
++ struct v2r0_disk_dqblk *d = dp;
++ struct qtree_mem_dqinfo *info =
++ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
++
++ if (qtree_entry_unused(info, dp))
++ return 0;
++ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
++}
++
++static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
++{
++ struct v2r1_disk_dqblk *d = dp, empty;
++ struct mem_dqblk *m = &dquot->dq_dqb;
++
++ m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
++ m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
++ m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
++ m->dqb_itime = le64_to_cpu(d->dqb_itime);
++ m->dqb_bhardlimit = v2_qbtos(le64_to_cpu(d->dqb_bhardlimit));
++ m->dqb_bsoftlimit = v2_qbtos(le64_to_cpu(d->dqb_bsoftlimit));
++ m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
++ m->dqb_btime = le64_to_cpu(d->dqb_btime);
++ /* We need to escape back all-zero structure */
++ memset(&empty, 0, sizeof(struct v2r1_disk_dqblk));
++ empty.dqb_itime = cpu_to_le64(1);
++ if (!memcmp(&empty, dp, sizeof(struct v2r1_disk_dqblk)))
++ m->dqb_itime = 0;
++}
++
++static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
++{
++ struct v2r1_disk_dqblk *d = dp;
++ struct mem_dqblk *m = &dquot->dq_dqb;
++ struct qtree_mem_dqinfo *info =
++ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
++
++ d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
++ d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
++ d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
++ d->dqb_itime = cpu_to_le64(m->dqb_itime);
++ d->dqb_bhardlimit = cpu_to_le64(v2_stoqb(m->dqb_bhardlimit));
++ d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
++ d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
++ d->dqb_btime = cpu_to_le64(m->dqb_btime);
++ d->dqb_id = cpu_to_le32(dquot->dq_id);
++ if (qtree_entry_unused(info, dp))
++ d->dqb_itime = cpu_to_le64(1);
++}
++
++static int v2r1_is_id(void *dp, struct dquot *dquot)
+ {
+- struct v2_disk_dqblk *d = dp;
++ struct v2r1_disk_dqblk *d = dp;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+diff -puN fs/quotaio_v2.h~quota-support-64-bit-quota-format fs/quotaio_v2.h
+--- a/fs/quotaio_v2.h~quota-support-64-bit-quota-format
++++ a/fs/quotaio_v2.h
+@@ -17,8 +17,8 @@
+ }
+
+ #define V2_INITQVERSIONS {\
+- 0, /* USRQUOTA */\
+- 0 /* GRPQUOTA */\
++ 1, /* USRQUOTA */\
++ 1 /* GRPQUOTA */\
+ }
+
+ /* First generic header */
+@@ -28,11 +28,11 @@ struct v2_disk_dqheader {
+ };
+
+ /*
+- * The following structure defines the format of the disk quota file
+- * (as it appears on disk) - the file is a radix tree whose leaves point
+- * to blocks of these structures.
++ * The following structure defines the format of the disk quota file in version
++ * 0 - the file is a radix tree whose leaves point to blocks of these
++ * structures.
+ */
+-struct v2_disk_dqblk {
++struct v2r0_disk_dqblk {
+ __le32 dqb_id; /* id this quota applies to */
+ __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
+ __le32 dqb_isoftlimit; /* preferred inode limit */
+@@ -44,6 +44,20 @@ struct v2_disk_dqblk {
+ __le64 dqb_itime; /* time limit for excessive inode use */
+ };
+
++/* The same structure in quota file version 1 */
++struct v2r1_disk_dqblk {
++ __le32 dqb_id; /* id this quota applies to */
++ __le32 dqb_padding; /* padding field */
++ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
++ __le64 dqb_isoftlimit; /* preferred inode limit */
++ __le64 dqb_curinodes; /* current # allocated inodes */
++ __le64 dqb_bhardlimit; /* absolute limit on disk space */
++ __le64 dqb_bsoftlimit; /* preferred limit on disk space */
++ __le64 dqb_curspace; /* current space occupied (in bytes) */
++ __le64 dqb_btime; /* time limit for excessive disk use */
++ __le64 dqb_itime; /* time limit for excessive inode use */
++};
++
+ /* Header with type and version specific information */
+ struct v2_disk_dqinfo {
+ __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
+_
--- /dev/null
+Index: linux-2.6.22.19/drivers/scsi/Kconfig
+===================================================================
+--- linux-2.6.22.19.orig/drivers/scsi/Kconfig
++++ linux-2.6.22.19/drivers/scsi/Kconfig
+@@ -76,6 +76,14 @@ config BLK_DEV_SD
+ In this case, do not compile the driver for your SCSI host adapter
+ (below) as a module either.
+
++config SD_IOSTATS
++ bool "Enable SCSI disk I/O stats"
++ depends on BLK_DEV_SD
++ default y
++ ---help---
++ This enables SCSI disk I/O stats collection. You must also enable
++ /proc file system support if you want this feature.
++
+ config CHR_DEV_ST
+ tristate "SCSI tape support"
+ depends on SCSI
+Index: linux-2.6.22.19/drivers/scsi/scsi_proc.c
+===================================================================
+--- linux-2.6.22.19.orig/drivers/scsi/scsi_proc.c
++++ linux-2.6.22.19/drivers/scsi/scsi_proc.c
+@@ -40,7 +40,8 @@
+ /* 4K page size, but our output routines, use some slack for overruns */
+ #define PROC_BLOCK_SIZE (3*1024)
+
+-static struct proc_dir_entry *proc_scsi;
++struct proc_dir_entry *proc_scsi;
++EXPORT_SYMBOL(proc_scsi);
+
+ /* Protect sht->present and sht->proc_dir */
+ static DEFINE_MUTEX(global_host_template_mutex);
+Index: linux-2.6.22.19/drivers/scsi/sd.c
+===================================================================
+--- linux-2.6.22.19.orig/drivers/scsi/sd.c
++++ linux-2.6.22.19/drivers/scsi/sd.c
+@@ -94,6 +94,24 @@ static DEFINE_SPINLOCK(sd_index_lock);
+ * object after last put) */
+ static DEFINE_MUTEX(sd_ref_mutex);
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++# include <linux/proc_fs.h>
++# include <linux/seq_file.h>
++struct proc_dir_entry *sd_iostats_procdir = NULL;
++char sd_iostats_procdir_name[] = "sd_iostats";
++static struct file_operations sd_iostats_proc_fops;
++
++extern void sd_iostats_init(void);
++extern void sd_iostats_fini(void);
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
++#else
++static inline void sd_iostats_init(void) {}
++static inline void sd_iostats_fini(void) {}
++static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
++static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
++#endif
++
+ static const char *sd_cache_types[] = {
+ "write through", "none", "write back",
+ "write back, no read (daft)"
+@@ -498,6 +516,8 @@ static int sd_init_command(struct scsi_c
+ */
+ SCpnt->done = sd_rw_intr;
+
++ sd_iostats_start_req(SCpnt);
++
+ /*
+ * This indicates that the command is ready from our end to be
+ * queued.
+@@ -980,6 +1000,7 @@ static void sd_done(struct scsi_cmnd
+ break;
+ }
+ out:
++ sd_iostats_finish_req(SCpnt);
+ if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
+ sd_dif_complete(SCpnt, good_bytes);
+
+@@ -1666,6 +1687,36 @@ static int sd_probe(struct device *dev)
+ if (sdp->removable)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL);
++ if (!sdkp->stats) {
++ printk(KERN_WARNING "cannot allocate iostat structure for"
++ "%s\n", gd->disk_name);
++ } else {
++ do_gettimeofday(&sdkp->stats->iostat_timeval);
++ sdkp->stats->iostat_queue_stamp = jiffies;
++ spin_lock_init(&sdkp->stats->iostat_lock);
++ if (sd_iostats_procdir) {
++ struct proc_dir_entry *pde;
++ pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR,
++ sd_iostats_procdir);
++ if (!pde) {
++ printk(KERN_WARNING "Can't create /proc/scsi/"
++ "%s/%s\n",
++ sd_iostats_procdir_name,
++ gd->disk_name);
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ } else {
++ pde->proc_fops = &sd_iostats_proc_fops;
++ pde->data = gd;
++ }
++ } else {
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ }
++ }
++#endif
+ dev_set_drvdata(dev, sdkp);
+ add_disk(gd);
+
+@@ -1709,6 +1760,366 @@ static int sd_remove(struct device *dev)
+ return 0;
+ }
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++static int
++sd_iostats_seq_show(struct seq_file *seq, void *v)
++{
++ struct timeval now;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats;
++ unsigned long long read_len;
++ unsigned long long read_len_tot;
++ unsigned long read_num;
++ unsigned long read_num_tot;
++ unsigned long long write_len;
++ unsigned long long write_len_tot;
++ unsigned long write_num;
++ unsigned long write_num_tot;
++ int i;
++ int maxi;
++
++ stats = scsi_disk(disk)->stats;
++ if (stats == NULL) {
++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
++ BUG();
++ }
++
++ do_gettimeofday(&now);
++ now.tv_sec -= stats->iostat_timeval.tv_sec;
++ now.tv_usec -= stats->iostat_timeval.tv_usec;
++ if (now.tv_usec < 0) {
++ now.tv_usec += 1000000;
++ now.tv_sec--;
++ }
++
++ /* this sampling races with updates */
++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
++ (unsigned long) scsi_disk(disk)->index,
++ now.tv_sec, now.tv_usec);
++
++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
++ if (stats->iostat_read_histogram[i].iostat_count != 0 ||
++ stats->iostat_write_histogram[i].iostat_count != 0)
++ break;
++ maxi = i;
++
++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
++ "reads", "total", "writes", "total");
++
++ read_len_tot = write_len_tot = 0;
++ read_num_tot = write_num_tot = 0;
++ for (i = 0; i <= maxi; i++) {
++ read_len = stats->iostat_read_histogram[i].iostat_size;
++ read_len_tot += read_len;
++ read_num = stats->iostat_read_histogram[i].iostat_count;
++ read_num_tot += read_num;
++
++ write_len = stats->iostat_write_histogram[i].iostat_size;
++ write_len_tot += write_len;
++ write_num = stats->iostat_write_histogram[i].iostat_count;
++ write_num_tot += write_num;
++
++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
++ 512<<i, read_num, read_len, write_num, write_len);
++ }
++
++ seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
++ read_num_tot, read_len_tot,
++ write_num_tot, write_len_tot);
++
++ seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long long ticks, percent;
++ ticks = stats->iostat_queue_ticks[i];
++ if (ticks == 0)
++ continue;
++ percent = stats->iostat_queue_ticks[i] * 100;
++ do_div(percent, stats->iostat_queue_ticks_sum);
++ seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
++ }
++
++ if (stats->iostat_reqs != 0) {
++ unsigned long long aveseek = 0, percent = 0;
++
++ if (stats->iostat_seeks) {
++ aveseek = stats->iostat_seek_sectors;
++ do_div(aveseek, stats->iostat_seeks);
++ percent = stats->iostat_seeks * 100;
++ do_div(percent, stats->iostat_reqs);
++ }
++
++ seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
++ "%llu sectors in ave, %llu%% of all reqs\n",
++ stats->iostat_sectors, stats->iostat_reqs,
++ stats->iostat_seeks, aveseek, percent);
++ }
++
++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
++ "%%", "writes", "%%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long read_percent = 0, write_percent = 0;
++ if (stats->iostat_wtime[i] == 0 &&
++ stats->iostat_rtime[i] == 0)
++ continue;
++ if (stats->iostat_read_reqs)
++ read_percent = stats->iostat_rtime[i] * 100 /
++ stats->iostat_read_reqs;
++ if (stats->iostat_write_reqs)
++ write_percent = stats->iostat_wtime[i] * 100 /
++ stats->iostat_write_reqs;
++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++ jiffies_to_msecs(((1UL << i) >> 1) << 1),
++ stats->iostat_rtime[i], read_percent,
++ stats->iostat_wtime[i], write_percent);
++ }
++
++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
++ "%%", "writes", "%%");
++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
++ unsigned long read_percent = 0, write_percent = 0;
++ if (stats->iostat_wtime_in_queue[i] == 0 &&
++ stats->iostat_rtime_in_queue[i] == 0)
++ continue;
++ if (stats->iostat_read_reqs)
++ read_percent = stats->iostat_rtime_in_queue[i] * 100 /
++ stats->iostat_read_reqs;
++ if (stats->iostat_write_reqs)
++ write_percent = stats->iostat_wtime_in_queue[i] * 100 /
++ stats->iostat_write_reqs;
++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
++ jiffies_to_msecs(((1UL << i) >> 1) << 1),
++ stats->iostat_rtime_in_queue[i],
++ read_percent,
++ stats->iostat_wtime_in_queue[i],
++ write_percent);
++ }
++
++ return 0;
++}
++
++static void *
++sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
++{
++ return (*pos == 0) ? (void *)1 : NULL;
++}
++
++static void *
++sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
++{
++ ++*pos;
++ return NULL;
++}
++
++static void
++sd_iostats_seq_stop(struct seq_file *p, void *v)
++{
++}
++
++static struct seq_operations sd_iostats_seqops = {
++ .start = sd_iostats_seq_start,
++ .stop = sd_iostats_seq_stop,
++ .next = sd_iostats_seq_next,
++ .show = sd_iostats_seq_show,
++};
++
++static int
++sd_iostats_seq_open (struct inode *inode, struct file *file)
++{
++ int rc;
++
++ rc = seq_open(file, &sd_iostats_seqops);
++ if (rc != 0)
++ return rc;
++
++ ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
++ return 0;
++}
++
++static ssize_t
++sd_iostats_seq_write(struct file *file, const char *buffer,
++ size_t len, loff_t *off)
++{
++ struct seq_file *seq = file->private_data;
++ struct gendisk *disk = seq->private;
++ iostat_stats_t *stats = scsi_disk(disk)->stats;
++ unsigned long flags;
++ unsigned long qdepth;
++
++
++ spin_lock_irqsave (&stats->iostat_lock, flags);
++ qdepth = stats->iostat_queue_depth;
++ memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
++ do_gettimeofday(&stats->iostat_timeval);
++ stats->iostat_queue_stamp = jiffies;
++ stats->iostat_queue_depth = qdepth;
++ spin_unlock_irqrestore (&stats->iostat_lock, flags);
++
++ return len;
++}
++
++static struct file_operations sd_iostats_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = sd_iostats_seq_open,
++ .read = seq_read,
++ .write = sd_iostats_seq_write,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++extern struct proc_dir_entry *proc_scsi;
++
++void
++sd_iostats_init(void)
++{
++ if (proc_scsi == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "proc_scsi is NULL\n");
++ return;
++ }
++
++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
++ S_IFDIR | S_IRUGO | S_IXUGO,
++ proc_scsi);
++ if (sd_iostats_procdir == NULL) {
++ printk(KERN_WARNING "No access to sd iostats: "
++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
++ return;
++ }
++}
++
++void sd_iostats_fini(void)
++{
++ if (proc_scsi != NULL && sd_iostats_procdir != NULL)
++ remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
++
++ sd_iostats_procdir = NULL;
++}
++
++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
++{
++ struct request *rq = SCpnt->request;
++ iostat_stats_t *stats;
++ unsigned long *tcounter;
++ int tbucket;
++ int tmp;
++ unsigned long irqflags;
++ unsigned long i;
++
++ stats = scsi_disk(rq->rq_disk)->stats;
++ if (stats == NULL)
++ return;
++
++ tmp = jiffies - rq->start_time;
++ for (tbucket = 0; tmp > 1; tbucket++)
++ tmp >>= 1;
++ if (tbucket >= IOSTAT_NCOUNTERS)
++ tbucket = IOSTAT_NCOUNTERS - 1;
++ //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
++
++ tcounter = rq_data_dir(rq) == WRITE ?
++ &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
++
++ spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++ /* update delay stats */
++ (*tcounter)++;
++
++ /* update queue depth stats */
++ i = stats->iostat_queue_depth;
++ if (i >= IOSTAT_NCOUNTERS)
++ i = IOSTAT_NCOUNTERS - 1;
++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++ BUG_ON(stats->iostat_queue_depth == 0);
++ stats->iostat_queue_depth--;
++
++ /* update seek stats. XXX: not sure about nr_sectors */
++ stats->iostat_sectors += rq->nr_sectors;
++ stats->iostat_reqs++;
++ if (rq->sector != stats->iostat_next_sector) {
++ stats->iostat_seek_sectors +=
++ rq->sector > stats->iostat_next_sector ?
++ rq->sector - stats->iostat_next_sector :
++ stats->iostat_next_sector - rq->sector;
++ stats->iostat_seeks++;
++ }
++ stats->iostat_next_sector = rq->sector + rq->nr_sectors;
++
++ stats->iostat_queue_stamp = jiffies;
++
++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
++}
++
++void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
++{
++ struct request *rq = SCpnt->request;
++ iostat_stats_t *stats;
++ iostat_counter_t *counter;
++ int bucket;
++ int tbucket;
++ int tmp;
++ unsigned long irqflags;
++ unsigned long i;
++ int nsect;
++
++ stats = scsi_disk(rq->rq_disk)->stats;
++ if (stats == NULL)
++ return;
++
++ nsect = scsi_bufflen(SCpnt) >> 9;
++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
++ tmp >>= 1;
++
++ if (bucket >= IOSTAT_NCOUNTERS) {
++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
++ BUG();
++ }
++
++ counter = rq_data_dir(rq) == WRITE ?
++ &stats->iostat_write_histogram[bucket] :
++ &stats->iostat_read_histogram[bucket];
++
++ tmp = jiffies - rq->start_time;
++ for (tbucket = 0; tmp > 1; tbucket++)
++ tmp >>= 1;
++ if (tbucket >= IOSTAT_NCOUNTERS)
++ tbucket = IOSTAT_NCOUNTERS - 1;
++ //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
++
++ /* an ugly hack to know exact processing time. the right
++ * solution is to add one more field to struct request
++ * hopefully it will break nothing ... */
++ rq->start_time = jiffies;
++
++ spin_lock_irqsave(&stats->iostat_lock, irqflags);
++
++ /* update queue depth stats */
++ i = stats->iostat_queue_depth;
++ if (i >= IOSTAT_NCOUNTERS)
++ i = IOSTAT_NCOUNTERS - 1;
++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
++ stats->iostat_queue_depth++;
++
++ /* update delay stats */
++ if (rq_data_dir(rq) == WRITE) {
++ stats->iostat_wtime_in_queue[tbucket]++;
++ stats->iostat_write_reqs++;
++ } else {
++ stats->iostat_rtime_in_queue[tbucket]++;
++ stats->iostat_read_reqs++;
++ }
++
++ /* update size stats */
++ counter->iostat_size += nsect;
++ counter->iostat_count++;
++
++ stats->iostat_queue_stamp = jiffies;
++
++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
++}
++#endif
++
+ /**
+ * scsi_disk_release - Called to free the scsi_disk structure
+ * @cdev: pointer to embedded class device
+@@ -1727,10 +2138,16 @@ static void scsi_disk_release(struct cla
+ idr_remove(&sd_index_idr, sdkp->index);
+ spin_unlock(&sd_index_lock);
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ if (sdkp->stats) {
++ remove_proc_entry(disk->disk_name, sd_iostats_procdir);
++ kfree(sdkp->stats);
++ sdkp->stats = NULL;
++ }
++#endif
+ disk->private_data = NULL;
+ put_disk(disk);
+ put_device(&sdkp->device->sdev_gendev);
+-
+ kfree(sdkp);
+ }
+
+@@ -1845,6 +2262,8 @@ static int __init init_sd(void)
+ if (!majors)
+ return -ENODEV;
+
++ sd_iostats_init();
++
+ err = class_register(&sd_disk_class);
+ if (err)
+ goto err_out;
+@@ -1860,6 +2279,7 @@ err_out_class:
+ err_out:
+ for (i = 0; i < SD_MAJORS; i++)
+ unregister_blkdev(sd_major(i), "sd");
++ sd_iostats_fini();
+ return err;
+ }
+
+Index: linux-2.6.22.19/include/scsi/sd.h
+===================================================================
+--- linux-2.6.22.19.orig/drivers/scsi/sd.h
++++ linux-2.6.22.19/drivers/scsi/sd.h
+@@ -31,6 +31,46 @@
+ */
+ #define SD_BUF_SIZE 512
+
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++typedef struct {
++ unsigned long long iostat_size;
++ unsigned long long iostat_count;
++} iostat_counter_t;
++
++#define IOSTAT_NCOUNTERS 16
++typedef struct {
++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
++ struct timeval iostat_timeval;
++
++ /* queue depth: how well the pipe is filled up */
++ unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
++ unsigned long long iostat_queue_ticks_sum;
++ unsigned long iostat_queue_depth;
++ unsigned long iostat_queue_stamp;
++
++ /* seeks: how linear the traffic is */
++ unsigned long long iostat_next_sector;
++ unsigned long long iostat_seek_sectors;
++ unsigned long long iostat_seeks;
++ unsigned long long iostat_sectors;
++ unsigned long long iostat_reqs;
++ unsigned long iostat_read_reqs;
++ unsigned long iostat_write_reqs;
++
++ /* process time: how long it takes to process requests */
++ unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
++ unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
++
++ /* queue time: how long process spent in elevator's queue */
++ unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
++ unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
++
++ /* must be the last field, as it's used to know size to be memset'ed */
++ spinlock_t iostat_lock;
++} ____cacheline_aligned_in_smp iostat_stats_t;
++#endif
++
+ struct scsi_disk {
+ struct scsi_driver *driver; /* always &sd_template */
+ struct scsi_device *device;
+@@ -44,6 +84,9 @@ struct scsi_disk {
+ unsigned WCE : 1; /* state of disk WCE bit */
+ unsigned RCD : 1; /* state of disk RCD bit, unused */
+ unsigned DPOFUA : 1; /* state of disk DPOFUA bit */
++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
++ iostat_stats_t *stats; /* scsi disk statistics */
++#endif
+ };
+ #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)
+