From 47d177baf6cb58df1ea570cbc259e21b4d4215b5 Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 26 Aug 2004 14:54:12 +0000 Subject: [PATCH] - added kernel patch for /dev/sd I/O stats (4385) --- .../patches/sd_iostats-2.4.21-chaos.patch | 442 +++++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch diff --git a/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch b/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch new file mode 100644 index 0000000..c8d2598 --- /dev/null +++ b/lustre/kernel_patches/patches/sd_iostats-2.4.21-chaos.patch @@ -0,0 +1,442 @@ +Index: linux/Documentation/Configure.help +=================================================================== +RCS file: /home/cvs/master/68chaos_eebperf/Documentation/Configure.help,v +retrieving revision 1.1.1.1 +diff -u -p -r1.1.1.1 Configure.help +--- linux/Documentation/Configure.help 20 Aug 2004 18:09:23 -0000 1.1.1.1 ++++ linux/Documentation/Configure.help 26 Aug 2004 12:34:40 -0000 +@@ -7679,6 +7679,11 @@ CONFIG_SCSI_LOGGING + there should be no noticeable performance impact as long as you have + logging turned off. + ++SCSI disk I/O stats ++CONFIG_SD_IOSTATS ++ This enables SCSI disk I/O stats collection. You must also enable ++ /proc file system support if you want this feature. ++ + QDIO base support for IBM S/390 and zSeries + CONFIG_QDIO + This driver provides the Queued Direct I/O base support for the +Index: linux/drivers/scsi/Config.in +=================================================================== +RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/Config.in,v +retrieving revision 1.1.1.1 +diff -u -p -r1.1.1.1 Config.in +--- linux/drivers/scsi/Config.in 20 Aug 2004 18:10:13 -0000 1.1.1.1 ++++ linux/drivers/scsi/Config.in 24 Aug 2004 14:30:08 -0000 +@@ -4,6 +4,7 @@ dep_tristate ' SCSI disk support' CONFI + + if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then + int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 ++ bool 'SCSI disk I/O stats' CONFIG_SD_IOSTATS + fi + + dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI +Index: linux/drivers/scsi/sd.c +=================================================================== +RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/sd.c,v +retrieving revision 1.1.1.1 +diff -u -p -r1.1.1.1 sd.c +--- linux/drivers/scsi/sd.c 20 Aug 2004 18:10:16 -0000 1.1.1.1 ++++ linux/drivers/scsi/sd.c 26 Aug 2004 13:34:39 -0000 +@@ -65,6 +65,40 @@ + * static const char RCSid[] = "$Header:"; + */ + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++#include ++#include ++ ++typedef struct ++{ ++ unsigned long long iostat_size; ++ unsigned long long iostat_count; ++} iostat_counter_t; ++ ++#define IOSTAT_NCOUNTERS 16 ++typedef struct ++{ ++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; ++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; ++ struct timeval iostat_timeval; ++} iostat_stats_t; ++ ++iostat_stats_t **sd_iostats; ++spinlock_t sd_iostats_lock; ++struct proc_dir_entry *sd_iostats_procdir; ++char sd_iostats_procdir_name[] = "sd_iostats"; ++ ++extern void sd_iostats_init(void); ++extern void sd_iostats_init_disk(int disk); ++extern void sd_iostats_fini(void); ++extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); ++#else ++static inline void sd_iostats_init(void) {} ++static inline void sd_iostats_init_disk(int disk) {} ++static inline void sd_iostats_fini(void) {} ++static inline void sd_iostats_bump(kdev_t dev, unsigned int nsect, int iswrite) {} ++#endif ++ + /* device number --> sd_gendisks index */ + #define SD_MAJOR_IDX(i) ( ((MAJOR(i) & 0x80) >> 4) + (MAJOR(i) & 7) ) + /* sd_gendisks index --> system major */ +@@ -351,6 +385,8 @@ static int sd_init_command(Scsi_Cmnd * S + SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n", + nbuff, dev, block)); + ++ sd_iostats_bump(dev, this_count, SCpnt->request.cmd == WRITE); ++ + /* + * If we have a 1K hardware sectorsize, prevent access to single + * 512 byte sectors. In theory we could handle this - in fact +@@ -545,7 +581,7 @@ static int sd_open(struct inode *inode, + if (scsi_block_when_processing_errors(SDev)) + scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL); + +- ++ sd_iostats_init_disk(target); + return 0; + + error_out: +@@ -1179,6 +1215,8 @@ static int sd_init() + + memset(sd_varyio, 0, (sd_template.dev_max << 4)); + ++ sd_iostats_init(); ++ + for (i = 0; i < sd_template.dev_max << 4; i++) { + sd_blocksizes[i] = 1024; + sd_hardsizes[i] = 512; +@@ -1243,6 +1281,7 @@ cleanup_gendisks_de_arr: + kfree(sd_gendisks); + sd_gendisks = NULL; + cleanup_sd_gendisks: ++ sd_iostats_fini(); + kfree(sd_varyio); + cleanup_varyio: + kfree(sd_max_sectors); +@@ -1466,6 +1505,316 @@ static void sd_detach(Scsi_Device * SDp) + return; + } + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++static int ++sd_iostats_seq_show(struct seq_file *seq, void *v) ++{ ++ struct timeval now; ++ unsigned long index = (unsigned long)(seq->private); ++ iostat_stats_t *stats; ++ unsigned long long read_len; ++ unsigned long long read_len_tot; ++ unsigned long read_num; ++ unsigned long read_num_tot; ++ unsigned long long write_len; ++ unsigned long long write_len_tot; ++ unsigned long write_num; ++ unsigned long write_num_tot; ++ int i; ++ int maxi; ++ ++ if (sd_iostats == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); ++ BUG(); ++ } ++ ++ stats = sd_iostats[index]; ++ if (stats == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); ++ BUG(); ++ } ++ ++ do_gettimeofday(&now); ++ now.tv_sec -= stats->iostat_timeval.tv_sec; ++ now.tv_usec -= stats->iostat_timeval.tv_usec; ++ if (now.tv_usec < 0) { ++ now.tv_usec += 1000000; ++ now.tv_sec--; ++ } ++ ++ /* this sampling races with updates */ ++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", ++ index, now.tv_sec, now.tv_usec); ++ ++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) ++ if (stats->iostat_read_histogram[i].iostat_count != 0 || ++ stats->iostat_write_histogram[i].iostat_count != 0) ++ break; ++ maxi = i; ++ ++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", ++ "reads", "total", "writes", "total"); ++ ++ read_len_tot = write_len_tot = 0; ++ read_num_tot = write_num_tot = 0; ++ for (i = 0; i <= maxi; i++) { ++ read_len = stats->iostat_read_histogram[i].iostat_size; ++ read_len_tot += read_len; ++ read_num = stats->iostat_read_histogram[i].iostat_count; ++ read_num_tot += read_num; ++ ++ write_len = stats->iostat_write_histogram[i].iostat_size; ++ write_len_tot += write_len; ++ write_num = stats->iostat_write_histogram[i].iostat_count; ++ write_num_tot += write_num; ++ ++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", ++ 512<private_data)->private = PDE(inode)->data; ++ return 0; ++} ++ ++static int ++sd_iostats_seq_write(struct file *file, const char *buffer, ++ size_t len, loff_t *off) ++{ ++ struct seq_file *seq = file->private_data; ++ unsigned long index = (unsigned long)seq->private; ++ iostat_stats_t *stats = sd_iostats[index]; ++ unsigned long flags; ++ ++ ++ spin_lock_irqsave (&sd_iostats_lock, flags); ++ memset (stats, 0, sizeof(*stats)); ++ do_gettimeofday(&stats->iostat_timeval); ++ spin_unlock_irqrestore (&sd_iostats_lock, flags); ++ ++ return len; ++} ++ ++static struct file_operations sd_iostats_proc_fops = { ++ .owner = THIS_MODULE, ++ .open = sd_iostats_seq_open, ++ .read = seq_read, ++ .write = sd_iostats_seq_write, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++void ++sd_iostats_init(void) ++{ ++ int maxdevs = sd_template.dev_max; ++ int i; ++ ++ spin_lock_init(&sd_iostats_lock); ++ ++ sd_iostats = kmalloc(maxdevs * sizeof(iostat_stats_t *), GFP_KERNEL); ++ if (sd_iostats == NULL) { ++ printk(KERN_WARNING "Can't keep sd iostats: " ++ "ENOMEM allocating stats array size %d\n", ++ sd_template.dev_max * sizeof(iostat_stats_t *)); ++ return; ++ } ++ ++ for (i = 0; i < maxdevs; i++) ++ sd_iostats[i] = NULL; ++ ++ if (proc_scsi == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "proc_scsi is NULL\n"); ++ return; ++ } ++ ++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, ++ S_IFDIR | S_IRUGO | S_IXUGO, ++ proc_scsi); ++ if (sd_iostats_procdir == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); ++ return; ++ } ++} ++ ++void ++sd_iostats_init_disk(int disk) ++{ ++ char name[6]; ++ struct proc_dir_entry *pde; ++ int i; ++ unsigned long flags; ++ iostat_stats_t *stats; ++ int maxdevs = sd_template.dev_max; ++ ++ if (sd_iostats == NULL || ++ sd_iostats_procdir == NULL) ++ return; ++ ++ if (disk > sd_template.dev_max) { ++ printk(KERN_ERR "sd_iostats_init_disk: " ++ "unexpected disk index %d(%d)\n", ++ disk, sd_template.dev_max); ++ BUG(); ++ } ++ ++ if (sd_iostats[disk] != NULL) ++ return; ++ ++ sd_devname(disk, name); ++ stats = kmalloc(sizeof(*stats), GFP_KERNEL); ++ if (stats == NULL) { ++ printk(KERN_WARNING "Can't keep %s iostats: " ++ "ENOMEM allocating stats size %d\n", ++ name, sizeof(*stats)); ++ return; ++ } ++ ++ memset (stats, 0, sizeof(*stats)); ++ do_gettimeofday(&stats->iostat_timeval); ++ ++ spin_lock_irqsave(&sd_iostats_lock, flags); ++ ++ if (sd_iostats[disk] != NULL) { ++ spin_unlock_irqrestore(&sd_iostats_lock, flags); ++ kfree (stats); ++ return; ++ } ++ ++ sd_iostats[disk] = stats; ++ ++ spin_unlock_irqrestore(&sd_iostats_lock, flags); ++ ++ pde = create_proc_entry(name, S_IRUGO | S_IWUSR, ++ sd_iostats_procdir); ++ if (pde == NULL) { ++ printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", ++ sd_iostats_procdir_name, name); ++ } else { ++ pde->proc_fops = &sd_iostats_proc_fops; ++ pde->data = (void *)((long)disk); ++ } ++} ++ ++void ++sd_iostats_fini(void) ++{ ++ char name[6]; ++ int i; ++ int maxdevs = sd_template.dev_max; ++ ++ if (sd_iostats_procdir != NULL) { ++ for (i = 0; i < maxdevs; i++) { ++ sd_devname(i, name); ++ remove_proc_entry(name, sd_iostats_procdir); ++ } ++ ++ if (proc_scsi == NULL) { ++ printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); ++ BUG(); ++ } ++ remove_proc_entry(sd_iostats_procdir_name, ++ proc_scsi); ++ ++ sd_iostats_procdir = NULL; ++ } ++ ++ if (sd_iostats != NULL) { ++ for (i = 0; i < maxdevs; i++) { ++ if (sd_iostats[i] != NULL) ++ kfree (sd_iostats[i]); ++ } ++ ++ kfree(sd_iostats); ++ sd_iostats = NULL; ++ } ++} ++ ++void ++sd_iostats_bump(int disk, unsigned int nsect, int iswrite) ++{ ++ iostat_stats_t *stats; ++ iostat_counter_t *counter; ++ int bucket; ++ int tmp; ++ unsigned long irqflags; ++ ++ if (sd_iostats == NULL) ++ return; ++ ++ if (disk < 0 || disk >= sd_template.dev_max) { ++ printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", ++ disk, sd_template.dev_max); ++ BUG(); ++ } ++ ++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++) ++ tmp /= 2; ++ ++ if (bucket >= IOSTAT_NCOUNTERS) { ++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); ++ BUG(); ++ } ++ ++ spin_lock_irqsave(&sd_iostats_lock, irqflags); ++ ++ stats = sd_iostats[disk]; ++ if (stats != NULL) { ++ counter = iswrite ? ++ &stats->iostat_write_histogram[bucket] : ++ &stats->iostat_read_histogram[bucket]; ++ ++ counter->iostat_size += nsect; ++ counter->iostat_count++; ++ } ++ ++ spin_unlock_irqrestore(&sd_iostats_lock, irqflags); ++} ++#endif ++ + static int __init init_sd(void) + { + sd_template.module = THIS_MODULE; +@@ -1488,6 +1837,7 @@ static void __exit exit_sd(void) + kfree(sd_blocksizes); + kfree(sd_hardsizes); + kfree(sd_varyio); ++ sd_iostats_fini(); + for (i = 0; i < N_USED_SD_MAJORS; i++) { + kfree(sd_gendisks[i].de_arr); + kfree(sd_gendisks[i].flags); -- 1.8.3.1