Index: linux/Documentation/Configure.help =================================================================== RCS file: /home/cvs/master/68chaos_eebperf/Documentation/Configure.help,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 Configure.help --- linux/Documentation/Configure.help 20 Aug 2004 18:09:23 -0000 1.1.1.1 +++ linux/Documentation/Configure.help 26 Aug 2004 12:34:40 -0000 @@ -7679,6 +7679,11 @@ CONFIG_SCSI_LOGGING there should be no noticeable performance impact as long as you have logging turned off. +SCSI disk I/O stats +CONFIG_SD_IOSTATS + This enables SCSI disk I/O stats collection. You must also enable + /proc file system support if you want this feature. + QDIO base support for IBM S/390 and zSeries CONFIG_QDIO This driver provides the Queued Direct I/O base support for the Index: linux/drivers/scsi/Config.in =================================================================== RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/Config.in,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 Config.in --- linux/drivers/scsi/Config.in 20 Aug 2004 18:10:13 -0000 1.1.1.1 +++ linux/drivers/scsi/Config.in 24 Aug 2004 14:30:08 -0000 @@ -4,6 +4,7 @@ dep_tristate ' SCSI disk support' CONFI if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 + bool 'SCSI disk I/O stats' CONFIG_SD_IOSTATS y fi dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI Index: linux/drivers/scsi/sd.c =================================================================== RCS file: /home/cvs/master/68chaos_eebperf/drivers/scsi/sd.c,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 sd.c --- linux/drivers/scsi/sd.c 20 Aug 2004 18:10:16 -0000 1.1.1.1 +++ linux/drivers/scsi/sd.c 26 Aug 2004 13:34:39 -0000 @@ -65,6 +65,40 @@ * static const char RCSid[] = "$Header:"; */ +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +#include +#include + +typedef struct +{ + unsigned long long iostat_size; + unsigned long long iostat_count; +} iostat_counter_t; + +#define IOSTAT_NCOUNTERS 16 +typedef struct +{ + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; + struct timeval iostat_timeval; +} iostat_stats_t; + +iostat_stats_t **sd_iostats; +spinlock_t sd_iostats_lock; +struct proc_dir_entry *sd_iostats_procdir; +char sd_iostats_procdir_name[] = "sd_iostats"; + +extern void sd_iostats_init(void); +extern void sd_iostats_init_disk(int disk); +extern void sd_iostats_fini(void); +extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite); +#else +static inline void sd_iostats_init(void) {} +static inline void sd_iostats_init_disk(int disk) {} +static inline void sd_iostats_fini(void) {} +static inline void sd_iostats_bump(int dev, unsigned int nsect, int iswrite) {} +#endif + /* device number --> sd_gendisks index */ #define SD_MAJOR_IDX(i) ( ((MAJOR(i) & 0x80) >> 4) + (MAJOR(i) & 7) ) /* sd_gendisks index --> system major */ @@ -351,6 +385,8 @@ static int sd_init_command(Scsi_Cmnd * S SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n", nbuff, dev, block)); + sd_iostats_bump(dev, this_count, SCpnt->request.cmd == WRITE); + /* * If we have a 1K hardware sectorsize, prevent access to single * 512 byte sectors. In theory we could handle this - in fact @@ -545,7 +581,7 @@ static int sd_open(struct inode *inode, if (scsi_block_when_processing_errors(SDev)) scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL); - + sd_iostats_init_disk(target); return 0; error_out: @@ -1179,6 +1215,8 @@ static int sd_init() memset(sd_varyio, 0, (sd_template.dev_max << 4)); + sd_iostats_init(); + for (i = 0; i < sd_template.dev_max << 4; i++) { sd_blocksizes[i] = 1024; sd_hardsizes[i] = 512; @@ -1243,6 +1281,7 @@ cleanup_gendisks_de_arr: kfree(sd_gendisks); sd_gendisks = NULL; cleanup_sd_gendisks: + sd_iostats_fini(); kfree(sd_varyio); cleanup_varyio: kfree(sd_max_sectors); @@ -1466,6 +1505,316 @@ static void sd_detach(Scsi_Device * SDp) return; } +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) +static int +sd_iostats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + unsigned long index = (unsigned long)(seq->private); + iostat_stats_t *stats; + unsigned long long read_len; + unsigned long long read_len_tot; + unsigned long read_num; + unsigned long read_num_tot; + unsigned long long write_len; + unsigned long long write_len_tot; + unsigned long write_num; + unsigned long write_num_tot; + int i; + int maxi; + + if (sd_iostats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n"); + BUG(); + } + + stats = sd_iostats[index]; + if (stats == NULL) { + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); + BUG(); + } + + do_gettimeofday(&now); + now.tv_sec -= stats->iostat_timeval.tv_sec; + now.tv_usec -= stats->iostat_timeval.tv_usec; + if (now.tv_usec < 0) { + now.tv_usec += 1000000; + now.tv_sec--; + } + + /* this sampling races with updates */ + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", + index, now.tv_sec, now.tv_usec); + + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) + if (stats->iostat_read_histogram[i].iostat_count != 0 || + stats->iostat_write_histogram[i].iostat_count != 0) + break; + maxi = i; + + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", + "reads", "total", "writes", "total"); + + read_len_tot = write_len_tot = 0; + read_num_tot = write_num_tot = 0; + for (i = 0; i <= maxi; i++) { + read_len = stats->iostat_read_histogram[i].iostat_size; + read_len_tot += read_len; + read_num = stats->iostat_read_histogram[i].iostat_count; + read_num_tot += read_num; + + write_len = stats->iostat_write_histogram[i].iostat_size; + write_len_tot += write_len; + write_num = stats->iostat_write_histogram[i].iostat_count; + write_num_tot += write_num; + + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", + 512<private_data)->private = PDE(inode)->data; + return 0; +} + +static int +sd_iostats_seq_write(struct file *file, const char *buffer, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + unsigned long index = (unsigned long)seq->private; + iostat_stats_t *stats = sd_iostats[index]; + unsigned long flags; + + + spin_lock_irqsave (&sd_iostats_lock, flags); + memset (stats, 0, sizeof(*stats)); + do_gettimeofday(&stats->iostat_timeval); + spin_unlock_irqrestore (&sd_iostats_lock, flags); + + return len; +} + +static struct file_operations sd_iostats_proc_fops = { + .owner = THIS_MODULE, + .open = sd_iostats_seq_open, + .read = seq_read, + .write = sd_iostats_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +void +sd_iostats_init(void) +{ + int maxdevs = sd_template.dev_max; + int i; + + spin_lock_init(&sd_iostats_lock); + + sd_iostats = kmalloc(maxdevs * sizeof(iostat_stats_t *), GFP_KERNEL); + if (sd_iostats == NULL) { + printk(KERN_WARNING "Can't keep sd iostats: " + "ENOMEM allocating stats array size %d\n", + sd_template.dev_max * sizeof(iostat_stats_t *)); + return; + } + + for (i = 0; i < maxdevs; i++) + sd_iostats[i] = NULL; + + if (proc_scsi == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "proc_scsi is NULL\n"); + return; + } + + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, + S_IFDIR | S_IRUGO | S_IXUGO, + proc_scsi); + if (sd_iostats_procdir == NULL) { + printk(KERN_WARNING "No access to sd iostats: " + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); + return; + } +} + +void +sd_iostats_init_disk(int disk) +{ + char name[6]; + struct proc_dir_entry *pde; + int i; + unsigned long flags; + iostat_stats_t *stats; + int maxdevs = sd_template.dev_max; + + if (sd_iostats == NULL || + sd_iostats_procdir == NULL) + return; + + if (disk > sd_template.dev_max) { + printk(KERN_ERR "sd_iostats_init_disk: " + "unexpected disk index %d(%d)\n", + disk, sd_template.dev_max); + BUG(); + } + + if (sd_iostats[disk] != NULL) + return; + + sd_devname(disk, name); + stats = kmalloc(sizeof(*stats), GFP_KERNEL); + if (stats == NULL) { + printk(KERN_WARNING "Can't keep %s iostats: " + "ENOMEM allocating stats size %d\n", + name, sizeof(*stats)); + return; + } + + memset (stats, 0, sizeof(*stats)); + do_gettimeofday(&stats->iostat_timeval); + + spin_lock_irqsave(&sd_iostats_lock, flags); + + if (sd_iostats[disk] != NULL) { + spin_unlock_irqrestore(&sd_iostats_lock, flags); + kfree (stats); + return; + } + + sd_iostats[disk] = stats; + + spin_unlock_irqrestore(&sd_iostats_lock, flags); + + pde = create_proc_entry(name, S_IRUGO | S_IWUSR, + sd_iostats_procdir); + if (pde == NULL) { + printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n", + sd_iostats_procdir_name, name); + } else { + pde->proc_fops = &sd_iostats_proc_fops; + pde->data = (void *)((long)disk); + } +} + +void +sd_iostats_fini(void) +{ + char name[6]; + int i; + int maxdevs = sd_template.dev_max; + + if (sd_iostats_procdir != NULL) { + for (i = 0; i < maxdevs; i++) { + sd_devname(i, name); + remove_proc_entry(name, sd_iostats_procdir); + } + + if (proc_scsi == NULL) { + printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n"); + BUG(); + } + remove_proc_entry(sd_iostats_procdir_name, + proc_scsi); + + sd_iostats_procdir = NULL; + } + + if (sd_iostats != NULL) { + for (i = 0; i < maxdevs; i++) { + if (sd_iostats[i] != NULL) + kfree (sd_iostats[i]); + } + + kfree(sd_iostats); + sd_iostats = NULL; + } +} + +void +sd_iostats_bump(int disk, unsigned int nsect, int iswrite) +{ + iostat_stats_t *stats; + iostat_counter_t *counter; + int bucket; + int tmp; + unsigned long irqflags; + + if (sd_iostats == NULL) + return; + + if (disk < 0 || disk >= sd_template.dev_max) { + printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n", + disk, sd_template.dev_max); + BUG(); + } + + for (bucket = 0, tmp = nsect; tmp > 1; bucket++) + tmp /= 2; + + if (bucket >= IOSTAT_NCOUNTERS) { + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); + BUG(); + } + + spin_lock_irqsave(&sd_iostats_lock, irqflags); + + stats = sd_iostats[disk]; + if (stats != NULL) { + counter = iswrite ? + &stats->iostat_write_histogram[bucket] : + &stats->iostat_read_histogram[bucket]; + + counter->iostat_size += nsect; + counter->iostat_count++; + } + + spin_unlock_irqrestore(&sd_iostats_lock, irqflags); +} +#endif + static int __init init_sd(void) { sd_template.module = THIS_MODULE; @@ -1488,6 +1837,7 @@ static void __exit exit_sd(void) kfree(sd_blocksizes); kfree(sd_hardsizes); kfree(sd_varyio); + sd_iostats_fini(); for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags);