From 200722741e8f12869f4fa357224b810fce453fa1 Mon Sep 17 00:00:00 2001 From: yangsheng Date: Thu, 2 Jul 2009 13:41:04 +0000 Subject: [PATCH] Branch b1_8 b=19846 Fork the patch get rid of the fuzz. --- .../patches/sd_iostats-2.6-sles10.patch | 581 +++++++++++++++++++++ lustre/kernel_patches/series/2.6-sles10.series | 2 +- 2 files changed, 582 insertions(+), 1 deletion(-) create mode 100644 lustre/kernel_patches/patches/sd_iostats-2.6-sles10.patch diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-sles10.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-sles10.patch new file mode 100644 index 0000000..ced2c53 --- /dev/null +++ b/lustre/kernel_patches/patches/sd_iostats-2.6-sles10.patch @@ -0,0 +1,581 @@ +Index: linux-2.6.18-53.1.21/drivers/scsi/Kconfig +=================================================================== +--- linux-2.6.18-53.1.21.orig/drivers/scsi/Kconfig ++++ linux-2.6.18-53.1.21/drivers/scsi/Kconfig +@@ -66,6 +66,14 @@ config BLK_DEV_SD + polling I/O. If it doesn't, LKCD will fall back to ordinary + interrupt-driven I/O. + ++config SD_IOSTATS ++ bool "Enable SCSI disk I/O stats" ++ depends on BLK_DEV_SD ++ default y ++ ---help--- ++ This enables SCSI disk I/O stats collection. You must also enable ++ /proc file system support if you want this feature. ++ + config CHR_DEV_ST + tristate "SCSI tape support" + depends on SCSI +Index: linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c +=================================================================== +--- linux-2.6.18-53.1.21.orig/drivers/scsi/scsi_proc.c ++++ linux-2.6.18-53.1.21/drivers/scsi/scsi_proc.c +@@ -40,7 +40,8 @@ + /* 4K page size, but our output routines, use some slack for overruns */ + #define PROC_BLOCK_SIZE (3*1024) + +-static struct proc_dir_entry *proc_scsi; ++struct proc_dir_entry *proc_scsi; ++EXPORT_SYMBOL(proc_scsi); + + /* Protect sht->present and sht->proc_dir */ + static DEFINE_MUTEX(global_host_template_mutex); +Index: linux-2.6.18-53.1.21/drivers/scsi/sd.c +=================================================================== +--- linux-2.6.18-53.1.21.orig/drivers/scsi/sd.c ++++ linux-2.6.18-53.1.21/drivers/scsi/sd.c +@@ -62,6 +62,63 @@ + + #include "scsi_logging.h" + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++# include ++# include ++ ++typedef struct { ++ unsigned long long iostat_size; ++ unsigned long long iostat_count; ++} iostat_counter_t; ++ ++#define IOSTAT_NCOUNTERS 16 ++typedef struct { ++ iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS]; ++ iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS]; ++ struct timeval iostat_timeval; ++ ++ /* queue depth: how well the pipe is filled up */ ++ unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS]; ++ unsigned long long iostat_queue_ticks_sum; ++ unsigned long iostat_queue_depth; ++ unsigned long iostat_queue_stamp; ++ ++ /* seeks: how linear the traffic is */ ++ unsigned long long iostat_next_sector; ++ unsigned long long iostat_seek_sectors; ++ unsigned long long iostat_seeks; ++ unsigned long long iostat_sectors; ++ unsigned long long iostat_reqs; ++ unsigned long iostat_read_reqs; ++ unsigned long iostat_write_reqs; ++ ++ /* process time: how long it takes to process requests */ ++ unsigned long iostat_rtime[IOSTAT_NCOUNTERS]; ++ unsigned long iostat_wtime[IOSTAT_NCOUNTERS]; ++ ++ /* queue time: how long process spent in elevator's queue */ ++ unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS]; ++ unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS]; ++ ++ /* must be the last field, as it's used to know size to be memset'ed */ ++ spinlock_t iostat_lock; ++} ____cacheline_aligned_in_smp iostat_stats_t; ++ ++struct proc_dir_entry *sd_iostats_procdir = NULL; ++char sd_iostats_procdir_name[] = "sd_iostats"; ++static struct file_operations sd_iostats_proc_fops; ++ ++extern void sd_iostats_init(void); ++extern void sd_iostats_fini(void); ++void sd_iostats_start_req(struct scsi_cmnd *SCpnt); ++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt); ++#else ++static inline void sd_iostats_init(void) {} ++static inline void sd_iostats_fini(void) {} ++static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {} ++static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {} ++#endif ++ + /* + * More than enough for everybody ;) The huge number of majors + * is a leftover from 16bit dev_t days, we don't really need that +@@ -126,6 +183,9 @@ struct scsi_disk { + unsigned WCE : 1; /* state of disk WCE bit */ + unsigned RCD : 1; /* state of disk RCD bit, unused */ + unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ iostat_stats_t *stats; /* scsi disk statistics */ ++#endif + }; + #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev) + +@@ -557,6 +617,8 @@ static int sd_init_command(struct scsi_c + */ + SCpnt->done = sd_rw_intr; + ++ sd_iostats_start_req(SCpnt); ++ + /* + * This indicates that the command is ready from our end to be + * queued. +@@ -1040,6 +1102,7 @@ static void sd_rw_intr(struct scsi_cmnd + break; + } + out: ++ sd_iostats_finish_req(SCpnt); + scsi_io_completion(SCpnt, good_bytes); + } + +@@ -1735,6 +1798,36 @@ static int sd_probe(struct device *dev) + if (sdp->removable) + gd->flags |= GENHD_FL_REMOVABLE; + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL); ++ if (!sdkp->stats) { ++ printk(KERN_WARNING "cannot allocate iostat structure for" ++ "%s\n", gd->disk_name); ++ } else { ++ do_gettimeofday(&sdkp->stats->iostat_timeval); ++ sdkp->stats->iostat_queue_stamp = jiffies; ++ spin_lock_init(&sdkp->stats->iostat_lock); ++ if (sd_iostats_procdir) { ++ struct proc_dir_entry *pde; ++ pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR, ++ sd_iostats_procdir); ++ if (!pde) { ++ printk(KERN_WARNING "Can't create /proc/scsi/" ++ "%s/%s\n", ++ sd_iostats_procdir_name, ++ gd->disk_name); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } else { ++ pde->proc_fops = &sd_iostats_proc_fops; ++ pde->data = gd; ++ } ++ } else { ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++ } ++#endif + dev_set_drvdata(dev, sdkp); + add_disk(gd); + +@@ -1778,6 +1871,366 @@ static int sd_remove(struct device *dev) + return 0; + } + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++static int ++sd_iostats_seq_show(struct seq_file *seq, void *v) ++{ ++ struct timeval now; ++ struct gendisk *disk = seq->private; ++ iostat_stats_t *stats; ++ unsigned long long read_len; ++ unsigned long long read_len_tot; ++ unsigned long read_num; ++ unsigned long read_num_tot; ++ unsigned long long write_len; ++ unsigned long long write_len_tot; ++ unsigned long write_num; ++ unsigned long write_num_tot; ++ int i; ++ int maxi; ++ ++ stats = scsi_disk(disk)->stats; ++ if (stats == NULL) { ++ printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n"); ++ BUG(); ++ } ++ ++ do_gettimeofday(&now); ++ now.tv_sec -= stats->iostat_timeval.tv_sec; ++ now.tv_usec -= stats->iostat_timeval.tv_usec; ++ if (now.tv_usec < 0) { ++ now.tv_usec += 1000000; ++ now.tv_sec--; ++ } ++ ++ /* this sampling races with updates */ ++ seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n", ++ (unsigned long) scsi_disk(disk)->index, ++ now.tv_sec, now.tv_usec); ++ ++ for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--) ++ if (stats->iostat_read_histogram[i].iostat_count != 0 || ++ stats->iostat_write_histogram[i].iostat_count != 0) ++ break; ++ maxi = i; ++ ++ seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", ++ "reads", "total", "writes", "total"); ++ ++ read_len_tot = write_len_tot = 0; ++ read_num_tot = write_num_tot = 0; ++ for (i = 0; i <= maxi; i++) { ++ read_len = stats->iostat_read_histogram[i].iostat_size; ++ read_len_tot += read_len; ++ read_num = stats->iostat_read_histogram[i].iostat_count; ++ read_num_tot += read_num; ++ ++ write_len = stats->iostat_write_histogram[i].iostat_size; ++ write_len_tot += write_len; ++ write_num = stats->iostat_write_histogram[i].iostat_count; ++ write_num_tot += write_num; ++ ++ seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", ++ 512<iostat_queue_ticks[i]; ++ if (ticks == 0) ++ continue; ++ percent = stats->iostat_queue_ticks[i] * 100; ++ do_div(percent, stats->iostat_queue_ticks_sum); ++ seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent); ++ } ++ ++ if (stats->iostat_reqs != 0) { ++ unsigned long long aveseek = 0, percent = 0; ++ ++ if (stats->iostat_seeks) { ++ aveseek = stats->iostat_seek_sectors; ++ do_div(aveseek, stats->iostat_seeks); ++ percent = stats->iostat_seeks * 100; ++ do_div(percent, stats->iostat_reqs); ++ } ++ ++ seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over " ++ "%llu sectors in ave, %llu%% of all reqs\n", ++ stats->iostat_sectors, stats->iostat_reqs, ++ stats->iostat_seeks, aveseek, percent); ++ } ++ ++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads", ++ "%%", "writes", "%%"); ++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) { ++ unsigned long read_percent = 0, write_percent = 0; ++ if (stats->iostat_wtime[i] == 0 && ++ stats->iostat_rtime[i] == 0) ++ continue; ++ if (stats->iostat_read_reqs) ++ read_percent = stats->iostat_rtime[i] * 100 / ++ stats->iostat_read_reqs; ++ if (stats->iostat_write_reqs) ++ write_percent = stats->iostat_wtime[i] * 100 / ++ stats->iostat_write_reqs; ++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n", ++ jiffies_to_msecs(((1UL << i) >> 1) << 1), ++ stats->iostat_rtime[i], read_percent, ++ stats->iostat_wtime[i], write_percent); ++ } ++ ++ seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads", ++ "%%", "writes", "%%"); ++ for (i = 0; i < IOSTAT_NCOUNTERS; i++) { ++ unsigned long read_percent = 0, write_percent = 0; ++ if (stats->iostat_wtime_in_queue[i] == 0 && ++ stats->iostat_rtime_in_queue[i] == 0) ++ continue; ++ if (stats->iostat_read_reqs) ++ read_percent = stats->iostat_rtime_in_queue[i] * 100 / ++ stats->iostat_read_reqs; ++ if (stats->iostat_write_reqs) ++ write_percent = stats->iostat_wtime_in_queue[i] * 100 / ++ stats->iostat_write_reqs; ++ seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n", ++ jiffies_to_msecs(((1UL << i) >> 1) << 1), ++ stats->iostat_rtime_in_queue[i], ++ read_percent, ++ stats->iostat_wtime_in_queue[i], ++ write_percent); ++ } ++ ++ return 0; ++} ++ ++static void * ++sd_iostats_seq_start(struct seq_file *p, loff_t *pos) ++{ ++ return (*pos == 0) ? (void *)1 : NULL; ++} ++ ++static void * ++sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos) ++{ ++ ++*pos; ++ return NULL; ++} ++ ++static void ++sd_iostats_seq_stop(struct seq_file *p, void *v) ++{ ++} ++ ++static struct seq_operations sd_iostats_seqops = { ++ .start = sd_iostats_seq_start, ++ .stop = sd_iostats_seq_stop, ++ .next = sd_iostats_seq_next, ++ .show = sd_iostats_seq_show, ++}; ++ ++static int ++sd_iostats_seq_open (struct inode *inode, struct file *file) ++{ ++ int rc; ++ ++ rc = seq_open(file, &sd_iostats_seqops); ++ if (rc != 0) ++ return rc; ++ ++ ((struct seq_file *)file->private_data)->private = PDE(inode)->data; ++ return 0; ++} ++ ++static ssize_t ++sd_iostats_seq_write(struct file *file, const char *buffer, ++ size_t len, loff_t *off) ++{ ++ struct seq_file *seq = file->private_data; ++ struct gendisk *disk = seq->private; ++ iostat_stats_t *stats = scsi_disk(disk)->stats; ++ unsigned long flags; ++ unsigned long qdepth; ++ ++ ++ spin_lock_irqsave (&stats->iostat_lock, flags); ++ qdepth = stats->iostat_queue_depth; ++ memset (stats, 0, offsetof(iostat_stats_t, iostat_lock)); ++ do_gettimeofday(&stats->iostat_timeval); ++ stats->iostat_queue_stamp = jiffies; ++ stats->iostat_queue_depth = qdepth; ++ spin_unlock_irqrestore (&stats->iostat_lock, flags); ++ ++ return len; ++} ++ ++static struct file_operations sd_iostats_proc_fops = { ++ .owner = THIS_MODULE, ++ .open = sd_iostats_seq_open, ++ .read = seq_read, ++ .write = sd_iostats_seq_write, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++extern struct proc_dir_entry *proc_scsi; ++ ++void ++sd_iostats_init(void) ++{ ++ if (proc_scsi == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "proc_scsi is NULL\n"); ++ return; ++ } ++ ++ sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name, ++ S_IFDIR | S_IRUGO | S_IXUGO, ++ proc_scsi); ++ if (sd_iostats_procdir == NULL) { ++ printk(KERN_WARNING "No access to sd iostats: " ++ "can't create /proc/scsi/%s\n", sd_iostats_procdir_name); ++ return; ++ } ++} ++ ++void sd_iostats_fini(void) ++{ ++ if (proc_scsi != NULL && sd_iostats_procdir != NULL) ++ remove_proc_entry(sd_iostats_procdir_name, proc_scsi); ++ ++ sd_iostats_procdir = NULL; ++} ++ ++void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) ++{ ++ struct request *rq = SCpnt->request; ++ iostat_stats_t *stats; ++ unsigned long *tcounter; ++ int tbucket; ++ int tmp; ++ unsigned long irqflags; ++ unsigned long i; ++ ++ stats = scsi_disk(rq->rq_disk)->stats; ++ if (stats == NULL) ++ return; ++ ++ tmp = jiffies - rq->start_time; ++ for (tbucket = 0; tmp > 1; tbucket++) ++ tmp >>= 1; ++ if (tbucket >= IOSTAT_NCOUNTERS) ++ tbucket = IOSTAT_NCOUNTERS - 1; ++ //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket); ++ ++ tcounter = rq_data_dir(rq) == WRITE ? ++ &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket]; ++ ++ spin_lock_irqsave(&stats->iostat_lock, irqflags); ++ ++ /* update delay stats */ ++ (*tcounter)++; ++ ++ /* update queue depth stats */ ++ i = stats->iostat_queue_depth; ++ if (i >= IOSTAT_NCOUNTERS) ++ i = IOSTAT_NCOUNTERS - 1; ++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; ++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; ++ BUG_ON(stats->iostat_queue_depth == 0); ++ stats->iostat_queue_depth--; ++ ++ /* update seek stats. XXX: not sure about nr_sectors */ ++ stats->iostat_sectors += rq->nr_sectors; ++ stats->iostat_reqs++; ++ if (rq->sector != stats->iostat_next_sector) { ++ stats->iostat_seek_sectors += ++ rq->sector > stats->iostat_next_sector ? ++ rq->sector - stats->iostat_next_sector : ++ stats->iostat_next_sector - rq->sector; ++ stats->iostat_seeks++; ++ } ++ stats->iostat_next_sector = rq->sector + rq->nr_sectors; ++ ++ stats->iostat_queue_stamp = jiffies; ++ ++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags); ++} ++ ++void sd_iostats_start_req(struct scsi_cmnd *SCpnt) ++{ ++ struct request *rq = SCpnt->request; ++ iostat_stats_t *stats; ++ iostat_counter_t *counter; ++ int bucket; ++ int tbucket; ++ int tmp; ++ unsigned long irqflags; ++ unsigned long i; ++ int nsect; ++ ++ stats = scsi_disk(rq->rq_disk)->stats; ++ if (stats == NULL) ++ return; ++ ++ nsect = SCpnt->request_bufflen >> 9; ++ for (bucket = 0, tmp = nsect; tmp > 1; bucket++) ++ tmp >>= 1; ++ ++ if (bucket >= IOSTAT_NCOUNTERS) { ++ printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect); ++ BUG(); ++ } ++ ++ counter = rq_data_dir(rq) == WRITE ? ++ &stats->iostat_write_histogram[bucket] : ++ &stats->iostat_read_histogram[bucket]; ++ ++ tmp = jiffies - rq->start_time; ++ for (tbucket = 0; tmp > 1; tbucket++) ++ tmp >>= 1; ++ if (tbucket >= IOSTAT_NCOUNTERS) ++ tbucket = IOSTAT_NCOUNTERS - 1; ++ //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket); ++ ++ /* an ugly hack to know exact processing time. the right ++ * solution is to add one more field to struct request ++ * hopefully it will break nothing ... */ ++ rq->start_time = jiffies; ++ ++ spin_lock_irqsave(&stats->iostat_lock, irqflags); ++ ++ /* update queue depth stats */ ++ i = stats->iostat_queue_depth; ++ if (i >= IOSTAT_NCOUNTERS) ++ i = IOSTAT_NCOUNTERS - 1; ++ stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp; ++ stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp; ++ stats->iostat_queue_depth++; ++ ++ /* update delay stats */ ++ if (rq_data_dir(rq) == WRITE) { ++ stats->iostat_wtime_in_queue[tbucket]++; ++ stats->iostat_write_reqs++; ++ } else { ++ stats->iostat_rtime_in_queue[tbucket]++; ++ stats->iostat_read_reqs++; ++ } ++ ++ /* update size stats */ ++ counter->iostat_size += nsect; ++ counter->iostat_count++; ++ ++ stats->iostat_queue_stamp = jiffies; ++ ++ spin_unlock_irqrestore(&stats->iostat_lock, irqflags); ++} ++#endif ++ + /** + * scsi_disk_release - Called to free the scsi_disk structure + * @cdev: pointer to embedded class device +@@ -1796,10 +2249,16 @@ static void scsi_disk_release(struct cla + idr_remove(&sd_index_idr, sdkp->index); + spin_unlock(&sd_index_lock); + ++#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS)) ++ if (sdkp->stats) { ++ remove_proc_entry(disk->disk_name, sd_iostats_procdir); ++ kfree(sdkp->stats); ++ sdkp->stats = NULL; ++ } ++#endif + disk->private_data = NULL; + put_disk(disk); + put_device(&sdkp->device->sdev_gendev); +- + kfree(sdkp); + } + +@@ -1907,6 +2366,7 @@ done: + static int __init init_sd(void) + { + int majors = 0, i; ++ int rc = 0; + + SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); + +@@ -1917,9 +2377,13 @@ static int __init init_sd(void) + if (!majors) + return -ENODEV; + ++ sd_iostats_init(); + class_register(&sd_disk_class); + +- return scsi_register_driver(&sd_template.gendrv); ++ rc = scsi_register_driver(&sd_template.gendrv); ++ if (rc) ++ sd_iostats_fini(); ++ return rc; + } + + /** +@@ -1938,6 +2402,7 @@ static void __exit exit_sd(void) + unregister_blkdev(sd_major(i), "sd"); + + class_unregister(&sd_disk_class); ++ sd_iostats_fini(); + } + + module_init(init_sd); diff --git a/lustre/kernel_patches/series/2.6-sles10.series b/lustre/kernel_patches/series/2.6-sles10.series index dcc8c04..7003a52 100644 --- a/lustre/kernel_patches/series/2.6-sles10.series +++ b/lustre/kernel_patches/series/2.6-sles10.series @@ -5,7 +5,7 @@ export_symbols-2.6.12.patch dev_read_only-2.6-fc5.patch export-2.6-fc5.patch export-show_task-2.6-fc5.patch -sd_iostats-2.6-rhel5.patch +sd_iostats-2.6-sles10.patch export_symbol_numa-2.6-fc5.patch blkdev_tunables-2.6-sles10.patch jbd-stats-2.6-sles10.patch -- 1.8.3.1