Whamcloud - gitweb
b=12348
[fs/lustre-release.git] / lustre / kernel_patches / patches / sd_iostats-2.4.21-chaos.patch
1 diff -urp RH_2_4_21_47_0_1.orig/Documentation/Configure.help RH_2_4_21_47_0_1/Documentation/Configure.help
2 --- RH_2_4_21_47_0_1.orig/Documentation/Configure.help  2006-11-20 16:59:49.000000000 +0200
3 +++ RH_2_4_21_47_0_1/Documentation/Configure.help       2007-05-21 19:13:23.000000000 +0300
4 @@ -7620,6 +7620,11 @@ CONFIG_SCSI_LOGGING
5    there should be no noticeable performance impact as long as you have
6    logging turned off.
7  
8 +SCSI disk I/O stats
9 +CONFIG_SD_IOSTATS
10 +  This enables SCSI disk I/O stats collection.  You must also enable
11 +  /proc file system support if you want this feature.
12 +
13  QDIO base support for IBM S/390 and zSeries
14  CONFIG_QDIO
15    This driver provides the Queued Direct I/O base support for the
16 diff -urp RH_2_4_21_47_0_1.orig/drivers/scsi/Config.in RH_2_4_21_47_0_1/drivers/scsi/Config.in
17 --- RH_2_4_21_47_0_1.orig/drivers/scsi/Config.in        2006-11-20 16:59:49.000000000 +0200
18 +++ RH_2_4_21_47_0_1/drivers/scsi/Config.in     2007-05-21 19:13:23.000000000 +0300
19 @@ -4,6 +4,7 @@ dep_tristate '  SCSI disk support' CONFI
20  
21  if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then
22     int  'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40
23 +   bool 'SCSI disk I/O stats' CONFIG_SD_IOSTATS y
24  fi
25  if [ "$CONFIG_BLK_DEV_SD" != "n" -a "$CONFIG_DISKDUMP" != "n" ]; then
26     dep_tristate '  SCSI dump support' CONFIG_SCSI_DUMP $CONFIG_SCSI
27 diff -urp RH_2_4_21_47_0_1.orig/drivers/scsi/sd.c RH_2_4_21_47_0_1/drivers/scsi/sd.c
28 --- RH_2_4_21_47_0_1.orig/drivers/scsi/sd.c     2006-11-20 16:59:45.000000000 +0200
29 +++ RH_2_4_21_47_0_1/drivers/scsi/sd.c  2007-05-21 19:14:27.000000000 +0300
30 @@ -65,6 +65,40 @@
31   *  static const char RCSid[] = "$Header:";
32   */
33  
34 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
35 +#include <linux/proc_fs.h>
36 +#include <linux/seq_file.h>
37 +
38 +typedef struct
39 +{
40 +        unsigned long long      iostat_size;
41 +        unsigned long long      iostat_count;
42 +} iostat_counter_t;
43 +
44 +#define IOSTAT_NCOUNTERS 16
45 +typedef struct 
46 +{
47 +        iostat_counter_t        iostat_read_histogram[IOSTAT_NCOUNTERS];
48 +        iostat_counter_t        iostat_write_histogram[IOSTAT_NCOUNTERS];
49 +       struct timeval          iostat_timeval;
50 +} iostat_stats_t;
51 +        
52 +iostat_stats_t       **sd_iostats;
53 +spinlock_t             sd_iostats_lock;
54 +struct proc_dir_entry *sd_iostats_procdir;
55 +char                   sd_iostats_procdir_name[] = "sd_iostats";
56 +
57 +extern void sd_iostats_init(void);
58 +extern void sd_iostats_init_disk(int disk);
59 +extern void sd_iostats_fini(void);
60 +extern void sd_iostats_bump(int disk, unsigned int nsect, int iswrite);
61 +#else
62 +static inline void sd_iostats_init(void) {}
63 +static inline void sd_iostats_init_disk(int disk) {}
64 +static inline void sd_iostats_fini(void) {}
65 +static inline void sd_iostats_bump(int dev, unsigned int nsect, int iswrite) {}
66 +#endif
67 +
68  /* device number --> sd_gendisks index */
69  #define SD_MAJOR_IDX(i)                ( ((MAJOR(i) & 0x80) >> 4) + (MAJOR(i) & 7) )
70  /* sd_gendisks index --> system major */
71 @@ -372,6 +406,8 @@ static int sd_init_command(Scsi_Cmnd * S
72         SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n",
73                                    nbuff, dev, block));
74  
75 +       sd_iostats_bump(dev, this_count, SCpnt->request.cmd == WRITE);
76 +
77         /*
78          * If we have a 1K hardware sectorsize, prevent access to single
79          * 512 byte sectors.  In theory we could handle this - in fact
80 @@ -575,7 +611,7 @@ static int sd_open(struct inode *inode, 
81                         if (scsi_block_when_processing_errors(SDev))
82                                 scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL);
83  
84 -       
85 +       sd_iostats_init_disk(target);
86         return 0;
87  
88  error_out:
89 @@ -593,18 +629,31 @@ static int sd_release(struct inode *inod
90  {
91         int target;
92         Scsi_Device * SDev;
93 +       char nbuff[6];
94  
95         target = DEVICE_NR(inode->i_rdev);
96         SDev = rscsi_disks[target].device;
97         if (!SDev)
98                 return -ENODEV;
99  
100 -       SDev->access_count--;
101 -
102 -       if (SDev->removable) {
103 -               if (!SDev->access_count)
104 +       if (!--SDev->access_count) {
105 +               /*
106 +                * Remove sd_iostats information about this disk
107 +                */
108 +               if (sd_iostats_procdir != NULL) {
109 +                       sd_devname(target, nbuff);
110 +                       remove_proc_entry(nbuff, sd_iostats_procdir);
111 +               }
112 +               if (sd_iostats != NULL) {
113 +                       if (sd_iostats[target] != NULL) {
114 +                               kfree (sd_iostats[target]);
115 +                               sd_iostats[target] = NULL;
116 +                       }
117 +               }
118 +               if (SDev->removable) {
119                         if (scsi_block_when_processing_errors(SDev))
120                                 scsi_ioctl(SDev, SCSI_IOCTL_DOORUNLOCK, NULL);
121 +               }
122         }
123         if (SDev->host->hostt->module)
124                 __MOD_DEC_USE_COUNT(SDev->host->hostt->module);
125 @@ -1260,6 +1309,8 @@ static int sd_init()
126  
127         memset(sd_varyio, 0, (sd_template.dev_max << 4)); 
128  
129 +       sd_iostats_init();
130 +
131         for (i = 0; i < sd_template.dev_max << 4; i++) {
132                 sd_blocksizes[i] = 1024;
133                 sd_hardsizes[i] = 512;
134 @@ -1324,6 +1375,7 @@ cleanup_gendisks_de_arr:
135         kfree(sd_gendisks);
136         sd_gendisks = NULL;
137  cleanup_sd_gendisks:
138 +       sd_iostats_fini();
139         kfree(sd_varyio);
140  cleanup_varyio:
141         kfree(sd_max_sectors);
142 @@ -1547,6 +1599,321 @@ static void sd_detach(Scsi_Device * SDp)
143         return;
144  }
145  
146 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
147 +static int
148 +sd_iostats_seq_show(struct seq_file *seq, void *v)
149 +{
150 +        struct timeval     now;
151 +        unsigned long      index = (unsigned long)(seq->private);
152 +       iostat_stats_t    *stats;
153 +        unsigned long long read_len;
154 +        unsigned long long read_len_tot;
155 +       unsigned long      read_num;
156 +       unsigned long      read_num_tot;
157 +        unsigned long long write_len;
158 +        unsigned long long write_len_tot;
159 +       unsigned long      write_num;
160 +       unsigned long      write_num_tot;
161 +        int                i;
162 +       int                maxi;
163 +
164 +       if (seq == NULL || sd_iostats == NULL) {
165 +               printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
166 +               BUG();
167 +       }
168 +       
169 +       if (index >= sd_template.dev_max || !rscsi_disks[index].device)
170 +               return -ENXIO;  /* No such device */
171 +
172 +       stats = sd_iostats[index];
173 +       if (stats == NULL) {
174 +                seq_printf(seq, "sd_iostats_seq_show: sd_iostats "
175 +                               "entry %d does not exist\n",
176 +                               index);
177 +               return 0;
178 +       }
179 +
180 +        do_gettimeofday(&now);
181 +       now.tv_sec -= stats->iostat_timeval.tv_sec;
182 +       now.tv_usec -= stats->iostat_timeval.tv_usec;
183 +       if (now.tv_usec < 0) {
184 +               now.tv_usec += 1000000;
185 +               now.tv_sec--;
186 +       }
187 +
188 +        /* this sampling races with updates */
189 +        seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
190 +                   index, now.tv_sec, now.tv_usec);
191 +
192 +       for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
193 +               if (stats->iostat_read_histogram[i].iostat_count != 0 ||
194 +                   stats->iostat_write_histogram[i].iostat_count != 0)
195 +                       break;
196 +       maxi = i;
197 +
198 +       seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
199 +                  "reads", "total", "writes", "total");
200 +
201 +       read_len_tot = write_len_tot = 0;
202 +       read_num_tot = write_num_tot = 0;
203 +       for (i = 0; i <= maxi; i++) {
204 +               read_len = stats->iostat_read_histogram[i].iostat_size;
205 +               read_len_tot += read_len;
206 +               read_num = stats->iostat_read_histogram[i].iostat_count;
207 +               read_num_tot += read_num;
208 +
209 +               write_len = stats->iostat_write_histogram[i].iostat_size;
210 +               write_len_tot += write_len;
211 +               write_num = stats->iostat_write_histogram[i].iostat_count;
212 +               write_num_tot += write_num;
213 +
214 +               seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
215 +                           512<<i, read_num, read_len, write_num, write_len);
216 +       }
217 +       
218 +       seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n", "total",
219 +                  read_num_tot, read_len_tot, 
220 +                  write_num_tot, write_len_tot);
221 +        return 0;
222 +}
223 +
224 +static void *
225 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
226 +{
227 +        return (*pos == 0) ? (void *)1 : NULL;
228 +}
229 +
230 +static void *
231 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
232 +{
233 +        ++*pos;
234 +        return NULL;
235 +}
236 +
237 +static void
238 +sd_iostats_seq_stop(struct seq_file *p, void *v)
239 +{
240 +}
241 +
242 +static struct seq_operations sd_iostats_seqops = {
243 +        .start = sd_iostats_seq_start,
244 +        .stop  = sd_iostats_seq_stop,
245 +        .next  = sd_iostats_seq_next,
246 +        .show  = sd_iostats_seq_show,
247 +};
248 +
249 +static int
250 +sd_iostats_seq_open (struct inode *inode, struct file *file)
251 +{
252 +       struct proc_dir_entry *dp = PDE(inode);
253 +       struct seq_file       *seq;
254 +       int                    rc;
255 +
256 +       rc = seq_open(file, &sd_iostats_seqops);
257 +       if (rc != 0)
258 +               return rc;
259 +
260 +       ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
261 +       return 0;
262 +}
263 +
264 +static int
265 +sd_iostats_seq_write(struct file *file, const char *buffer,
266 +                    size_t len, loff_t *off)
267 +{
268 +       struct seq_file   *seq = file->private_data;
269 +       unsigned long      index = (unsigned long)seq->private;
270 +       iostat_stats_t    *stats = sd_iostats[index];
271 +       unsigned long      flags;
272 +       
273 +       
274 +       spin_lock_irqsave (&sd_iostats_lock, flags);
275 +       memset (stats, 0, sizeof(*stats));
276 +       do_gettimeofday(&stats->iostat_timeval);
277 +       spin_unlock_irqrestore (&sd_iostats_lock, flags);
278 +
279 +       return len;
280 +}
281 +
282 +static struct file_operations sd_iostats_proc_fops = {
283 +        .owner   = THIS_MODULE,
284 +        .open    = sd_iostats_seq_open,
285 +        .read    = seq_read,
286 +        .write   = sd_iostats_seq_write,
287 +        .llseek  = seq_lseek,
288 +        .release = seq_release,
289 +};
290 +
291 +void
292 +sd_iostats_init(void)
293 +{
294 +       int    maxdevs = sd_template.dev_max;
295 +       int    i;
296 +
297 +       spin_lock_init(&sd_iostats_lock);
298 +
299 +       sd_iostats = kmalloc(maxdevs * sizeof(iostat_stats_t *), GFP_KERNEL);
300 +       if (sd_iostats == NULL) {
301 +               printk(KERN_WARNING "Can't keep sd iostats: "
302 +                      "ENOMEM allocating stats array size %d\n",
303 +                      sd_template.dev_max * sizeof(iostat_stats_t *));
304 +               return;
305 +       }
306 +
307 +       for (i = 0; i < maxdevs; i++)
308 +               sd_iostats[i] = NULL;
309 +
310 +       if (proc_scsi == NULL) {
311 +               printk(KERN_WARNING "No access to sd iostats: "
312 +                      "proc_scsi is NULL\n");
313 +               return;
314 +       }
315 +
316 +       sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
317 +                                              S_IFDIR | S_IRUGO | S_IXUGO,
318 +                                              proc_scsi);
319 +       if (sd_iostats_procdir == NULL) {
320 +               printk(KERN_WARNING "No access to sd iostats: "
321 +                      "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
322 +               return;
323 +       }
324 +}
325 +
326 +void
327 +sd_iostats_init_disk(int disk)
328 +{
329 +       char                   name[6];
330 +       struct proc_dir_entry *pde;
331 +       int                    i;
332 +       unsigned long          flags;
333 +       iostat_stats_t        *stats;
334 +       int                    maxdevs = sd_template.dev_max;
335 +
336 +       if (sd_iostats == NULL ||
337 +           sd_iostats_procdir == NULL)
338 +               return;
339 +
340 +       if (disk > sd_template.dev_max) {
341 +               printk(KERN_ERR "sd_iostats_init_disk: "
342 +                      "unexpected disk index %d(%d)\n",
343 +                      disk, sd_template.dev_max);
344 +               BUG();
345 +       }
346 +
347 +       if (sd_iostats[disk] != NULL)
348 +               return;
349 +
350 +       sd_devname(disk, name);
351 +       stats = kmalloc(sizeof(*stats), GFP_KERNEL);
352 +       if (stats == NULL) {
353 +               printk(KERN_WARNING "Can't keep %s iostats: "
354 +                      "ENOMEM allocating stats size %d\n", 
355 +                      name, sizeof(*stats));
356 +               return;
357 +       }
358 +
359 +       memset (stats, 0, sizeof(*stats));
360 +       do_gettimeofday(&stats->iostat_timeval);
361 +
362 +       spin_lock_irqsave(&sd_iostats_lock, flags);
363 +
364 +       if (sd_iostats[disk] != NULL) {
365 +               spin_unlock_irqrestore(&sd_iostats_lock, flags);
366 +               kfree (stats);
367 +               return;
368 +       }
369 +
370 +       sd_iostats[disk] = stats;
371 +       
372 +       spin_unlock_irqrestore(&sd_iostats_lock, flags);
373 +       
374 +       pde = create_proc_entry(name, S_IRUGO | S_IWUSR, 
375 +                               sd_iostats_procdir);
376 +       if (pde == NULL) {
377 +               printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
378 +                      sd_iostats_procdir_name, name);
379 +       } else {
380 +               pde->proc_fops = &sd_iostats_proc_fops;
381 +               pde->data = (void *)((long)disk);
382 +       }
383 +}
384 +
385 +void
386 +sd_iostats_fini(void)
387 +{
388 +       char name[6];
389 +       int  i;
390 +       int  maxdevs = sd_template.dev_max;
391 +       
392 +       if (sd_iostats_procdir != NULL) {
393 +               for (i = 0; i < maxdevs; i++) {
394 +                       sd_devname(i, name);
395 +                       remove_proc_entry(name, sd_iostats_procdir);
396 +               }
397 +
398 +               if (proc_scsi == NULL) {
399 +                       printk(KERN_ERR "sd_iostats_fini: proc_scsi NULL\n");
400 +                       BUG();
401 +               }
402 +               remove_proc_entry(sd_iostats_procdir_name,
403 +                                 proc_scsi);
404 +
405 +               sd_iostats_procdir = NULL;
406 +       }
407 +       
408 +       if (sd_iostats != NULL) {
409 +               for (i = 0; i < maxdevs; i++) {
410 +                       if (sd_iostats[i] != NULL)
411 +                               kfree (sd_iostats[i]);
412 +               }
413 +               
414 +               kfree(sd_iostats);
415 +               sd_iostats = NULL;
416 +       }
417 +}
418 +
419 +void
420 +sd_iostats_bump(int disk, unsigned int nsect, int iswrite)
421 +{
422 +       iostat_stats_t    *stats;
423 +       iostat_counter_t  *counter;
424 +       int                bucket;
425 +       int                tmp;
426 +       unsigned long      irqflags;
427 +
428 +       if (sd_iostats == NULL)
429 +               return;
430 +
431 +       if (disk < 0 || disk >= sd_template.dev_max) {
432 +               printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
433 +                      disk, sd_template.dev_max);
434 +               BUG();
435 +       }
436 +
437 +       for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
438 +               tmp /= 2;
439 +
440 +       if (bucket >= IOSTAT_NCOUNTERS) {
441 +               printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
442 +               BUG();
443 +       }
444 +
445 +       spin_lock_irqsave(&sd_iostats_lock, irqflags);
446 +       
447 +       stats = sd_iostats[disk];
448 +       if (stats != NULL) {
449 +               counter = iswrite ? 
450 +                         &stats->iostat_write_histogram[bucket] :
451 +                         &stats->iostat_read_histogram[bucket];
452 +
453 +               counter->iostat_size += nsect;
454 +               counter->iostat_count++;
455 +       }
456 +
457 +       spin_unlock_irqrestore(&sd_iostats_lock, irqflags);
458 +}
459 +#endif
460 +
461  static int __init init_sd(void)
462  {
463         sd_template.module = THIS_MODULE;
464 @@ -1569,6 +1936,7 @@ static void __exit exit_sd(void)
465                 kfree(sd_blocksizes);
466                 kfree(sd_hardsizes);
467                 kfree(sd_varyio);
468 +               sd_iostats_fini();
469                 for (i = 0; i < N_USED_SD_MAJORS; i++) {
470                         kfree(sd_gendisks[i].de_arr);
471                         kfree(sd_gendisks[i].flags);