Whamcloud - gitweb
21a273db6e4f6a47558a98a21da6d270c9925a8e
[tools/e2fsprogs.git] / lib / ext2fs / unix_io.c
1 /*
2  * unix_io.c --- This is the Unix (well, really POSIX) implementation
3  *      of the I/O manager.
4  *
5  * Implements a one-block write-through cache.
6  *
7  * Includes support for Windows NT support under Cygwin.
8  *
9  * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10  *      2002 by Theodore Ts'o.
11  *
12  * %Begin-Header%
13  * This file may be redistributed under the terms of the GNU Library
14  * General Public License, version 2.
15  * %End-Header%
16  */
17
18 #define _LARGEFILE_SOURCE
19 #define _LARGEFILE64_SOURCE
20 #ifndef _GNU_SOURCE
21 #define _GNU_SOURCE
22 #endif
23
24 #include <stdio.h>
25 #include <string.h>
26 #if HAVE_UNISTD_H
27 #include <unistd.h>
28 #endif
29 #if HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <fcntl.h>
33 #include <time.h>
34 #ifdef __linux__
35 #include <sys/utsname.h>
36 #endif
37 #ifdef HAVE_SYS_IOCTL_H
38 #include <sys/ioctl.h>
39 #endif
40 #ifdef HAVE_SYS_MOUNT_H
41 #include <sys/mount.h>
42 #endif
43 #if HAVE_SYS_STAT_H
44 #include <sys/stat.h>
45 #endif
46 #if HAVE_SYS_TYPES_H
47 #include <sys/types.h>
48 #endif
49 #if HAVE_SYS_RESOURCE_H
50 #include <sys/resource.h>
51 #endif
52
53 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
54 #define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
55 #endif
56
57 #if defined(__linux__) && defined(_IO) && !defined(BLKSSZGET)
58 #define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
59 #endif
60
61 #undef ALIGN_DEBUG
62
63 #include "ext2_fs.h"
64 #include "ext2fs.h"
65
66 /*
67  * For checking structure magic numbers...
68  */
69
70 #define EXT2_CHECK_MAGIC(struct, code) \
71           if ((struct)->magic != (code)) return (code)
72
73 struct unix_cache {
74         char            *buf;
75         unsigned long   block;
76         int             access_time;
77         unsigned        dirty:1;
78         unsigned        in_use:1;
79 };
80
81 #define CACHE_SIZE 8
82 #define WRITE_DIRECT_SIZE 4     /* Must be smaller than CACHE_SIZE */
83 #define READ_DIRECT_SIZE 4      /* Should be smaller than CACHE_SIZE */
84
85 struct unix_private_data {
86         int     magic;
87         int     dev;
88         int     flags;
89         int     align;
90         int     access_time;
91         ext2_loff_t offset;
92         struct unix_cache cache[CACHE_SIZE];
93         void    *bounce;
94         struct struct_io_stats io_stats;
95 };
96
97 #define IS_ALIGNED(n, align) ((((unsigned long) n) & \
98                                ((unsigned long) ((align)-1))) == 0)
99
100 static errcode_t unix_open(const char *name, int flags, io_channel *channel);
101 static errcode_t unix_close(io_channel channel);
102 static errcode_t unix_set_blksize(io_channel channel, int blksize);
103 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
104                                int count, void *data);
105 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
106                                 int count, const void *data);
107 static errcode_t unix_flush(io_channel channel);
108 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
109                                 int size, const void *data);
110 static errcode_t unix_set_option(io_channel channel, const char *option,
111                                  const char *arg);
112 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
113 ;
114 static void reuse_cache(io_channel channel, struct unix_private_data *data,
115                  struct unix_cache *cache, unsigned long long block);
116 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
117                                int count, void *data);
118 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
119                                 int count, const void *data);
120 static errcode_t unix_discard(io_channel channel, unsigned long long block,
121                               unsigned long long count);
122
123 static struct struct_io_manager struct_unix_manager = {
124         EXT2_ET_MAGIC_IO_MANAGER,
125         "Unix I/O Manager",
126         unix_open,
127         unix_close,
128         unix_set_blksize,
129         unix_read_blk,
130         unix_write_blk,
131         unix_flush,
132         unix_write_byte,
133         unix_set_option,
134         unix_get_stats,
135         unix_read_blk64,
136         unix_write_blk64,
137         unix_discard,
138 };
139
140 io_manager unix_io_manager = &struct_unix_manager;
141
142 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
143 {
144         errcode_t       retval = 0;
145
146         struct unix_private_data *data;
147
148         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
149         data = (struct unix_private_data *) channel->private_data;
150         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
151
152         if (stats)
153                 *stats = &data->io_stats;
154
155         return retval;
156 }
157
158 /*
159  * Here are the raw I/O functions
160  */
161 static errcode_t raw_read_blk(io_channel channel,
162                               struct unix_private_data *data,
163                               unsigned long long block,
164                               int count, void *bufv)
165 {
166         errcode_t       retval;
167         ssize_t         size;
168         ext2_loff_t     location;
169         int             actual = 0;
170         unsigned char   *buf = bufv;
171
172         size = (count < 0) ? -count : count * channel->block_size;
173         data->io_stats.bytes_read += size;
174         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
175         if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
176                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
177                 goto error_out;
178         }
179         if ((data->align == 0) ||
180             ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
181                 actual = read(data->dev, buf, size);
182                 if (actual != size) {
183                 short_read:
184                         if (actual < 0)
185                                 actual = 0;
186                         retval = EXT2_ET_SHORT_READ;
187                         goto error_out;
188                 }
189                 return 0;
190         }
191
192 #ifdef ALIGN_DEBUG
193         printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
194                (unsigned long) size);
195 #endif
196
197         /*
198          * The buffer or size which we're trying to read isn't aligned
199          * to the O_DIRECT rules, so we need to do this the hard way...
200          */
201         while (size > 0) {
202                 actual = read(data->dev, data->bounce, channel->block_size);
203                 if (actual != channel->block_size)
204                         goto short_read;
205                 actual = size;
206                 if (size > channel->block_size)
207                         actual = channel->block_size;
208                 memcpy(buf, data->bounce, actual);
209                 size -= actual;
210                 buf += actual;
211         }
212         return 0;
213
214 error_out:
215         memset((char *) buf+actual, 0, size-actual);
216         if (channel->read_error)
217                 retval = (channel->read_error)(channel, block, count, buf,
218                                                size, actual, retval);
219         return retval;
220 }
221
222 static errcode_t raw_write_blk(io_channel channel,
223                                struct unix_private_data *data,
224                                unsigned long long block,
225                                int count, const void *bufv)
226 {
227         ssize_t         size;
228         ext2_loff_t     location;
229         int             actual = 0;
230         errcode_t       retval;
231         const unsigned char *buf = bufv;
232
233         if (count == 1)
234                 size = channel->block_size;
235         else {
236                 if (count < 0)
237                         size = -count;
238                 else
239                         size = count * channel->block_size;
240         }
241         data->io_stats.bytes_written += size;
242
243         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
244         if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
245                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
246                 goto error_out;
247         }
248
249         if ((data->align == 0) ||
250             ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
251                 actual = write(data->dev, buf, size);
252                 if (actual != size) {
253                 short_write:
254                         retval = EXT2_ET_SHORT_WRITE;
255                         goto error_out;
256                 }
257                 return 0;
258         }
259
260 #ifdef ALIGN_DEBUG
261         printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
262                (unsigned long) size);
263 #endif
264         /*
265          * The buffer or size which we're trying to write isn't aligned
266          * to the O_DIRECT rules, so we need to do this the hard way...
267          */
268         while (size > 0) {
269                 if (size < channel->block_size) {
270                         actual = read(data->dev, data->bounce,
271                                       channel->block_size);
272                         if (actual != channel->block_size) {
273                                 retval = EXT2_ET_SHORT_READ;
274                                 goto error_out;
275                         }
276                 }
277                 actual = size;
278                 if (size > channel->block_size)
279                         actual = channel->block_size;
280                 memcpy(data->bounce, buf, actual);
281                 actual = write(data->dev, data->bounce, channel->block_size);
282                 if (actual != channel->block_size)
283                         goto short_write;
284                 size -= actual;
285                 buf += actual;
286         }
287         return 0;
288
289 error_out:
290         if (channel->write_error)
291                 retval = (channel->write_error)(channel, block, count, buf,
292                                                 size, actual, retval);
293         return retval;
294 }
295
296
297 /*
298  * Here we implement the cache functions
299  */
300
301 /* Allocate the cache buffers */
302 static errcode_t alloc_cache(io_channel channel,
303                              struct unix_private_data *data)
304 {
305         errcode_t               retval;
306         struct unix_cache       *cache;
307         int                     i;
308
309         data->access_time = 0;
310         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
311                 cache->block = 0;
312                 cache->access_time = 0;
313                 cache->dirty = 0;
314                 cache->in_use = 0;
315                 if (cache->buf)
316                         ext2fs_free_mem(&cache->buf);
317                 retval = ext2fs_get_memalign(channel->block_size,
318                                              data->align, &cache->buf);
319                 if (retval)
320                         return retval;
321         }
322         if (data->align) {
323                 if (data->bounce)
324                         ext2fs_free_mem(&data->bounce);
325                 retval = ext2fs_get_memalign(channel->block_size, data->align,
326                                              &data->bounce);
327         }
328         return retval;
329 }
330
331 /* Free the cache buffers */
332 static void free_cache(struct unix_private_data *data)
333 {
334         struct unix_cache       *cache;
335         int                     i;
336
337         data->access_time = 0;
338         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
339                 cache->block = 0;
340                 cache->access_time = 0;
341                 cache->dirty = 0;
342                 cache->in_use = 0;
343                 if (cache->buf)
344                         ext2fs_free_mem(&cache->buf);
345         }
346         if (data->bounce)
347                 ext2fs_free_mem(&data->bounce);
348 }
349
350 #ifndef NO_IO_CACHE
351 /*
352  * Try to find a block in the cache.  If the block is not found, and
353  * eldest is a non-zero pointer, then fill in eldest with the cache
354  * entry to that should be reused.
355  */
356 static struct unix_cache *find_cached_block(struct unix_private_data *data,
357                                             unsigned long long block,
358                                             struct unix_cache **eldest)
359 {
360         struct unix_cache       *cache, *unused_cache, *oldest_cache;
361         int                     i;
362
363         unused_cache = oldest_cache = 0;
364         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
365                 if (!cache->in_use) {
366                         if (!unused_cache)
367                                 unused_cache = cache;
368                         continue;
369                 }
370                 if (cache->block == block) {
371                         cache->access_time = ++data->access_time;
372                         return cache;
373                 }
374                 if (!oldest_cache ||
375                     (cache->access_time < oldest_cache->access_time))
376                         oldest_cache = cache;
377         }
378         if (eldest)
379                 *eldest = (unused_cache) ? unused_cache : oldest_cache;
380         return 0;
381 }
382
383 /*
384  * Reuse a particular cache entry for another block.
385  */
386 static void reuse_cache(io_channel channel, struct unix_private_data *data,
387                  struct unix_cache *cache, unsigned long long block)
388 {
389         if (cache->dirty && cache->in_use)
390                 raw_write_blk(channel, data, cache->block, 1, cache->buf);
391
392         cache->in_use = 1;
393         cache->dirty = 0;
394         cache->block = block;
395         cache->access_time = ++data->access_time;
396 }
397
398 /*
399  * Flush all of the blocks in the cache
400  */
401 static errcode_t flush_cached_blocks(io_channel channel,
402                                      struct unix_private_data *data,
403                                      int invalidate)
404
405 {
406         struct unix_cache       *cache;
407         errcode_t               retval, retval2;
408         int                     i;
409
410         retval2 = 0;
411         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
412                 if (!cache->in_use)
413                         continue;
414
415                 if (invalidate)
416                         cache->in_use = 0;
417
418                 if (!cache->dirty)
419                         continue;
420
421                 retval = raw_write_blk(channel, data,
422                                        cache->block, 1, cache->buf);
423                 if (retval)
424                         retval2 = retval;
425                 else
426                         cache->dirty = 0;
427         }
428         return retval2;
429 }
430 #endif /* NO_IO_CACHE */
431
432 #ifdef __linux__
433 #ifndef BLKDISCARDZEROES
434 #define BLKDISCARDZEROES _IO(0x12,124)
435 #endif
436 #endif
437
438 static errcode_t unix_open(const char *name, int flags, io_channel *channel)
439 {
440         io_channel      io = NULL;
441         struct unix_private_data *data = NULL;
442         errcode_t       retval;
443         int             open_flags, zeroes = 0;
444         ext2fs_struct_stat st;
445 #ifdef __linux__
446         struct          utsname ut;
447 #endif
448
449         if (name == 0)
450                 return EXT2_ET_BAD_DEVICE_NAME;
451         retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
452         if (retval)
453                 return retval;
454         memset(io, 0, sizeof(struct struct_io_channel));
455         io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
456         retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
457         if (retval)
458                 goto cleanup;
459
460         io->manager = unix_io_manager;
461         retval = ext2fs_get_mem(strlen(name)+1, &io->name);
462         if (retval)
463                 goto cleanup;
464
465         strcpy(io->name, name);
466         io->private_data = data;
467         io->block_size = 1024;
468         io->read_error = 0;
469         io->write_error = 0;
470         io->refcount = 1;
471
472         memset(data, 0, sizeof(struct unix_private_data));
473         data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
474         data->io_stats.num_fields = 2;
475
476         open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
477         if (flags & IO_FLAG_EXCLUSIVE)
478                 open_flags |= O_EXCL;
479 #ifdef O_DIRECT
480         if (flags & IO_FLAG_DIRECT_IO)
481                 open_flags |= O_DIRECT;
482 #endif
483         data->flags = flags;
484
485         data->dev = ext2fs_open_file(io->name, open_flags);
486         if (data->dev < 0) {
487                 retval = errno;
488                 goto cleanup;
489         }
490
491 #ifdef BLKSSZGET
492         if (flags & IO_FLAG_DIRECT_IO) {
493                 if (ioctl(data->dev, BLKSSZGET, &data->align) != 0)
494                         data->align = io->block_size;
495         }
496 #endif
497
498 #ifdef BLKDISCARDZEROES
499         ioctl(data->dev, BLKDISCARDZEROES, &zeroes);
500         if (zeroes)
501                 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
502 #endif
503
504 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
505         /*
506          * Some operating systems require that the buffers be aligned,
507          * regardless of O_DIRECT
508          */
509         data->align = 512;
510 #endif
511
512
513         if ((retval = alloc_cache(io, data)))
514                 goto cleanup;
515
516 #ifdef BLKROGET
517         if (flags & IO_FLAG_RW) {
518                 int error;
519                 int readonly = 0;
520
521                 /* Is the block device actually writable? */
522                 error = ioctl(data->dev, BLKROGET, &readonly);
523                 if (!error && readonly) {
524                         close(data->dev);
525                         retval = EPERM;
526                         goto cleanup;
527                 }
528         }
529 #endif
530
531 #ifdef __linux__
532 #undef RLIM_INFINITY
533 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
534 #define RLIM_INFINITY   ((unsigned long)(~0UL>>1))
535 #else
536 #define RLIM_INFINITY  (~0UL)
537 #endif
538         /*
539          * Work around a bug in 2.4.10-2.4.18 kernels where writes to
540          * block devices are wrongly getting hit by the filesize
541          * limit.  This workaround isn't perfect, since it won't work
542          * if glibc wasn't built against 2.2 header files.  (Sigh.)
543          *
544          */
545         if ((flags & IO_FLAG_RW) &&
546             (uname(&ut) == 0) &&
547             ((ut.release[0] == '2') && (ut.release[1] == '.') &&
548              (ut.release[2] == '4') && (ut.release[3] == '.') &&
549              (ut.release[4] == '1') && (ut.release[5] >= '0') &&
550              (ut.release[5] < '8')) &&
551             (ext2fs_stat(io->name, &st) == 0) &&
552             (S_ISBLK(st.st_mode))) {
553                 struct rlimit   rlim;
554
555                 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
556                 setrlimit(RLIMIT_FSIZE, &rlim);
557                 getrlimit(RLIMIT_FSIZE, &rlim);
558                 if (((unsigned long) rlim.rlim_cur) <
559                     ((unsigned long) rlim.rlim_max)) {
560                         rlim.rlim_cur = rlim.rlim_max;
561                         setrlimit(RLIMIT_FSIZE, &rlim);
562                 }
563         }
564 #endif
565         *channel = io;
566         return 0;
567
568 cleanup:
569         if (data) {
570                 free_cache(data);
571                 ext2fs_free_mem(&data);
572         }
573         if (io)
574                 ext2fs_free_mem(&io);
575         return retval;
576 }
577
578 static errcode_t unix_close(io_channel channel)
579 {
580         struct unix_private_data *data;
581         errcode_t       retval = 0;
582
583         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
584         data = (struct unix_private_data *) channel->private_data;
585         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
586
587         if (--channel->refcount > 0)
588                 return 0;
589
590 #ifndef NO_IO_CACHE
591         retval = flush_cached_blocks(channel, data, 0);
592 #endif
593
594         if (close(data->dev) < 0)
595                 retval = errno;
596         free_cache(data);
597
598         ext2fs_free_mem(&channel->private_data);
599         if (channel->name)
600                 ext2fs_free_mem(&channel->name);
601         ext2fs_free_mem(&channel);
602         return retval;
603 }
604
605 static errcode_t unix_set_blksize(io_channel channel, int blksize)
606 {
607         struct unix_private_data *data;
608         errcode_t               retval;
609
610         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
611         data = (struct unix_private_data *) channel->private_data;
612         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
613
614         if (channel->block_size != blksize) {
615 #ifndef NO_IO_CACHE
616                 if ((retval = flush_cached_blocks(channel, data, 0)))
617                         return retval;
618 #endif
619
620                 channel->block_size = blksize;
621                 free_cache(data);
622                 if ((retval = alloc_cache(channel, data)))
623                         return retval;
624         }
625         return 0;
626 }
627
628
629 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
630                                int count, void *buf)
631 {
632         struct unix_private_data *data;
633         struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
634         errcode_t       retval;
635         char            *cp;
636         int             i, j;
637
638         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
639         data = (struct unix_private_data *) channel->private_data;
640         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
641
642 #ifdef NO_IO_CACHE
643         return raw_read_blk(channel, data, block, count, buf);
644 #else
645         /*
646          * If we're doing an odd-sized read or a very large read,
647          * flush out the cache and then do a direct read.
648          */
649         if (count < 0 || count > WRITE_DIRECT_SIZE) {
650                 if ((retval = flush_cached_blocks(channel, data, 0)))
651                         return retval;
652                 return raw_read_blk(channel, data, block, count, buf);
653         }
654
655         cp = buf;
656         while (count > 0) {
657                 /* If it's in the cache, use it! */
658                 if ((cache = find_cached_block(data, block, &reuse[0]))) {
659 #ifdef DEBUG
660                         printf("Using cached block %lu\n", block);
661 #endif
662                         memcpy(cp, cache->buf, channel->block_size);
663                         count--;
664                         block++;
665                         cp += channel->block_size;
666                         continue;
667                 }
668                 if (count == 1) {
669                         /*
670                          * Special case where we read directly into the
671                          * cache buffer; important in the O_DIRECT case
672                          */
673                         cache = reuse[0];
674                         reuse_cache(channel, data, cache, block);
675                         if ((retval = raw_read_blk(channel, data, block, 1,
676                                                    cache->buf))) {
677                                 cache->in_use = 0;
678                                 return retval;
679                         }
680                         memcpy(cp, cache->buf, channel->block_size);
681                         return 0;
682                 }
683
684                 /*
685                  * Find the number of uncached blocks so we can do a
686                  * single read request
687                  */
688                 for (i=1; i < count; i++)
689                         if (find_cached_block(data, block+i, &reuse[i]))
690                                 break;
691 #ifdef DEBUG
692                 printf("Reading %d blocks starting at %lu\n", i, block);
693 #endif
694                 if ((retval = raw_read_blk(channel, data, block, i, cp)))
695                         return retval;
696
697                 /* Save the results in the cache */
698                 for (j=0; j < i; j++) {
699                         count--;
700                         cache = reuse[j];
701                         reuse_cache(channel, data, cache, block++);
702                         memcpy(cache->buf, cp, channel->block_size);
703                         cp += channel->block_size;
704                 }
705         }
706         return 0;
707 #endif /* NO_IO_CACHE */
708 }
709
710 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
711                                int count, void *buf)
712 {
713         return unix_read_blk64(channel, block, count, buf);
714 }
715
716 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
717                                 int count, const void *buf)
718 {
719         struct unix_private_data *data;
720         struct unix_cache *cache, *reuse;
721         errcode_t       retval = 0;
722         const char      *cp;
723         int             writethrough;
724
725         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
726         data = (struct unix_private_data *) channel->private_data;
727         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
728
729 #ifdef NO_IO_CACHE
730         return raw_write_blk(channel, data, block, count, buf);
731 #else
732         /*
733          * If we're doing an odd-sized write or a very large write,
734          * flush out the cache completely and then do a direct write.
735          */
736         if (count < 0 || count > WRITE_DIRECT_SIZE) {
737                 if ((retval = flush_cached_blocks(channel, data, 1)))
738                         return retval;
739                 return raw_write_blk(channel, data, block, count, buf);
740         }
741
742         /*
743          * For a moderate-sized multi-block write, first force a write
744          * if we're in write-through cache mode, and then fill the
745          * cache with the blocks.
746          */
747         writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
748         if (writethrough)
749                 retval = raw_write_blk(channel, data, block, count, buf);
750
751         cp = buf;
752         while (count > 0) {
753                 cache = find_cached_block(data, block, &reuse);
754                 if (!cache) {
755                         cache = reuse;
756                         reuse_cache(channel, data, cache, block);
757                 }
758                 memcpy(cache->buf, cp, channel->block_size);
759                 cache->dirty = !writethrough;
760                 count--;
761                 block++;
762                 cp += channel->block_size;
763         }
764         return retval;
765 #endif /* NO_IO_CACHE */
766 }
767
768 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
769                                 int count, const void *buf)
770 {
771         return unix_write_blk64(channel, block, count, buf);
772 }
773
774 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
775                                  int size, const void *buf)
776 {
777         struct unix_private_data *data;
778         errcode_t       retval = 0;
779         ssize_t         actual;
780
781         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
782         data = (struct unix_private_data *) channel->private_data;
783         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
784
785         if (data->align != 0) {
786 #ifdef ALIGN_DEBUG
787                 printf("unix_write_byte: O_DIRECT fallback\n");
788 #endif
789                 return EXT2_ET_UNIMPLEMENTED;
790         }
791
792 #ifndef NO_IO_CACHE
793         /*
794          * Flush out the cache completely
795          */
796         if ((retval = flush_cached_blocks(channel, data, 1)))
797                 return retval;
798 #endif
799
800         if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
801                 return errno;
802
803         actual = write(data->dev, buf, size);
804         if (actual != size)
805                 return EXT2_ET_SHORT_WRITE;
806
807         return 0;
808 }
809
810 /*
811  * Flush data buffers to disk.
812  */
813 static errcode_t unix_flush(io_channel channel)
814 {
815         struct unix_private_data *data;
816         errcode_t retval = 0;
817
818         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
819         data = (struct unix_private_data *) channel->private_data;
820         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
821
822 #ifndef NO_IO_CACHE
823         retval = flush_cached_blocks(channel, data, 0);
824 #endif
825         fsync(data->dev);
826         return retval;
827 }
828
829 static errcode_t unix_set_option(io_channel channel, const char *option,
830                                  const char *arg)
831 {
832         struct unix_private_data *data;
833         unsigned long long tmp;
834         char *end;
835
836         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
837         data = (struct unix_private_data *) channel->private_data;
838         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
839
840         if (!strcmp(option, "offset")) {
841                 if (!arg)
842                         return EXT2_ET_INVALID_ARGUMENT;
843
844                 tmp = strtoull(arg, &end, 0);
845                 if (*end)
846                         return EXT2_ET_INVALID_ARGUMENT;
847                 data->offset = tmp;
848                 if (data->offset < 0)
849                         return EXT2_ET_INVALID_ARGUMENT;
850                 return 0;
851         }
852         return EXT2_ET_INVALID_ARGUMENT;
853 }
854
855 #if defined(__linux__) && !defined(BLKDISCARD)
856 #define BLKDISCARD      _IO(0x12,119)
857 #endif
858
859 static errcode_t unix_discard(io_channel channel, unsigned long long block,
860                               unsigned long long count)
861 {
862 #ifdef BLKDISCARD
863         struct unix_private_data *data;
864         __uint64_t      range[2];
865         int             ret;
866
867         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
868         data = (struct unix_private_data *) channel->private_data;
869         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
870
871         range[0] = (__uint64_t)(block) * channel->block_size;
872         range[1] = (__uint64_t)(count) * channel->block_size;
873
874         ret = ioctl(data->dev, BLKDISCARD, &range);
875         if (ret < 0)
876                 return errno;
877         return 0;
878 #else
879         return EXT2_ET_UNIMPLEMENTED;
880 #endif
881 }