Whamcloud - gitweb
ext2fs: Add Direct I/O support to the ext2fs library
[tools/e2fsprogs.git] / lib / ext2fs / unix_io.c
1 /*
2  * unix_io.c --- This is the Unix (well, really POSIX) implementation
3  *      of the I/O manager.
4  *
5  * Implements a one-block write-through cache.
6  *
7  * Includes support for Windows NT support under Cygwin.
8  *
9  * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10  *      2002 by Theodore Ts'o.
11  *
12  * %Begin-Header%
13  * This file may be redistributed under the terms of the GNU Library
14  * General Public License, version 2.
15  * %End-Header%
16  */
17
18 #define _LARGEFILE_SOURCE
19 #define _LARGEFILE64_SOURCE
20 #define _GNU_SOURCE
21
22 #include <stdio.h>
23 #include <string.h>
24 #if HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #if HAVE_ERRNO_H
28 #include <errno.h>
29 #endif
30 #include <fcntl.h>
31 #include <time.h>
32 #ifdef __linux__
33 #include <sys/utsname.h>
34 #endif
35 #ifdef HAVE_SYS_IOCTL_H
36 #include <sys/ioctl.h>
37 #endif
38 #ifdef HAVE_SYS_MOUNT_H
39 #include <sys/mount.h>
40 #endif
41 #if HAVE_SYS_STAT_H
42 #include <sys/stat.h>
43 #endif
44 #if HAVE_SYS_TYPES_H
45 #include <sys/types.h>
46 #endif
47 #if HAVE_SYS_RESOURCE_H
48 #include <sys/resource.h>
49 #endif
50
51 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
52 #define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
53 #endif
54
55 #if defined(__linux__) && defined(_IO) && !defined(BLKSSZGET)
56 #define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
57 #endif
58
59 #undef ALIGN_DEBUG
60
61 #include "ext2_fs.h"
62 #include "ext2fs.h"
63
64 /*
65  * For checking structure magic numbers...
66  */
67
68 #define EXT2_CHECK_MAGIC(struct, code) \
69           if ((struct)->magic != (code)) return (code)
70
71 struct unix_cache {
72         char            *buf;
73         unsigned long   block;
74         int             access_time;
75         unsigned        dirty:1;
76         unsigned        in_use:1;
77 };
78
79 #define CACHE_SIZE 8
80 #define WRITE_DIRECT_SIZE 4     /* Must be smaller than CACHE_SIZE */
81 #define READ_DIRECT_SIZE 4      /* Should be smaller than CACHE_SIZE */
82
83 struct unix_private_data {
84         int     magic;
85         int     dev;
86         int     flags;
87         int     align;
88         int     access_time;
89         ext2_loff_t offset;
90         struct unix_cache cache[CACHE_SIZE];
91         void    *bounce;
92         struct struct_io_stats io_stats;
93 };
94
95 #define IS_ALIGNED(n, align) ((((unsigned long) n) & \
96                                ((unsigned long) ((align)-1))) == 0)
97
98 static errcode_t unix_open(const char *name, int flags, io_channel *channel);
99 static errcode_t unix_close(io_channel channel);
100 static errcode_t unix_set_blksize(io_channel channel, int blksize);
101 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
102                                int count, void *data);
103 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
104                                 int count, const void *data);
105 static errcode_t unix_flush(io_channel channel);
106 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
107                                 int size, const void *data);
108 static errcode_t unix_set_option(io_channel channel, const char *option,
109                                  const char *arg);
110 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
111 ;
112 static void reuse_cache(io_channel channel, struct unix_private_data *data,
113                  struct unix_cache *cache, unsigned long long block);
114 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
115                                int count, void *data);
116 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
117                                 int count, const void *data);
118
119 static struct struct_io_manager struct_unix_manager = {
120         EXT2_ET_MAGIC_IO_MANAGER,
121         "Unix I/O Manager",
122         unix_open,
123         unix_close,
124         unix_set_blksize,
125         unix_read_blk,
126         unix_write_blk,
127         unix_flush,
128         unix_write_byte,
129         unix_set_option,
130         unix_get_stats,
131         unix_read_blk64,
132         unix_write_blk64,
133 };
134
135 io_manager unix_io_manager = &struct_unix_manager;
136
137 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
138 {
139         errcode_t       retval = 0;
140
141         struct unix_private_data *data;
142
143         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
144         data = (struct unix_private_data *) channel->private_data;
145         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
146
147         if (stats)
148                 *stats = &data->io_stats;
149
150         return retval;
151 }
152
153 /*
154  * Here are the raw I/O functions
155  */
156 static errcode_t raw_read_blk(io_channel channel,
157                               struct unix_private_data *data,
158                               unsigned long long block,
159                               int count, void *buf)
160 {
161         errcode_t       retval;
162         ssize_t         size;
163         ext2_loff_t     location;
164         int             actual = 0;
165
166         size = (count < 0) ? -count : count * channel->block_size;
167         data->io_stats.bytes_read += size;
168         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
169         if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
170                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
171                 goto error_out;
172         }
173         if ((data->align == 0) ||
174             ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
175                 actual = read(data->dev, buf, size);
176                 if (actual != size) {
177                 short_read:
178                         if (actual < 0)
179                                 actual = 0;
180                         retval = EXT2_ET_SHORT_READ;
181                         goto error_out;
182                 }
183                 return 0;
184         }
185
186 #ifdef ALIGN_DEBUG
187         printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
188                (unsigned long) size);
189 #endif
190
191         /*
192          * The buffer or size which we're trying to read isn't aligned
193          * to the O_DIRECT rules, so we need to do this the hard way...
194          */
195         while (size > 0) {
196                 actual = read(data->dev, data->bounce, channel->block_size);
197                 if (actual != channel->block_size)
198                         goto short_read;
199                 actual = size;
200                 if (size > channel->block_size)
201                         actual = channel->block_size;
202                 memcpy(buf, data->bounce, actual);
203                 size -= actual;
204                 buf += actual;
205         }
206         return 0;
207
208 error_out:
209         memset((char *) buf+actual, 0, size-actual);
210         if (channel->read_error)
211                 retval = (channel->read_error)(channel, block, count, buf,
212                                                size, actual, retval);
213         return retval;
214 }
215
216 static errcode_t raw_write_blk(io_channel channel,
217                                struct unix_private_data *data,
218                                unsigned long long block,
219                                int count, const void *buf)
220 {
221         ssize_t         size;
222         ext2_loff_t     location;
223         int             actual = 0;
224         errcode_t       retval;
225
226         if (count == 1)
227                 size = channel->block_size;
228         else {
229                 if (count < 0)
230                         size = -count;
231                 else
232                         size = count * channel->block_size;
233         }
234         data->io_stats.bytes_written += size;
235
236         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
237         if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
238                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
239                 goto error_out;
240         }
241
242         if ((data->align == 0) ||
243             ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) {
244                 actual = write(data->dev, buf, size);
245                 if (actual != size) {
246                 short_write:
247                         retval = EXT2_ET_SHORT_WRITE;
248                         goto error_out;
249                 }
250                 return 0;
251         }
252
253 #ifdef ALIGN_DEBUG
254         printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
255                (unsigned long) size);
256 #endif
257         /*
258          * The buffer or size which we're trying to write isn't aligned
259          * to the O_DIRECT rules, so we need to do this the hard way...
260          */
261         while (size > 0) {
262                 if (size < channel->block_size) {
263                         actual = read(data->dev, data->bounce,
264                                       channel->block_size);
265                         if (actual != channel->block_size) {
266                                 retval = EXT2_ET_SHORT_READ;
267                                 goto error_out;
268                         }
269                 }
270                 actual = size;
271                 if (size > channel->block_size)
272                         actual = channel->block_size;
273                 memcpy(data->bounce, buf, actual);
274                 actual = write(data->dev, data->bounce, channel->block_size);
275                 if (actual != channel->block_size)
276                         goto short_write;
277                 size -= actual;
278                 buf += actual;
279         }
280         return 0;
281
282 error_out:
283         if (channel->write_error)
284                 retval = (channel->write_error)(channel, block, count, buf,
285                                                 size, actual, retval);
286         return retval;
287 }
288
289
290 /*
291  * Here we implement the cache functions
292  */
293
294 /* Allocate the cache buffers */
295 static errcode_t alloc_cache(io_channel channel,
296                              struct unix_private_data *data)
297 {
298         errcode_t               retval;
299         struct unix_cache       *cache;
300         int                     i;
301
302         data->access_time = 0;
303         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
304                 cache->block = 0;
305                 cache->access_time = 0;
306                 cache->dirty = 0;
307                 cache->in_use = 0;
308                 if (cache->buf)
309                         ext2fs_free_mem(&cache->buf);
310                 retval = ext2fs_get_memalign(channel->block_size,
311                                              data->align, &cache->buf);
312                 if (retval)
313                         return retval;
314         }
315         if (data->align) {
316                 if (data->bounce)
317                         ext2fs_free_mem(&data->bounce);
318                 retval = ext2fs_get_memalign(channel->block_size, data->align,
319                                              &data->bounce);
320         }
321         return retval;
322 }
323
324 /* Free the cache buffers */
325 static void free_cache(struct unix_private_data *data)
326 {
327         struct unix_cache       *cache;
328         int                     i;
329
330         data->access_time = 0;
331         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
332                 cache->block = 0;
333                 cache->access_time = 0;
334                 cache->dirty = 0;
335                 cache->in_use = 0;
336                 if (cache->buf)
337                         ext2fs_free_mem(&cache->buf);
338         }
339         if (data->bounce)
340                 ext2fs_free_mem(&data->bounce);
341 }
342
343 #ifndef NO_IO_CACHE
344 /*
345  * Try to find a block in the cache.  If the block is not found, and
346  * eldest is a non-zero pointer, then fill in eldest with the cache
347  * entry to that should be reused.
348  */
349 static struct unix_cache *find_cached_block(struct unix_private_data *data,
350                                             unsigned long long block,
351                                             struct unix_cache **eldest)
352 {
353         struct unix_cache       *cache, *unused_cache, *oldest_cache;
354         int                     i;
355
356         unused_cache = oldest_cache = 0;
357         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
358                 if (!cache->in_use) {
359                         if (!unused_cache)
360                                 unused_cache = cache;
361                         continue;
362                 }
363                 if (cache->block == block) {
364                         cache->access_time = ++data->access_time;
365                         return cache;
366                 }
367                 if (!oldest_cache ||
368                     (cache->access_time < oldest_cache->access_time))
369                         oldest_cache = cache;
370         }
371         if (eldest)
372                 *eldest = (unused_cache) ? unused_cache : oldest_cache;
373         return 0;
374 }
375
376 /*
377  * Reuse a particular cache entry for another block.
378  */
379 static void reuse_cache(io_channel channel, struct unix_private_data *data,
380                  struct unix_cache *cache, unsigned long long block)
381 {
382         if (cache->dirty && cache->in_use)
383                 raw_write_blk(channel, data, cache->block, 1, cache->buf);
384
385         cache->in_use = 1;
386         cache->dirty = 0;
387         cache->block = block;
388         cache->access_time = ++data->access_time;
389 }
390
391 /*
392  * Flush all of the blocks in the cache
393  */
394 static errcode_t flush_cached_blocks(io_channel channel,
395                                      struct unix_private_data *data,
396                                      int invalidate)
397
398 {
399         struct unix_cache       *cache;
400         errcode_t               retval, retval2;
401         int                     i;
402
403         retval2 = 0;
404         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
405                 if (!cache->in_use)
406                         continue;
407
408                 if (invalidate)
409                         cache->in_use = 0;
410
411                 if (!cache->dirty)
412                         continue;
413
414                 retval = raw_write_blk(channel, data,
415                                        cache->block, 1, cache->buf);
416                 if (retval)
417                         retval2 = retval;
418                 else
419                         cache->dirty = 0;
420         }
421         return retval2;
422 }
423 #endif /* NO_IO_CACHE */
424
425 static errcode_t unix_open(const char *name, int flags, io_channel *channel)
426 {
427         io_channel      io = NULL;
428         struct unix_private_data *data = NULL;
429         errcode_t       retval;
430         int             open_flags;
431         struct stat     st;
432 #ifdef __linux__
433         struct          utsname ut;
434 #endif
435
436         if (name == 0)
437                 return EXT2_ET_BAD_DEVICE_NAME;
438         retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
439         if (retval)
440                 return retval;
441         memset(io, 0, sizeof(struct struct_io_channel));
442         io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
443         retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
444         if (retval)
445                 goto cleanup;
446
447         io->manager = unix_io_manager;
448         retval = ext2fs_get_mem(strlen(name)+1, &io->name);
449         if (retval)
450                 goto cleanup;
451
452         strcpy(io->name, name);
453         io->private_data = data;
454         io->block_size = 1024;
455         io->read_error = 0;
456         io->write_error = 0;
457         io->refcount = 1;
458
459         memset(data, 0, sizeof(struct unix_private_data));
460         data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
461         data->io_stats.num_fields = 2;
462
463         open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
464         if (flags & IO_FLAG_EXCLUSIVE)
465                 open_flags |= O_EXCL;
466         if (flags & IO_FLAG_DIRECT_IO)
467                 open_flags |= O_DIRECT;
468         data->flags = flags;
469
470 #ifdef HAVE_OPEN64
471         data->dev = open64(io->name, open_flags);
472 #else
473         data->dev = open(io->name, open_flags);
474 #endif
475         if (data->dev < 0) {
476                 retval = errno;
477                 goto cleanup;
478         }
479
480 #ifdef BLKSSZGET
481         if (flags & IO_FLAG_DIRECT_IO) {
482                 if (ioctl(data->dev, BLKSSZGET, &data->align) != 0)
483                         data->align = io->block_size;
484         }
485 #endif
486
487 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
488         /*
489          * Some operating systems require that the buffers be aligned,
490          * regardless of O_DIRECT
491          */
492         data->align = 512;
493 #endif
494
495
496         if ((retval = alloc_cache(io, data)))
497                 goto cleanup;
498
499 #ifdef BLKROGET
500         if (flags & IO_FLAG_RW) {
501                 int error;
502                 int readonly = 0;
503
504                 /* Is the block device actually writable? */
505                 error = ioctl(data->dev, BLKROGET, &readonly);
506                 if (!error && readonly) {
507                         close(data->dev);
508                         retval = EPERM;
509                         goto cleanup;
510                 }
511         }
512 #endif
513
514 #ifdef __linux__
515 #undef RLIM_INFINITY
516 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
517 #define RLIM_INFINITY   ((unsigned long)(~0UL>>1))
518 #else
519 #define RLIM_INFINITY  (~0UL)
520 #endif
521         /*
522          * Work around a bug in 2.4.10-2.4.18 kernels where writes to
523          * block devices are wrongly getting hit by the filesize
524          * limit.  This workaround isn't perfect, since it won't work
525          * if glibc wasn't built against 2.2 header files.  (Sigh.)
526          *
527          */
528         if ((flags & IO_FLAG_RW) &&
529             (uname(&ut) == 0) &&
530             ((ut.release[0] == '2') && (ut.release[1] == '.') &&
531              (ut.release[2] == '4') && (ut.release[3] == '.') &&
532              (ut.release[4] == '1') && (ut.release[5] >= '0') &&
533              (ut.release[5] < '8')) &&
534             (fstat(data->dev, &st) == 0) &&
535             (S_ISBLK(st.st_mode))) {
536                 struct rlimit   rlim;
537
538                 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
539                 setrlimit(RLIMIT_FSIZE, &rlim);
540                 getrlimit(RLIMIT_FSIZE, &rlim);
541                 if (((unsigned long) rlim.rlim_cur) <
542                     ((unsigned long) rlim.rlim_max)) {
543                         rlim.rlim_cur = rlim.rlim_max;
544                         setrlimit(RLIMIT_FSIZE, &rlim);
545                 }
546         }
547 #endif
548         *channel = io;
549         return 0;
550
551 cleanup:
552         if (data) {
553                 free_cache(data);
554                 ext2fs_free_mem(&data);
555         }
556         if (io)
557                 ext2fs_free_mem(&io);
558         return retval;
559 }
560
561 static errcode_t unix_close(io_channel channel)
562 {
563         struct unix_private_data *data;
564         errcode_t       retval = 0;
565
566         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
567         data = (struct unix_private_data *) channel->private_data;
568         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
569
570         if (--channel->refcount > 0)
571                 return 0;
572
573 #ifndef NO_IO_CACHE
574         retval = flush_cached_blocks(channel, data, 0);
575 #endif
576
577         if (close(data->dev) < 0)
578                 retval = errno;
579         free_cache(data);
580
581         ext2fs_free_mem(&channel->private_data);
582         if (channel->name)
583                 ext2fs_free_mem(&channel->name);
584         ext2fs_free_mem(&channel);
585         return retval;
586 }
587
588 static errcode_t unix_set_blksize(io_channel channel, int blksize)
589 {
590         struct unix_private_data *data;
591         errcode_t               retval;
592
593         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
594         data = (struct unix_private_data *) channel->private_data;
595         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
596
597         if (channel->block_size != blksize) {
598 #ifndef NO_IO_CACHE
599                 if ((retval = flush_cached_blocks(channel, data, 0)))
600                         return retval;
601 #endif
602
603                 channel->block_size = blksize;
604                 free_cache(data);
605                 if ((retval = alloc_cache(channel, data)))
606                         return retval;
607         }
608         return 0;
609 }
610
611
612 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
613                                int count, void *buf)
614 {
615         struct unix_private_data *data;
616         struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
617         errcode_t       retval;
618         char            *cp;
619         int             i, j;
620
621         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
622         data = (struct unix_private_data *) channel->private_data;
623         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
624
625 #ifdef NO_IO_CACHE
626         return raw_read_blk(channel, data, block, count, buf);
627 #else
628         /*
629          * If we're doing an odd-sized read or a very large read,
630          * flush out the cache and then do a direct read.
631          */
632         if (count < 0 || count > WRITE_DIRECT_SIZE) {
633                 if ((retval = flush_cached_blocks(channel, data, 0)))
634                         return retval;
635                 return raw_read_blk(channel, data, block, count, buf);
636         }
637
638         cp = buf;
639         while (count > 0) {
640                 /* If it's in the cache, use it! */
641                 if ((cache = find_cached_block(data, block, &reuse[0]))) {
642 #ifdef DEBUG
643                         printf("Using cached block %lu\n", block);
644 #endif
645                         memcpy(cp, cache->buf, channel->block_size);
646                         count--;
647                         block++;
648                         cp += channel->block_size;
649                         continue;
650                 }
651                 if (count == 1) {
652                         /*
653                          * Special case where we read directly into the
654                          * cache buffer; important in the O_DIRECT case
655                          */
656                         cache = reuse[0];
657                         reuse_cache(channel, data, cache, block);
658                         if ((retval = raw_read_blk(channel, data, block, 1,
659                                                    cache->buf))) {
660                                 cache->in_use = 0;
661                                 return retval;
662                         }
663                         memcpy(cp, cache->buf, channel->block_size);
664                         return 0;
665                 }
666
667                 /*
668                  * Find the number of uncached blocks so we can do a
669                  * single read request
670                  */
671                 for (i=1; i < count; i++)
672                         if (find_cached_block(data, block+i, &reuse[i]))
673                                 break;
674 #ifdef DEBUG
675                 printf("Reading %d blocks starting at %lu\n", i, block);
676 #endif
677                 if ((retval = raw_read_blk(channel, data, block, i, cp)))
678                         return retval;
679
680                 /* Save the results in the cache */
681                 for (j=0; j < i; j++) {
682                         count--;
683                         cache = reuse[j];
684                         reuse_cache(channel, data, cache, block++);
685                         memcpy(cache->buf, cp, channel->block_size);
686                         cp += channel->block_size;
687                 }
688         }
689         return 0;
690 #endif /* NO_IO_CACHE */
691 }
692
693 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
694                                int count, void *buf)
695 {
696         return unix_read_blk64(channel, block, count, buf);
697 }
698
699 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
700                                 int count, const void *buf)
701 {
702         struct unix_private_data *data;
703         struct unix_cache *cache, *reuse;
704         errcode_t       retval = 0;
705         const char      *cp;
706         int             writethrough;
707
708         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
709         data = (struct unix_private_data *) channel->private_data;
710         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
711
712 #ifdef NO_IO_CACHE
713         return raw_write_blk(channel, data, block, count, buf);
714 #else
715         /*
716          * If we're doing an odd-sized write or a very large write,
717          * flush out the cache completely and then do a direct write.
718          */
719         if (count < 0 || count > WRITE_DIRECT_SIZE) {
720                 if ((retval = flush_cached_blocks(channel, data, 1)))
721                         return retval;
722                 return raw_write_blk(channel, data, block, count, buf);
723         }
724
725         /*
726          * For a moderate-sized multi-block write, first force a write
727          * if we're in write-through cache mode, and then fill the
728          * cache with the blocks.
729          */
730         writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
731         if (writethrough)
732                 retval = raw_write_blk(channel, data, block, count, buf);
733
734         cp = buf;
735         while (count > 0) {
736                 cache = find_cached_block(data, block, &reuse);
737                 if (!cache) {
738                         cache = reuse;
739                         reuse_cache(channel, data, cache, block);
740                 }
741                 memcpy(cache->buf, cp, channel->block_size);
742                 cache->dirty = !writethrough;
743                 count--;
744                 block++;
745                 cp += channel->block_size;
746         }
747         return retval;
748 #endif /* NO_IO_CACHE */
749 }
750
751 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
752                                 int count, const void *buf)
753 {
754         return unix_write_blk64(channel, block, count, buf);
755 }
756
757 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
758                                  int size, const void *buf)
759 {
760         struct unix_private_data *data;
761         errcode_t       retval = 0;
762         ssize_t         actual;
763
764         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
765         data = (struct unix_private_data *) channel->private_data;
766         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
767
768         if (data->align != 0) {
769 #ifdef ALIGN_DEBUG
770                 printf("unix_write_byte: O_DIRECT fallback\n");
771 #endif
772                 return EXT2_ET_UNIMPLEMENTED;
773         }
774
775 #ifndef NO_IO_CACHE
776         /*
777          * Flush out the cache completely
778          */
779         if ((retval = flush_cached_blocks(channel, data, 1)))
780                 return retval;
781 #endif
782
783         if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
784                 return errno;
785
786         actual = write(data->dev, buf, size);
787         if (actual != size)
788                 return EXT2_ET_SHORT_WRITE;
789
790         return 0;
791 }
792
793 /*
794  * Flush data buffers to disk.
795  */
796 static errcode_t unix_flush(io_channel channel)
797 {
798         struct unix_private_data *data;
799         errcode_t retval = 0;
800
801         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
802         data = (struct unix_private_data *) channel->private_data;
803         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
804
805 #ifndef NO_IO_CACHE
806         retval = flush_cached_blocks(channel, data, 0);
807 #endif
808         fsync(data->dev);
809         return retval;
810 }
811
812 static errcode_t unix_set_option(io_channel channel, const char *option,
813                                  const char *arg)
814 {
815         struct unix_private_data *data;
816         unsigned long long tmp;
817         char *end;
818
819         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
820         data = (struct unix_private_data *) channel->private_data;
821         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
822
823         if (!strcmp(option, "offset")) {
824                 if (!arg)
825                         return EXT2_ET_INVALID_ARGUMENT;
826
827                 tmp = strtoull(arg, &end, 0);
828                 if (*end)
829                         return EXT2_ET_INVALID_ARGUMENT;
830                 data->offset = tmp;
831                 if (data->offset < 0)
832                         return EXT2_ET_INVALID_ARGUMENT;
833                 return 0;
834         }
835         return EXT2_ET_INVALID_ARGUMENT;
836 }