Whamcloud - gitweb
libext2fs: unix_io: fix potential error path deadlock in reuse_cache()
[tools/e2fsprogs.git] / lib / ext2fs / unix_io.c
1 /*
2  * unix_io.c --- This is the Unix (well, really POSIX) implementation
3  *      of the I/O manager.
4  *
5  * Implements a one-block write-through cache.
6  *
7  * Includes support for Windows NT support under Cygwin.
8  *
9  * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10  *      2002 by Theodore Ts'o.
11  *
12  * %Begin-Header%
13  * This file may be redistributed under the terms of the GNU Library
14  * General Public License, version 2.
15  * %End-Header%
16  */
17
18 #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
19 #define _XOPEN_SOURCE 600
20 #define _DARWIN_C_SOURCE
21 #define _FILE_OFFSET_BITS 64
22 #ifndef _LARGEFILE_SOURCE
23 #define _LARGEFILE_SOURCE
24 #endif
25 #ifndef _LARGEFILE64_SOURCE
26 #define _LARGEFILE64_SOURCE
27 #endif
28 #ifndef _GNU_SOURCE
29 #define _GNU_SOURCE
30 #endif
31 #endif
32
33 #include "config.h"
34 #include <stdio.h>
35 #include <string.h>
36 #if HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 #if HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #include <fcntl.h>
43 #include <time.h>
44 #ifdef __linux__
45 #include <sys/utsname.h>
46 #endif
47 #if HAVE_SYS_TYPES_H
48 #include <sys/types.h>
49 #endif
50 #ifdef HAVE_SYS_IOCTL_H
51 #include <sys/ioctl.h>
52 #endif
53 #ifdef HAVE_SYS_MOUNT_H
54 #include <sys/mount.h>
55 #endif
56 #ifdef HAVE_SYS_PRCTL_H
57 #include <sys/prctl.h>
58 #else
59 #define PR_GET_DUMPABLE 3
60 #endif
61 #if HAVE_SYS_STAT_H
62 #include <sys/stat.h>
63 #endif
64 #if HAVE_SYS_RESOURCE_H
65 #include <sys/resource.h>
66 #endif
67 #if HAVE_LINUX_FALLOC_H
68 #include <linux/falloc.h>
69 #endif
70 #ifdef HAVE_PTHREAD
71 #include <pthread.h>
72 #endif
73
74 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
75 #define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
76 #endif
77
78 #undef ALIGN_DEBUG
79
80 #include "ext2_fs.h"
81 #include "ext2fs.h"
82 #include "ext2fsP.h"
83
84 /*
85  * For checking structure magic numbers...
86  */
87
88 #define EXT2_CHECK_MAGIC(struct, code) \
89           if ((struct)->magic != (code)) return (code)
90
91 struct unix_cache {
92         char                    *buf;
93         unsigned long long      block;
94         int                     access_time;
95         unsigned                dirty:1;
96         unsigned                in_use:1;
97         unsigned                write_err:1;
98 };
99
100 #define CACHE_SIZE 8
101 #define WRITE_DIRECT_SIZE 4     /* Must be smaller than CACHE_SIZE */
102 #define READ_DIRECT_SIZE 4      /* Should be smaller than CACHE_SIZE */
103
104 struct unix_private_data {
105         int     magic;
106         int     dev;
107         int     flags;
108         int     align;
109         int     access_time;
110         ext2_loff_t offset;
111         struct unix_cache cache[CACHE_SIZE];
112         void    *bounce;
113         struct struct_io_stats io_stats;
114 #ifdef HAVE_PTHREAD
115         pthread_mutex_t cache_mutex;
116         pthread_mutex_t bounce_mutex;
117         pthread_mutex_t stats_mutex;
118 #endif
119 };
120
121 #define IS_ALIGNED(n, align) ((((uintptr_t) n) & \
122                                ((uintptr_t) ((align)-1))) == 0)
123
124 typedef enum lock_kind {
125         CACHE_MTX, BOUNCE_MTX, STATS_MTX
126 } kind_t;
127
128 #ifdef HAVE_PTHREAD
129 static inline pthread_mutex_t *get_mutex(struct unix_private_data *data,
130                                          kind_t kind)
131 {
132         if (data->flags & IO_FLAG_THREADS) {
133                 switch (kind) {
134                 case CACHE_MTX:
135                         return &data->cache_mutex;
136                 case BOUNCE_MTX:
137                         return &data->bounce_mutex;
138                 case STATS_MTX:
139                         return &data->stats_mutex;
140                 }
141         }
142         return NULL;
143 }
144 #endif
145
146 static inline void mutex_lock(struct unix_private_data *data, kind_t kind)
147 {
148 #ifdef HAVE_PTHREAD
149         pthread_mutex_t *mtx = get_mutex(data,kind);
150
151         if (mtx)
152                 pthread_mutex_lock(mtx);
153 #endif
154 }
155
156 static inline void mutex_unlock(struct unix_private_data *data, kind_t kind)
157 {
158 #ifdef HAVE_PTHREAD
159         pthread_mutex_t *mtx = get_mutex(data,kind);
160
161         if (mtx)
162                 pthread_mutex_unlock(mtx);
163 #endif
164 }
165
166 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
167 {
168         errcode_t       retval = 0;
169
170         struct unix_private_data *data;
171
172         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
173         data = (struct unix_private_data *) channel->private_data;
174         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
175
176         if (stats) {
177                 mutex_lock(data, STATS_MTX);
178                 *stats = &data->io_stats;
179                 mutex_unlock(data, STATS_MTX);
180         }
181
182         return retval;
183 }
184
185 static char *safe_getenv(const char *arg)
186 {
187         if ((getuid() != geteuid()) || (getgid() != getegid()))
188                 return NULL;
189 #ifdef HAVE_PRCTL
190         if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0) == 0)
191                 return NULL;
192 #else
193 #if (defined(linux) && defined(SYS_prctl))
194         if (syscall(SYS_prctl, PR_GET_DUMPABLE, 0, 0, 0, 0) == 0)
195                 return NULL;
196 #endif
197 #endif
198
199 #if defined(HAVE_SECURE_GETENV)
200         return secure_getenv(arg);
201 #elif defined(HAVE___SECURE_GETENV)
202         return __secure_getenv(arg);
203 #else
204         return getenv(arg);
205 #endif
206 }
207
208 /*
209  * Here are the raw I/O functions
210  */
211 static errcode_t raw_read_blk(io_channel channel,
212                               struct unix_private_data *data,
213                               unsigned long long block,
214                               int count, void *bufv)
215 {
216         errcode_t       retval;
217         ssize_t         size;
218         ext2_loff_t     location;
219         int             actual = 0;
220         unsigned char   *buf = bufv;
221         ssize_t         really_read = 0;
222         unsigned long long aligned_blk;
223         int             align_size, offset;
224
225         size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size;
226         mutex_lock(data, STATS_MTX);
227         data->io_stats.bytes_read += size;
228         mutex_unlock(data, STATS_MTX);
229         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
230
231         if (data->flags & IO_FLAG_FORCE_BOUNCE)
232                 goto bounce_read;
233
234 #ifdef HAVE_PREAD64
235         /* Try an aligned pread */
236         if ((channel->align == 0) ||
237             (IS_ALIGNED(buf, channel->align) &&
238              IS_ALIGNED(location, channel->align) &&
239              IS_ALIGNED(size, channel->align))) {
240                 actual = pread64(data->dev, buf, size, location);
241                 if (actual == size)
242                         return 0;
243                 actual = 0;
244         }
245 #elif HAVE_PREAD
246         /* Try an aligned pread */
247         if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
248             ((channel->align == 0) ||
249              (IS_ALIGNED(buf, channel->align) &&
250               IS_ALIGNED(location, channel->align) &&
251               IS_ALIGNED(size, channel->align)))) {
252                 actual = pread(data->dev, buf, size, location);
253                 if (actual == size)
254                         return 0;
255                 actual = 0;
256         }
257 #endif /* HAVE_PREAD */
258
259         if ((channel->align == 0) ||
260             (IS_ALIGNED(buf, channel->align) &&
261              IS_ALIGNED(location, channel->align) &&
262              IS_ALIGNED(size, channel->align))) {
263                 mutex_lock(data, BOUNCE_MTX);
264                 if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
265                         retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
266                         goto error_unlock;
267                 }
268                 actual = read(data->dev, buf, size);
269                 if (actual != size) {
270                 short_read:
271                         if (actual < 0) {
272                                 retval = errno;
273                                 actual = 0;
274                         } else
275                                 retval = EXT2_ET_SHORT_READ;
276                         goto error_unlock;
277                 }
278                 goto success_unlock;
279         }
280
281 #ifdef ALIGN_DEBUG
282         printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
283                (unsigned long) size);
284 #endif
285
286         /*
287          * The buffer or size which we're trying to read isn't aligned
288          * to the O_DIRECT rules, so we need to do this the hard way...
289          */
290 bounce_read:
291         if (channel->align == 0)
292                 channel->align = 1;
293         if ((channel->block_size > channel->align) &&
294             (channel->block_size % channel->align) == 0)
295                 align_size = channel->block_size;
296         else
297                 align_size = channel->align;
298         aligned_blk = location / align_size;
299         offset = location % align_size;
300
301         mutex_lock(data, BOUNCE_MTX);
302         if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
303                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
304                 goto error_unlock;
305         }
306         while (size > 0) {
307                 actual = read(data->dev, data->bounce, align_size);
308                 if (actual != align_size) {
309                         mutex_unlock(data, BOUNCE_MTX);
310                         actual = really_read;
311                         buf -= really_read;
312                         size += really_read;
313                         goto short_read;
314                 }
315                 if ((actual + offset) > align_size)
316                         actual = align_size - offset;
317                 if (actual > size)
318                         actual = size;
319                 memcpy(buf, (char *)data->bounce + offset, actual);
320
321                 really_read += actual;
322                 size -= actual;
323                 buf += actual;
324                 offset = 0;
325                 aligned_blk++;
326         }
327 success_unlock:
328         mutex_unlock(data, BOUNCE_MTX);
329         return 0;
330
331 error_unlock:
332         mutex_unlock(data, BOUNCE_MTX);
333         if (actual >= 0 && actual < size)
334                 memset((char *) buf+actual, 0, size-actual);
335         if (channel->read_error)
336                 retval = (channel->read_error)(channel, block, count, buf,
337                                                size, actual, retval);
338         return retval;
339 }
340
341 #define RAW_WRITE_NO_HANDLER    1
342
343 static errcode_t raw_write_blk(io_channel channel,
344                                struct unix_private_data *data,
345                                unsigned long long block,
346                                int count, const void *bufv,
347                                int flags)
348 {
349         ssize_t         size;
350         ext2_loff_t     location;
351         int             actual = 0;
352         errcode_t       retval;
353         const unsigned char *buf = bufv;
354         unsigned long long aligned_blk;
355         int             align_size, offset;
356
357         if (count == 1)
358                 size = channel->block_size;
359         else {
360                 if (count < 0)
361                         size = -count;
362                 else
363                         size = (ext2_loff_t) count * channel->block_size;
364         }
365         mutex_lock(data, STATS_MTX);
366         data->io_stats.bytes_written += size;
367         mutex_unlock(data, STATS_MTX);
368
369         location = ((ext2_loff_t) block * channel->block_size) + data->offset;
370
371         if (data->flags & IO_FLAG_FORCE_BOUNCE)
372                 goto bounce_write;
373
374 #ifdef HAVE_PWRITE64
375         /* Try an aligned pwrite */
376         if ((channel->align == 0) ||
377             (IS_ALIGNED(buf, channel->align) &&
378              IS_ALIGNED(location, channel->align) &&
379              IS_ALIGNED(size, channel->align))) {
380                 actual = pwrite64(data->dev, buf, size, location);
381                 if (actual == size)
382                         return 0;
383         }
384 #elif HAVE_PWRITE
385         /* Try an aligned pwrite */
386         if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
387             ((channel->align == 0) ||
388              (IS_ALIGNED(buf, channel->align) &&
389               IS_ALIGNED(location, channel->align) &&
390               IS_ALIGNED(size, channel->align)))) {
391                 actual = pwrite(data->dev, buf, size, location);
392                 if (actual == size)
393                         return 0;
394         }
395 #endif /* HAVE_PWRITE */
396
397         if ((channel->align == 0) ||
398             (IS_ALIGNED(buf, channel->align) &&
399              IS_ALIGNED(location, channel->align) &&
400              IS_ALIGNED(size, channel->align))) {
401                 mutex_lock(data, BOUNCE_MTX);
402                 if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
403                         retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
404                         goto error_unlock;
405                 }
406                 actual = write(data->dev, buf, size);
407                 mutex_unlock(data, BOUNCE_MTX);
408                 if (actual < 0) {
409                         retval = errno;
410                         goto error_out;
411                 }
412                 if (actual != size) {
413                 short_write:
414                         retval = EXT2_ET_SHORT_WRITE;
415                         goto error_out;
416                 }
417                 return 0;
418         }
419
420 #ifdef ALIGN_DEBUG
421         printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
422                (unsigned long) size);
423 #endif
424         /*
425          * The buffer or size which we're trying to write isn't aligned
426          * to the O_DIRECT rules, so we need to do this the hard way...
427          */
428 bounce_write:
429         if (channel->align == 0)
430                 channel->align = 1;
431         if ((channel->block_size > channel->align) &&
432             (channel->block_size % channel->align) == 0)
433                 align_size = channel->block_size;
434         else
435                 align_size = channel->align;
436         aligned_blk = location / align_size;
437         offset = location % align_size;
438
439         while (size > 0) {
440                 int actual_w;
441
442                 mutex_lock(data, BOUNCE_MTX);
443                 if (size < align_size || offset) {
444                         if (ext2fs_llseek(data->dev, aligned_blk * align_size,
445                                           SEEK_SET) < 0) {
446                                 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
447                                 goto error_unlock;
448                         }
449                         actual = read(data->dev, data->bounce,
450                                       align_size);
451                         if (actual != align_size) {
452                                 if (actual < 0) {
453                                         retval = errno;
454                                         goto error_unlock;
455                                 }
456                                 memset((char *) data->bounce + actual, 0,
457                                        align_size - actual);
458                         }
459                 }
460                 actual = size;
461                 if ((actual + offset) > align_size)
462                         actual = align_size - offset;
463                 if (actual > size)
464                         actual = size;
465                 memcpy(((char *)data->bounce) + offset, buf, actual);
466                 if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
467                         retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
468                         goto error_unlock;
469                 }
470                 actual_w = write(data->dev, data->bounce, align_size);
471                 mutex_unlock(data, BOUNCE_MTX);
472                 if (actual_w < 0) {
473                         retval = errno;
474                         goto error_out;
475                 }
476                 if (actual_w != align_size)
477                         goto short_write;
478                 size -= actual;
479                 buf += actual;
480                 location += actual;
481                 aligned_blk++;
482                 offset = 0;
483         }
484         return 0;
485
486 error_unlock:
487         mutex_unlock(data, BOUNCE_MTX);
488 error_out:
489         if (((flags & RAW_WRITE_NO_HANDLER) == 0) && channel->write_error)
490                 retval = (channel->write_error)(channel, block, count, buf,
491                                                 size, actual, retval);
492         return retval;
493 }
494
495
496 /*
497  * Here we implement the cache functions
498  */
499
500 /* Allocate the cache buffers */
501 static errcode_t alloc_cache(io_channel channel,
502                              struct unix_private_data *data)
503 {
504         errcode_t               retval;
505         struct unix_cache       *cache;
506         int                     i;
507
508         data->access_time = 0;
509         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
510                 cache->block = 0;
511                 cache->access_time = 0;
512                 cache->dirty = 0;
513                 cache->in_use = 0;
514                 if (cache->buf)
515                         ext2fs_free_mem(&cache->buf);
516                 retval = io_channel_alloc_buf(channel, 0, &cache->buf);
517                 if (retval)
518                         return retval;
519         }
520         if (channel->align || data->flags & IO_FLAG_FORCE_BOUNCE) {
521                 if (data->bounce)
522                         ext2fs_free_mem(&data->bounce);
523                 retval = io_channel_alloc_buf(channel, 0, &data->bounce);
524         }
525         return retval;
526 }
527
528 /* Free the cache buffers */
529 static void free_cache(struct unix_private_data *data)
530 {
531         struct unix_cache       *cache;
532         int                     i;
533
534         data->access_time = 0;
535         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
536                 cache->block = 0;
537                 cache->access_time = 0;
538                 cache->dirty = 0;
539                 cache->in_use = 0;
540                 if (cache->buf)
541                         ext2fs_free_mem(&cache->buf);
542         }
543         if (data->bounce)
544                 ext2fs_free_mem(&data->bounce);
545 }
546
547 #ifndef NO_IO_CACHE
548 /*
549  * Try to find a block in the cache.  If the block is not found, and
550  * eldest is a non-zero pointer, then fill in eldest with the cache
551  * entry to that should be reused.
552  */
553 static struct unix_cache *find_cached_block(struct unix_private_data *data,
554                                             unsigned long long block,
555                                             struct unix_cache **eldest)
556 {
557         struct unix_cache       *cache, *unused_cache, *oldest_cache;
558         int                     i;
559
560         unused_cache = oldest_cache = 0;
561         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
562                 if (!cache->in_use) {
563                         if (!unused_cache)
564                                 unused_cache = cache;
565                         continue;
566                 }
567                 if (cache->block == block) {
568                         cache->access_time = ++data->access_time;
569                         return cache;
570                 }
571                 if (!oldest_cache ||
572                     (cache->access_time < oldest_cache->access_time))
573                         oldest_cache = cache;
574         }
575         if (eldest)
576                 *eldest = (unused_cache) ? unused_cache : oldest_cache;
577         return 0;
578 }
579
580 /*
581  * Reuse a particular cache entry for another block.
582  */
583 static errcode_t reuse_cache(io_channel channel,
584                 struct unix_private_data *data, struct unix_cache *cache,
585                 unsigned long long block)
586 {
587         if (cache->dirty && cache->in_use) {
588                 errcode_t retval;
589
590                 retval = raw_write_blk(channel, data, cache->block, 1,
591                                        cache->buf, RAW_WRITE_NO_HANDLER);
592                 if (retval) {
593                         cache->write_err = 1;
594                         return retval;
595                 }
596         }
597
598         cache->in_use = 1;
599         cache->dirty = 0;
600         cache->write_err = 0;
601         cache->block = block;
602         cache->access_time = ++data->access_time;
603         return 0;
604 }
605
606 #define FLUSH_INVALIDATE        0x01
607 #define FLUSH_NOLOCK            0x02
608
609 /*
610  * Flush all of the blocks in the cache
611  */
612 static errcode_t flush_cached_blocks(io_channel channel,
613                                      struct unix_private_data *data,
614                                      int flags)
615 {
616         struct unix_cache       *cache;
617         errcode_t               retval, retval2;
618         int                     i;
619
620         retval2 = 0;
621         if ((flags & FLUSH_NOLOCK) == 0)
622                 mutex_lock(data, CACHE_MTX);
623         for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
624                 if (!cache->in_use)
625                         continue;
626
627                 if (flags & FLUSH_INVALIDATE)
628                         cache->in_use = 0;
629
630                 if (!cache->dirty)
631                         continue;
632
633                 retval = raw_write_blk(channel, data,
634                                        cache->block, 1, cache->buf, 0);
635                 if (retval)
636                         retval2 = retval;
637                 else
638                         cache->dirty = 0;
639         }
640         if ((flags & FLUSH_NOLOCK) == 0)
641                 mutex_unlock(data, CACHE_MTX);
642         return retval2;
643 }
644 #endif /* NO_IO_CACHE */
645
646 #ifdef __linux__
647 #ifndef BLKDISCARDZEROES
648 #define BLKDISCARDZEROES _IO(0x12,124)
649 #endif
650 #endif
651
652 int ext2fs_open_file(const char *pathname, int flags, mode_t mode)
653 {
654         if (mode)
655 #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
656                 return open64(pathname, flags, mode);
657         else
658                 return open64(pathname, flags);
659 #else
660                 return open(pathname, flags, mode);
661         else
662                 return open(pathname, flags);
663 #endif
664 }
665
666 int ext2fs_stat(const char *path, ext2fs_struct_stat *buf)
667 {
668 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
669         return stat64(path, buf);
670 #else
671         return stat(path, buf);
672 #endif
673 }
674
675 int ext2fs_fstat(int fd, ext2fs_struct_stat *buf)
676 {
677 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
678         return fstat64(fd, buf);
679 #else
680         return fstat(fd, buf);
681 #endif
682 }
683
684
685 static errcode_t unix_open_channel(const char *name, int fd,
686                                    int flags, io_channel *channel,
687                                    io_manager io_mgr)
688 {
689         io_channel      io = NULL;
690         struct unix_private_data *data = NULL;
691         errcode_t       retval;
692         ext2fs_struct_stat st;
693 #ifdef __linux__
694         struct          utsname ut;
695 #endif
696
697         if (safe_getenv("UNIX_IO_FORCE_BOUNCE"))
698                 flags |= IO_FLAG_FORCE_BOUNCE;
699
700 #ifdef __linux__
701         /*
702          * We need to make sure any previous errors in the block
703          * device are thrown away, sigh.
704          */
705         (void) fsync(fd);
706 #endif
707
708         retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
709         if (retval)
710                 goto cleanup;
711         memset(io, 0, sizeof(struct struct_io_channel));
712         io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
713         retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
714         if (retval)
715                 goto cleanup;
716
717         io->manager = io_mgr;
718         retval = ext2fs_get_mem(strlen(name)+1, &io->name);
719         if (retval)
720                 goto cleanup;
721
722         strcpy(io->name, name);
723         io->private_data = data;
724         io->block_size = 1024;
725         io->read_error = 0;
726         io->write_error = 0;
727         io->refcount = 1;
728         io->flags = 0;
729
730         memset(data, 0, sizeof(struct unix_private_data));
731         data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
732         data->io_stats.num_fields = 2;
733         data->flags = flags;
734         data->dev = fd;
735
736 #if defined(O_DIRECT)
737         if (flags & IO_FLAG_DIRECT_IO)
738                 io->align = ext2fs_get_dio_alignment(data->dev);
739 #elif defined(F_NOCACHE)
740         if (flags & IO_FLAG_DIRECT_IO)
741                 io->align = 4096;
742 #endif
743
744         /*
745          * If the device is really a block device, then set the
746          * appropriate flag, otherwise we can set DISCARD_ZEROES flag
747          * because we are going to use punch hole instead of discard
748          * and if it succeed, subsequent read from sparse area returns
749          * zero.
750          */
751         if (ext2fs_fstat(data->dev, &st) == 0) {
752                 if (ext2fsP_is_disk_device(st.st_mode))
753                         io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
754                 else
755                         io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
756         }
757
758 #ifdef BLKDISCARDZEROES
759         {
760                 int zeroes = 0;
761                 if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 &&
762                     zeroes)
763                         io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
764         }
765 #endif
766
767 #if defined(__CYGWIN__)
768         /*
769          * Some operating systems require that the buffers be aligned,
770          * regardless of O_DIRECT
771          */
772         if (!io->align)
773                 io->align = 512;
774 #endif
775
776 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
777         if (io->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
778                 int dio_align = ext2fs_get_dio_alignment(fd);
779
780                 if (io->align < dio_align)
781                         io->align = dio_align;
782         }
783 #endif
784
785         if ((retval = alloc_cache(io, data)))
786                 goto cleanup;
787
788 #ifdef BLKROGET
789         if (flags & IO_FLAG_RW) {
790                 int error;
791                 int readonly = 0;
792
793                 /* Is the block device actually writable? */
794                 error = ioctl(data->dev, BLKROGET, &readonly);
795                 if (!error && readonly) {
796                         retval = EPERM;
797                         goto cleanup;
798                 }
799         }
800 #endif
801
802 #ifdef __linux__
803 #undef RLIM_INFINITY
804 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
805 #define RLIM_INFINITY   ((unsigned long)(~0UL>>1))
806 #else
807 #define RLIM_INFINITY  (~0UL)
808 #endif
809         /*
810          * Work around a bug in 2.4.10-2.4.18 kernels where writes to
811          * block devices are wrongly getting hit by the filesize
812          * limit.  This workaround isn't perfect, since it won't work
813          * if glibc wasn't built against 2.2 header files.  (Sigh.)
814          *
815          */
816         if ((flags & IO_FLAG_RW) &&
817             (uname(&ut) == 0) &&
818             ((ut.release[0] == '2') && (ut.release[1] == '.') &&
819              (ut.release[2] == '4') && (ut.release[3] == '.') &&
820              (ut.release[4] == '1') && (ut.release[5] >= '0') &&
821              (ut.release[5] < '8')) &&
822             (ext2fs_fstat(data->dev, &st) == 0) &&
823             (ext2fsP_is_disk_device(st.st_mode))) {
824                 struct rlimit   rlim;
825
826                 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
827                 setrlimit(RLIMIT_FSIZE, &rlim);
828                 getrlimit(RLIMIT_FSIZE, &rlim);
829                 if (((unsigned long) rlim.rlim_cur) <
830                     ((unsigned long) rlim.rlim_max)) {
831                         rlim.rlim_cur = rlim.rlim_max;
832                         setrlimit(RLIMIT_FSIZE, &rlim);
833                 }
834         }
835 #endif
836 #ifdef HAVE_PTHREAD
837         if (flags & IO_FLAG_THREADS) {
838                 io->flags |= CHANNEL_FLAGS_THREADS;
839                 retval = pthread_mutex_init(&data->cache_mutex, NULL);
840                 if (retval)
841                         goto cleanup;
842                 retval = pthread_mutex_init(&data->bounce_mutex, NULL);
843                 if (retval) {
844                         pthread_mutex_destroy(&data->cache_mutex);
845                         goto cleanup;
846                 }
847                 retval = pthread_mutex_init(&data->stats_mutex, NULL);
848                 if (retval) {
849                         pthread_mutex_destroy(&data->cache_mutex);
850                         pthread_mutex_destroy(&data->bounce_mutex);
851                         goto cleanup;
852                 }
853         }
854 #endif
855         *channel = io;
856         return 0;
857
858 cleanup:
859         if (data) {
860                 if (data->dev >= 0)
861                         close(data->dev);
862                 free_cache(data);
863                 ext2fs_free_mem(&data);
864         }
865         if (io) {
866                 if (io->name) {
867                         ext2fs_free_mem(&io->name);
868                 }
869                 ext2fs_free_mem(&io);
870         }
871         return retval;
872 }
873
874 static errcode_t unixfd_open(const char *str_fd, int flags,
875                              io_channel *channel)
876 {
877         int fd;
878         int fd_flags;
879
880         fd = atoi(str_fd);
881 #if defined(HAVE_FCNTL)
882         fd_flags = fcntl(fd, F_GETFD);
883         if (fd_flags == -1)
884                 return EBADF;
885
886         flags = 0;
887         if (fd_flags & O_RDWR)
888                 flags |= IO_FLAG_RW;
889         if (fd_flags & O_EXCL)
890                 flags |= IO_FLAG_EXCLUSIVE;
891 #if defined(O_DIRECT)
892         if (fd_flags & O_DIRECT)
893                 flags |= IO_FLAG_DIRECT_IO;
894 #endif
895 #endif  /* HAVE_FCNTL */
896
897         return unix_open_channel(str_fd, fd, flags, channel, unixfd_io_manager);
898 }
899
900 static errcode_t unix_open(const char *name, int flags,
901                            io_channel *channel)
902 {
903         int fd = -1;
904         int open_flags;
905
906         if (name == 0)
907                 return EXT2_ET_BAD_DEVICE_NAME;
908
909         open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
910         if (flags & IO_FLAG_EXCLUSIVE)
911                 open_flags |= O_EXCL;
912 #if defined(O_DIRECT)
913         if (flags & IO_FLAG_DIRECT_IO)
914                 open_flags |= O_DIRECT;
915 #endif
916         fd = ext2fs_open_file(name, open_flags, 0);
917         if (fd < 0)
918                 return errno;
919 #if defined(F_NOCACHE) && !defined(IO_DIRECT)
920         if (flags & IO_FLAG_DIRECT_IO) {
921                 if (fcntl(fd, F_NOCACHE, 1) < 0)
922                         return errno;
923         }
924 #endif
925         return unix_open_channel(name, fd, flags, channel, unix_io_manager);
926 }
927
928 static errcode_t unix_close(io_channel channel)
929 {
930         struct unix_private_data *data;
931         errcode_t       retval = 0;
932
933         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
934         data = (struct unix_private_data *) channel->private_data;
935         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
936
937         if (--channel->refcount > 0)
938                 return 0;
939
940 #ifndef NO_IO_CACHE
941         retval = flush_cached_blocks(channel, data, 0);
942 #endif
943
944         if (close(data->dev) < 0)
945                 retval = errno;
946         free_cache(data);
947 #ifdef HAVE_PTHREAD
948         if (data->flags & IO_FLAG_THREADS) {
949                 pthread_mutex_destroy(&data->cache_mutex);
950                 pthread_mutex_destroy(&data->bounce_mutex);
951                 pthread_mutex_destroy(&data->stats_mutex);
952         }
953 #endif
954
955         ext2fs_free_mem(&channel->private_data);
956         if (channel->name)
957                 ext2fs_free_mem(&channel->name);
958         ext2fs_free_mem(&channel);
959         return retval;
960 }
961
962 static errcode_t unix_set_blksize(io_channel channel, int blksize)
963 {
964         struct unix_private_data *data;
965         errcode_t               retval = 0;
966
967         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
968         data = (struct unix_private_data *) channel->private_data;
969         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
970
971         if (channel->block_size != blksize) {
972                 mutex_lock(data, CACHE_MTX);
973                 mutex_lock(data, BOUNCE_MTX);
974 #ifndef NO_IO_CACHE
975                 if ((retval = flush_cached_blocks(channel, data, FLUSH_NOLOCK))){
976                         mutex_unlock(data, BOUNCE_MTX);
977                         mutex_unlock(data, CACHE_MTX);
978                         return retval;
979                 }
980 #endif
981
982                 channel->block_size = blksize;
983                 free_cache(data);
984                 retval = alloc_cache(channel, data);
985                 mutex_unlock(data, BOUNCE_MTX);
986                 mutex_unlock(data, CACHE_MTX);
987         }
988         return retval;
989 }
990
991 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
992                                int count, void *buf)
993 {
994         struct unix_private_data *data;
995         struct unix_cache *cache;
996         errcode_t       retval;
997         char            *cp;
998         int             i, j;
999
1000         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1001         data = (struct unix_private_data *) channel->private_data;
1002         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1003
1004 #ifdef NO_IO_CACHE
1005         return raw_read_blk(channel, data, block, count, buf);
1006 #else
1007         if (data->flags & IO_FLAG_NOCACHE)
1008                 return raw_read_blk(channel, data, block, count, buf);
1009         /*
1010          * If we're doing an odd-sized read or a very large read,
1011          * flush out the cache and then do a direct read.
1012          */
1013         if (count < 0 || count > WRITE_DIRECT_SIZE) {
1014                 if ((retval = flush_cached_blocks(channel, data, 0)))
1015                         return retval;
1016                 return raw_read_blk(channel, data, block, count, buf);
1017         }
1018
1019         cp = buf;
1020         mutex_lock(data, CACHE_MTX);
1021         while (count > 0) {
1022                 /* If it's in the cache, use it! */
1023                 if ((cache = find_cached_block(data, block, NULL))) {
1024 #ifdef DEBUG
1025                         printf("Using cached block %lu\n", block);
1026 #endif
1027                         memcpy(cp, cache->buf, channel->block_size);
1028                         count--;
1029                         block++;
1030                         cp += channel->block_size;
1031                         continue;
1032                 }
1033
1034                 /*
1035                  * Find the number of uncached blocks so we can do a
1036                  * single read request
1037                  */
1038                 for (i=1; i < count; i++)
1039                         if (find_cached_block(data, block+i, NULL))
1040                                 break;
1041 #ifdef DEBUG
1042                 printf("Reading %d blocks starting at %lu\n", i, block);
1043 #endif
1044                 mutex_unlock(data, CACHE_MTX);
1045                 if ((retval = raw_read_blk(channel, data, block, i, cp)))
1046                         return retval;
1047                 mutex_lock(data, CACHE_MTX);
1048
1049                 /* Save the results in the cache */
1050                 for (j=0; j < i; j++) {
1051                         if (!find_cached_block(data, block, &cache)) {
1052                                 retval = reuse_cache(channel, data,
1053                                                      cache, block);
1054                                 if (retval)
1055                                         goto call_write_handler;
1056                                 memcpy(cache->buf, cp, channel->block_size);
1057                         }
1058                         count--;
1059                         block++;
1060                         cp += channel->block_size;
1061                 }
1062         }
1063         mutex_unlock(data, CACHE_MTX);
1064         return 0;
1065
1066 call_write_handler:
1067         if (cache->write_err && channel->write_error) {
1068                 char *err_buf = NULL;
1069                 unsigned long long err_block = cache->block;
1070
1071                 cache->dirty = 0;
1072                 cache->in_use = 0;
1073                 cache->write_err = 0;
1074                 if (io_channel_alloc_buf(channel, 0, &err_buf))
1075                         err_buf = NULL;
1076                 else
1077                         memcpy(err_buf, cache->buf, channel->block_size);
1078                 mutex_unlock(data, CACHE_MTX);
1079                 (channel->write_error)(channel, err_block, 1, err_buf,
1080                                        channel->block_size, -1,
1081                                        retval);
1082                 if (err_buf)
1083                         ext2fs_free_mem(&err_buf);
1084         } else
1085                 mutex_unlock(data, CACHE_MTX);
1086         return retval;
1087 #endif /* NO_IO_CACHE */
1088 }
1089
1090 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
1091                                int count, void *buf)
1092 {
1093         return unix_read_blk64(channel, block, count, buf);
1094 }
1095
1096 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
1097                                 int count, const void *buf)
1098 {
1099         struct unix_private_data *data;
1100         struct unix_cache *cache, *reuse;
1101         errcode_t       retval = 0;
1102         const char      *cp;
1103         int             writethrough;
1104
1105         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1106         data = (struct unix_private_data *) channel->private_data;
1107         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1108
1109 #ifdef NO_IO_CACHE
1110         return raw_write_blk(channel, data, block, count, buf, 0);
1111 #else
1112         if (data->flags & IO_FLAG_NOCACHE)
1113                 return raw_write_blk(channel, data, block, count, buf, 0);
1114         /*
1115          * If we're doing an odd-sized write or a very large write,
1116          * flush out the cache completely and then do a direct write.
1117          */
1118         if (count < 0 || count > WRITE_DIRECT_SIZE) {
1119                 if ((retval = flush_cached_blocks(channel, data,
1120                                                   FLUSH_INVALIDATE)))
1121                         return retval;
1122                 return raw_write_blk(channel, data, block, count, buf, 0);
1123         }
1124
1125         /*
1126          * For a moderate-sized multi-block write, first force a write
1127          * if we're in write-through cache mode, and then fill the
1128          * cache with the blocks.
1129          */
1130         writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
1131         if (writethrough)
1132                 retval = raw_write_blk(channel, data, block, count, buf, 0);
1133
1134         cp = buf;
1135         mutex_lock(data, CACHE_MTX);
1136         while (count > 0) {
1137                 cache = find_cached_block(data, block, &reuse);
1138                 if (!cache) {
1139                         errcode_t err;
1140
1141                         cache = reuse;
1142                         err = reuse_cache(channel, data, cache, block);
1143                         if (err)
1144                                 goto call_write_handler;
1145                 }
1146                 if (cache->buf != cp)
1147                         memcpy(cache->buf, cp, channel->block_size);
1148                 cache->dirty = !writethrough;
1149                 count--;
1150                 block++;
1151                 cp += channel->block_size;
1152         }
1153         mutex_unlock(data, CACHE_MTX);
1154         return retval;
1155
1156 call_write_handler:
1157         if (cache->write_err && channel->write_error) {
1158                 char *err_buf = NULL;
1159                 unsigned long long err_block = cache->block;
1160
1161                 cache->dirty = 0;
1162                 cache->in_use = 0;
1163                 cache->write_err = 0;
1164                 if (io_channel_alloc_buf(channel, 0, &err_buf))
1165                         err_buf = NULL;
1166                 else
1167                         memcpy(err_buf, cache->buf, channel->block_size);
1168                 mutex_unlock(data, CACHE_MTX);
1169                 (channel->write_error)(channel, err_block, 1, err_buf,
1170                                        channel->block_size, -1,
1171                                        retval);
1172                 if (err_buf)
1173                         ext2fs_free_mem(&err_buf);
1174         } else
1175                 mutex_unlock(data, CACHE_MTX);
1176         return retval;
1177 #endif /* NO_IO_CACHE */
1178 }
1179
1180 static errcode_t unix_cache_readahead(io_channel channel,
1181                                       unsigned long long block,
1182                                       unsigned long long count)
1183 {
1184 #ifdef POSIX_FADV_WILLNEED
1185         struct unix_private_data *data;
1186
1187         data = (struct unix_private_data *)channel->private_data;
1188         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1189         return posix_fadvise(data->dev,
1190                              (ext2_loff_t)block * channel->block_size + data->offset,
1191                              (ext2_loff_t)count * channel->block_size,
1192                              POSIX_FADV_WILLNEED);
1193 #else
1194         return EXT2_ET_OP_NOT_SUPPORTED;
1195 #endif
1196 }
1197
1198 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
1199                                 int count, const void *buf)
1200 {
1201         return unix_write_blk64(channel, block, count, buf);
1202 }
1203
1204 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
1205                                  int size, const void *buf)
1206 {
1207         struct unix_private_data *data;
1208         errcode_t       retval = 0;
1209         ssize_t         actual;
1210
1211         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1212         data = (struct unix_private_data *) channel->private_data;
1213         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1214
1215         if (channel->align != 0) {
1216 #ifdef ALIGN_DEBUG
1217                 printf("unix_write_byte: O_DIRECT fallback\n");
1218 #endif
1219                 return EXT2_ET_UNIMPLEMENTED;
1220         }
1221
1222 #ifndef NO_IO_CACHE
1223         /*
1224          * Flush out the cache completely
1225          */
1226         if ((retval = flush_cached_blocks(channel, data, FLUSH_INVALIDATE)))
1227                 return retval;
1228 #endif
1229
1230         if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
1231                 return errno;
1232
1233         actual = write(data->dev, buf, size);
1234         if (actual < 0)
1235                 return errno;
1236         if (actual != size)
1237                 return EXT2_ET_SHORT_WRITE;
1238
1239         return 0;
1240 }
1241
1242 /*
1243  * Flush data buffers to disk.
1244  */
1245 static errcode_t unix_flush(io_channel channel)
1246 {
1247         struct unix_private_data *data;
1248         errcode_t retval = 0;
1249
1250         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1251         data = (struct unix_private_data *) channel->private_data;
1252         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1253
1254 #ifndef NO_IO_CACHE
1255         retval = flush_cached_blocks(channel, data, 0);
1256 #endif
1257 #ifdef HAVE_FSYNC
1258         if (!retval && fsync(data->dev) != 0)
1259                 return errno;
1260 #endif
1261         return retval;
1262 }
1263
1264 static errcode_t unix_set_option(io_channel channel, const char *option,
1265                                  const char *arg)
1266 {
1267         struct unix_private_data *data;
1268         unsigned long long tmp;
1269         errcode_t retval;
1270         char *end;
1271
1272         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1273         data = (struct unix_private_data *) channel->private_data;
1274         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1275
1276         if (!strcmp(option, "offset")) {
1277                 if (!arg)
1278                         return EXT2_ET_INVALID_ARGUMENT;
1279
1280                 tmp = strtoull(arg, &end, 0);
1281                 if (*end)
1282                         return EXT2_ET_INVALID_ARGUMENT;
1283                 data->offset = tmp;
1284                 if (data->offset < 0)
1285                         return EXT2_ET_INVALID_ARGUMENT;
1286                 return 0;
1287         }
1288         if (!strcmp(option, "cache")) {
1289                 if (!arg)
1290                         return EXT2_ET_INVALID_ARGUMENT;
1291                 if (!strcmp(arg, "on")) {
1292                         data->flags &= ~IO_FLAG_NOCACHE;
1293                         return 0;
1294                 }
1295                 if (!strcmp(arg, "off")) {
1296                         retval = flush_cached_blocks(channel, data, 0);
1297                         data->flags |= IO_FLAG_NOCACHE;
1298                         return retval;
1299                 }
1300                 return EXT2_ET_INVALID_ARGUMENT;
1301         }
1302         return EXT2_ET_INVALID_ARGUMENT;
1303 }
1304
1305 #if defined(__linux__) && !defined(BLKDISCARD)
1306 #define BLKDISCARD              _IO(0x12,119)
1307 #endif
1308
1309 static errcode_t unix_discard(io_channel channel, unsigned long long block,
1310                               unsigned long long count)
1311 {
1312         struct unix_private_data *data;
1313         int             ret;
1314
1315         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1316         data = (struct unix_private_data *) channel->private_data;
1317         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1318
1319         if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
1320 #ifdef BLKDISCARD
1321                 __u64 range[2];
1322
1323                 range[0] = (__u64)(block) * channel->block_size + data->offset;
1324                 range[1] = (__u64)(count) * channel->block_size;
1325
1326                 ret = ioctl(data->dev, BLKDISCARD, &range);
1327 #else
1328                 goto unimplemented;
1329 #endif
1330         } else {
1331 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
1332                 /*
1333                  * If we are not on block device, try to use punch hole
1334                  * to reclaim free space.
1335                  */
1336                 ret = fallocate(data->dev,
1337                                 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1338                                 (off_t)(block) * channel->block_size + data->offset,
1339                                 (off_t)(count) * channel->block_size);
1340 #else
1341                 goto unimplemented;
1342 #endif
1343         }
1344         if (ret < 0) {
1345                 if (errno == EOPNOTSUPP)
1346                         goto unimplemented;
1347                 return errno;
1348         }
1349         return 0;
1350 unimplemented:
1351         return EXT2_ET_UNIMPLEMENTED;
1352 }
1353
1354 /*
1355  * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
1356  * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
1357  * it always invalidates page cache, and libext2fs requires that reads after
1358  * ZERO_RANGE return zeroes.
1359  */
1360 static int __unix_zeroout(int fd, off_t offset, off_t len)
1361 {
1362         int ret = -1;
1363
1364 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
1365         ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
1366         if (ret == 0)
1367                 return 0;
1368 #endif
1369 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
1370         ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1371                         offset,  len);
1372         if (ret == 0)
1373                 return 0;
1374 #endif
1375         errno = EOPNOTSUPP;
1376         return ret;
1377 }
1378
1379 /* parameters might not be used if OS doesn't support zeroout */
1380 #if __GNUC_PREREQ (4, 6)
1381 #pragma GCC diagnostic push
1382 #pragma GCC diagnostic ignored "-Wunused-parameter"
1383 #endif
1384 static errcode_t unix_zeroout(io_channel channel, unsigned long long block,
1385                               unsigned long long count)
1386 {
1387         struct unix_private_data *data;
1388         int             ret;
1389
1390         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1391         data = (struct unix_private_data *) channel->private_data;
1392         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1393
1394         if (safe_getenv("UNIX_IO_NOZEROOUT"))
1395                 goto unimplemented;
1396
1397         if (!(channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE)) {
1398                 /* Regular file, try to use truncate/punch/zero. */
1399                 struct stat statbuf;
1400
1401                 if (count == 0)
1402                         return 0;
1403                 /*
1404                  * If we're trying to zero a range past the end of the file,
1405                  * extend the file size, then truncate everything.
1406                  */
1407                 ret = fstat(data->dev, &statbuf);
1408                 if (ret)
1409                         goto err;
1410                 if ((unsigned long long) statbuf.st_size <
1411                         (block + count) * channel->block_size + data->offset) {
1412                         ret = ftruncate(data->dev,
1413                                         (block + count) * channel->block_size + data->offset);
1414                         if (ret)
1415                                 goto err;
1416                 }
1417         }
1418
1419         ret = __unix_zeroout(data->dev,
1420                         (off_t)(block) * channel->block_size + data->offset,
1421                         (off_t)(count) * channel->block_size);
1422 err:
1423         if (ret < 0) {
1424                 if (errno == EOPNOTSUPP)
1425                         goto unimplemented;
1426                 return errno;
1427         }
1428         return 0;
1429 unimplemented:
1430         return EXT2_ET_UNIMPLEMENTED;
1431 }
1432 #if __GNUC_PREREQ (4, 6)
1433 #pragma GCC diagnostic pop
1434 #endif
1435
1436 static struct struct_io_manager struct_unix_manager = {
1437         .magic          = EXT2_ET_MAGIC_IO_MANAGER,
1438         .name           = "Unix I/O Manager",
1439         .open           = unix_open,
1440         .close          = unix_close,
1441         .set_blksize    = unix_set_blksize,
1442         .read_blk       = unix_read_blk,
1443         .write_blk      = unix_write_blk,
1444         .flush          = unix_flush,
1445         .write_byte     = unix_write_byte,
1446         .set_option     = unix_set_option,
1447         .get_stats      = unix_get_stats,
1448         .read_blk64     = unix_read_blk64,
1449         .write_blk64    = unix_write_blk64,
1450         .discard        = unix_discard,
1451         .cache_readahead        = unix_cache_readahead,
1452         .zeroout        = unix_zeroout,
1453 };
1454
1455 io_manager unix_io_manager = &struct_unix_manager;
1456
1457 static struct struct_io_manager struct_unixfd_manager = {
1458         .magic          = EXT2_ET_MAGIC_IO_MANAGER,
1459         .name           = "Unix fd I/O Manager",
1460         .open           = unixfd_open,
1461         .close          = unix_close,
1462         .set_blksize    = unix_set_blksize,
1463         .read_blk       = unix_read_blk,
1464         .write_blk      = unix_write_blk,
1465         .flush          = unix_flush,
1466         .write_byte     = unix_write_byte,
1467         .set_option     = unix_set_option,
1468         .get_stats      = unix_get_stats,
1469         .read_blk64     = unix_read_blk64,
1470         .write_blk64    = unix_write_blk64,
1471         .discard        = unix_discard,
1472         .cache_readahead        = unix_cache_readahead,
1473         .zeroout        = unix_zeroout,
1474 };
1475
1476 io_manager unixfd_io_manager = &struct_unixfd_manager;