Whamcloud - gitweb
libext2fs: fix Direct I/O fallback code so it implements RMW correctly
[tools/e2fsprogs.git] / lib / ext2fs / undo_io.c
1 /*
2  * undo_io.c --- This is the undo io manager that copies the old data that
3  * copies the old data being overwritten into a tdb database
4  *
5  * Copyright IBM Corporation, 2007
6  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
7  *
8  * %Begin-Header%
9  * This file may be redistributed under the terms of the GNU Library
10  * General Public License, version 2.
11  * %End-Header%
12  */
13
14 #define _LARGEFILE_SOURCE
15 #define _LARGEFILE64_SOURCE
16
17 #include "config.h"
18 #include <stdio.h>
19 #include <string.h>
20 #if HAVE_UNISTD_H
21 #include <unistd.h>
22 #endif
23 #if HAVE_ERRNO_H
24 #include <errno.h>
25 #endif
26 #include <fcntl.h>
27 #include <time.h>
28 #ifdef __linux__
29 #include <sys/utsname.h>
30 #endif
31 #if HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #if HAVE_SYS_TYPES_H
35 #include <sys/types.h>
36 #endif
37 #if HAVE_SYS_RESOURCE_H
38 #include <sys/resource.h>
39 #endif
40
41 #include "tdb.h"
42
43 #include "ext2_fs.h"
44 #include "ext2fs.h"
45
46 #ifdef __GNUC__
47 #define ATTR(x) __attribute__(x)
48 #else
49 #define ATTR(x)
50 #endif
51
52 /*
53  * For checking structure magic numbers...
54  */
55
56 #define EXT2_CHECK_MAGIC(struct, code) \
57           if ((struct)->magic != (code)) return (code)
58
59 struct undo_private_data {
60         int     magic;
61         TDB_CONTEXT *tdb;
62         char *tdb_file;
63
64         /* The backing io channel */
65         io_channel real;
66
67         int tdb_data_size;
68         int tdb_written;
69
70         /* to support offset in unix I/O manager */
71         ext2_loff_t offset;
72 };
73
74 static io_manager undo_io_backing_manager;
75 static char *tdb_file;
76 static int actual_size;
77
78 static unsigned char mtime_key[] = "filesystem MTIME";
79 static unsigned char blksize_key[] = "filesystem BLKSIZE";
80 static unsigned char uuid_key[] = "filesystem UUID";
81
82 errcode_t set_undo_io_backing_manager(io_manager manager)
83 {
84         /*
85          * We may want to do some validation later
86          */
87         undo_io_backing_manager = manager;
88         return 0;
89 }
90
91 errcode_t set_undo_io_backup_file(char *file_name)
92 {
93         tdb_file = strdup(file_name);
94
95         if (tdb_file == NULL) {
96                 return EXT2_ET_NO_MEMORY;
97         }
98
99         return 0;
100 }
101
102 static errcode_t write_file_system_identity(io_channel undo_channel,
103                                                         TDB_CONTEXT *tdb)
104 {
105         errcode_t retval;
106         struct ext2_super_block super;
107         TDB_DATA tdb_key, tdb_data;
108         struct undo_private_data *data;
109         io_channel channel;
110         int block_size ;
111
112         data = (struct undo_private_data *) undo_channel->private_data;
113         channel = data->real;
114         block_size = channel->block_size;
115
116         io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
117         retval = io_channel_read_blk64(channel, 1, -SUPERBLOCK_SIZE, &super);
118         if (retval)
119                 goto err_out;
120
121         /* Write to tdb file in the file system byte order */
122         tdb_key.dptr = mtime_key;
123         tdb_key.dsize = sizeof(mtime_key);
124         tdb_data.dptr = (unsigned char *) &(super.s_mtime);
125         tdb_data.dsize = sizeof(super.s_mtime);
126
127         retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
128         if (retval == -1) {
129                 retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
130                 goto err_out;
131         }
132
133         tdb_key.dptr = uuid_key;
134         tdb_key.dsize = sizeof(uuid_key);
135         tdb_data.dptr = (unsigned char *)&(super.s_uuid);
136         tdb_data.dsize = sizeof(super.s_uuid);
137
138         retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
139         if (retval == -1) {
140                 retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
141         }
142
143 err_out:
144         io_channel_set_blksize(channel, block_size);
145         return retval;
146 }
147
148 static errcode_t write_block_size(TDB_CONTEXT *tdb, int block_size)
149 {
150         errcode_t retval;
151         TDB_DATA tdb_key, tdb_data;
152
153         tdb_key.dptr = blksize_key;
154         tdb_key.dsize = sizeof(blksize_key);
155         tdb_data.dptr = (unsigned char *)&(block_size);
156         tdb_data.dsize = sizeof(block_size);
157
158         retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
159         if (retval == -1) {
160                 retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
161         }
162
163         return retval;
164 }
165
166 static errcode_t undo_write_tdb(io_channel channel,
167                                 unsigned long long block, int count)
168
169 {
170         int size, sz;
171         unsigned long long block_num, backing_blk_num;
172         errcode_t retval = 0;
173         ext2_loff_t offset;
174         struct undo_private_data *data;
175         TDB_DATA tdb_key, tdb_data;
176         unsigned char *read_ptr;
177         unsigned long long end_block;
178
179         data = (struct undo_private_data *) channel->private_data;
180
181         if (data->tdb == NULL) {
182                 /*
183                  * Transaction database not initialized
184                  */
185                 return 0;
186         }
187
188         if (count == 1)
189                 size = channel->block_size;
190         else {
191                 if (count < 0)
192                         size = -count;
193                 else
194                         size = count * channel->block_size;
195         }
196         /*
197          * Data is stored in tdb database as blocks of tdb_data_size size
198          * This helps in efficient lookup further.
199          *
200          * We divide the disk to blocks of tdb_data_size.
201          */
202         offset = (block * channel->block_size) + data->offset ;
203         block_num = offset / data->tdb_data_size;
204         end_block = (offset + size) / data->tdb_data_size;
205
206         tdb_transaction_start(data->tdb);
207         while (block_num <= end_block ) {
208
209                 tdb_key.dptr = (unsigned char *)&block_num;
210                 tdb_key.dsize = sizeof(block_num);
211                 /*
212                  * Check if we have the record already
213                  */
214                 if (tdb_exists(data->tdb, tdb_key)) {
215                         /* Try the next block */
216                         block_num++;
217                         continue;
218                 }
219                 /*
220                  * Read one block using the backing I/O manager
221                  * The backing I/O manager block size may be
222                  * different from the tdb_data_size.
223                  * Also we need to recalcuate the block number with respect
224                  * to the backing I/O manager.
225                  */
226                 offset = block_num * data->tdb_data_size;
227                 backing_blk_num = (offset - data->offset) / channel->block_size;
228
229                 count = data->tdb_data_size +
230                                 ((offset - data->offset) % channel->block_size);
231                 retval = ext2fs_get_mem(count, &read_ptr);
232                 if (retval) {
233                         tdb_transaction_cancel(data->tdb);
234                         return retval;
235                 }
236
237                 memset(read_ptr, 0, count);
238                 actual_size = 0;
239                 if ((count % channel->block_size) == 0)
240                         sz = count / channel->block_size;
241                 else
242                         sz = -count;
243                 retval = io_channel_read_blk64(data->real, backing_blk_num,
244                                              sz, read_ptr);
245                 if (retval) {
246                         if (retval != EXT2_ET_SHORT_READ) {
247                                 free(read_ptr);
248                                 tdb_transaction_cancel(data->tdb);
249                                 return retval;
250                         }
251                         /*
252                          * short read so update the record size
253                          * accordingly
254                          */
255                         tdb_data.dsize = actual_size;
256                 } else {
257                         tdb_data.dsize = data->tdb_data_size;
258                 }
259                 tdb_data.dptr = read_ptr +
260                                 ((offset - data->offset) % channel->block_size);
261 #ifdef DEBUG
262                 printf("Printing with key %lld data %x and size %d\n",
263                        block_num,
264                        tdb_data.dptr,
265                        tdb_data.dsize);
266 #endif
267                 if (!data->tdb_written) {
268                         data->tdb_written = 1;
269                         /* Write the blocksize to tdb file */
270                         retval = write_block_size(data->tdb,
271                                                   data->tdb_data_size);
272                         if (retval) {
273                                 tdb_transaction_cancel(data->tdb);
274                                 retval = EXT2_ET_TDB_ERR_IO;
275                                 free(read_ptr);
276                                 return retval;
277                         }
278                 }
279                 retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT);
280                 if (retval == -1) {
281                         /*
282                          * TDB_ERR_EXISTS cannot happen because we
283                          * have already verified it doesn't exist
284                          */
285                         tdb_transaction_cancel(data->tdb);
286                         retval = EXT2_ET_TDB_ERR_IO;
287                         free(read_ptr);
288                         return retval;
289                 }
290                 free(read_ptr);
291                 /* Next block */
292                 block_num++;
293         }
294         tdb_transaction_commit(data->tdb);
295
296         return retval;
297 }
298
299 static errcode_t undo_io_read_error(io_channel channel ATTR((unused)),
300                                     unsigned long block ATTR((unused)),
301                                     int count ATTR((unused)),
302                                     void *data ATTR((unused)),
303                                     size_t size ATTR((unused)),
304                                     int actual,
305                                     errcode_t error ATTR((unused)))
306 {
307         actual_size = actual;
308         return error;
309 }
310
311 static void undo_err_handler_init(io_channel channel)
312 {
313         channel->read_error = undo_io_read_error;
314 }
315
316 static errcode_t undo_open(const char *name, int flags, io_channel *channel)
317 {
318         io_channel      io = NULL;
319         struct undo_private_data *data = NULL;
320         errcode_t       retval;
321
322         if (name == 0)
323                 return EXT2_ET_BAD_DEVICE_NAME;
324         retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
325         if (retval)
326                 goto cleanup;
327         memset(io, 0, sizeof(struct struct_io_channel));
328         io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
329         retval = ext2fs_get_mem(sizeof(struct undo_private_data), &data);
330         if (retval)
331                 goto cleanup;
332
333         io->manager = undo_io_manager;
334         retval = ext2fs_get_mem(strlen(name)+1, &io->name);
335         if (retval)
336                 goto cleanup;
337
338         strcpy(io->name, name);
339         io->private_data = data;
340         io->block_size = 1024;
341         io->read_error = 0;
342         io->write_error = 0;
343         io->refcount = 1;
344
345         memset(data, 0, sizeof(struct undo_private_data));
346         data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
347
348         if (undo_io_backing_manager) {
349                 retval = undo_io_backing_manager->open(name, flags,
350                                                        &data->real);
351                 if (retval)
352                         goto cleanup;
353         } else {
354                 data->real = 0;
355         }
356
357         /* setup the tdb file */
358         data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST,
359                              O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
360         if (!data->tdb) {
361                 retval = errno;
362                 goto cleanup;
363         }
364
365         /*
366          * setup err handler for read so that we know
367          * when the backing manager fails do short read
368          */
369         if (data->real)
370                 undo_err_handler_init(data->real);
371
372         *channel = io;
373         return 0;
374
375 cleanup:
376         if (data && data->real)
377                 io_channel_close(data->real);
378         if (data)
379                 ext2fs_free_mem(&data);
380         if (io)
381                 ext2fs_free_mem(&io);
382         return retval;
383 }
384
385 static errcode_t undo_close(io_channel channel)
386 {
387         struct undo_private_data *data;
388         errcode_t       retval = 0;
389
390         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
391         data = (struct undo_private_data *) channel->private_data;
392         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
393
394         if (--channel->refcount > 0)
395                 return 0;
396         /* Before closing write the file system identity */
397         retval = write_file_system_identity(channel, data->tdb);
398         if (retval)
399                 return retval;
400         if (data->real)
401                 retval = io_channel_close(data->real);
402         if (data->tdb)
403                 tdb_close(data->tdb);
404         ext2fs_free_mem(&channel->private_data);
405         if (channel->name)
406                 ext2fs_free_mem(&channel->name);
407         ext2fs_free_mem(&channel);
408
409         return retval;
410 }
411
412 static errcode_t undo_set_blksize(io_channel channel, int blksize)
413 {
414         struct undo_private_data *data;
415         errcode_t               retval = 0;
416
417         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
418         data = (struct undo_private_data *) channel->private_data;
419         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
420
421         if (data->real)
422                 retval = io_channel_set_blksize(data->real, blksize);
423         /*
424          * Set the block size used for tdb
425          */
426         if (!data->tdb_data_size) {
427                 data->tdb_data_size = blksize;
428         }
429         channel->block_size = blksize;
430         return retval;
431 }
432
433 static errcode_t undo_read_blk64(io_channel channel, unsigned long long block,
434                                int count, void *buf)
435 {
436         errcode_t       retval = 0;
437         struct undo_private_data *data;
438
439         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
440         data = (struct undo_private_data *) channel->private_data;
441         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
442
443         if (data->real)
444                 retval = io_channel_read_blk64(data->real, block, count, buf);
445
446         return retval;
447 }
448
449 static errcode_t undo_read_blk(io_channel channel, unsigned long block,
450                                int count, void *buf)
451 {
452         return undo_read_blk64(channel, block, count, buf);
453 }
454
455 static errcode_t undo_write_blk64(io_channel channel, unsigned long long block,
456                                 int count, const void *buf)
457 {
458         struct undo_private_data *data;
459         errcode_t       retval = 0;
460
461         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
462         data = (struct undo_private_data *) channel->private_data;
463         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
464         /*
465          * First write the existing content into database
466          */
467         retval = undo_write_tdb(channel, block, count);
468         if (retval)
469                  return retval;
470         if (data->real)
471                 retval = io_channel_write_blk64(data->real, block, count, buf);
472
473         return retval;
474 }
475
476 static errcode_t undo_write_blk(io_channel channel, unsigned long block,
477                                 int count, const void *buf)
478 {
479         return undo_write_blk64(channel, block, count, buf);
480 }
481
482 static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
483                                  int size, const void *buf)
484 {
485         struct undo_private_data *data;
486         errcode_t       retval = 0;
487         ext2_loff_t     location;
488         unsigned long blk_num, count;;
489
490         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
491         data = (struct undo_private_data *) channel->private_data;
492         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
493
494         location = offset + data->offset;
495         blk_num = location/channel->block_size;
496         /*
497          * the size specified may spread across multiple blocks
498          * also make sure we account for the fact that block start
499          * offset for tdb is different from the backing I/O manager
500          * due to possible different block size
501          */
502         count = (size + (location % channel->block_size) +
503                         channel->block_size  -1)/channel->block_size;
504         retval = undo_write_tdb(channel, blk_num, count);
505         if (retval)
506                 return retval;
507         if (data->real && data->real->manager->write_byte)
508                 retval = io_channel_write_byte(data->real, offset, size, buf);
509
510         return retval;
511 }
512
513 /*
514  * Flush data buffers to disk.
515  */
516 static errcode_t undo_flush(io_channel channel)
517 {
518         errcode_t       retval = 0;
519         struct undo_private_data *data;
520
521         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
522         data = (struct undo_private_data *) channel->private_data;
523         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
524
525         if (data->real)
526                 retval = io_channel_flush(data->real);
527
528         return retval;
529 }
530
531 static errcode_t undo_set_option(io_channel channel, const char *option,
532                                  const char *arg)
533 {
534         errcode_t       retval = 0;
535         struct undo_private_data *data;
536         unsigned long tmp;
537         char *end;
538
539         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
540         data = (struct undo_private_data *) channel->private_data;
541         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
542
543         if (!strcmp(option, "tdb_data_size")) {
544                 if (!arg)
545                         return EXT2_ET_INVALID_ARGUMENT;
546
547                 tmp = strtoul(arg, &end, 0);
548                 if (*end)
549                         return EXT2_ET_INVALID_ARGUMENT;
550                 if (!data->tdb_data_size || !data->tdb_written) {
551                         data->tdb_data_size = tmp;
552                 }
553                 return 0;
554         }
555         /*
556          * Need to support offset option to work with
557          * Unix I/O manager
558          */
559         if (data->real && data->real->manager->set_option) {
560                 retval = data->real->manager->set_option(data->real,
561                                                         option, arg);
562         }
563         if (!retval && !strcmp(option, "offset")) {
564                 if (!arg)
565                         return EXT2_ET_INVALID_ARGUMENT;
566
567                 tmp = strtoul(arg, &end, 0);
568                 if (*end)
569                         return EXT2_ET_INVALID_ARGUMENT;
570                 data->offset = tmp;
571         }
572         return retval;
573 }
574
575 static errcode_t undo_get_stats(io_channel channel, io_stats *stats)
576 {
577         errcode_t       retval = 0;
578         struct undo_private_data *data;
579
580         EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
581         data = (struct undo_private_data *) channel->private_data;
582         EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
583
584         if (data->real)
585                 retval = (data->real->manager->get_stats)(data->real, stats);
586
587         return retval;
588 }
589
590 static struct struct_io_manager struct_undo_manager = {
591         .magic          = EXT2_ET_MAGIC_IO_MANAGER,
592         .name           = "Undo I/O Manager",
593         .open           = undo_open,
594         .close          = undo_close,
595         .set_blksize    = undo_set_blksize,
596         .read_blk       = undo_read_blk,
597         .write_blk      = undo_write_blk,
598         .flush          = undo_flush,
599         .write_byte     = undo_write_byte,
600         .set_option     = undo_set_option,
601         .get_stats      = undo_get_stats,
602         .read_blk64     = undo_read_blk64,
603         .write_blk64    = undo_write_blk64,
604 };
605
606 io_manager undo_io_manager = &struct_undo_manager;