2 Index: linux-2.4.20-8/fs/ext3/snap.c
3 ===================================================================
4 --- linux-2.4.20-8.orig/fs/ext3/snap.c 2003-01-30 18:24:37.000000000 +0800
5 +++ linux-2.4.20-8/fs/ext3/snap.c 2004-01-05 10:54:25.000000000 +0800
9 + * Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
10 + * started by Andreas Dilger <adilger@turbolinux.com>
11 + * Peter Braam <braam@mountainviewdata.com>
12 + * Harrison Xing <harrisonx@mountainviewdata.com>
13 + * Eric Mei <Ericm@clusterfs.com>
15 + * port to 2.4 by Wang Di <wangdi@clusterfs.com>
16 + * Eric Mei <ericm@clusterfs.com>
18 + * Functions for implementing snapshots in the ext3 filesystem. They are
19 + * intended to hide the internals of the filesystem from the caller in
20 + * such a way that the caller doesn't need to know about inode numbers,
21 + * how the redirectors are implemented or stored, etc. It may not do that
22 + * all yet, but it tries.
24 + * The snapshot inode redirection is stored in the primary/direct inode as
25 + * an extended attribute $snap, in the form of little-endian u32 inode
30 +#define EXPORT_SYMTAB
31 +#include <linux/module.h>
33 +#include <linux/sched.h>
34 +#include <linux/jbd.h>
35 +#include <linux/mm.h>
36 +#include <linux/slab.h>
37 +#include <linux/locks.h>
38 +#include <linux/snap.h>
39 +#include <linux/ext3_jbd.h>
40 +#include <linux/ext3_fs.h>
41 +#include <linux/ext3_xattr.h>
43 +#define EXT3_SNAP_ATTR "@snap"
44 +#define EXT3_SNAP_GENERATION_ATTR "@snap_generation"
45 +#define EXT3_MAX_SNAPS 20
46 +#define EXT3_MAX_SNAP_DATA (sizeof(struct snap_ea))
47 +#define EXT3_SNAP_INDEX EXT3_XATTR_INDEX_LUSTRE
49 +#ifdef EXT3_SNAP_DEBUG
50 + static long snap_kmem = 0;
51 + #define snap_debug(f, a...) \
53 + printk (KERN_INFO "SNAP DEBUG: (%s, %d): %s: ", \
54 + __FILE__, __LINE__, __FUNCTION__); \
58 + #define snap_err(f, a...) \
60 + printk (KERN_ERR "SNAP ERROR: (%s, %d): %s: ", \
61 + __FILE__, __LINE__, __FUNCTION__); \
66 + #define snap_debug(f, a...) do {} while (0)
67 + #define snap_err(f, a...) \
69 + printk (KERN_ERR "SNAP ERROR: (%s, %d): ", \
70 + __FILE__, __LINE__); \
76 +#ifdef EXT3_SNAP_DEBUG
77 + #define ALLOC(ptr, cast, size) \
79 + ptr = (cast)kmalloc((size_t) size, GFP_KERNEL); \
81 + printk(KERN_ERR "kmalloc returns 0 at %s:%d\n", \
82 + __FILE__, __LINE__); \
84 + snap_kmem += size; \
85 + printk(KERN_INFO "snap_alloc %d, kmem %ld\n", \
86 + (size_t)size, snap_kmem); \
90 + #define FREE(ptr,size) \
93 + snap_kmem -= size; \
94 + printk(KERN_INFO "snap_free %d, kmem %ld\n", \
95 + (size_t)size, snap_kmem); \
99 + #define ALLOC(ptr, cast, size) \
101 + ptr = (cast)kmalloc((size_t) size, GFP_KERNEL); \
104 + #define FREE(ptr,size) \
109 +#endif /* EXT3_SNAP_DEBUG */
111 +#ifdef EXT3_SNAP_DEBUG
112 + /* modestr: convert inode mode to string . debug function */
113 + static char * modestr ( umode_t mode )
115 + if( S_ISREG(mode) )
117 + else if(S_ISDIR(mode))
119 + else if(S_ISLNK(mode))
121 + else if(S_ISCHR(mode))
123 + else if(S_ISBLK(mode))
125 + else if(S_ISFIFO(mode))
127 + else if(S_ISSOCK(mode))
130 + return "non-known";
132 +#define DEBUG_INODE(inode) \
133 + if(inode && !IS_ERR(inode)) { \
134 + snap_debug("%s ino %lu, i_nlink %u, i_count %d, i_mode %u, i_size %lld, i_blocks %lu\n", \
135 + modestr(inode->i_mode), inode->i_ino, inode->i_nlink, \
136 + atomic_read(&inode->i_count), inode->i_mode, inode->i_size, \
137 + inode->i_blocks); }
139 + #define modestr(mode) do {} while (0)
140 + #define DEBUG_INODE(inode)
142 +#endif /* EXT3_SNAP_DEBUG */
143 +/* do file cow on: dir, symlink, regular but fs has filecow flag */
145 +#define IS_FILECOW_TYPE(inode) \
146 + (S_ISDIR(inode->i_mode) || \
147 + S_ISLNK(inode->i_mode) || \
148 + (S_ISREG(inode->i_mode) && \
149 + !SNAP_HAS_COMPAT_FEATURE(inode->i_sb, SNAP_FEATURE_COMPAT_BLOCKCOW)))
151 +#define SNAP_ERROR(err) ((err) < 0 ? (err) : (-(err)))
152 +/* SNAP_ERROR(err): Make sure we return negative errors for Linux ( return positive errors) */
157 +# define CDEBUG_STACK (THREAD_SIZE - \
158 + ((unsigned long)__builtin_dwarf_cfa() & \
159 + (THREAD_SIZE - 1)))
161 +# define CDEBUG_STACK (THREAD_SIZE - \
162 + ((unsigned long)__builtin_frame_address(0) & \
163 + (THREAD_SIZE - 1)))
166 +#define snap_debug_msg(file, fn, line, stack, format, a...) \
167 + printf("(%s:%s,l. %d %d %lu): " format, file, fn, line, \
168 + getpid() , stack, ## a);
171 +#define CDEBUG(mask, format, a...) \
173 + CHECK_STACK(CDEBUG_STACK); \
174 + if (!(mask) || ((mask) & (D_ERROR | D_EMERG))) \
175 + snap_debug_msg(__FILE__, __FUNCTION__, __LINE__, \
176 + CDEBUG_STACK, format, ## a); \
179 +#define CWARN (format, a...) CDEBUG(D_WARNING, format, ## a)
180 +#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
181 +#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
183 +#define RETURN(rc) \
185 + typeof(rc) RETURN__ret = (rc); \
186 + CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
187 + (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
188 + return RETURN__ret; \
193 + CDEBUG(D_TRACE, "Process entered\n"); \
198 + CDEBUG(D_TRACE, "Process leaving\n"); \
201 +#define CDEBUG(mask, format, a...) do { } while (0)
202 +#define CWARN(format, a...) do { } while (0)
203 +#define CERROR(format, a...) printk("<3>" format, ## a)
204 +#define CEMERG(format, a...) printk("<0>" format, ## a)
205 +#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
206 +#define RETURN(rc) return (rc)
207 +#define ENTRY do { } while (0)
208 +#define EXIT do { } while (0)
211 +#define SNAP_ATTR_BUF_CNT 10
213 +#define SB_LAST_COWED_INO(sb) (EXT3_SB(sb)->s_es->s_last_cowed_pri_ino)
214 +#define SB_FIRST_COWED_INO(sb) (EXT3_SB(sb)->s_es->s_first_cowed_pri_ino)
215 +#define SB_SNAPTABLE_INO(sb) (EXT3_SB(sb)->s_es->s_snaptable_ino)
216 +#define SB_SNAP_LIST_SEM(sb) (EXT3_SB(sb)->s_snap_list_sem)
217 +#define SB_FEATURE_COMPAT(sb) (EXT3_SB(sb)->s_es->s_feature_compat)
219 +#define SNAP_HAS_COMPAT_FEATURE(sb,mask) \
220 + (SB_FEATURE_COMPAT(sb) & cpu_to_le32(mask))
222 +/* NOTE: these macros are close dependant on the structure of snap ea */
223 +#define SNAP_CNT_FROM_SIZE(size) ((((size)-sizeof(ino_t)*2)/2)/sizeof(ino_t))
224 +#define SNAP_EA_SIZE_FROM_INDEX(index) (sizeof(ino_t)*2 + 2*sizeof(ino_t)*((index)+1))
226 +#define SNAP_EA_INO_BLOCK_SIZE(size) (((size)-sizeof(ino_t)*2)/2)
227 +#define SNAP_EA_PARENT_OFFSET(size) (sizeof(ino_t)*2 + SNAP_EA_INO_BLOCK_SIZE((size)))
229 +extern int ext3_bmap(struct address_space *mapping, long block);
230 +extern int ext3_load_inode_bitmap (struct super_block * sb, unsigned int block_group);
231 +/* helper functions to manipulate field 'parent' in snap_ea */
234 +set_parent_ino(struct snap_ea *pea, int size, int index, ino_t val)
236 + char * p = (char*) pea;
239 + offset = sizeof(ino_t)*2 + (size - sizeof(ino_t)*2)/2;
240 + offset += sizeof(ino_t) * index;
241 + *(ino_t*)(p+offset) = val;
246 +get_parent_ino(struct snap_ea *pea, int size, int index)
248 + char * p = (char*)pea;
251 + offset = sizeof(ino_t)*2 + (size - sizeof(ino_t)*2)/2;
252 + offset += sizeof(ino_t) * index;
253 + return *(ino_t*)(p+offset);
255 +static inline void snap_double_lock(struct inode *i1, struct inode *i2)
257 + double_down(&i1->i_sem, &i2->i_sem);
260 +static inline void snap_double_unlock(struct inode *i1, struct inode *i2)
262 + double_up(&i1->i_sem, &i2->i_sem);
265 +/* ext3_iterate_cowed_inode:
266 + * iterate all the cowed inode with the same index and
267 + * run the associate function @repeat
269 + * For @repeat, if it returns non-zero value, it will exit the iterator
271 + * return value: 0 or positive: success
272 + * negative: failure
273 + * additional: if the return value is positive, it must be the return value
274 + * of function @repeat.
277 +static int ext3_iterate_cowed_inode(
278 + struct super_block *sb,
279 + int (*repeat)(struct inode *inode, void *priv),
280 + struct inode **start,
283 + struct inode *list_inode = NULL;
284 + char buf[EXT3_MAX_SNAP_DATA];
285 + struct snap_ea *snaps;
288 + if (SB_FIRST_COWED_INO(sb) == 0) {
289 + snap_debug("no cowed inode in the list\n");
293 + /* get head inode in the list */
294 + if (start != NULL && *start != NULL && (*start)->i_ino)
295 + list_inode = iget(sb, (*start)->i_ino);
297 + list_inode = iget (sb, le32_to_cpu( SB_FIRST_COWED_INO(sb) ));
299 + /* loop for all inode in list */
300 + while (list_inode) {
301 + if (!list_inode->i_nlink || is_bad_inode(list_inode)) {
302 + snap_err("inode %p, ino %lu, mode %o, nlink %d\n",
305 + list_inode->i_mode,
306 + list_inode->i_nlink);
311 + err = ext3_xattr_get(list_inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
312 + buf, EXT3_MAX_SNAP_DATA);
313 + if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
314 + snap_err("inode %lu, error %d\n", list_inode->i_ino, err);
318 + if ((err = (*repeat)(list_inode, priv)) != 0)
323 + snaps = (struct snap_ea *) buf;
324 + if (le32_to_cpu (snaps->next_ino) != 0) {
325 + list_inode = iget(sb, le32_to_cpu(snaps->next_ino));
328 + snap_debug ("cowed inode list end, exit\n");
338 +static int get_cowed_ino(struct inode *pri, void *param)
340 + ino_t *find = param;
341 + (*find) = pri->i_ino;
345 +/* Return 0 for error. */
346 +static int get_cowed_ino_end (struct inode *inode)
351 + rc = ext3_iterate_cowed_inode(inode->i_sb, &get_cowed_ino, &inode, &ino);
359 +/* find the end of the primary inode, iterate if needed
360 + * return 0 if any error found */
361 +static inline ino_t find_last_cowed_ino(struct super_block *sb)
363 + struct inode *inode = NULL;
364 + ino_t first, last = 0;
366 + last = le32_to_cpu(SB_LAST_COWED_INO(sb));
370 + first = le32_to_cpu(SB_FIRST_COWED_INO(sb));
373 + snap_err("first cowed inode is NULL\n");
377 + inode = iget(sb, first);
379 + if (is_bad_inode(inode)) {
380 + snap_err("bad inode %lu\n", first);
384 + last = get_cowed_ino_end(inode);
392 +/* Insert the primary inode to the cowed inode list
393 + * Append it to the list end
395 + * @pri: inode to insert
396 + * @buf_pri: the valid ea buf for @pri inode ( excluding the next_ino field) ,
397 + * it's used to write the ea for @pri inode
399 + * To avoid list broken in abnormal case, it will first write the ea for @pri
400 + * inode, and then write ea for the list end inode. Thus list broken is
401 + * avoid even if there are errors when writting ea.
403 +static int insert_cowed_ino_to_list (handle_t *handle, struct inode *pri, char *buf_pri)
405 + char buf[EXT3_MAX_SNAP_DATA];
406 + struct snap_ea *snaps;
407 + struct snap_ea *snaps_pri;
408 + struct inode *last_inode = NULL;
409 + struct ext3_sb_info *sbi = EXT3_SB(pri->i_sb);
412 + snaps_pri = (struct snap_ea *)buf_pri;
414 + if (!SB_FIRST_COWED_INO(pri->i_sb)) {
415 + /* we set the next_ino and write ea for pri inode */
416 + snaps_pri->next_ino = cpu_to_le32(0);
417 + snaps_pri->prev_ino = cpu_to_le32(0);
419 + err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
420 + buf_pri, EXT3_MAX_SNAP_DATA, 0);
422 + snap_err("ino %lu, set_ext_attr err %d\n", pri->i_ino, err);
425 + lock_super(pri->i_sb);
426 + ext3_journal_get_write_access(handle, sbi->s_sbh);
427 + sbi->s_es->s_first_cowed_pri_ino = cpu_to_le32(pri->i_ino);
428 + SB_LAST_COWED_INO(pri->i_sb) = cpu_to_le32(pri->i_ino);
429 + pri->i_sb->s_dirt = 1;
430 + ext3_journal_dirty_metadata(handle, sbi->s_sbh);
431 + unlock_super(pri->i_sb);
432 + EXT3_I(pri)->i_flags |= EXT3_SNAP_PRI_FLAG;
436 + if (!SB_LAST_COWED_INO(pri->i_sb)){
437 + SB_LAST_COWED_INO(pri->i_sb) = find_last_cowed_ino(pri->i_sb);
438 + if (!SB_LAST_COWED_INO(pri->i_sb) ){
439 + snap_err("error, last cowed inode is NULL\n");
444 + last_inode = iget(pri->i_sb, SB_LAST_COWED_INO(pri->i_sb));
445 + if (!last_inode || is_bad_inode(last_inode)) {
449 + err = ext3_xattr_get(last_inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
450 + buf, EXT3_MAX_SNAP_DATA);
451 + if (err == -ENOENT) {
452 + snap_debug("no existing attributes - zeroing\n");
453 + memset(buf, 0, EXT3_MAX_SNAP_DATA);
454 + } else if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
455 + snap_debug("got err %d when reading attributes\n", err);
458 + /*set primary inode EA*/
459 + snaps_pri->next_ino = 0;
460 + snaps_pri->prev_ino = cpu_to_le32(last_inode->i_ino);
462 + err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
463 + buf_pri, EXT3_MAX_SNAP_DATA, 0);
465 + snap_debug("set attributes error for inode %lu\n",
466 + (ulong)pri->i_ino);
470 + /*set last inode EA*/
471 + snaps = (struct snap_ea *) buf;
472 + snaps->next_ino = cpu_to_le32(pri->i_ino);
473 + err = ext3_xattr_set(handle, last_inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
474 + buf, EXT3_MAX_SNAP_DATA, 0);
476 + snap_debug("set attributes error for inode %lu\n",
477 + (ulong)last_inode->i_ino);
481 + EXT3_I(pri)->i_flags |= EXT3_SNAP_PRI_FLAG;
483 + /* we update the new cowed ino list end in memory */
484 + SB_LAST_COWED_INO(pri->i_sb) = cpu_to_le32(pri->i_ino);
485 + snap_debug("cowed_inode_list_end %lu, append ino=%d\n",
486 + last_inode->i_ino, le32_to_cpu(snaps->ino[index]));
494 +/* delelte the ino from cowed inode list */
495 +static int delete_cowed_ino_from_list (handle_t *handle, struct inode *inode)
497 + ino_t prev_ino = 0, next_ino = 0;
498 + struct inode *prev_inode = NULL;
499 + struct inode *next_inode = NULL;
500 + struct snap_ea *snaps;
501 + char buf[EXT3_MAX_SNAP_DATA];
504 + err = ext3_xattr_get(inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
505 + buf, EXT3_MAX_SNAP_DATA);
506 + if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
507 + snap_err("get attr inode %lu, error %d\n", inode->i_ino, err);
511 + snaps = (struct snap_ea *) buf;
512 + next_ino = le32_to_cpu(snaps->next_ino);
513 + prev_ino = le32_to_cpu(snaps->prev_ino);
515 + /* if this is the first cowed ino */
516 + if (inode->i_ino == le32_to_cpu(SB_FIRST_COWED_INO(inode->i_sb))) {
517 + SB_FIRST_COWED_INO(inode->i_sb) = cpu_to_le32(next_ino);
518 + EXT3_I(inode)->i_flags &= ~EXT3_SNAP_PRI_FLAG;
524 + /* find previous inode and read its ea */
525 + prev_inode = iget(inode->i_sb, prev_ino);
526 + if (!prev_inode || is_bad_inode(prev_inode))
529 + err = ext3_xattr_get(prev_inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
530 + buf, EXT3_MAX_SNAP_DATA);
531 + if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
532 + snap_err("get attr inode %lu, error %d\n", prev_inode->i_ino, err);
536 + /* make the previous inode point to the next inode,
537 + * but ignore errors because at current version we
538 + * didn't use the previous pionter */
539 + snaps = (struct snap_ea *) buf;
540 + snaps->next_ino = cpu_to_le32(next_ino);
542 + snap_debug("delete ino %lu from list\n", inode->i_ino);
544 + err = ext3_xattr_set(handle, prev_inode, EXT3_SNAP_INDEX,
545 + EXT3_SNAP_ATTR, buf, EXT3_MAX_SNAP_DATA, 0);
547 + snap_err("err %d setting ea for ino %lu\n", err, prev_inode->i_ino);
551 + if (next_ino == 0) {
552 + SB_LAST_COWED_INO(inode->i_sb) = prev_ino;
556 + /* make the next inode point to the previous one */
557 + next_inode = iget(inode->i_sb, next_ino);
558 + if (!next_inode || is_bad_inode(next_inode))
561 + err = ext3_xattr_get(next_inode, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
562 + buf, EXT3_MAX_SNAP_DATA);
563 + if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
564 + snap_err("set attr inode %lu, error %d\n", next_inode->i_ino, err);
567 + snaps = ( struct snap_ea *) buf;
568 + snaps->prev_ino = cpu_to_le32(prev_ino);
570 + err = ext3_xattr_set(handle, next_inode, EXT3_SNAP_INDEX,
571 + EXT3_SNAP_ATTR, buf, EXT3_MAX_SNAP_DATA, 0);
573 + snap_err("err %d setting attributes for ino %lu\n",
574 + err, next_inode->i_ino);
583 +static inline void lock_list(struct super_block *sb)
585 + down(&SB_SNAP_LIST_SEM(sb));
588 +static inline void unlock_list(struct super_block *sb)
590 + up(&SB_SNAP_LIST_SEM(sb));
593 +static int ext3_snap_feature (struct super_block *sb, int feature, int op) {
598 + case SNAP_SET_FEATURE:
599 + handle = ext3_journal_start(sb->s_root->d_inode, 1);
601 + ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
602 + SB_FEATURE_COMPAT(sb) |= cpu_to_le32(feature);
604 + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
606 + ext3_journal_stop(handle, sb->s_root->d_inode);
608 + case SNAP_CLEAR_FEATURE:
609 + handle = ext3_journal_start(sb->s_root->d_inode, 1);
611 + ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
612 + SB_FEATURE_COMPAT(sb) &= ~cpu_to_le32(feature);
613 + ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
616 + ext3_journal_stop(handle, sb->s_root->d_inode);
618 + case SNAP_HAS_FEATURE:
619 + /*FIXME should lock super or not*/
620 + rc = SNAP_HAS_COMPAT_FEATURE(sb, feature);
628 +#ifdef _DEVICE_FAIL_TEST
630 +extern int loop_discard_io(kdev_t dev, long arg);
632 + * modify failpos to let loop fail at certain point
633 + * let pos=0 mean no fail point
635 +static int failpos = 0;
636 +#define loopfail(pos) \
638 + if( pos == failpos ){ \
640 + printk(KERN_EMERG "SNAP; hit fail point %d\n", failpos);\
641 + for( i=0; i<15; i++ ) \
642 + loop_discard_io( MKDEV(7,i), 1 ); \
646 +#define loopfail(pos) do{}while(0)
649 +/* Save the indirect inode in the snapshot table of the primary inode. */
650 +static int ext3_set_indirect(struct inode *pri, int index, ino_t ind_ino, ino_t parent_ino )
652 + char buf[EXT3_MAX_SNAP_DATA];
653 + struct snap_ea *snaps;
654 + int err = 0, inlist = 1;
656 + handle_t *handle = NULL;
658 + snap_debug("(ino %lu, parent %lu): saving ind %lu to index %d\n",
659 + pri->i_ino, parent_ino, ind_ino, index);
661 + if (index < 0 || index > MAX_SNAPS || !pri)
663 + /* need lock the list before get_attr() to avoid race */
664 + lock_list(pri->i_sb);
665 + /* read ea at first */
666 + err = ext3_xattr_get(pri, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
667 + buf, EXT3_MAX_SNAP_DATA);
668 + if (err == -ENOENT || err == -ENOATTR) {
669 + snap_debug("no extended attributes - zeroing\n");
670 + memset(buf, 0, EXT3_MAX_SNAP_DATA);
672 + * To judge a inode in list, we only see if it has snap ea.
673 + * So take care of snap ea of primary inodes very carefully.
674 + * Is it right in snapfs EXT3, check it later?
677 + // ea_size = SNAP_EA_SIZE_FROM_INDEX(index);
678 + } else if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
682 + handle = ext3_journal_start(pri, SNAP_SETIND_TRANS_BLOCKS);
684 + err = PTR_ERR(handle);
688 + snaps = (struct snap_ea *)buf;
689 + snaps->ino[index] = cpu_to_le32 (ind_ino);
690 + ea_size = EXT3_MAX_SNAP_DATA;
692 + set_parent_ino(snaps, ea_size, index, cpu_to_le32(parent_ino));
694 + snap_debug("saving attributes\n");
697 + err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
698 + buf, EXT3_MAX_SNAP_DATA, 0);
701 + /* This will also write the ea for the pri inode, like above */
702 + err = insert_cowed_ino_to_list(handle, pri, buf);
704 + ext3_mark_inode_dirty(handle, pri);
705 + ext3_journal_stop(handle, pri);
707 + unlock_list(pri->i_sb);
712 + * is_redirector - determines if a primary inode is a redirector
713 + * @inode: primary inode to test
715 + * Returns 1 if the inode is a redirector, 0 otherwise.
717 +static int is_redirector(struct inode *inode)
719 + int is_redirector = 0;
722 + rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
724 + if (rc > 0 && rc <= MAX_SNAP_DATA)
726 + snap_debug("inode %lu %s redirector\n", inode->i_ino,
727 + is_redirector ? "is" : "isn't");
728 + return is_redirector;
731 +/*if it's indirect inode or not */
732 +static int is_indirect(struct inode *inode)
734 + if (EXT3_I(inode)->i_flags |= EXT3_COW_FL)
740 + * Copy inode metadata from one inode to another, excluding blocks and size.
741 + * FIXME do we copy EA data - ACLs and such (excluding snapshot data)?
743 +static void ext3_copy_meta(struct inode *dst, struct inode *src)
747 + dst->i_mode = src->i_mode;
748 + dst->i_nlink = src->i_nlink;
749 + dst->i_uid = src->i_uid;
750 + dst->i_gid = src->i_gid;
751 + dst->i_atime = src->i_atime;
752 + dst->i_mtime = src->i_mtime;
753 + dst->i_ctime = src->i_ctime;
754 +// dst->i_version = src->i_version;
755 + dst->i_attr_flags = src->i_attr_flags;
756 + dst->i_generation = src->i_generation;
757 + dst->u.ext3_i.i_dtime = src->u.ext3_i.i_dtime;
758 + dst->u.ext3_i.i_flags = src->u.ext3_i.i_flags | EXT3_COW_FL;
759 +#ifdef EXT3_FRAGMENTS
760 + dst->u.ext3_i.i_faddr = src->u.ext3_i.i_faddr;
761 + dst->u.ext3_i.i_frag_no = src->u.ext3_i.i_frag_no;
762 + dst->u.ext3_i.i_frag_size = src->u.ext3_i.i_frag_size;
764 + if ((size = ext3_xattr_list(src, NULL, 0)) > 0) {
769 + if (ext3_xattr_list(src, names, 0) < 0)
772 + * the list of attribute names are stored as NUL terminated
773 + * strings, with a double NUL string at the end.
776 + while ((namelen = strlen(name))) {
779 + handle_t *handle = ext3_journal_current_handle();
781 + /* don't copy snap data */
782 + if (!strcmp(name, EXT3_SNAP_ATTR)) {
783 + snap_debug("skipping %s item\n", name);
786 + snap_debug("copying %s item\n", name);
787 + attrlen = ext3_xattr_get(src, EXT3_SNAP_INDEX,
788 + EXT3_SNAP_ATTR, NULL, 0);
791 + if ((buf = kmalloc(attrlen, GFP_ATOMIC)) == NULL)
793 + if (ext3_xattr_get(src, EXT3_SNAP_INDEX,
794 + EXT3_SNAP_ATTR, buf, attrlen) < 0)
796 + if (ext3_xattr_set(handle, dst, EXT3_SNAP_INDEX,
797 + EXT3_SNAP_ATTR, buf, attrlen, 0) < 0)
800 + name += namelen + 1; /* skip name and trailing NUL */
805 +static inline int ext3_has_ea(struct inode *inode)
807 + return (EXT3_I(inode)->i_file_acl != 0);
810 +/* ext3_migrate_data:
811 + * MOVE all the data blocks from inode src to inode dst as well as
812 + * COPY all attributes(meta data) from inode src to inode dst.
813 + * For extended attributes(EA), we COPY all the EAs but skip the Snap EA from src to dst.
814 + * If the dst has Snap EA, then we CAN'T overwrite it. We CAN'T copy the src Snap EA.
815 + * XXX for EA, can we change it to MOVE all the EAs(exclude Snap EA) to dst and copy it back to src ?
816 + * This is for LAN free backup later.
819 +static int ext3_migrate_data (struct inode *dst, struct inode *src)
821 + unsigned long err = 0;
822 + handle_t *handle = NULL;
823 + /* 512 byte disk blocks per inode block */
824 + int bpib = src->i_sb->s_blocksize >> 9;
826 + if((!dst) || (!src))
829 + if (dst->i_ino == src->i_ino)
831 + handle = ext3_journal_start(dst, SNAP_MIGRATEDATA_TRANS_BLOCKS);
834 + ext3_copy_meta(dst, src);
836 + snap_debug("migrating %ld data blocks from %lu to %lu\n",
837 + blocks, src->i_ino, dst->i_ino);
838 + /* Can't check blocks in case of EAs */
839 + memcpy(EXT3_I(dst)->i_data, EXT3_I(src)->i_data,
840 + sizeof(EXT3_I(src)->i_data));
841 + memset(EXT3_I(src)->i_data, 0, sizeof(EXT3_I(src)->i_data));
843 + ext3_discard_prealloc(src);
845 + dst->i_size = EXT3_I(dst)->i_disksize = EXT3_I(src)->i_disksize;
846 + src->i_size = EXT3_I(src)->i_disksize = 0;
848 + dst->i_blocks = src->i_blocks;
850 + /* Check EA blocks here to modify i_blocks correctly */
851 + if(ext3_has_ea (src)) {
852 + src->i_blocks += bpib;
853 + if( ! ext3_has_ea (dst) )
854 + if( dst->i_blocks >= bpib )
855 + dst->i_blocks -= bpib;
857 + if( ext3_has_ea (dst))
858 + dst->i_blocks += bpib;
861 + snap_debug("migrate data from ino %lu to ino %lu\n",
862 + src->i_ino, dst->i_ino);
863 + ext3_mark_inode_dirty(handle, src);
864 + ext3_mark_inode_dirty(handle, dst);
866 + ext3_journal_stop(handle, dst);
868 + return SNAP_ERROR(err);
872 + * ext3_get_indirect - get a specific indirect inode from a primary inode
873 + * @primary: primary (direct) inode
874 + * @table: table of @slot + 1 indices in reverse chronological order
875 + * @slot: starting slot number to check for indirect inode number
877 + * We locate an indirect inode from a primary inode using the redirection
878 + * table stored in the primary inode. Because the desired inode may actually
879 + * be in a "newer" slot number than the supplied slot, we are given a table
880 + * of indices in chronological order to search for the correct inode number.
881 + * We walk table from @slot to 0 looking for a non-zero inode to load.
883 + * To only load a specific index (and fail if it does not exist), you can
884 + * pass @table = NULL, and the index number in @slot. If @slot == 0, the
885 + * primary inode data is returned.
887 + * We return a pointer to an inode, or an error. If the indirect inode for
888 + * the given index does not exist, NULL is returned.
890 +static struct inode *ext3_get_indirect(struct inode *primary, int *table,
893 + char buf[EXT3_MAX_SNAP_DATA];
894 + struct snap_ea *snaps;
896 + struct inode *inode = NULL;
897 + int err = 0, index = 0;
899 + if (slot < 0 || slot > EXT3_MAX_SNAPS || !primary)
902 + snap_debug("ino %lu, table %p, slot %d\n", primary->i_ino, table,slot);
904 + err = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
905 + buf, EXT3_MAX_SNAP_DATA);
906 + if (err == -ENOENT) {
908 + } else if (err < 0) {
909 + snap_debug(" attribute read error\n");
912 + snaps = (struct snap_ea *)buf;
914 + /* if table is NULL and there is a slot */
915 + if( !table && slot ) {
917 + ino = le32_to_cpu ( snaps->ino[index] );
918 + if(ino) inode = iget(primary->i_sb, ino);
921 + /* if table is not NULL */
922 + while ( !inode && slot > 0) {
923 + index = table[slot];
924 + ino = le32_to_cpu ( snaps->ino[index] );
926 + snap_debug("snap inode at slot %d is %lu\n", slot, ino);
931 + inode = iget(primary->i_sb, ino);
934 + if( slot == 0 && table ) {
935 + snap_debug("redirector not found, using primary\n");
936 + inode = iget(primary->i_sb, primary->i_ino);
942 +/* get the indirect ino at index of the primary inode
943 + * return value: postive: indirect ino number
944 + * negative or 0: error
946 +static ino_t ext3_get_indirect_ino(struct inode *primary, int index)
948 + char buf[EXT3_MAX_SNAP_DATA];
949 + struct snap_ea *snaps;
953 + if (index < 0 || index > EXT3_MAX_SNAPS || !primary)
956 + err = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
957 + buf, EXT3_MAX_SNAP_DATA);
958 + if (err == -ENOATTR) {
961 + } else if (err < 0) {
962 + snap_err(EXT3_SNAP_ATTR " attribute read error\n");
967 + snaps = (struct snap_ea *)buf;
968 + ino = le32_to_cpu (snaps->ino[index]);
969 + snap_debug("snap ino for %ld at index %d is %lu\n",
970 + primary->i_ino, index, ino);
974 +/* ext3_copy_block - copy one data block from inode @src to @dst.
975 + No lock here. User should do the lock.
976 + User should check the return value to see if the result is correct.
978 + 1: The block has been copied successfully
979 + 0: No block is copied, usually this is because src has no such blk
983 +static int ext3_copy_block (struct inode *dst, struct inode *src, int blk)
985 + struct buffer_head *bh_dst, *bh_src;
988 + handle_t *handle = NULL;
990 + if (!ext3_bmap(src->i_mapping, blk))
994 + * ext3_getblk() require handle!=NULL
996 + journal_data = !S_ISREG(src->i_mode);
998 + handle = ext3_journal_start(dst, SNAP_COPYBLOCK_TRANS_BLOCKS);
1002 + bh_src = ext3_bread(handle, src, blk, 0, &err);
1004 + snap_err("error for src blk %d, error %d\n", blk, err);
1008 + bh_dst = ext3_getblk(handle, dst, blk, 1, &err);
1010 + snap_err("error for dst blk %d, error %d\n", blk, err);
1012 + goto exit_rels_src;
1014 + snap_debug("copy block %lu to %lu (%ld bytes)\n",
1015 + bh_src->b_blocknr, bh_dst->b_blocknr,
1016 + src->i_sb->s_blocksize);
1019 + ext3_journal_get_write_access(handle, bh_dst);
1022 + memcpy(bh_dst->b_data, bh_src->b_data, src->i_sb->s_blocksize);
1025 + ext3_journal_dirty_metadata(handle, bh_dst);
1027 + mark_buffer_dirty(bh_dst);
1028 + if (IS_SYNC(src)) {
1029 + ll_rw_block (WRITE, 1, &bh_dst);
1030 + wait_on_buffer (bh_dst);
1037 + ext3_journal_stop(handle, dst);
1041 +#ifdef EXT3_ENABLE_SNAP_ORPHAN
1043 + * add one inode to superblock's snap_orphan chain
1044 + * only add on-disk data for simplicity
1046 +static void add_snap_orphan(handle_t *handle, struct inode *pri, struct inode *ind)
1048 + struct ext3_sb_info *sb = &pri->i_sb->u.ext3_sb;
1049 + struct ext3_iloc iloc;
1051 + if( ext3_get_inode_loc(ind, &iloc) ){
1052 + snap_debug("--- get ind loc fail\n");
1057 + snap_debug("add new ind inode %lu into orphan list,"
1058 + " primary %lu, last orphan %u\n",
1059 + ind->i_ino, pri->i_ino,
1060 + sb->s_es->s_last_snap_orphan);
1061 + lock_super(pri->i_sb);
1062 + iloc.raw_inode->i_next_snap_orphan = sb->s_es->s_last_snap_orphan;
1063 + iloc.raw_inode->i_snap_primary = pri->i_ino;
1064 + ext3_mark_inode_dirty(handle, ind);
1066 + ext3_journal_get_write_access(handle, sb->s_sbh);
1067 + sb->s_es->s_last_snap_orphan = ind->i_ino;
1068 + pri->i_sb->s_dirt = 1;
1069 + ext3_journal_dirty_metadata(handle, sb->s_sbh);
1070 + unlock_super(pri->i_sb);
1075 + * counterpart of add_snap_orphan
1077 +static void remove_snap_orphan(handle_t *handle, struct inode *ind)
1079 + struct ext3_sb_info *sb = &ind->i_sb->u.ext3_sb;
1080 + struct inode *pre = NULL, *inode = NULL;
1081 + struct ext3_iloc iloc, pre_iloc;
1084 + lock_super(ind->i_sb);
1085 + for(ino = sb->s_es->s_last_snap_orphan; ino; ){
1086 + snap_debug("found an orphan, ino=%lu\n", ino);
1087 + inode = iget( ind->i_sb, ino );
1089 + snap_debug("iget %lu fail\n", ino);
1092 + if( ext3_get_inode_loc(inode, &iloc) ){
1093 + snap_debug("get_inode_loc %lu fail\n", ino);
1096 + if( ino == ind->i_ino ){
1098 + snap_debug("found at head of orphan chain\n");
1099 + ext3_journal_get_write_access(handle, sb->s_sbh);
1100 + sb->s_es->s_last_snap_orphan =
1101 + iloc.raw_inode->i_next_snap_orphan;
1102 + ext3_journal_dirty_metadata(handle, sb->s_sbh);
1103 + snap_debug("set new last orphan: %u\n",
1104 + sb->s_es->s_last_snap_orphan);
1108 + snap_debug("found in middle of orphan chain\n");
1109 + if( ext3_get_inode_loc(pre, &pre_iloc) ){
1110 + snap_err("get pre_inode loc %lu fail\n", pre->i_ino);
1113 + pre_iloc.raw_inode->i_next_snap_orphan =
1114 + iloc.raw_inode->i_next_snap_orphan;
1115 + ext3_mark_inode_dirty(handle, pre);
1116 + brelse(pre_iloc.bh);
1122 + ino = iloc.raw_inode->i_next_snap_orphan;
1127 + unlock_super(ind->i_sb);
1132 + * FIXME: how about crashs again during recovery?
1134 +void snap_orphan_cleanup(struct super_block *sb)
1136 + ino_t ind_ino, pri_ino;
1137 + struct inode *ind = NULL, *pri = NULL;
1138 + struct ext3_iloc ind_iloc;
1140 + if( (ind_ino = sb->u.ext3_sb.s_es->s_last_snap_orphan) == 0 ){
1141 + snap_debug("snap_orphan_cleanup: nothing to do\n");
1145 + snap_debug("------ begin cleanup snap orphans ------\n");
1147 + ind = iget( sb, ind_ino );
1149 + snap_err("snap_orphan_cleanup: get "
1150 + "ind %lu fail\n", ind_ino);
1154 + if( ext3_get_inode_loc(ind, &ind_iloc) ){
1155 + snap_err("snap_orphan_cleanup: get "
1156 + "iloc %lu fail\n", ind_ino);
1161 + ind_ino = sb->u.ext3_sb.s_es->s_last_snap_orphan =
1162 + ind_iloc.raw_inode->i_next_snap_orphan;
1163 + pri_ino = ind_iloc.raw_inode->i_snap_primary;
1165 + pri = iget( sb, pri_ino );
1167 + snap_err("snap_orphan_cleanup: get primary "
1168 + "%lu fail\n", pri_ino);
1171 + restore_snap_inode(pri, ind);
1172 + }while( ind_ino );
1173 + snap_debug("------ end cleanup snap orphans ------\n");
1175 + sb->u.ext3_sb.s_es->s_last_snap_orphan = 0;
1180 + * reserse operation of set_indirect()
1181 + * we should determine whether we had put pri into primary inode chain,
1182 + * if not, don't touch it
1184 +static void unset_indirect(handle_t *handle, struct inode *pri, struct inode *ind)
1186 + char buf[EXT3_MAX_SNAP_DATA];
1187 + struct snap_ea *snaps;
1188 + int err, alone=1, index, found;
1190 + snap_debug("pri %lu, ind %lu\n", pri->i_ino, ind->i_ino);
1191 + err = ext3_xattr_get(pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR, buf,
1192 + EXT3_MAX_SNAP_DATA);
1194 + if( err == -ENOATTR ){
1195 + snap_debug("primary inode has not EA\n");
1198 + snap_debug("get EA error on primary inode,"
1199 + "returned value %d\n", err);
1204 + /* find ind's item in the ea */
1205 + snaps = (struct snap_ea*)buf;
1206 + for(index=EXT3_MAX_SNAPS-1, found=-1; index>=0; index--) {
1207 + if( snaps->ino[index] == ind->i_ino )
1209 + else if( snaps->ino[index] )
1214 + snap_debug("remove from primary inode's EA\n");
1215 + snaps->ino[found] = 0;
1216 + snaps->parent_ino[found] = 0;
1217 + ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
1218 + buf, EXT3_MAX_SNAP_DATA, 0);
1220 + snap_debug("delete from primary inodes chain\n");
1221 + lock_list(pri->i_sb);
1222 + delete_cowed_ino_from_list(handle, pri);
1223 + unlock_list(pri->i_sb);
1226 + snap_debug("didn't found ind in pri's EA, do nothing\n");
1235 + * restore all data in @ind to @pri after free data blocks of @pri.
1236 + * then release @ind
1238 +static void restore_snap_inode(struct inode *pri, struct inode *ind)
1241 + struct inode *tmp;
1243 + snap_debug("restore from indirect %lu to primary %lu\n",
1244 + ind->i_ino, pri->i_ino);
1246 + handle = ext3_journal_start(pri, SNAP_RESTOREORPHAN_TRANS_BLOCKS);
1250 + /* first: taken from pri's ea, or from fs-wide primary inode chain */
1251 + unset_indirect(handle, pri, ind);
1253 + /* second: throw out half-copied data in pri */
1254 + if( pri->i_blocks ){
1255 + tmp = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
1257 + snap_debug("ext3_new_inode error\n");
1261 + ext3_migrate_data(tmp, pri);
1262 + snap_debug("freeing half-copied %lu blocks\n", tmp->i_blocks );
1267 + /* third: restore ind inode to pri inode */
1268 + snap_debug("restore %lu blocks to primary inode %lu\n",
1269 + ind->i_blocks, pri->i_ino);
1270 + ext3_migrate_data(pri, ind);
1272 + /* final: delete ind inode */
1278 + ext3_journal_stop(handle, pri);
1284 +static handle_t * ext3_copy_data(handle_t *handle, struct inode *dst,
1285 + struct inode *src, int *has_orphan)
1287 + unsigned long blocks, blk, cur_blks;
1288 + int low_credits, save_ref;
1290 + blocks =(src->i_size + src->i_sb->s_blocksize-1) >>
1291 + src->i_sb->s_blocksize_bits;
1292 + low_credits = handle->h_buffer_credits - SNAP_BIGCOPY_TRANS_BLOCKS;
1294 + snap_debug("%lu blocks need to be copied,"
1295 + "low credits limit %d\n", blocks, low_credits);
1296 + for (blk = 0, cur_blks= dst->i_blocks; blk < blocks; blk++) {
1297 + if (!ext3_bmap(src->i_mapping, blk))
1299 + if(handle->h_buffer_credits <= low_credits) {
1300 + int needed = (blocks - blk) * EXT3_DATA_TRANS_BLOCKS;
1301 + if (needed > 4 * SNAP_COPYBLOCK_TRANS_BLOCKS)
1302 + needed = 4 * SNAP_COPYBLOCK_TRANS_BLOCKS;
1303 + if (journal_extend(handle, needed)) {
1304 + snap_debug("create_indirect:fail to extend "
1305 + "journal, restart trans\n");
1307 + if( !*has_orphan ){
1308 +#ifdef EXT3_ENABLE_SNAP_ORPHAN
1309 + add_snap_orphan(handle, dst, src);
1311 + ext3_orphan_add(handle, dst);
1315 + dst->u.ext3_i.i_disksize =
1316 + blk * dst->i_sb->s_blocksize;
1317 + dst->i_blocks = cur_blks;
1318 + dst->i_mtime = CURRENT_TIME;
1319 + ext3_mark_inode_dirty(handle, dst);
1322 + * We can be sure the last handle was stoped
1323 + * ONLY if the handle's reference count is 1
1325 + save_ref = handle->h_ref;
1326 + handle->h_ref = 1;
1327 + if( ext3_journal_stop(handle, dst) ){
1328 + snap_err("fail to stop journal\n");
1333 + handle = ext3_journal_start(dst,
1334 + low_credits + needed);
1336 + snap_err("fail to restart handle\n");
1339 + handle->h_ref = save_ref;
1342 + if (ext3_copy_block( dst, src, blk) < 0 )
1344 + cur_blks += dst->i_sb->s_blocksize / 512;
1346 + dst->i_size = dst->u.ext3_i.i_disksize = src->i_size;
1351 +static int ext3_set_generation(struct inode *inode, unsigned long gen)
1356 + handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
1358 + err = ext3_xattr_set(handle, inode, EXT3_SNAP_INDEX, EXT3_SNAP_GENERATION_ATTR,
1359 + (char*)&gen, sizeof(int), 0);
1361 + snap_err("ino %lu, set_ext_attr err %d\n", inode->i_ino, err);
1365 + ext3_journal_stop(handle, inode);
1369 +static int ext3_get_generation(struct inode *inode)
1373 + err = ext3_xattr_get(inode, EXT3_SNAP_INDEX, EXT3_SNAP_GENERATION_ATTR,
1374 + (char*)&gen, sizeof(gen));
1376 + if (err == -ENODATA) {
1379 + snap_err("can not get generation from %lu \n", inode->i_ino);
1386 + * ext3_create_indirect - copy data, attributes from primary to new indir inode
1387 + * @pri: primary (source) inode
1388 + * @index: index in snapshot table where indirect inode should be stored
1389 + * @delete: flag that the primary inode is being deleted
1391 + * We copy all of the data blocks from the @*src inode to the @*dst inode, as
1392 + * well as copying the attributes from @*src to @*dst. If @delete == 1, then
1393 + * the primary inode will only be a redirector and will appear deleted.
1395 + * FIXME do we move EAs, only non-snap EAs, what?
1396 + * FIXME we could do readpage/writepage, but we would have to handle block
1397 + * allocation then, and it ruins sparse files for 1k/2k filesystems,
1398 + * at the expense of doing a memcpy.
1401 +static struct inode *ext3_create_indirect(
1402 + struct inode *pri,
1408 + struct inode *ind;
1409 + handle_t *handle = NULL;
1411 + int has_orphan = 0;
1413 + if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
1414 + printk( KERN_EMERG "TRY TO COW JOUNRAL\n");
1417 + snap_debug("creating indirect inode for %lu at index %d, %s pri\n",
1418 + pri->i_ino, index, del ? "deleting" : "preserve");
1420 + ind = ext3_get_indirect(pri, NULL, index);
1424 + handle = ext3_journal_start(pri, SNAP_CREATEIND_TRANS_BLOCKS);
1427 + /* XXX ? We should pass an err argument to get_indirect and precisely
1428 + * detect the errors, for some errors, we should exit right away.
1431 + /* if the option is SNAP_DEL_PRI_WITH_IND and there is an indirect,
1432 + * we just free the primary data blocks and mark this inode delete
1434 + if((del) && ind && !IS_ERR(ind)) {
1435 + struct inode *tmp;
1436 + /* for directory, we don't free the data blocks,
1437 + * or ext3_rmdir will report errors "bad dir, no data blocks"
1439 + snap_debug("del==SNAP_DEL_PRI_WITH_IND && ind\n");
1440 + if(!S_ISDIR(pri->i_mode)) {
1441 + /*Here delete the data of that pri inode.
1442 + * FIXME later, should throw the blocks of
1443 + * primary inode directly
1445 + tmp = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
1447 + down(&tmp->i_sem);
1448 + ext3_migrate_data(tmp, pri);
1454 + snap_err("ext3_new_inode error\n");
1459 + pri->u.ext3_i.i_dtime = CURRENT_TIME;
1460 + ext3_mark_inode_dirty(handle, pri);
1465 + if (ind && !IS_ERR(ind)) {
1466 + snap_err("existing indirect ino %lu for %lu: index %d\n",
1467 + ind->i_ino, pri->i_ino, index);
1471 + /* XXX: check this, ext3_new_inode, the first arg should be "dir" */
1472 + ind = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
1478 + snap_debug("got new inode %lu\n", ind->i_ino);
1479 + ind->i_op = pri->i_op;
1480 + ext3_set_generation(ind, (unsigned long)gen);
1481 + /* If we are deleting the primary inode, we want to ensure that it is
1482 + * written to disk with a non-zero link count, otherwise the next iget
1483 + * and iput will mark the inode as free (which we don't want, we want
1484 + * it to stay a redirector). We fix this in ext3_destroy_indirect()
1485 + * when the last indirect inode is removed.
1487 + * We then do what ext3_delete_inode() does so that the metadata will
1488 + * appear the same as a deleted inode, and we can detect it later.
1491 + snap_debug("deleting primary inode\n");
1493 + down(&ind->i_sem);
1494 + err = ext3_migrate_data(ind, pri);
1498 + err = ext3_set_indirect(pri, index, ind->i_ino, parent_ino);
1502 + /* XXX for directory, we copy the block back
1503 + * or ext3_rmdir will report errors "bad dir, no data blocks"
1505 + if( S_ISDIR(pri->i_mode)) {
1506 + handle = ext3_copy_data( handle, pri, ind, &has_orphan );
1511 + pri->u.ext3_i.i_flags |= EXT3_DEL_FL;
1512 + ind->u.ext3_i.i_flags |= EXT3_COW_FL;
1513 + if(S_ISREG(pri->i_mode)) pri->i_nlink = 1;
1514 + pri->u.ext3_i.i_dtime = CURRENT_TIME;
1515 + //pri->u.ext3_i.i_generation++;
1516 + ext3_mark_inode_dirty(handle, pri);
1517 + ext3_mark_inode_dirty(handle, ind);
1520 + down(&ind->i_sem);
1521 + err = ext3_migrate_data(ind, pri);
1525 + /* for regular files we do blocklevel COW's maybe */
1526 + if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW)
1527 + && S_ISREG(pri->i_mode)) {
1529 + snap_debug("ino %lu, do block cow\n",pri->i_ino);
1530 + /* because after migrate_data , pri->i_size is 0 */
1531 + pri->i_size = ind->i_size;
1534 + int bpib = pri->i_sb->s_blocksize >> 9;
1535 + snap_debug("ino %lu, do file cow\n", pri->i_ino);
1537 + /* XXX: can we do this better?
1538 + * If it's a fast symlink, we should copy i_data back!
1539 + * The criteria to determine a fast symlink is:
1540 + * 1) it's a link and its i_blocks is 0
1541 + * 2) it's a link and its i_blocks is bpib ( the case
1542 + * it has been cowed and has ea )
1544 + if( S_ISLNK(ind->i_mode) &&
1545 + (( ind->i_blocks == 0) || (ext3_has_ea(ind) && ind->i_blocks == bpib )) ){
1546 + snap_debug("ino %lu is fast symlink\n",
1548 + memcpy(EXT3_I(pri)->i_data,
1549 + EXT3_I(ind)->i_data,
1550 + sizeof(EXT3_I(ind)->i_data));
1551 + pri->i_size = ind->i_size;
1554 + handle = ext3_copy_data(handle, pri, ind, &has_orphan);
1559 + /* set cow flag for ind */
1560 + ind->u.ext3_i.i_flags |= EXT3_COW_FL;
1561 + pri->u.ext3_i.i_flags &= ~EXT3_COW_FL;
1563 + ext3_mark_inode_dirty(handle, pri);
1564 + ext3_mark_inode_dirty(handle, ind);
1566 + err = ext3_set_indirect(pri, index, ind->i_ino, parent_ino);
1573 + if (!EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
1574 + EXT3_FEATURE_COMPAT_SNAPFS)) {
1575 + lock_super(pri->i_sb);
1576 + ext3_journal_get_write_access(handle, pri->i_sb->u.ext3_sb.s_sbh);
1577 + pri->i_sb->u.ext3_sb.s_es->s_feature_compat |=
1578 + cpu_to_le32(EXT3_FEATURE_COMPAT_SNAPFS);
1579 + ext3_journal_dirty_metadata(handle, pri->i_sb->u.ext3_sb.s_sbh);
1580 + pri->i_sb->s_dirt = 1;
1581 + unlock_super(pri->i_sb);
1584 +#ifdef EXT3_ENABLE_SNAP_ORPHAN
1585 + remove_snap_orphan(handle, ind);
1587 + ext3_orphan_del(handle, ind);
1590 + ext3_journal_stop(handle, pri);
1601 + ext3_journal_stop(handle, pri);
1602 + snap_debug("exiting with error %d\n", err);
1607 +/* The following functions are used by destroy_indirect */
1608 +#define inode_bmap(inode, nr) (EXT3_I(inode)->i_data[(nr)])
1609 +#define inode_setbmap(inode, nr, physical) (EXT3_I(inode)->i_data[(nr)]=(physical))
1611 +static inline int block_bmap (struct buffer_head * bh, int nr)
1617 + tmp = le32_to_cpu(((u32 *) bh->b_data)[nr]);
1622 +static inline int block_setbmap (handle_t *handle, struct buffer_head * bh, int nr, int physical)
1627 + ext3_journal_get_write_access(handle, bh);
1628 + ((u32 *) bh->b_data)[nr] = cpu_to_le32(physical);
1629 + ext3_journal_dirty_metadata(handle, bh);
1634 +static int ext3_migrate_block (handle_t *handle, struct inode * dst, struct inode *src, int block)
1636 + int i1_d=0, i1_s=0, i2_d=0, i2_s=0, i3_d=0, i3_s=0;
1637 + int addr_per_block = EXT3_ADDR_PER_BLOCK(src->i_sb);
1638 + int addr_per_block_bits = EXT3_ADDR_PER_BLOCK_BITS(src->i_sb);
1639 + unsigned long blksz = src->i_sb->s_blocksize;
1640 + kdev_t ddev = dst->i_dev;
1641 + kdev_t sdev = src->i_dev;
1645 + ext3_warning (src->i_sb, "ext3_migrate_block", "block < 0");
1648 + if (block >= EXT3_NDIR_BLOCKS + addr_per_block +
1649 + (1 << (addr_per_block_bits * 2)) +
1650 + ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
1651 + ext3_warning (src->i_sb, "ext3_migrate_block", "block > big");
1654 + /* EXT3_NDIR_BLOCK */
1655 + if (block < EXT3_NDIR_BLOCKS) {
1656 + if( inode_bmap(dst, block) ) return 0;
1658 + if( (physical = inode_bmap(src, block)) ) {
1659 + inode_setbmap (dst, block, physical);
1660 + inode_setbmap (src, block, 0);
1667 + /* EXT3_IND_BLOCK */
1668 + block -= EXT3_NDIR_BLOCKS;
1669 + if (block < addr_per_block) {
1670 + i1_d = inode_bmap (dst, EXT3_IND_BLOCK);
1673 + physical = inode_bmap(src, EXT3_IND_BLOCK);
1675 + inode_setbmap (dst, EXT3_IND_BLOCK, physical);
1676 + inode_setbmap (src, EXT3_IND_BLOCK, 0);
1682 + if( block_bmap (bread (ddev, i1_d, blksz), block ))
1685 + i1_s = inode_bmap (src, EXT3_IND_BLOCK);
1686 + if( !i1_s) return 0;
1688 + physical = block_bmap ( bread (sdev, i1_s, blksz), block );
1691 + block_setbmap(handle, bread(ddev, i1_d, blksz),block,physical);
1692 + block_setbmap(handle, bread(sdev, i1_s, blksz), block, 0);
1698 + /* EXT3_DIND_BLOCK */
1699 + block -= addr_per_block;
1700 + if (block < (1 << (addr_per_block_bits * 2))) {
1701 + i1_d = inode_bmap (dst, EXT3_DIND_BLOCK);
1702 + i1_s = inode_bmap (src, EXT3_DIND_BLOCK);
1704 + if( (physical = inode_bmap(src, EXT3_DIND_BLOCK)) ) {
1705 + inode_setbmap (dst, EXT3_DIND_BLOCK, physical);
1706 + inode_setbmap (src, EXT3_DIND_BLOCK, 0);
1712 + i2_d = block_bmap (bread (ddev, i1_d, blksz),
1713 + block >> addr_per_block_bits);
1717 + if( !i1_s) return 0;
1719 + physical = block_bmap (bread (sdev, i1_s, blksz),
1720 + block >> addr_per_block_bits);
1722 + block_setbmap (handle, bread (ddev, i1_d, blksz),
1723 + block >> addr_per_block_bits, physical);
1724 + block_setbmap (handle, bread (sdev, i1_s, blksz),
1725 + block >> addr_per_block_bits, 0);
1731 + physical = block_bmap (bread (ddev, i2_d,
1733 + block & (addr_per_block - 1));
1737 + i2_s = block_bmap (bread (sdev, i1_s,
1739 + block >> addr_per_block_bits);
1740 + if(!i2_s) return 0;
1742 + physical = block_bmap(bread (sdev, i2_s,
1744 + block & (addr_per_block - 1));
1746 + block_setbmap(handle, bread (ddev, i2_d, blksz),
1747 + block & (addr_per_block - 1), physical);
1748 + block_setbmap(handle, bread (sdev, i2_s, blksz),
1749 + block & (addr_per_block - 1), 0);
1757 + /* EXT3_TIND_BLOCK */
1758 + block -= (1 << (addr_per_block_bits * 2));
1759 + i1_d = inode_bmap (dst, EXT3_TIND_BLOCK);
1760 + i1_s = inode_bmap (src, EXT3_TIND_BLOCK);
1762 + if( (physical = inode_bmap(src, EXT3_TIND_BLOCK)) )
1763 + inode_setbmap (dst, EXT3_TIND_BLOCK, physical);
1767 + i2_d = block_bmap (bread (ddev, i1_d, blksz),
1768 + block >> (addr_per_block_bits * 2));
1770 + if(i1_s) i2_s = block_bmap (bread (sdev, i1_s, blksz),
1771 + block >> (addr_per_block_bits * 2));
1775 + if( !i1_s) return 0;
1777 + physical = block_bmap (bread (sdev, i1_s, blksz),
1778 + block >> (addr_per_block_bits * 2));
1780 + block_setbmap (handle, bread (ddev, i1_d, blksz),
1781 + block >> (addr_per_block_bits * 2), physical);
1782 + block_setbmap (handle, bread (sdev, i1_s, blksz),
1783 + block >> (addr_per_block_bits * 2), 0);
1789 + i3_d = block_bmap (bread (ddev, i2_d, blksz),
1790 + (block >> addr_per_block_bits) & (addr_per_block - 1));
1791 + if( i2_s) i3_s = block_bmap (bread (sdev, i2_s, blksz),
1792 + (block >> addr_per_block_bits) & (addr_per_block - 1));
1795 + if (!i2_s) return 0;
1796 + physical = block_bmap (bread (sdev, i2_s, blksz),
1797 + (block >> addr_per_block_bits) & (addr_per_block - 1));
1799 + block_setbmap (handle, bread (ddev, i2_d, blksz),
1800 + (block >> addr_per_block_bits) & (addr_per_block - 1),
1802 + block_setbmap (handle, bread (sdev, i2_s, blksz),
1803 + (block >> addr_per_block_bits) & (addr_per_block - 1),
1810 + physical = block_bmap (bread (ddev, i3_d, blksz),
1811 + block & (addr_per_block - 1)) ;
1812 + if(physical) return 0;
1814 + if(!i3_s) return 0;
1815 + physical = block_bmap (bread (sdev, i3_s, blksz),
1816 + block & (addr_per_block - 1)) ;
1818 + block_setbmap (handle, bread (ddev, i3_d, blksz),
1819 + block & (addr_per_block - 1), physical);
1820 + block_setbmap (handle, bread (sdev, i3_s, blksz),
1821 + block & (addr_per_block - 1), 0);
1829 +/* Generate i_blocks from blocks for an inode .
1830 + * We also calculate EA block here.
1832 +static unsigned long calculate_i_blocks(struct inode *inode, int blocks)
1834 + /* 512 byte disk blocks per inode block */
1835 + int bpib = inode->i_sb->s_blocksize >> 9;
1836 + int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
1837 + unsigned long i_blocks = 0;
1840 + int meta_blocks = 0;
1842 + if( !inode ) return 0;
1844 + if( blocks < 0 ) {
1845 + /* re-calculate blocks here */
1846 + blocks = (inode->i_size + inode->i_sb->s_blocksize-1)
1847 + >> inode->i_sb->s_blocksize_bits;
1850 + /* calculate data blocks */
1851 + for(i = 0; i < blocks; i++ ) {
1852 + if(ext3_bmap(inode->i_mapping, i))
1855 + /* calculate meta blocks */
1856 + blocks -= EXT3_NDIR_BLOCKS;
1857 + if( blocks > 0 ) {
1859 + blocks -= addr_per_block;
1861 + if( blocks > 0 ) meta_blocks++;
1863 + while( (blocks > 0) && (i < addr_per_block) ) {
1865 + blocks -= addr_per_block;
1868 + if ( blocks > 0 ) meta_blocks += 2;
1871 + while( blocks > 0) {
1873 + blocks -= addr_per_block;
1875 + if(i >= addr_per_block ) {
1879 + if( j >= addr_per_block) {
1884 + /* calculate EA blocks */
1885 + if( ext3_has_ea (inode) ) meta_blocks++;
1887 + i_blocks += meta_blocks * bpib;
1888 + snap_debug("ino %lu, get i_blocks %lu\n", inode->i_ino, i_blocks);
1893 + * ext3_destroy_indirect - delete an indirect inode from the table
1894 + * @pri: primary inode
1895 + * @ind: indirect inode
1896 + * @index: index of inode that should be deleted
1898 + * We delete the @*ind inode, and remove it from the snapshot table. If @*ind
1899 + * is NULL, we use the inode at @index.
1901 +static int ext3_destroy_indirect(struct inode *pri, int index,
1902 + struct inode *next_ind)
1904 + char buf[EXT3_MAX_SNAP_DATA];
1905 + struct snap_ea *snaps;
1906 + struct inode *ind;
1910 + handle_t *handle=NULL;
1913 + if (index < 0 || index > EXT3_MAX_SNAPS)
1916 + if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
1917 + printk( KERN_EMERG "TRY TO DESTROY JOURNAL'S IND\n");
1921 + handle = ext3_journal_start(pri, SNAP_DESTROY_TRANS_BLOCKS);
1925 + err = ext3_xattr_get(pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
1926 + buf, EXT3_MAX_SNAP_DATA);
1928 + if (err == -ENOENT)
1929 + snap_err("inode %lu is not a redirector\n", pri->i_ino);
1931 + snap_err(EXT3_SNAP_ATTR " attribute read error\n");
1935 + snaps = (struct snap_ea *)buf;
1936 + if ( !snaps->ino[index] ) {
1937 + snap_err("for pri ino %lu, index %d, redirect ino is 0\n",
1938 + pri->i_ino, index);
1943 + snap_debug("for pri ino %lu, reading inode %lu at index %d\n",
1944 + pri->i_ino, (ulong)le32_to_cpu(snaps->ino[index]), index);
1946 + ind = iget(pri->i_sb, le32_to_cpu (snaps->ino[index]) );
1947 + snap_debug("iget ind %lu, ref count = %d\n", ind->i_ino, ind->i_count);
1949 + if ( !ind || IS_ERR(ind) || is_bad_inode(ind) ) {
1954 + /* if it's block level cow, first copy the blocks back */
1955 + if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW) &&
1956 + S_ISREG(pri->i_mode)) {
1959 + if( !next_ind ) next_ind = pri;
1960 + blocks = (next_ind->i_size + next_ind->i_sb->s_blocksize-1)
1961 + >> next_ind->i_sb->s_blocksize_bits;
1963 +#define FAST_MIGRATE_BLOCK
1964 +#ifdef FAST_MIGRATE_BLOCK
1965 + snap_debug("migrate block back from ino %lu to %lu\n",
1966 + ind->i_ino, next_ind->i_ino);
1968 + snap_double_lock(next_ind, ind);
1969 + for(i = 0; i < blocks; i++) {
1970 + if( ext3_bmap(next_ind->i_mapping, i) )
1972 + if( !ext3_bmap(ind->i_mapping, i) )
1974 + ext3_migrate_block(handle, next_ind, ind, i) ;
1976 + /* Now re-compute the i_blocks */
1977 + /* XXX shall we take care of ind here? probably not */
1978 + next_ind->i_blocks = calculate_i_blocks( next_ind, blocks);
1979 + ext3_mark_inode_dirty(handle, next_ind);
1981 + snap_double_unlock(next_ind, ind);
1984 + snap_double_lock(pri, ind);
1986 + for(i = 0; i < blocks; i++) {
1987 + if( ext3_bmap(pri, i) ) continue;
1988 + if( !ext3_bmap(ind, i) ) continue;
1989 + ext3_migrate_block( pri, ind, i) ;
1991 + /* Now re-compute the i_blocks */
1992 + /* XXX shall we take care of ind here? probably not */
1993 + pri->i_blocks = calculate_i_blocks( pri, blocks);
1994 + mark_inode_dirty(pri);
1996 + double_unlock(pri, ind);
1999 + snap_double_lock(next_ind, ind);
2000 + for (i = 0; i < blocks; i++) {
2001 + if (ext3_bmap (next_ind->i_mapping, i) )
2003 + if (ext3_copy_block (next_ind, ind, i ) < 0) break;
2005 + mark_inode_dirty(next_ind);
2006 + double_unlock(next_ind, ind);
2011 + snap_debug("delete indirect ino %lu\n", ind->i_ino);
2012 + snap_debug("iput ind %lu, ref count = %d\n", ind->i_ino, ind->i_count);
2016 + snaps->ino[index] = cpu_to_le32(0);
2017 + for (i = 0; i < EXT3_MAX_SNAPS; i++)
2018 + save += snaps->ino[i];
2021 + lock_list(pri->i_sb);
2022 + delete_cowed_ino_from_list(handle, pri);
2023 + unlock_list(pri->i_sb);
2026 + /* if there are no cowed inode left, then remove snapfs feature */
2027 + if(!SB_FIRST_COWED_INO(pri->i_sb)) {
2029 + lock_super(pri->i_sb);
2031 + ext3_journal_get_write_access(handle, pri->i_sb->u.ext3_sb.s_sbh);
2032 + if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
2033 + EXT3_FEATURE_COMPAT_SNAPFS)) {
2034 + pri->i_sb->u.ext3_sb.s_es->s_feature_compat &=
2035 + cpu_to_le32(~EXT3_FEATURE_COMPAT_SNAPFS);
2037 + /* clean up block level cow feature */
2038 + if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
2039 + EXT3_FEATURE_COMPAT_BLOCKCOW)) {
2040 + pri->i_sb->u.ext3_sb.s_es->s_feature_compat &=
2041 + cpu_to_le32(~EXT3_FEATURE_COMPAT_BLOCKCOW);
2043 + /* XXX clean the extended attribute feature,
2044 + * this is not safe, find a better way
2046 + if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
2047 + EXT3_FEATURE_COMPAT_EXT_ATTR)) {
2048 + pri->i_sb->u.ext3_sb.s_es->s_feature_compat &=
2049 + cpu_to_le32(~EXT3_FEATURE_COMPAT_EXT_ATTR);
2052 + ext3_journal_dirty_metadata(handle, pri->i_sb->u.ext3_sb.s_sbh);
2053 + pri->i_sb->s_dirt = 1;
2054 + unlock_super(pri->i_sb);
2058 + * If we are deleting the last indirect inode, and the primary inode
2059 + * has already been deleted, then mark the primary for deletion also.
2060 + * Otherwise, if we are deleting the last indirect inode remove the
2061 + * snaptable from the inode. XXX
2063 + if (!save && pri->u.ext3_i.i_dtime) {
2064 + snap_debug("deleting primary %lu\n", pri->i_ino);
2066 + /* reset err to 0 now */
2069 + snap_debug("%s redirector table\n",
2070 + save ? "saving" : "deleting");
2071 + /* XXX: since set ea will modify i_ctime of pri,
2072 + so save/restore i_ctime. Need this necessary ? */
2073 + ctime = pri->i_ctime;
2074 + err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
2075 + save ? buf : NULL, EXT3_MAX_SNAP_DATA, 0);
2076 + pri->i_ctime = ctime;
2077 + ext3_mark_inode_dirty(handle, pri);
2080 + ext3_journal_stop(handle, pri);
2084 +/* restore a primary inode with the indirect inode at index */
2085 +static int ext3_restore_indirect(struct inode *pri, int index)
2087 + struct inode *ind;
2088 + struct inode *tmp;
2090 + handle_t *handle = NULL;
2092 + if (index < 0 || index > EXT3_MAX_SNAPS)
2095 + if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
2096 + printk( KERN_EMERG "TRY TO RESTORE JOURNAL\n");
2099 + snap_debug("pri ino %lu, index %d\n", pri->i_ino, index);
2101 + ind = ext3_get_indirect(pri, NULL, index);
2106 + snap_debug("restore ino %lu to %lu\n", pri->i_ino, ind->i_ino);
2108 + handle = ext3_journal_start(pri, SNAP_RESTORE_TRANS_BLOCKS);
2111 + /* first destroy all the data blocks in primary inode */
2112 + /* XXX: check this, ext3_new_inode, the first arg should be "dir" */
2113 + tmp = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
2115 + snap_double_lock(pri, tmp);
2116 + ext3_migrate_data(tmp, pri);
2117 + snap_double_unlock(pri, tmp);
2123 + snap_err("restore_indirect, new_inode err\n");
2125 + snap_double_lock(pri, ind);
2126 + ext3_migrate_data(pri, ind);
2127 + /* clear the cow flag for pri because ind has it */
2128 + pri->u.ext3_i.i_flags &= ~EXT3_COW_FL;
2129 + ext3_mark_inode_dirty(handle, pri);
2130 + snap_double_unlock(pri, ind);
2134 +// ext3_destroy_indirect(pri, index);
2136 + ext3_journal_stop(handle, pri);
2142 + * ext3_snap_iterate - iterate through all of the inodes
2143 + * @sb: filesystem superblock
2144 + * @repeat: pointer to function called on each valid inode
2145 + * @start: inode to start iterating at
2146 + * @priv: private data to the caller/repeat function
2148 + * If @start is NULL, then we do not return an inode pointer. If @*start is
2149 + * NULL, then we start at the beginning of the filesystem, and iterate over
2150 + * all of the inodes in the system. If @*start is non-NULL, then we start
2151 + * iterating at this inode.
2153 + * We call the repeat function for each inode that is in use. The repeat
2154 + * function must check if this is a redirector (with is_redirector) if it
2155 + * only wants to operate on redirector inodes. If there is an error or
2156 + * the repeat function returns non-zero, we return the last inode operated
2157 + * on in the @*start parameter. This allows the caller to restart the
2158 + * iteration at this inode if desired, by returning a positive value.
2159 + * Negative return values indicate an error.
2161 + * NOTE we cannot simply traverse the existing filesystem tree from the root
2162 + * inode, as there may be disconnected trees from deleted files/dirs
2164 + * FIXME If there was a list of inodes with EAs, we could simply walk the list
2165 + * intead of reading every inode. This is an internal implementation issue.
2168 +static int ext3_iterate_all(struct super_block *sb,
2169 + int (*repeat)(struct inode *inode, void *priv),
2170 + struct inode **start, void *priv)
2172 + struct inode *tmp = NULL;
2174 + ino_t istart, ibase;
2180 + *start = iget(sb, EXT3_ROOT_INO);
2185 + if (is_bad_inode(*start)) {
2190 + if ((*start)->i_ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
2191 + snap_debug("invalid starting inode %ld\n",(*start)->i_ino);
2195 + if ((*start)->i_ino < EXT3_FIRST_INO(sb)) {
2196 + if ((err = (*repeat)(*start, priv) != 0))
2199 + *start = iget(sb, EXT3_FIRST_INO(sb));
2204 + if (is_bad_inode(*start)) {
2210 + gstart = ((*start)->i_ino - 1) / EXT3_INODES_PER_GROUP(sb);
2211 + istart = ((*start)->i_ino - 1) % EXT3_INODES_PER_GROUP(sb);
2212 + ibase = gstart * EXT3_INODES_PER_GROUP(sb);
2213 + for (gnum = gstart; gnum < EXT3_SB(sb)->s_groups_count;
2214 + gnum++, ibase += EXT3_INODES_PER_GROUP(sb)) {
2215 + struct ext3_group_desc * gdp;
2220 + gdp = ext3_get_group_desc (sb, gnum, NULL);
2221 + if (!gdp || le16_to_cpu(gdp->bg_free_inodes_count) ==
2222 + EXT3_INODES_PER_GROUP(sb))
2225 + bitmap_nr = ext3_load_inode_bitmap(sb, gnum);
2226 + if (bitmap_nr < 0)
2229 + bitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]->b_data;
2230 + for (ibyte = istart >> 3;
2231 + ibyte < EXT3_INODES_PER_GROUP(sb) >> 3;
2237 + if (!bitmap[ibyte])
2240 + /* FIXME need to verify if bit endianness will
2241 + * work properly here for all architectures.
2243 + for (i = 1, bit = 1; i <= 8; i++, bit <<= 1) {
2244 + ino_t ino = ibase + (ibyte << 3) + i;
2246 + if ((bitmap[ibyte] & bit) == 0)
2249 + if (ino < (*start)->i_ino)
2252 + *start = iget(sb, ino);
2257 + if (is_bad_inode(*start)) {
2262 + if ((err = (*repeat)(*start, priv)) != 0)
2275 +static int ext3_iterate(struct super_block *sb,
2276 + int (*repeat)(struct inode *inode, void *priv),
2277 + struct inode **start, void *priv, int flag)
2280 + case SNAP_ITERATE_ALL_INODE:
2281 + return ext3_iterate_all (sb, repeat, start, priv);
2283 + case SNAP_ITERATE_COWED_INODE:
2284 + return ext3_iterate_cowed_inode (sb, repeat, start,priv);
2292 +static int find_snap_meta_index(
2293 + struct table_snap_meta_data *snap_meta,
2298 + /* table max length is null*/
2299 + for( i = 0; i < TABLE_ITEM_COUNT; i++){
2300 + /*compare name Max name Length 15*/
2301 + if (snap_meta->array[i].name[0]){
2302 + if(strcmp(snap_meta->array[i].name, name))
2307 + return -1; /* can not find */
2310 +int set_snap_meta_index(
2311 + struct table_snap_meta_data *snap_meta,
2317 + for( i = 0; i < TABLE_ITEM_COUNT; i++){
2318 + /*compare name Max name Length 15*/
2319 + if (! snap_meta->array[i].name[0]){
2320 + strcpy(snap_meta->array[i].name, name);
2321 + snap_meta->count ++;
2322 + snap_meta->array[i].start = i * TABLE_ITEM_SIZE + 1;
2323 + snap_meta->array[i].len = size;
2327 + return -1; /* can not find */
2330 +static int ext3_get_meta_attr(struct super_block *sb,
2331 + char* name, char* buf,
2335 + struct inode *inode;
2336 + struct buffer_head *bh = NULL;
2337 + struct table_snap_meta_data *s_attr;
2338 + unsigned long map_len = 0, index = 0, left_size;
2341 + ino = SB_SNAPTABLE_INO(sb);
2343 + snap_err("No table file \n");
2346 + inode = iget(sb, ino);
2347 + if(!inode || is_bad_inode(inode)){
2348 + snap_err("unable to get table ino %lu\n", ino);
2352 + /*read the table from the table inode*/
2353 + bh = ext3_bread(NULL, inode, 0, 0, &error);
2355 + snap_err("read table ino %lu, error %d\n", ino, error);
2359 + s_attr = (struct table_snap_meta_data *)(bh->b_data);
2360 + index = find_snap_meta_index(s_attr, name);
2362 + snap_debug("not exit %s meta attr of table ino %llu \n",
2367 + if (!buf || *size < s_attr->array[index].len) {
2368 + /*return the size of this meta attr */
2369 + error = s_attr->array[index].len;
2372 + map_len = (s_attr->array[index].len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
2373 + left_size = *size;
2374 + for(i = 0; i < map_len; i++) {
2375 + struct buffer_head *array_bh = NULL;
2377 + array_bh = ext3_bread(NULL, inode,
2378 + s_attr->array[index].start + i,
2381 + snap_err("ino %lu read snap attr offset %d error %d \n",
2382 + inode->i_ino, (s_attr->array[index].start + i),
2386 + if (left_size >= sb->s_blocksize) {
2387 + memcpy(buf, array_bh->b_data, sb->s_blocksize);
2389 + memcpy(buf, array_bh->b_data, left_size);
2390 + left_size -= sb->s_blocksize;
2393 + *size = s_attr->array[index].len;
2400 +static int ext3_set_meta_attr(struct super_block *sb, char* name,
2401 + char* buf, int size)
2403 + struct inode *inode = NULL;
2404 + handle_t *handle = NULL;
2405 + struct buffer_head *bh = NULL;
2406 + struct table_snap_meta_data *s_attr = NULL;
2407 + unsigned long ino;
2408 + int i, index = 0, error = 0;
2409 + unsigned long new_len = 0, left_size;
2411 + ino = SB_SNAPTABLE_INO(sb);
2413 + if (ino == 0 && !buf) {
2414 + snap_debug("no table ino \n");
2418 + handle = ext3_journal_start(sb->s_root->d_inode, 2*EXT3_SETMETA_TRANS_BLOCKS);
2423 + /*create table inode update table ino*/
2424 + inode = ext3_new_inode(handle, sb->s_root->d_inode, (int)S_IFREG, 0);
2428 + ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
2429 + SB_SNAPTABLE_INO(sb) = inode->i_ino;
2430 + ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
2435 + inode = iget(sb, ino);
2436 + if (!inode || !inode->i_nlink || is_bad_inode(inode)) {
2437 + snap_err("unable to get table ino %lu\n", ino);
2442 + /*read the table from the table inode,
2443 + * If can not find the block just create it*/
2444 + bh = ext3_bread(handle, inode, 0, 1, &error);
2446 + snap_err("read table ino %lu, error %d\n", ino, error);
2450 + s_attr = (struct table_snap_meta_data *)(bh->b_data);
2451 + index = find_snap_meta_index(s_attr, name);
2452 + if (index < 0 && !buf) {
2453 + snap_debug("%s meta attr of table ino %llu do not exist\n",
2454 + name, inode->i_ino);
2459 + snap_debug("delete the meta attr %s in the table ino %s",
2460 + *name, inode->ino);
2461 + /*Here we only delete the entry of the attr
2462 + *FIXME, should we also delete the block of
2465 + ext3_journal_get_write_access(handle, bh);
2466 + memset(s_attr->array[index].name, 0, TABLE_ITEM_NAME_SIZE);
2467 + s_attr->array[index].len = 0;
2469 + ext3_journal_dirty_metadata(handle, bh);
2472 + new_len = (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
2473 + /*find the place to put this attr in that index*/
2474 + ext3_journal_get_write_access(handle, bh);
2476 + index = set_snap_meta_index(s_attr, name, size);
2478 + snap_err("table full of ino %lu \n", inode->i_ino);
2483 + s_attr->array[index].len = size;
2484 + journal_dirty_metadata(handle, bh);
2485 + /*put this attr to the snap table*/
2487 + for(i = 0; i < new_len; i++) {
2488 + struct buffer_head *array_bh = NULL;
2490 + array_bh = ext3_bread(handle, inode,
2491 + s_attr->array[index].start + i, 1, &error);
2493 + snap_err("inode %lu Can not get the block of attr %s\n",
2494 + inode->i_ino, name);
2498 + ext3_journal_get_write_access(handle, array_bh);
2499 + if (left_size > inode->i_sb->s_blocksize)
2500 + memcpy(array_bh->b_data, buf, inode->i_sb->s_blocksize);
2502 + memcpy(array_bh->b_data, buf, left_size);
2503 + ext3_journal_dirty_metadata(handle, array_bh);
2504 + left_size -= inode->i_sb->s_blocksize;
2507 + ext3_journal_stop(handle, sb->s_root->d_inode);
2514 +static int ext3_get_meta_attr(struct super_block *sb,
2515 + char* name, char* buf,
2518 + struct inode *root_inode = sb->s_root->d_inode;
2525 + err = ext3_xattr_get(root_inode, EXT3_SNAP_INDEX, name,
2527 + if (err == -ERANGE || !buf){
2528 + /*get the size of the buf*/
2529 + *size = ext3_xattr_get(root_inode, EXT3_SNAP_INDEX, name,
2536 + * set the meta info of the snap system
2538 + * buf == NULL delete "name" meta attr
2539 + * != NULL set "name" meta attr
2545 +static int ext3_set_meta_attr(struct super_block *sb, char* name,
2546 + char* buf, int size)
2548 + struct inode *root_inode = sb->s_root->d_inode;
2549 + handle_t *handle = NULL;
2553 + handle = ext3_journal_start(root_inode, EXT3_XATTR_TRANS_BLOCKS);
2557 + error = ext3_xattr_set(handle, root_inode, EXT3_SNAP_INDEX, name, buf, size, 0);
2559 + ext3_journal_stop(handle, root_inode);
2565 +struct snapshot_operations ext3_snap_operations = {
2566 + ops_version: SNAP_VERSION(2,0,2),
2567 + is_redirector: is_redirector,
2568 + is_indirect: is_indirect,
2569 + create_indirect: ext3_create_indirect,
2570 + get_indirect: ext3_get_indirect,
2571 + get_indirect_ino: ext3_get_indirect_ino,
2572 + destroy_indirect: ext3_destroy_indirect,
2573 + restore_indirect: ext3_restore_indirect,
2574 + iterate: ext3_iterate,
2575 + copy_block: ext3_copy_block,
2576 + set_indirect: ext3_set_indirect,
2577 + snap_feature: ext3_snap_feature,
2578 + get_generation: ext3_get_generation,
2579 + set_generation: ext3_set_generation,
2580 + get_meta_attr: ext3_get_meta_attr,
2581 + set_meta_attr: ext3_set_meta_attr,
2584 +EXPORT_SYMBOL(ext3_snap_operations);
2585 +#ifdef SNAP_PROFILE
2586 +EXPORT_SYMBOL(prof_snapdel);
2589 +#ifdef SNAP_DEBUG_IOC
2591 +static int print_inode(struct inode *pri, void *index_val)
2595 + struct snap_ea *snaps;
2596 + char buf[EXT3_MAX_SNAP_DATA];
2597 + int index = *(int *)index_val;
2599 + err = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
2600 + buf, EXT3_MAX_SNAP_DATA);
2602 + if (err == -ENOENT) {
2603 + memset(buf, 0, EXT3_MAX_SNAP_DATA);
2605 + else if (err < 0) {
2606 + snap_err("got err %d when reading attributes\n", err);
2610 + snaps = (struct snap_ea *) buf;
2612 + if( le32_to_cpu(snaps->ino[index]) == 0 ) {
2613 + snap_debug("no redirected ino for primary inode %lu\n",
2617 + snap_debug("primary inode %lu , redirected ino=%d\n",
2618 + primary->i_ino,le32_to_cpu(snaps->ino[index]));
2624 +int snap_print(struct super_block *sb, int index)
2626 + ext3_iterate_cowed_inode(sb, &print_inode, NULL, &index);
2630 +static int ext3_snap_destroy_inode(struct inode *primary,void *index_val)
2632 + int index = *(int *)index_val;
2634 + printk("delete_inode for index %d\n",index);
2635 + rc = ext3_destroy_indirect(primary,index, NULL);
2637 + printk("ERROR:ext3_destroy_indirect(ino %lu,index %d),ret %d\n",
2638 + primary->i_ino, index, rc);
2642 +int ext3_snap_delete(struct super_block *sb, int index)
2644 + ext3_iterate(sb, &ext3_snap_destroy_inode, NULL, &index,
2645 + SNAP_ITERATE_COWED_INODE);
2657 Index: linux-2.4.20-8/fs/ext3/ioctl.c
2658 ===================================================================
2659 --- linux-2.4.20-8.orig/fs/ext3/ioctl.c 2004-01-05 10:54:00.000000000 +0800
2660 +++ linux-2.4.20-8/fs/ext3/ioctl.c 2004-01-05 10:54:25.000000000 +0800
2662 #include <linux/ext3_jbd.h>
2663 #include <linux/sched.h>
2664 #include <asm/uaccess.h>
2665 +#include <linux/locks.h>
2667 +#include <linux/snap.h>
2668 +extern struct snapshot_operations ext3_snap_operations;
2669 +extern int ext3_snap_print(struct super_block *sb, int index);
2670 +extern int ext3_snap_delete(struct super_block *sb, int index);
2673 int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
2674 @@ -189,6 +195,103 @@
2679 + case EXT3_IOC_SNAP_SETFILECOW: {
2680 + printk(KERN_INFO "set file cow on dev %x\n",inode->i_dev);
2682 + /* clear block cow feature*/
2683 + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
2684 + EXT3_FEATURE_COMPAT_BLOCKCOW)) {
2685 + handle_t *handle = ext3_journal_start(inode, 1);
2687 + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
2688 + EXT3_FEATURE_COMPAT_SNAPFS)) {
2689 + printk(KERN_INFO "can't change cow level while snapfs feature exist");
2692 + lock_super(inode->i_sb);
2693 + journal_get_write_access(handle, EXT3_SB(inode->i_sb)->s_sbh);
2694 + inode->i_sb->u.ext3_sb.s_es->s_feature_compat &=
2695 + cpu_to_le32(~EXT3_FEATURE_COMPAT_BLOCKCOW);
2696 + inode->i_sb->s_dirt = 1;
2697 + journal_dirty_metadata(handle, EXT3_SB(inode->i_sb)->s_sbh);
2698 + unlock_super(inode->i_sb);
2699 + ext3_journal_stop(handle, inode);
2703 + case EXT3_IOC_CREATE_INDIR: {
2704 + struct inode *ind;
2705 + printk(KERN_INFO "create indirect on inode %lu\n",inode->i_ino);
2706 + ind = ext3_snap_operations.create_indirect(inode, 0, 1, 0);
2707 + if (!ind || IS_ERR(ind))
2708 + return PTR_ERR(ind);
2709 + printk(KERN_INFO "got indirect inode %lu\n",ind->i_ino);
2710 + put_user(ind->i_ino,(int *) arg);
2714 + case EXT3_IOC_GET_INDIR: {
2715 + struct inode *ind;
2717 + if (get_user(index, (int *) arg))
2720 + printk(KERN_INFO "get indirect on inode %lu, index %d\n",
2721 + inode->i_ino, index);
2722 + ind = ext3_snap_operations.get_indirect(inode, NULL, index);
2723 + if (!ind || IS_ERR(ind)) {
2724 + put_user(0,(int *) arg);
2725 + return PTR_ERR(ind);
2727 + printk(KERN_INFO "got indirect inode %lu for index %d\n",
2728 + ind->i_ino, index);
2729 + put_user(ind->i_ino,(int *) arg);
2733 + case EXT3_IOC_IS_REDIR: {
2734 + int is_redirector = 0;
2735 + printk(KERN_INFO "checking if inode %lu is redirector via\n",
2737 + is_redirector = ext3_snap_operations.is_redirector(inode);
2738 + printk(KERN_INFO "redirector: %s\n",is_redirector ? "yes":"no");
2739 + put_user(is_redirector,(int *) arg);
2743 + case EXT3_IOC_RESTORE_INDIR: {
2744 + printk(KERN_INFO "restore indirect on inode %lu\n",inode->i_ino);
2745 + return ext3_snap_operations.restore_indirect(inode, 1);
2747 + case EXT3_IOC_SNAP_PRINT: {
2749 + if (get_user(index, (int *) arg))
2751 + printk(KERN_INFO "print snap for index %d\n",index);
2753 + return ext3_snap_print(inode->i_sb, 1);
2755 + case EXT3_IOC_SNAP_DELETE: {
2757 + if (get_user(index, (int *) arg))
2760 + // XXX: debug code , always set index = 1
2761 + if(index !=1) index=1;
2762 + printk(KERN_INFO "delete all cowed inode for index %d\n",index);
2763 + return ext3_snap_delete(inode->i_sb, index);
2766 + case EXT3_IOC_DESTROY_INDIR: {
2768 + if (get_user(index, (int *) arg))
2770 + printk(KERN_INFO "destroy indirect on ino %lu, index %d\n",
2771 + inode->i_ino, index);
2772 + return ext3_snap_operations.destroy_indirect(inode, index, NULL);
2778 Index: linux-2.4.20-8/fs/ext3/Makefile
2779 ===================================================================
2780 --- linux-2.4.20-8.orig/fs/ext3/Makefile 2004-01-05 10:54:03.000000000 +0800
2781 +++ linux-2.4.20-8/fs/ext3/Makefile 2004-01-05 10:54:25.000000000 +0800
2784 obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
2785 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
2787 + xattr_trusted.o snap.o
2788 obj-m := $(O_TARGET)
2790 export-objs += xattr.o
2791 Index: linux-2.4.20-8/fs/ext3/inode.c
2792 ===================================================================
2793 --- linux-2.4.20-8.orig/fs/ext3/inode.c 2004-01-05 10:54:03.000000000 +0800
2794 +++ linux-2.4.20-8/fs/ext3/inode.c 2004-01-05 10:54:25.000000000 +0800
2795 @@ -1191,7 +1191,7 @@
2796 * So, if we see any bmap calls here on a modified, data-journaled file,
2797 * take extra steps to flush any blocks which might be in the cache.
2799 -static int ext3_bmap(struct address_space *mapping, long block)
2800 +int ext3_bmap(struct address_space *mapping, long block)
2802 struct inode *inode = mapping->host;
2804 Index: linux-2.4.20-8/fs/ext3/ialloc.c
2805 ===================================================================
2806 --- linux-2.4.20-8.orig/fs/ext3/ialloc.c 2004-01-05 10:54:03.000000000 +0800
2807 +++ linux-2.4.20-8/fs/ext3/ialloc.c 2004-01-05 10:54:25.000000000 +0800
2809 sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
2814 * load_inode_bitmap loads the inode bitmap for a blocks group
2816 @@ -160,6 +159,13 @@
2820 +/* Export load_inode_bitmap*/
2821 +int ext3_load_inode_bitmap (struct super_block * sb,
2822 + unsigned int block_group)
2824 + return load_inode_bitmap(sb, block_group);
2828 * NOTE! When we get the inode, we're the only people
2829 * that have access to it, and as such there are no
2830 Index: linux-2.4.20-8/include/linux/snap.h
2831 ===================================================================
2832 --- linux-2.4.20-8.orig/include/linux/snap.h 2003-01-30 18:24:37.000000000 +0800
2833 +++ linux-2.4.20-8/include/linux/snap.h 2004-01-05 10:54:25.000000000 +0800
2836 + * Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
2837 + * started by Andreas Dilger <adilger@turbolinux.com>
2838 + * Peter Braam <braam@mountainviewdata.com>
2839 + * Harrison Xing <harrisonx@mountainviewdata.com>
2841 + * Redesigned 2003 by Peter Braam <braam@clusterfs.com>
2842 + * Eric Mei <Ericm@clusterfs.com>
2843 + * Wang Di <wangdi@clusterfs.com>
2845 + * Rewriten 2003 by Wang Di <wangdi@clusterfs.com>
2846 + * Eric Mei <ericm@clusterfs.com>
2848 + * Functions for implementing snapshots in the ext3 filesystem. They are
2849 + * intended to hide the internals of the filesystem from the caller in
2850 + * such a way that the caller doesn't need to know about inode numbers,
2851 + * how the redirectors are implemented or stored, etc. It may not do that
2852 + * all yet, but it tries.
2854 + * The snapshot inode redirection is stored in the primary/direct inode as
2855 + * an extended attribute $snap, in the form of little-endian u32 inode
2860 +#ifndef _LINUX_SNAP_H
2861 +#define _LINUX_SNAP_H
2863 +#include <linux/fs.h>
2865 +/* maximum number of snapshots available for users */
2866 +#define MAX_SNAPS 20
2868 +/* snap extended attributes definition */
2869 +#define SNAP_ATTR "@snap"
2874 + ino_t ino[MAX_SNAPS+1]; /* including current snapshot */
2875 + ino_t parent_ino[MAX_SNAPS+1];
2877 +#define MAX_SNAP_DATA (sizeof(struct snap_ea))
2879 +/* for compatibility with old 128 max snapshots */
2880 +#define MAX_SNAP128_DATA (sizeof(struct snap_ea) - (sizeof(ino_t) * 128 * 2))
2881 +#define ZERO_SNAP_ATTR_TOP(buf) \
2883 + struct snap_ea *p = (struct snap_ea*)buf; \
2884 + memset(&p->ino[129], 0, sizeof(ino_t)*128); \
2885 + memset(&p->parent_ino[129], 0, sizeof(ino_t)*128); \
2888 +/* snap new ea definition , for logging of new inode */
2889 +#define SNAP_NEW_INO_ATTR "@snap_new"
2890 +struct snap_new_ea{
2891 + ino_t prev_ino; /* reserved. save the inode to a linked list */
2893 + int new_index; /* indicate for which index this is a new inode */
2895 +#define NULL_NEW_INDEX -1 /* null new index, to clear the snap_new_ea */
2897 +/* ea to identiry a indirect inode's infomation */
2898 +#define SNAP_INDIRECT_INFO_ATTR "@snap_indirect_inode_info"
2899 +struct snap_indirect_info {
2900 + __u32 index; /* which index belongs to */
2901 + __u32 reserved[3]; /* reserved */
2905 +/* snapfs meta data stored in extended attributes of root ino */
2906 +#define DISK_SNAP_META_ATTR "@disk_snap_meta_attr"
2907 +struct disk_snap_meta_data {
2908 + ino_t snap_first_cowed_ino;
2909 + ino_t snap_table_ino;
2910 + __u32 snap_feature_compat;
2912 +/*snapfs quota info */
2914 +#define SNAP_USR_QUOTA 0
2915 +#define SNAP_GRP_QUOTA 1
2916 +#define DISK_SNAP_QUOTA_INFO "@disk_snap_quota_info"
2917 +struct quota_info_len {
2918 + int uid_len; /*uid quota info length */
2919 + int gid_len; /*gid quota info length */
2922 + * Check if the EA @name is Snap EA or not.
2923 + * Snap EA includes the SNAP_ATTR, SNAP_NEW_INO_ATTR and DISK_SNAP_META_ATTR
2926 +#define IS_SNAP_EA(name) ( (!strcmp((name), SNAP_ATTR)) || \
2927 + (!strcmp((name), DISK_SNAP_META_ATTR)))
2931 +/* file system features */
2932 +#define SNAP_FEATURE_COMPAT_SNAPFS 0x0010
2933 +#define SNAP_FEATURE_COMPAT_BLOCKCOW 0x0020
2935 +/* constants for snap_feature operations */
2936 +#define SNAP_CLEAR_FEATURE 0x0
2937 +#define SNAP_SET_FEATURE 0x1
2938 +#define SNAP_HAS_FEATURE 0x2
2940 +/* snap flags for inode, within 1 byte range, each occupy 1 bit */
2941 +#define SNAP_INO_MAGIC 0x88 /* magic for snap inode */
2942 +#define SNAP_COW_FLAG 0x01 /* snap redirected inode */
2943 +#define SNAP_DEL_FLAG 0x02 /* snap deleted inode */
2944 +#define SNAP_TABLE_FLAG 0x04 /* snap table inode */
2945 +#define SNAP_PRI_FLAG 0x08 /* primary inode */
2947 +/* no snapfs attributes for get_indirect_ino */
2948 +#define ENOSNAPATTR 320
2950 +/* constants used by iterator */
2951 +#define SNAP_ITERATE_ALL_INODE 0x0
2952 +#define SNAP_ITERATE_COWED_INODE 0x1
2954 +/* constants used by create_indirect */
2955 +#define SNAP_CREATE_IND_NORMAL 0x0
2956 +#define SNAP_CREATE_IND_DEL_PRI 0x1
2958 +/* the data structure represent in the xfs_dinode.pad
2959 + offset 0: magic (1 byte)
2960 + offset 1: flag (1 byte)
2961 + offset 2: gen (4 bytes)
2964 +#define SIZEOF_MAGIC 1
2965 +#define SIZEOF_FLAG 1
2966 +#define SIZEOF_GENERATION 4
2968 +#define MAGIC_OFFSET 0
2969 +#define FLAG_OFFSET 1
2970 +#define GENERATION_OFFSET 2
2972 +#define SNAP_GET_DINODE_MAGIC(dinode) \
2973 + (((__u8*)(dinode)->di_pad)[MAGIC_OFFSET])
2974 +#define SNAP_SET_DINODE_MAGIC(dinode) \
2975 + ((__u8*)(dinode)->di_pad)[MAGIC_OFFSET] = (SNAP_INO_MAGIC)
2976 +#define SNAP_GET_DINODE_FLAG(dinode) \
2977 + (((__u8*)(dinode)->di_pad)[FLAG_OFFSET])
2978 +#define SNAP_SET_DINODE_FLAG(dinode, flag) \
2979 + (((__u8*)(dinode)->di_pad)[FLAG_OFFSET] |= (flag))
2980 +#define SNAP_CLEAR_DINODE_FLAG(dinode, flag) \
2981 + (((__u8*)(dinode)->di_pad)[FLAG_OFFSET] &= ~(flag))
2982 +#define SNAP_GET_DINODE_GEN(dinode) \
2983 + (le32_to_cpu(*(__u32*)(&((__u8*)(dinode)->di_pad)[GENERATION_OFFSET])))
2984 +#define SNAP_SET_DINODE_GEN(dinode, gen) \
2985 + *(__u32*)(&((__u8*)(dinode)->di_pad)[GENERATION_OFFSET]) = cpu_to_le32(gen)
2988 +/* header of saving snaptable */
2990 + unsigned int size; /* buffer size passed by */
2991 + char data[0]; /* followed by actual data */
2994 +/* header of on-disk table data */
2995 +struct disk_snap_table_header {
3001 +/* table magic and version constant */
3002 +#define SNAP_TABLE_MAGIC 0xB3A2957F
3003 +#define SNAP_TABLE_VERSION 1
3006 +#define SNAPTABLE_BLOCKS(sb,size) \
3007 + (((size-sizeof(__u32)+sizeof(struct disk_snap_table_header)) \
3008 + >> sb->s_blocksize_bits)+1)
3011 +#define SNAP_VERSION(a,b,c) \
3012 + (((a & 0xFF) << 16) | ((b & 0xFF) << 8) | (c & 0xFF))
3013 +#define SNAP_VERSION_MAJOR(v) \
3014 + ((v >> 16) & 0xFF)
3015 +#define SNAP_VERSION_MINOR(v) \
3017 +#define SNAP_VERSION_REL(v) \
3020 +/* for snap meta attr table */
3021 +#define TABLE_ITEM_COUNT 200
3022 +#define TABLE_ITEM_SIZE 1000
3023 +#define TABLE_ITEM_NAME_SIZE 16
3025 +/*snap table array */
3026 +struct snap_meta_array {
3027 + char name[TABLE_ITEM_NAME_SIZE];
3028 + int start; /* where is the start of the array */
3029 + int len; /* the len of the array */
3031 +/* snap table structure for record the information */
3032 +struct table_snap_meta_data {
3034 + struct snap_meta_array array[TABLE_ITEM_COUNT];
3039 +#define SNAP_PROFILE
3041 +#undef SNAP_PROFILE
3044 +#ifdef SNAP_PROFILE
3045 +struct profile_snapdel_stat
3047 + unsigned long total_tick; /* total time */
3048 + unsigned long inodes; /* primary inodes */
3050 + unsigned long yield_count; /* for yeild cpu */
3051 + unsigned long yield_tick;
3052 + unsigned long yield_max_tick;
3054 + unsigned long getea_count; /* for get ea */
3055 + unsigned long getea_tick;
3056 + unsigned long getea_max_tick;
3058 + unsigned long setea_count; /* for set ea */
3059 + unsigned long setea_tick;
3060 + unsigned long setea_max_tick;
3062 + unsigned long converge_count; /* for converge */
3063 + unsigned long converge_tick;
3064 + unsigned long converge_max_tick;
3069 +/* snapshot operations */
3070 +struct snapshot_operations {
3071 + unsigned int ops_version;
3072 + int (*is_redirector) (struct inode *inode);
3073 + int (*is_indirect) (struct inode *inode);
3074 + struct inode * (*create_indirect) (struct inode *pri, int index,
3075 + unsigned int gen, ino_t parent_ino,
3077 + struct inode * (*get_indirect) (struct inode *pri, int *table,int slot);
3078 + ino_t (*get_indirect_ino) (struct inode *pri, int index);
3079 + int (*destroy_indirect) (struct inode *pri, int index,
3080 + struct inode *next_ind);
3081 + int (*restore_indirect) (struct inode *pri, int index);
3082 + int (*iterate) (struct super_block *sb,
3083 + int (*repeat)(struct inode *inode, void *priv),
3084 + struct inode **start, void *priv, int flag);
3085 + int (*copy_block) ( struct inode *dst, struct inode *src, int blk);
3086 + int (*has_block) (struct inode *dst, int blk);
3087 + int (*set_indirect) (struct inode *pri, int index,
3088 + ino_t ind_ino, ino_t parent_ino );
3089 + int (*snap_feature) (struct super_block *sb, int feature, int op);
3090 + int (*get_generation) (struct inode *pri);
3091 + int (*set_generation) (struct inode *pri, unsigned long new_gen);
3092 + int (*has_del_flag) (struct inode *inode);
3093 + int (*clear_del_flag) (struct inode *inode);
3094 + int (*set_meta_attr)(struct super_block *sb, char *name,
3095 + char *buf, int size);
3096 + int (*get_meta_attr)(struct super_block *sb, char *name,
3097 + char *buf, int *size);
3101 Index: linux-2.4.20-8/include/linux/ext3_fs.h
3102 ===================================================================
3103 --- linux-2.4.20-8.orig/include/linux/ext3_fs.h 2004-01-05 10:54:03.000000000 +0800
3104 +++ linux-2.4.20-8/include/linux/ext3_fs.h 2004-01-05 10:54:25.000000000 +0800
3105 @@ -183,7 +183,13 @@
3106 #define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
3107 #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
3108 #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
3109 -#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
3110 +/* For snapfs in EXT3 flags --- FIXME will find other ways to store it*/
3111 +#define EXT3_COW_FL 0x00008000 /* inode is snapshot cow */
3112 +#define EXT3_DEL_FL 0x00010000 /* inode is deleting in snapshot */
3113 +#define EXT3_SNAP_TABLE_FLAG 0x00020000 /* snap table inode */
3114 +/* FIXME For debugging will be removed later*/
3115 +#define EXT3_SNAP_PRI_FLAG 0x00040000 /* primary inode */
3118 #define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */
3119 #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */
3120 @@ -205,10 +211,25 @@
3121 /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
3122 #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
3123 #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
3124 +/* the following are for temporary test */
3125 +/* snapfs ioctls */
3126 +#define EXT3_IOC_CREATE_INDIR _IOW('v', 3, long)
3127 +#define EXT3_IOC_GET_INDIR _IOW('v', 4, long)
3128 +#define EXT3_IOC_DESTROY_INDIR _IOW('v', 5, long)
3129 +#define EXT3_IOC_IS_REDIR _IOW('v', 6, long)
3130 +#define EXT3_IOC_RESTORE_INDIR _IOW('v', 7, long)
3132 +#define EXT3_IOC_SNAP_SETFILECOW _IOW('v', 10, long)
3134 +/* XXX: the following are for temporary test, can be removed later */
3135 +#define EXT3_IOC_SNAP_PRINT _IOW('v', 11, long)
3136 +#define EXT3_IOC_SNAP_DELETE _IOW('v', 12, long)
3137 +#define EXT3_IOC_SNAP_RESTORE _IOW('v', 13, long)
3140 #ifdef CONFIG_JBD_DEBUG
3141 #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
3145 * Structure of an inode on the disk
3147 @@ -429,7 +450,14 @@
3148 __u8 s_def_hash_version; /* Default hash version to use */
3149 __u8 s_reserved_char_pad;
3150 __u16 s_reserved_word_pad;
3151 - __u32 s_reserved[192]; /* Padding to the end of the block */
3153 + __u32 s_first_meta_bg; /* First metablock group */
3154 + __u32 s_mkfs_time; /* When the filesystem was created */
3155 + __u32 s_first_cowed_pri_ino; /* For snapfs,the first cowed primary inode */
3156 + __u32 s_last_cowed_pri_ino; /* last cowed ino in memory */
3157 + __u32 s_snaptable_ino; /* snaptable ino in memory */
3158 + __u32 s_last_snap_orphan; /* SnapFS: start of cowing indirect inode */
3159 + __u32 s_reserved[186]; /* Padding to the end of the block,originally 204 */
3164 #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
3165 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
3167 +#define EXT3_FEATURE_COMPAT_SNAPFS 0x0010
3168 +#define EXT3_FEATURE_COMPAT_BLOCKCOW 0x0020
3170 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
3171 #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
3172 EXT3_FEATURE_INCOMPAT_RECOVER)
3173 Index: linux-2.4.20-8/include/linux/ext3_fs_sb.h
3174 ===================================================================
3175 --- linux-2.4.20-8.orig/include/linux/ext3_fs_sb.h 2004-01-05 10:54:00.000000000 +0800
3176 +++ linux-2.4.20-8/include/linux/ext3_fs_sb.h 2004-01-05 10:54:25.000000000 +0800
3178 wait_queue_head_t s_delete_thread_queue;
3179 wait_queue_head_t s_delete_waiter_queue;
3181 +#define EXT3_SNAP_FS
3182 +#ifdef EXT3_SNAP_FS
3183 + struct semaphore s_snap_list_sem;
3184 + unsigned long s_first_cowed_pri_ino;/* For snapfs,the first cowed primary inode */
3185 + unsigned long s_last_cowed_pri_ino; /* last cowed ino in memory */
3186 + unsigned long s_snaptable_ino; /* snaptable ino in memory */
3190 #endif /* _LINUX_EXT3_FS_SB */
3191 Index: linux-2.4.20-8/include/linux/ext3_jbd.h
3192 ===================================================================
3193 --- linux-2.4.20-8.orig/include/linux/ext3_jbd.h 2004-01-05 10:53:59.000000000 +0800
3194 +++ linux-2.4.20-8/include/linux/ext3_jbd.h 2004-01-05 10:54:25.000000000 +0800
3197 #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
3199 +/*snapshot transaction blocks*/
3201 +#define EXT3_EA_TRANS_BLOCKS EXT3_DATA_TRANS_BLOCKS
3202 +#define EXT3_SETMETA_TRANS_BLOCKS EXT3_DATA_TRANS_BLOCKS
3203 +#define EXT3_NEWINODE_TRANS_BLOCKS 10
3204 +#define SNAP_INSERTLIST_TRANS_BLOCKS (2 * EXT3_EA_TRANS_BLOCKS + 1)
3205 +#define SNAP_DELETELIST_TRANS_BLOCKS (2 * EXT3_EA_TRANS_BLOCKS + 2)
3206 +#define SNAP_COPYBLOCK_TRANS_BLOCKS (EXT3_DATA_TRANS_BLOCKS)
3207 +#define SNAP_MIGRATEDATA_TRANS_BLOCKS 2
3208 +#define SNAP_SETIND_TRANS_BLOCKS (SNAP_INSERTLIST_TRANS_BLOCKS + 1)
3209 +#define SNAP_ADDORPHAN_TRANS_BLOCKS 2
3210 +#define SNAP_REMOVEORPHAN_TRANS_BLOCKS 1
3211 +#define SNAP_RESTOREORPHAN_TRANS_BLOCKS (EXT3_EA_TRANS_BLOCKS + \
3212 + SNAP_DELETELIST_TRANS_BLOCKS + \
3213 + EXT3_NEWINODE_TRANS_BLOCKS + \
3214 + 2 * SNAP_MIGRATEDATA_TRANS_BLOCKS)
3215 +#define SNAP_BIGCOPY_TRANS_BLOCKS (2 * EXT3_DATA_TRANS_BLOCKS)
3216 +#define SNAP_CREATEIND_TRANS_BLOCKS (EXT3_NEWINODE_TRANS_BLOCKS + \
3217 + SNAP_MIGRATEDATA_TRANS_BLOCKS + \
3218 + SNAP_SETIND_TRANS_BLOCKS + \
3219 + SNAP_BIGCOPY_TRANS_BLOCKS + 3)
3220 +#define SNAP_MIGRATEBLK_TRANS_BLOCKS 2
3221 +#define SNAP_DESTROY_TRANS_BLOCKS (SNAP_DELETELIST_TRANS_BLOCKS + \
3222 + EXT3_EA_TRANS_BLOCKS + 2)
3223 +#define SNAP_RESTORE_TRANS_BLOCKS (EXT3_NEWINODE_TRANS_BLOCKS + \
3224 + 2 * SNAP_MIGRATEDATA_TRANS_BLOCKS + 1)
3227 ext3_mark_iloc_dirty(handle_t *handle,
3228 struct inode *inode,
3231 fs/ext3/Makefile | 2
3232 fs/ext3/ialloc.c | 8
3234 fs/ext3/ioctl.c | 103 +
3235 fs/ext3/snap.c | 2650 +++++++++++++++++++++++++++++++++++++++++++++
3236 include/linux/ext3_fs.h | 37
3237 include/linux/ext3_fs_sb.h | 7
3238 include/linux/ext3_jbd.h | 27
3239 include/linux/snap.h | 266 ++++
3240 9 files changed, 3096 insertions(+), 6 deletions(-)