1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre filesystem abstraction routines
6 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
7 * Author: Andreas Dilger <adilger@clusterfs.com>
9 * This file is part of Lustre, http://www.lustre.org.
11 * Lustre is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * Lustre is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with Lustre; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_FILTER
26 #include <linux/init.h>
27 #include <linux/module.h>
29 #include <linux/jbd.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <linux/quotaops.h>
33 #include <linux/ext3_fs.h>
34 #include <linux/ext3_jbd.h>
35 #include <linux/ext3_extents.h>
36 #include <linux/locks.h>
37 #include <linux/version.h>
38 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
39 #include <linux/ext3_xattr.h>
41 #include <ext3/xattr.h>
44 #include <linux/kp30.h>
45 #include <linux/lustre_fsfilt.h>
46 #include <linux/obd.h>
47 #include <linux/obd_class.h>
48 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
49 #include <linux/module.h>
50 #include <linux/iobuf.h>
52 #include <linux/lustre_snap.h>
54 /* For snapfs in EXT3 flags --- FIXME will find other ways to store it*/
55 #define EXT3_COW_FL 0x00100000 /* inode is snapshot cow */
56 #define EXT3_DEL_FL 0x00200000 /* inode is deleting in snapshot */
58 #define EXT3_SNAP_ATTR "@snap"
59 #define EXT3_SNAP_GENERATION_ATTR "@snap_generation"
60 #define EXT3_MAX_SNAPS 20
61 #define EXT3_MAX_SNAP_DATA (sizeof(struct snap_ea))
62 #define EXT3_SNAP_INDEX EXT3_XATTR_INDEX_LUSTRE
64 #define SB_SNAPTABLE_INO(sb) (EXT3_SB(sb)->s_es->s_snaptable_ino)
65 #define SB_FEATURE_COMPAT(sb) (EXT3_SB(sb)->s_es->s_feature_compat)
67 #define SNAP_HAS_COMPAT_FEATURE(sb,mask) \
68 (SB_FEATURE_COMPAT(sb) & cpu_to_le32(mask))
70 #define EXT3_FEATURE_COMPAT_SNAPFS 0x0010
71 #define EXT3_FEATURE_COMPAT_BLOCKCOW 0x0020
73 /* NOTE: these macros are close dependant on the structure of snap ea */
74 #define SNAP_CNT_FROM_SIZE(size) ((((size)-sizeof(ino_t)*2)/2)/sizeof(ino_t))
75 #define SNAP_EA_SIZE_FROM_INDEX(index) (sizeof(ino_t)*2 + 2*sizeof(ino_t)*((index)+1))
77 #define SNAP_EA_INO_BLOCK_SIZE(size) (((size)-sizeof(ino_t)*2)/2)
78 #define SNAP_EA_PARENT_OFFSET(size) (sizeof(ino_t)*2 + SNAP_EA_INO_BLOCK_SIZE((size)))
80 /* helper functions to manipulate field 'parent' in snap_ea */
82 set_parent_ino(struct snap_ea *pea, int size, int index, ino_t val)
84 char * p = (char*) pea;
87 offset = sizeof(ino_t)*2 + (size - sizeof(ino_t)*2)/2;
88 offset += sizeof(ino_t) * index;
89 *(ino_t*)(p+offset) = val;
93 static int add_primary_inode_to_cowed_dir(handle_t *handle, struct inode *pri,
100 static int del_primary_inode_to_cowed_dir(handle_t *handle, struct inode *pri)
106 * fsfilt_ext3_get_indirect - get a specific indirect inode from a primary inode
107 * @primary: primary (direct) inode
108 * @table: table of @slot + 1 indices in reverse chronological order
109 * @slot: starting slot number to check for indirect inode number
111 * We locate an indirect inode from a primary inode using the redirection
112 * table stored in the primary inode. Because the desired inode may actually
113 * be in a "newer" slot number than the supplied slot, we are given a table
114 * of indices in chronological order to search for the correct inode number.
115 * We walk table from @slot to 0 looking for a non-zero inode to load.
117 * To only load a specific index (and fail if it does not exist), you can
118 * pass @table = NULL, and the index number in @slot. If @slot == 0, the
119 * primary inode data is returned.
121 * We return a pointer to an inode, or an error. If the indirect inode for
122 * the given index does not exist, NULL is returned.
124 static struct inode *fsfilt_ext3_get_indirect(struct inode *primary, int *table,
127 char buf[EXT3_MAX_SNAP_DATA];
128 struct snap_ea *snaps;
130 struct inode *inode = NULL;
131 int rc = 0, index = 0;
135 if (slot < 0 || slot > EXT3_MAX_SNAPS || !primary)
138 CDEBUG(D_INODE, "ino %lu, table %p, slot %d\n", primary->i_ino, table,
140 rc = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR, buf,
142 if (rc == -ENODATA) {
145 CERROR("attribute read rc=%d \n", rc);
148 snaps = (struct snap_ea *)buf;
150 /* if table is NULL and there is a slot */
151 if( !table && slot ) {
153 ino = le32_to_cpu ( snaps->ino[index] );
155 inode = iget(primary->i_sb, ino);
158 /* if table is not NULL */
159 while ( !inode && slot > 0) {
161 ino = le32_to_cpu ( snaps->ino[index] );
163 CDEBUG(D_INODE, "snap inode at slot %d is %lu\n", slot, ino);
168 inode = iget(primary->i_sb, ino);
171 if( slot == 0 && table ) {
172 CDEBUG(D_INODE, "redirector not found, using primary\n");
173 inode = iget(primary->i_sb, primary->i_ino);
179 /* Save the indirect inode in the snapshot table of the primary inode. */
180 static int fsfilt_ext3_set_indirect(struct inode *pri, int index, ino_t ind_ino,
183 char buf[EXT3_MAX_SNAP_DATA];
184 struct snap_ea *snaps;
185 int err = 0, inlist = 1;
187 handle_t *handle = NULL;
190 CDEBUG(D_INODE, "(ino %lu, parent %lu): saving ind %lu to index %d\n",
191 pri->i_ino, parent_ino, ind_ino, index);
193 if (index < 0 || index > MAX_SNAPS || !pri)
195 /* need lock the list before get_attr() to avoid race */
196 /* read ea at first */
197 err = ext3_xattr_get(pri, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
198 buf, EXT3_MAX_SNAP_DATA);
199 if (err == -ENODATA || err == -ENOATTR) {
200 CDEBUG(D_INODE, "no extended attributes - zeroing\n");
201 memset(buf, 0, EXT3_MAX_SNAP_DATA);
203 * To judge a inode in list, we only see if it has snap ea.
204 * So take care of snap ea of primary inodes very carefully.
205 * Is it right in snapfs EXT3, check it later?
208 } else if (err < 0 || err > EXT3_MAX_SNAP_DATA) {
209 GOTO(out_unlock, err);
212 handle = ext3_journal_start(pri, SNAP_SETIND_TRANS_BLOCKS);
214 GOTO(out_unlock, err = PTR_ERR(handle));
216 snaps = (struct snap_ea *)buf;
217 snaps->ino[index] = cpu_to_le32 (ind_ino);
218 ea_size = EXT3_MAX_SNAP_DATA;
220 set_parent_ino(snaps, ea_size, index, cpu_to_le32(parent_ino));
223 err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
224 buf, EXT3_MAX_SNAP_DATA, 0);
227 err = add_primary_inode_to_cowed_dir(handle, pri, buf);
229 ext3_mark_inode_dirty(handle, pri);
230 ext3_journal_stop(handle, pri);
235 static int fsfilt_ext3_set_generation(struct inode *inode, unsigned long gen)
241 handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
245 err = ext3_xattr_set(handle, inode, EXT3_SNAP_INDEX,
246 EXT3_SNAP_GENERATION_ATTR,
247 (char*)&gen, sizeof(int), 0);
249 CERROR("ino %lu, set_ext_attr err %d\n", inode->i_ino, err);
253 ext3_journal_stop(handle, inode);
257 static int fsfilt_ext3_get_generation(struct inode *inode)
262 err = ext3_xattr_get(inode, EXT3_SNAP_INDEX, EXT3_SNAP_GENERATION_ATTR,
263 (char*)&gen, sizeof(gen));
265 if (err == -ENODATA) {
268 CERROR("can not get generation from %lu \n",
278 * Copy inode metadata from one inode to another, excluding blocks and size.
279 * FIXME do we copy EA data - ACLs and such (excluding snapshot data)?
281 static void ext3_copy_meta(handle_t *handle, struct inode *dst, struct inode *src)
285 dst->i_mode = src->i_mode;
286 dst->i_nlink = src->i_nlink;
287 dst->i_uid = src->i_uid;
288 dst->i_gid = src->i_gid;
289 dst->i_atime = src->i_atime;
290 dst->i_mtime = src->i_mtime;
291 dst->i_ctime = src->i_ctime;
292 // dst->i_version = src->i_version;
293 dst->i_attr_flags = src->i_attr_flags;
294 dst->i_generation = src->i_generation;
295 dst->u.ext3_i.i_dtime = src->u.ext3_i.i_dtime;
296 dst->u.ext3_i.i_flags = src->u.ext3_i.i_flags | EXT3_COW_FL;
297 #ifdef EXT3_FRAGMENTS
298 dst->u.ext3_i.i_faddr = src->u.ext3_i.i_faddr;
299 dst->u.ext3_i.i_frag_no = src->u.ext3_i.i_frag_no;
300 dst->u.ext3_i.i_frag_size = src->u.ext3_i.i_frag_size;
302 if ((size = ext3_xattr_list(src, NULL, 0)) > 0) {
307 if (ext3_xattr_list(src, names, 0) < 0)
310 * the list of attribute names are stored as NUL terminated
311 * strings, with a double NUL string at the end.
314 while ((namelen = strlen(name))) {
318 /* don't copy snap data */
319 if (!strcmp(name, EXT3_SNAP_ATTR)) {
320 CDEBUG(D_INFO, "skipping %s item\n", name);
323 CDEBUG(D_INODE, "copying %s item\n", name);
324 attrlen = ext3_xattr_get(src, EXT3_SNAP_INDEX,
325 EXT3_SNAP_ATTR, NULL, 0);
328 OBD_ALLOC(buf, attrlen);
334 if (ext3_xattr_get(src, EXT3_SNAP_INDEX,
335 EXT3_SNAP_ATTR, buf, attrlen) < 0)
337 if (ext3_xattr_set(handle, dst, EXT3_SNAP_INDEX,
338 EXT3_SNAP_ATTR, buf, attrlen, 0) < 0)
340 OBD_FREE(buf, attrlen);
341 name += namelen + 1; /* skip name and trailing NUL */
345 /* fsfilt_ext3_copy_block - copy one data block from inode @src to @dst.
346 No lock here. User should do the lock.
347 User should check the return value to see if the result is correct.
349 1: The block has been copied successfully
350 0: No block is copied, usually this is because src has no such blk
354 static int fsfilt_ext3_copy_block (struct inode *dst, struct inode *src, int blk)
356 struct buffer_head *bh_dst = NULL, *bh_src = NULL;
358 handle_t *handle = NULL;
360 CDEBUG(D_INODE, "copy blk %d from %lu to %lu \n", blk, src->i_ino,
363 * ext3_getblk() require handle!=NULL
365 if (S_ISREG(src->i_mode))
368 handle = ext3_journal_start(dst, SNAP_COPYBLOCK_TRANS_BLOCKS);
372 bh_src = ext3_bread(handle, src, blk, 0, &err);
374 CERROR("error for src blk %d, error %d\n", blk, err);
375 GOTO(exit_relese, err);
377 bh_dst = ext3_getblk(handle, dst, blk, 1, &err);
379 CERROR("error for dst blk %d, error %d\n", blk, err);
380 GOTO(exit_relese, err);
382 CDEBUG(D_INODE, "copy block %lu to %lu (%ld bytes)\n",
383 bh_src->b_blocknr, bh_dst->b_blocknr, src->i_sb->s_blocksize);
385 ext3_journal_get_write_access(handle, bh_dst);
386 memcpy(bh_dst->b_data, bh_src->b_data, src->i_sb->s_blocksize);
387 ext3_journal_dirty_metadata(handle, bh_dst);
391 if (bh_src) brelse(bh_src);
392 if (bh_dst) brelse(bh_dst);
394 ext3_journal_stop(handle, dst);
398 static inline int ext3_has_ea(struct inode *inode)
400 return (EXT3_I(inode)->i_file_acl != 0);
402 /* XXXThis function has a very bad effect to
403 * the performance of filesystem,
404 * will find another way to fix it
406 static void fs_flushinval_pages(handle_t *handle, struct inode* inode)
408 if (inode->i_blocks > 0 && inode->i_mapping) {
409 fsync_inode_data_buffers(inode);
410 truncate_inode_pages(inode->i_mapping, 0);
413 /* ext3_migrate_data:
414 * MOVE all the data blocks from inode src to inode dst as well as
415 * COPY all attributes(meta data) from inode src to inode dst.
416 * For extended attributes(EA), we COPY all the EAs but skip the Snap EA from
417 * src to dst. If the dst has Snap EA, then we CAN'T overwrite it. We CAN'T
418 * copy the src Snap EA. XXX for EA, can we change it to MOVE all the EAs
419 * (exclude Snap EA) to dst and copy it back to src ? This is for LAN free
422 static int ext3_migrate_data(handle_t *handle, struct inode *dst,
425 unsigned long err = 0;
426 /* 512 byte disk blocks per inode block */
427 int bpib = src->i_sb->s_blocksize >> 9;
434 if (dst->i_ino == src->i_ino)
437 fs_flushinval_pages(handle, src);
439 ext3_copy_meta(handle, dst, src);
441 CDEBUG(D_INODE, "migrating data blocks from %lu to %lu\n",
442 src->i_ino, dst->i_ino);
443 /* Can't check blocks in case of EAs */
445 memcpy(EXT3_I(dst)->i_data, EXT3_I(src)->i_data,
446 sizeof(EXT3_I(src)->i_data));
447 memset(EXT3_I(src)->i_data, 0, sizeof(EXT3_I(src)->i_data));
449 ext3_discard_prealloc(src);
451 dst->i_size = EXT3_I(dst)->i_disksize = EXT3_I(src)->i_disksize;
452 src->i_size = EXT3_I(src)->i_disksize = 0;
454 dst->i_blocks = src->i_blocks;
456 /* Check EA blocks here to modify i_blocks correctly */
457 if(ext3_has_ea (src)) {
458 src->i_blocks += bpib;
459 if( ! ext3_has_ea (dst) )
460 if( dst->i_blocks >= bpib )
461 dst->i_blocks -= bpib;
463 if( ext3_has_ea (dst))
464 dst->i_blocks += bpib;
467 CDEBUG(D_INODE, "migrate data from ino %lu to ino %lu\n", src->i_ino,
469 ext3_mark_inode_dirty(handle, src);
470 ext3_mark_inode_dirty(handle, dst);
474 static handle_t * ext3_copy_data(handle_t *handle, struct inode *dst,
475 struct inode *src, int *has_orphan)
477 unsigned long blocks, blk, cur_blks;
478 int low_credits, save_ref;
481 blocks =(src->i_size + src->i_sb->s_blocksize-1) >>
482 src->i_sb->s_blocksize_bits;
483 low_credits = handle->h_buffer_credits - SNAP_BIGCOPY_TRANS_BLOCKS;
485 CDEBUG(D_INODE, "%lu blocks need to be copied,low credits limit %d\n",
486 blocks, low_credits);
488 for (blk = 0, cur_blks= dst->i_blocks; blk < blocks; blk++) {
489 if (!ext3_bmap(src->i_mapping, blk))
491 if(handle->h_buffer_credits <= low_credits) {
492 int needed = (blocks - blk) * EXT3_DATA_TRANS_BLOCKS;
493 if (needed > 4 * SNAP_COPYBLOCK_TRANS_BLOCKS)
494 needed = 4 * SNAP_COPYBLOCK_TRANS_BLOCKS;
495 if (journal_extend(handle, needed)) {
496 CDEBUG(D_INFO, "create_indirect:fail to extend "
497 "journal, restart trans\n");
500 CDEBUG(D_INODE, "add orphan ino %lu"
501 "nlink %d to orphan list \n",
502 dst->i_ino, dst->i_nlink);
503 ext3_orphan_add(handle, dst);
506 dst->u.ext3_i.i_disksize =
507 blk * dst->i_sb->s_blocksize;
508 dst->i_blocks = cur_blks;
509 dst->i_mtime = CURRENT_TIME;
510 ext3_mark_inode_dirty(handle, dst);
512 * We can be sure the last handle was stoped
513 * ONLY if the handle's reference count is 1
515 save_ref = handle->h_ref;
517 if( ext3_journal_stop(handle, dst) ){
518 CERROR("fail to stop journal\n");
522 handle = ext3_journal_start(dst,
523 low_credits + needed);
525 CERROR("fail to restart handle\n");
528 handle->h_ref = save_ref;
531 if (fsfilt_ext3_copy_block( dst, src, blk) < 0 )
533 cur_blks += dst->i_sb->s_blocksize / 512;
536 dst->i_size = dst->u.ext3_i.i_disksize = src->i_size;
541 * fsfilt_ext3_create_indirect - copy data, attributes from primary to new indir inode
542 * @pri: primary (source) inode
543 * @index: index in snapshot table where indirect inode should be stored
544 * @delete: flag that the primary inode is being deleted
546 * We copy all of the data blocks from the @*src inode to the @*dst inode, as
547 * well as copying the attributes from @*src to @*dst. If @delete == 1, then
548 * the primary inode will only be a redirector and will appear deleted.
550 * FIXME do we move EAs, only non-snap EAs, what?
551 * FIXME we could do readpage/writepage, but we would have to handle block
552 * allocation then, and it ruins sparse files for 1k/2k filesystems,
553 * at the expense of doing a memcpy.
555 static struct inode* fsfilt_ext3_create_indirect(struct inode *pri, int index,
557 struct inode* parent,
561 handle_t *handle = NULL;
566 if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
567 CERROR("TRY TO COW JOUNRAL\n");
570 CDEBUG(D_INODE, "creating indirect inode for %lu at index %d, %s pri\n",
571 pri->i_ino, index, del ? "deleting" : "preserve");
573 ind = fsfilt_ext3_get_indirect(pri, NULL, index);
575 handle = ext3_journal_start(pri, SNAP_CREATEIND_TRANS_BLOCKS);
578 /* XXX ? We should pass an err argument to get_indirect and precisely
579 * detect the errors, for some errors, we should exit right away.
582 /* if the option is SNAP_DEL_PRI_WITH_IND and there is an indirect,
583 * we just free the primary data blocks and mark this inode delete
585 if((del) && ind && !IS_ERR(ind)) {
587 /* for directory, we don't free the data blocks,
588 * or ext3_rmdir will report errors "bad dir, no data blocks"
590 CDEBUG(D_INODE, "del==SNAP_DEL_PRI_WITH_IND && ind\n");
591 if(!S_ISDIR(pri->i_mode)) {
592 /*Here delete the data of that pri inode.
593 * FIXME later, should throw the blocks of
594 * primary inode directly
596 tmp = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
599 ext3_migrate_data(handle, tmp, pri);
604 CERROR("ext3_new_inode error\n");
605 GOTO(exit, err=-EIO);
609 pri->u.ext3_i.i_dtime = CURRENT_TIME;
610 ext3_mark_inode_dirty(handle, pri);
614 if (ind && !IS_ERR(ind)) {
615 CDEBUG(D_INODE, "existing indirect ino %lu for %lu: index %d\n",
616 ind->i_ino, pri->i_ino, index);
619 /* XXX: check this, ext3_new_inode, the first arg should be "dir" */
620 ind = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
624 CDEBUG(D_INODE, "got new inode %lu\n", ind->i_ino);
625 ind->i_rdev = pri->i_rdev;
626 ind->i_op = pri->i_op;
627 fsfilt_ext3_set_generation(ind, (unsigned long)gen);
628 /* If we are deleting the primary inode, we want to ensure that it is
629 * written to disk with a non-zero link count, otherwise the next iget
630 * and iput will mark the inode as free (which we don't want, we want
631 * it to stay a redirector). We fix this in ext3_destroy_indirect()
632 * when the last indirect inode is removed.
634 * We then do what ext3_delete_inode() does so that the metadata will
635 * appear the same as a deleted inode, and we can detect it later.
638 CDEBUG(D_INODE, "deleting primary inode\n");
641 err = ext3_migrate_data(handle, ind, pri);
643 GOTO(exit_unlock, err);
645 err = fsfilt_ext3_set_indirect(pri, index, ind->i_ino, parent->i_ino);
647 GOTO(exit_unlock, err);
649 /* XXX for directory, we copy the block back
650 * or ext3_rmdir will report errors "bad dir, no data blocks"
652 if( S_ISDIR(pri->i_mode)) {
653 handle = ext3_copy_data(handle, pri, ind, &has_orphan);
655 GOTO(exit_unlock, err= -EINVAL);
658 pri->u.ext3_i.i_flags |= EXT3_DEL_FL;
659 ind->u.ext3_i.i_flags |= EXT3_COW_FL;
660 if(S_ISREG(pri->i_mode)) pri->i_nlink = 1;
661 pri->u.ext3_i.i_dtime = CURRENT_TIME;
662 //pri->u.ext3_i.i_generation++;
663 ext3_mark_inode_dirty(handle, pri);
664 ext3_mark_inode_dirty(handle, ind);
668 err = ext3_migrate_data(handle, ind, pri);
672 /* for regular files we do blocklevel COW's maybe */
673 if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW)
674 && S_ISREG(pri->i_mode)) {
676 CDEBUG(D_INODE, "ino %lu, do block cow\n", pri->i_ino);
677 /* because after migrate_data , pri->i_size is 0 */
678 pri->i_size = ind->i_size;
681 int bpib = pri->i_sb->s_blocksize >> 9;
682 CDEBUG(D_INODE, "ino %lu, do file cow\n", pri->i_ino);
684 /* XXX: can we do this better?
685 * If it's a fast symlink, we should copy i_data back!
686 * The criteria to determine a fast symlink is:
687 * 1) it's a link and its i_blocks is 0
688 * 2) it's a link and its i_blocks is bpib ( the case
689 * it has been cowed and has ea )
691 if( S_ISLNK(ind->i_mode) && ((ind->i_blocks == 0) ||
692 (ext3_has_ea(ind) && ind->i_blocks == bpib))) {
693 CDEBUG(D_INODE, "ino %lu is fast symlink\n", pri->i_ino);
694 memcpy(EXT3_I(pri)->i_data, EXT3_I(ind)->i_data,
695 sizeof(EXT3_I(ind)->i_data));
696 pri->i_size = ind->i_size;
699 handle = ext3_copy_data(handle, pri, ind, &has_orphan);
701 GOTO(exit_unlock, err);
704 /* set cow flag for ind */
705 ind->u.ext3_i.i_flags |= EXT3_COW_FL;
706 pri->u.ext3_i.i_flags &= ~EXT3_COW_FL;
708 ext3_mark_inode_dirty(handle, pri);
709 ext3_mark_inode_dirty(handle, ind);
711 err = fsfilt_ext3_set_indirect(pri, index, ind->i_ino, parent->i_ino);
713 GOTO(exit_unlock, err);
717 if (!EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
718 EXT3_FEATURE_COMPAT_SNAPFS)) {
719 lock_super(pri->i_sb);
720 ext3_journal_get_write_access(handle, pri->i_sb->u.ext3_sb.s_sbh);
721 pri->i_sb->u.ext3_sb.s_es->s_feature_compat |=
722 cpu_to_le32(EXT3_FEATURE_COMPAT_SNAPFS);
723 ext3_journal_dirty_metadata(handle, pri->i_sb->u.ext3_sb.s_sbh);
724 pri->i_sb->s_dirt = 1;
725 unlock_super(pri->i_sb);
728 CDEBUG(D_INODE, "del %lu nlink %d from orphan list\n",
729 ind->i_ino, ind->i_nlink);
730 ext3_orphan_del(handle, ind);
732 ext3_journal_stop(handle, pri);
741 CDEBUG(D_INODE, "del %lu nlink %d from orphan list\n",
742 ind->i_ino, ind->i_nlink);
743 ext3_orphan_del(handle, ind);
746 ext3_journal_stop(handle, pri);
748 CERROR("exiting with error %d\n", err);
752 static int fsfilt_ext3_snap_feature (struct super_block *sb, int feature, int op) {
759 case SNAP_SET_FEATURE:
760 handle = ext3_journal_start(sb->s_root->d_inode, 1);
762 ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
763 SB_FEATURE_COMPAT(sb) |= cpu_to_le32(feature);
765 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
767 ext3_journal_stop(handle, sb->s_root->d_inode);
769 case SNAP_CLEAR_FEATURE:
770 handle = ext3_journal_start(sb->s_root->d_inode, 1);
772 ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
773 SB_FEATURE_COMPAT(sb) &= ~cpu_to_le32(feature);
774 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
777 ext3_journal_stop(handle, sb->s_root->d_inode);
779 case SNAP_HAS_FEATURE:
780 /*FIXME should lock super or not*/
781 rc = SNAP_HAS_COMPAT_FEATURE(sb, feature);
789 * is_redirector - determines if a primary inode is a redirector
790 * @inode: primary inode to test
792 * Returns 1 if the inode is a redirector, 0 otherwise.
794 static int fsfilt_ext3_is_redirector(struct inode *inode)
796 int is_redirector = 0;
800 rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
802 if (rc > 0 && rc <= MAX_SNAP_DATA)
804 CDEBUG(D_INODE, "inode %lu %s redirector\n", inode->i_ino,
805 is_redirector ? "is" : "isn't");
806 RETURN(is_redirector);
808 /*if it's indirect inode or not */
809 static int fsfilt_ext3_is_indirect(struct inode *inode)
811 if (EXT3_I(inode)->i_flags |= EXT3_COW_FL)
817 /* get the indirect ino at index of the primary inode
818 * return value: postive: indirect ino number
819 * negative or 0: error
821 static ino_t fsfilt_ext3_get_indirect_ino(struct inode *primary, int index)
823 char buf[EXT3_MAX_SNAP_DATA];
824 struct snap_ea *snaps;
828 if (index < 0 || index > EXT3_MAX_SNAPS || !primary)
831 err = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
832 buf, EXT3_MAX_SNAP_DATA);
833 if (err == -ENOATTR) {
834 GOTO(err_free, ino = -ENOATTR);
835 } else if (err < 0) {
836 CERROR(" attribute read error err=%d\n", err);
837 GOTO(err_free, ino = err);
839 snaps = (struct snap_ea *)buf;
840 ino = le32_to_cpu (snaps->ino[index]);
841 CDEBUG(D_INODE, "snap ino for %ld at index %d is %lu\n",
842 primary->i_ino, index, ino);
848 /* The following functions are used by destroy_indirect */
849 #define inode_bmap(inode, nr) (EXT3_I(inode)->i_data[(nr)])
850 #define inode_setbmap(inode, nr, physical) (EXT3_I(inode)->i_data[(nr)]=(physical))
851 static inline int block_bmap(struct buffer_head * bh, int nr)
857 tmp = le32_to_cpu(((u32 *) bh->b_data)[nr]);
862 static inline int block_setbmap(handle_t *handle, struct buffer_head * bh,
863 int nr, int physical)
868 ext3_journal_get_write_access(handle, bh);
869 ((u32 *) bh->b_data)[nr] = cpu_to_le32(physical);
870 ext3_journal_dirty_metadata(handle, bh);
875 static int ext3_migrate_block(handle_t *handle, struct inode * dst,
876 struct inode *src, int block)
878 int i1_d=0, i1_s=0, i2_d=0, i2_s=0, i3_d=0, i3_s=0;
879 int addr_per_block = EXT3_ADDR_PER_BLOCK(src->i_sb);
880 int addr_per_block_bits = EXT3_ADDR_PER_BLOCK_BITS(src->i_sb);
881 unsigned long blksz = src->i_sb->s_blocksize;
882 kdev_t ddev = dst->i_dev;
883 kdev_t sdev = src->i_dev;
888 CWARN("ext3_migrate_block block < 0 %p \n", src->i_sb);
891 if (block >= EXT3_NDIR_BLOCKS + addr_per_block +
892 (1 << (addr_per_block_bits * 2)) +
893 ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
894 CWARN("ext3_migrate_block block > big %p \n", src->i_sb);
897 /* EXT3_NDIR_BLOCK */
898 if (block < EXT3_NDIR_BLOCKS) {
899 if(inode_bmap(dst, block))
902 if( (physical = inode_bmap(src, block)) ) {
903 inode_setbmap (dst, block, physical);
904 inode_setbmap (src, block, 0);
912 block -= EXT3_NDIR_BLOCKS;
913 if (block < addr_per_block) {
914 i1_d = inode_bmap (dst, EXT3_IND_BLOCK);
916 physical = inode_bmap(src, EXT3_IND_BLOCK);
918 inode_setbmap (dst, EXT3_IND_BLOCK, physical);
919 inode_setbmap (src, EXT3_IND_BLOCK, 0);
925 if(block_bmap(bread(ddev, i1_d, blksz), block))
928 i1_s = inode_bmap (src, EXT3_IND_BLOCK);
929 if( !i1_s) RETURN(0);
931 physical = block_bmap(bread(sdev, i1_s, blksz), block);
934 block_setbmap(handle, bread(ddev, i1_d, blksz),block,
936 block_setbmap(handle, bread(sdev, i1_s, blksz),block,0);
942 /* EXT3_DIND_BLOCK */
943 block -= addr_per_block;
944 if (block < (1 << (addr_per_block_bits * 2))) {
945 i1_d = inode_bmap (dst, EXT3_DIND_BLOCK);
946 i1_s = inode_bmap (src, EXT3_DIND_BLOCK);
948 if( (physical = inode_bmap(src, EXT3_DIND_BLOCK)) ) {
949 inode_setbmap (dst, EXT3_DIND_BLOCK, physical);
950 inode_setbmap (src, EXT3_DIND_BLOCK, 0);
956 i2_d = block_bmap (bread (ddev, i1_d, blksz),
957 block >> addr_per_block_bits);
963 physical = block_bmap(bread (sdev, i1_s, blksz),
964 block >> addr_per_block_bits);
966 block_setbmap(handle, bread (ddev, i1_d,blksz),
967 block >> addr_per_block_bits,
969 block_setbmap(handle, bread (sdev, i1_s,blksz),
970 block >> addr_per_block_bits, 0);
976 physical = block_bmap(bread (ddev, i2_d, blksz),
977 block & (addr_per_block - 1));
981 i2_s = block_bmap (bread (sdev, i1_s, blksz),
982 block >> addr_per_block_bits);
985 physical = block_bmap(bread (sdev, i2_s, blksz),
986 block & (addr_per_block - 1));
988 block_setbmap(handle, bread (ddev, i2_d, blksz),
989 block & (addr_per_block - 1), physical);
990 block_setbmap(handle, bread (sdev, i2_s, blksz),
991 block & (addr_per_block - 1), 0);
999 /* EXT3_TIND_BLOCK */
1000 block -= (1 << (addr_per_block_bits * 2));
1001 i1_d = inode_bmap (dst, EXT3_TIND_BLOCK);
1002 i1_s = inode_bmap (src, EXT3_TIND_BLOCK);
1004 if((physical = inode_bmap(src, EXT3_TIND_BLOCK)) )
1005 inode_setbmap (dst, EXT3_TIND_BLOCK, physical);
1009 i2_d = block_bmap(bread (ddev, i1_d, blksz),
1010 block >> (addr_per_block_bits * 2));
1012 if(i1_s) i2_s = block_bmap(bread(sdev, i1_s, blksz),
1013 block >> (addr_per_block_bits * 2));
1016 if( !i1_s) RETURN(0);
1018 physical = block_bmap(bread (sdev, i1_s, blksz),
1019 block >> (addr_per_block_bits * 2));
1021 block_setbmap(handle, bread (ddev, i1_d, blksz),
1022 block >> (addr_per_block_bits * 2), physical);
1023 block_setbmap(handle, bread (sdev, i1_s, blksz),
1024 block >> (addr_per_block_bits * 2), 0);
1030 i3_d = block_bmap (bread (ddev, i2_d, blksz),
1031 (block >> addr_per_block_bits) & (addr_per_block - 1));
1032 if( i2_s) i3_s = block_bmap (bread (sdev, i2_s, blksz),
1033 (block >> addr_per_block_bits) & (addr_per_block - 1));
1036 if (!i2_s) RETURN(0);
1037 physical = block_bmap (bread (sdev, i2_s, blksz),
1038 (block >> addr_per_block_bits) & (addr_per_block - 1));
1040 block_setbmap (handle, bread (ddev, i2_d, blksz),
1041 (block >> addr_per_block_bits) &
1042 (addr_per_block - 1), physical);
1043 block_setbmap (handle, bread (sdev, i2_s, blksz),
1044 (block >> addr_per_block_bits) &
1045 (addr_per_block - 1),0);
1051 physical = block_bmap (bread (ddev, i3_d, blksz),
1052 block & (addr_per_block - 1)) ;
1058 physical = block_bmap(bread(sdev, i3_s, blksz),
1059 block & (addr_per_block - 1));
1061 block_setbmap (handle, bread (ddev, i3_d, blksz),
1062 block & (addr_per_block - 1), physical);
1063 block_setbmap (handle, bread (sdev, i3_s, blksz),
1064 block & (addr_per_block - 1), 0);
1072 /* Generate i_blocks from blocks for an inode .
1073 * We also calculate EA block here.
1075 static unsigned long calculate_i_blocks(struct inode *inode, int blocks)
1077 /* 512 byte disk blocks per inode block */
1078 int bpib = inode->i_sb->s_blocksize >> 9;
1079 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
1080 unsigned long i_blocks = 0;
1081 int i=0, j=0, meta_blocks = 0;
1087 /* re-calculate blocks here */
1088 blocks = (inode->i_size + inode->i_sb->s_blocksize-1)
1089 >> inode->i_sb->s_blocksize_bits;
1092 /* calculate data blocks */
1093 for(i = 0; i < blocks; i++) {
1094 if(ext3_bmap(inode->i_mapping, i))
1097 /* calculate meta blocks */
1098 blocks -= EXT3_NDIR_BLOCKS;
1101 blocks -= addr_per_block;
1103 if( blocks > 0 ) meta_blocks++;
1106 while( (blocks > 0) && (i < addr_per_block) ) {
1108 blocks -= addr_per_block;
1112 if ( blocks > 0 ) meta_blocks += 2;
1115 while( blocks > 0) {
1117 blocks -= addr_per_block;
1119 if(i >= addr_per_block ) {
1123 if( j >= addr_per_block) {
1128 /* calculate EA blocks */
1129 if(ext3_has_ea(inode))
1132 i_blocks += meta_blocks * bpib;
1133 CDEBUG(D_INODE, "ino %lu, get i_blocks %lu\n", inode->i_ino, i_blocks);
1139 * fsfilt_ext3_destroy_indirect - delete an indirect inode from the table
1140 * @pri: primary inode
1141 * @ind: indirect inode
1142 * @index: index of inode that should be deleted
1144 * We delete the @*ind inode, and remove it from the snapshot table. If @*ind
1145 * is NULL, we use the inode at @index.
1147 static int fsfilt_ext3_destroy_indirect(struct inode *pri, int index,
1148 struct inode *next_ind)
1150 char buf[EXT3_MAX_SNAP_DATA];
1151 struct snap_ea *snaps;
1153 int save = 0, i=0, err = 0;
1154 handle_t *handle=NULL;
1158 if (index < 0 || index > EXT3_MAX_SNAPS)
1161 if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
1162 CERROR("TRY TO DESTROY JOURNAL'S IND\n");
1166 err = ext3_xattr_get(pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
1167 buf, EXT3_MAX_SNAP_DATA);
1169 CERROR("inode %lu attribute read error\n", pri->i_ino);
1173 snaps = (struct snap_ea *)buf;
1174 if ( !snaps->ino[index] ) {
1175 CERROR("for pri ino %lu, index %d, redirect ino is 0\n",
1180 CDEBUG(D_INODE, "for pri ino %lu, reading inode %lu at index %d\n",
1181 pri->i_ino, (ulong)le32_to_cpu(snaps->ino[index]), index);
1183 ind = iget(pri->i_sb, le32_to_cpu (snaps->ino[index]));
1185 if ( !ind || IS_ERR(ind) || is_bad_inode(ind))
1188 CDEBUG(D_INODE, "iget ind %lu, ref count = %d\n",
1189 ind->i_ino, atomic_read(&ind->i_count));
1191 handle = ext3_journal_start(pri, SNAP_DESTROY_TRANS_BLOCKS);
1196 /* if it's block level cow, first copy the blocks back */
1197 if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW) &&
1198 S_ISREG(pri->i_mode)) {
1205 double_down(&next_ind->i_sem, &ind->i_sem);
1207 blocks = (next_ind->i_size + next_ind->i_sb->s_blocksize-1)
1208 >> next_ind->i_sb->s_blocksize_bits;
1210 CDEBUG(D_INODE, "migrate block back from ino %lu to %lu\n",
1211 ind->i_ino, next_ind->i_ino);
1213 for(i = 0; i < blocks; i++) {
1214 if( ext3_bmap(next_ind->i_mapping, i) )
1216 if( !ext3_bmap(ind->i_mapping, i) )
1218 ext3_migrate_block(handle, next_ind, ind, i) ;
1220 /* Now re-compute the i_blocks */
1221 /* XXX shall we take care of ind here? probably not */
1222 next_ind->i_blocks = calculate_i_blocks( next_ind, blocks);
1223 ext3_mark_inode_dirty(handle, next_ind);
1225 if (next_ind == pri)
1228 double_up(&next_ind->i_sem, &ind->i_sem);
1232 CDEBUG(D_INODE, "delete indirect ino %lu\n", ind->i_ino);
1233 CDEBUG(D_INODE, "iput ind %lu, ref count = %d\n", ind->i_ino,
1234 atomic_read(&ind->i_count));
1239 snaps->ino[index] = cpu_to_le32(0);
1240 for (i = 0; i < EXT3_MAX_SNAPS; i++)
1241 save += snaps->ino[i];
1244 del_primary_inode_to_cowed_dir(handle, pri);
1246 /*Should we remove snap feature here*/
1248 * If we are deleting the last indirect inode, and the primary inode
1249 * has already been deleted, then mark the primary for deletion also.
1250 * Otherwise, if we are deleting the last indirect inode remove the
1251 * snaptable from the inode. XXX
1253 if (!save && pri->u.ext3_i.i_dtime) {
1254 CDEBUG(D_INODE, "deleting primary %lu\n", pri->i_ino);
1256 /* reset err to 0 now */
1259 CDEBUG(D_INODE, "%s redirector table\n",
1260 save ? "saving" : "deleting");
1261 /* XXX: since set ea will modify i_ctime of pri,
1262 so save/restore i_ctime. Need this necessary ? */
1263 ctime = pri->i_ctime;
1264 err = ext3_xattr_set(handle, pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
1265 save ? buf : NULL, EXT3_MAX_SNAP_DATA, 0);
1266 pri->i_ctime = ctime;
1267 ext3_mark_inode_dirty(handle, pri);
1269 ext3_journal_stop(handle, pri);
1274 /* restore a primary inode with the indirect inode at index */
1275 static int fsfilt_ext3_restore_indirect(struct inode *pri, int index)
1280 handle_t *handle = NULL;
1283 if (index < 0 || index > EXT3_MAX_SNAPS)
1286 if( pri == pri->i_sb->u.ext3_sb.s_journal_inode ){
1287 CERROR("TRY TO RESTORE JOURNAL\n");
1290 CDEBUG(D_INODE, "pri ino %lu, index %d\n", pri->i_ino, index);
1292 ind = fsfilt_ext3_get_indirect(pri, NULL, index);
1297 CDEBUG(D_INODE, "restore ino %lu to %lu\n", pri->i_ino, ind->i_ino);
1299 handle = ext3_journal_start(pri, SNAP_RESTORE_TRANS_BLOCKS);
1302 /* first destroy all the data blocks in primary inode */
1303 /* XXX: check this, ext3_new_inode, the first arg should be "dir" */
1304 tmp = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
1306 double_down(&pri->i_sem, &tmp->i_sem);
1307 ext3_migrate_data(handle, tmp, pri);
1308 double_up(&pri->i_sem, &tmp->i_sem);
1313 CERROR("restore_indirect, new_inode err\n");
1315 double_down(&pri->i_sem, &ind->i_sem);
1316 ext3_migrate_data(handle, pri, ind);
1317 pri->u.ext3_i.i_flags &= ~EXT3_COW_FL;
1318 ext3_mark_inode_dirty(handle, pri);
1319 double_up(&pri->i_sem, &ind->i_sem);
1322 //fsfilt_ext3_destroy_indirect(pri, index);
1323 ext3_journal_stop(handle, pri);
1329 * ext3_snap_iterate - iterate through all of the inodes
1330 * @sb: filesystem superblock
1331 * @repeat: pointer to function called on each valid inode
1332 * @start: inode to start iterating at
1333 * @priv: private data to the caller/repeat function
1335 * If @start is NULL, then we do not return an inode pointer. If @*start is
1336 * NULL, then we start at the beginning of the filesystem, and iterate over
1337 * all of the inodes in the system. If @*start is non-NULL, then we start
1338 * iterating at this inode.
1340 * We call the repeat function for each inode that is in use. The repeat
1341 * function must check if this is a redirector (with is_redirector) if it
1342 * only wants to operate on redirector inodes. If there is an error or
1343 * the repeat function returns non-zero, we return the last inode operated
1344 * on in the @*start parameter. This allows the caller to restart the
1345 * iteration at this inode if desired, by returning a positive value.
1346 * Negative return values indicate an error.
1348 * NOTE we cannot simply traverse the existing filesystem tree from the root
1349 * inode, as there may be disconnected trees from deleted files/dirs
1351 * FIXME If there was a list of inodes with EAs, we could simply walk the list
1352 * intead of reading every inode. This is an internal implementation issue.
1355 static int ext3_iterate_all(struct super_block *sb,
1356 int (*repeat)(struct inode *inode,void *priv),
1357 struct inode **start, void *priv)
1359 struct inode *tmp = NULL;
1360 int gstart, gnum, err = 0;
1361 ino_t istart, ibase;
1367 *start = iget(sb, EXT3_ROOT_INO);
1369 GOTO(exit, err = -ENOMEM);
1371 if (is_bad_inode(*start))
1372 GOTO(exit, err = -EIO);
1374 if ((*start)->i_ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
1375 CERROR("invalid starting inode %ld\n",(*start)->i_ino);
1376 GOTO(exit, err = -EINVAL);
1378 if ((*start)->i_ino < EXT3_FIRST_INO(sb)) {
1379 if ((err = (*repeat)(*start, priv) != 0))
1382 *start = iget(sb, EXT3_FIRST_INO(sb));
1384 GOTO(exit, err = -ENOMEM);
1385 if (is_bad_inode(*start))
1386 GOTO(exit, err = -EIO);
1389 gstart = ((*start)->i_ino - 1) / EXT3_INODES_PER_GROUP(sb);
1390 istart = ((*start)->i_ino - 1) % EXT3_INODES_PER_GROUP(sb);
1391 ibase = gstart * EXT3_INODES_PER_GROUP(sb);
1392 for (gnum = gstart; gnum < EXT3_SB(sb)->s_groups_count;
1393 gnum++, ibase += EXT3_INODES_PER_GROUP(sb)) {
1394 struct ext3_group_desc * gdp;
1395 int bitmap_nr, ibyte;
1398 gdp = ext3_get_group_desc (sb, gnum, NULL);
1399 if (!gdp || le16_to_cpu(gdp->bg_free_inodes_count) ==
1400 EXT3_INODES_PER_GROUP(sb))
1403 bitmap_nr = ext3_load_inode_bitmap(sb, gnum);
1407 bitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]->b_data;
1408 for (ibyte = istart >> 3; ibyte < EXT3_INODES_PER_GROUP(sb) >> 3;
1415 /* FIXME need to verify if bit endianness will
1416 * work properly here for all architectures.
1418 for (i = 1, bit = 1; i <= 8; i++, bit <<= 1) {
1419 ino_t ino = ibase + (ibyte << 3) + i;
1421 if ((bitmap[ibyte] & bit) == 0)
1424 if (ino < (*start)->i_ino)
1427 *start = iget(sb, ino);
1429 GOTO(exit, err = -ENOMEM);
1430 if (is_bad_inode(*start))
1431 GOTO(exit, err = -EIO);
1433 if ((err = (*repeat)(*start, priv)) != 0)
1446 static int fsfilt_ext3_iterate(struct super_block *sb,
1447 int (*repeat)(struct inode *inode, void *priv),
1448 struct inode **start, void *priv, int flag)
1451 case SNAP_ITERATE_ALL_INODE:
1452 return ext3_iterate_all (sb, repeat, start, priv);
1458 static int find_snap_meta_index(
1459 struct table_snap_meta_data *snap_meta,
1464 /* table max length is null*/
1465 for( i = 0; i < TABLE_ITEM_COUNT; i++){
1466 /*compare name Max name Length 15*/
1467 if (snap_meta->array[i].name[0]){
1468 if(!strncmp(snap_meta->array[i].name, name, strlen(name)))
1472 return -1; /* can not find */
1475 int set_snap_meta_index(
1476 struct table_snap_meta_data *snap_meta,
1482 for( i = 0; i < TABLE_ITEM_COUNT; i++){
1483 /*compare name Max name Length 15*/
1484 if (! snap_meta->array[i].name[0]){
1485 strcpy(snap_meta->array[i].name, name);
1486 snap_meta->count ++;
1487 snap_meta->array[i].start = i * TABLE_ITEM_SIZE + 1;
1488 snap_meta->array[i].len = size;
1492 return -1; /* can not find */
1495 static int fsfilt_ext3_get_meta_attr(struct super_block *sb, char* name,
1496 char* buf, int *size)
1498 struct inode *inode;
1499 struct buffer_head *bh = NULL;
1500 struct table_snap_meta_data *s_attr;
1501 unsigned long map_len = 0, left_size;
1502 int i, error = 0, index = 0;
1506 ino = SB_SNAPTABLE_INO(sb);
1508 CERROR("No table file \n");
1512 inode = iget(sb, ino);
1513 if(!inode || is_bad_inode(inode)){
1514 CERROR("unable to get table ino %lu\n", ino);
1515 GOTO(out_iput, error = -ENOENT);
1517 /*read the table from the table inode*/
1518 bh = ext3_bread(NULL, inode, 0, 0, &error);
1520 CERROR("read table ino %lu, error %d\n", ino, error);
1521 GOTO(out_iput, error = -ENODATA);
1523 s_attr = (struct table_snap_meta_data *)(bh->b_data);
1524 index = find_snap_meta_index(s_attr, name);
1526 CDEBUG(D_INFO, "not exit %s meta attr of table ino %lu \n",
1527 name, inode->i_ino);
1528 GOTO(out_iput, error = 0);
1530 if (!buf || *size < s_attr->array[index].len) {
1531 /*return the size of this meta attr */
1532 error = s_attr->array[index].len;
1533 GOTO(out_iput, error);
1535 map_len = (s_attr->array[index].len + sb->s_blocksize - 1)
1536 >> sb->s_blocksize_bits;
1538 for(i = 0; i < map_len; i++) {
1539 struct buffer_head *array_bh = NULL;
1541 array_bh = ext3_bread(NULL, inode,
1542 s_attr->array[index].start + i,
1545 CERROR("ino %lu read snap attr offset %d error %d \n",
1546 inode->i_ino, (s_attr->array[index].start + i),
1548 GOTO(out_iput, error);
1550 if (left_size >= sb->s_blocksize)
1551 memcpy(buf, array_bh->b_data, sb->s_blocksize);
1553 memcpy(buf, array_bh->b_data, left_size);
1554 left_size -= sb->s_blocksize;
1557 *size = s_attr->array[index].len;
1565 static int fsfilt_ext3_set_meta_attr(struct super_block *sb, char* name,
1566 char* buf, int size)
1568 struct inode *inode = NULL;
1569 handle_t *handle = NULL;
1570 struct buffer_head *bh = NULL;
1571 struct table_snap_meta_data *s_attr = NULL;
1573 int i, index = 0, error = 0;
1574 unsigned long new_len = 0, left_size;
1578 ino = SB_SNAPTABLE_INO(sb);
1580 if (ino == 0 && !buf) {
1581 CDEBUG(D_INODE, "no table ino \n");
1585 handle = ext3_journal_start(sb->s_root->d_inode,
1586 2 * EXT3_SETMETA_TRANS_BLOCKS);
1591 /*create table inode update table ino*/
1592 inode = ext3_new_inode(handle, sb->s_root->d_inode, (int)S_IFREG, 0);
1596 ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
1597 SB_SNAPTABLE_INO(sb) = inode->i_ino;
1598 ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
1603 inode = iget(sb, ino);
1604 if (!inode || !inode->i_nlink || is_bad_inode(inode)) {
1605 CERROR("unable to get table ino %lu\n", ino);
1606 GOTO(exit, error = -ENOENT);
1609 /*read the table from the table inode,
1610 * If can not find the block just create it*/
1611 bh = ext3_bread(handle, inode, 0, 1, &error);
1613 CERROR("read table ino %lu, error %d\n", ino, error);
1614 GOTO(exit, error = -ENODATA);
1616 s_attr = (struct table_snap_meta_data *)(bh->b_data);
1617 index = find_snap_meta_index(s_attr, name);
1618 if (index < 0 && !buf) {
1619 CDEBUG(D_INODE, "%s meta attr of table ino %lu do not exist\n",
1620 name, inode->i_ino);
1622 GOTO(exit, error = 0);
1625 CDEBUG(D_INODE, "delete the meta attr %s in the table ino %lu",
1626 name, inode->i_ino);
1627 /*Here we only delete the entry of the attr
1628 *FIXME, should we also delete the block of
1631 ext3_journal_get_write_access(handle, bh);
1632 memset(s_attr->array[index].name, 0, TABLE_ITEM_NAME_SIZE);
1633 s_attr->array[index].len = 0;
1635 ext3_journal_dirty_metadata(handle, bh);
1639 new_len = (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1640 /*find the place to put this attr in that index*/
1641 ext3_journal_get_write_access(handle, bh);
1643 index = set_snap_meta_index(s_attr, name, size);
1645 CERROR("table full of ino %lu \n", inode->i_ino);
1647 GOTO(exit, error = index);
1650 s_attr->array[index].len = size;
1651 journal_dirty_metadata(handle, bh);
1653 /*put this attr to the snap table*/
1655 for(i = 0; i < new_len; i++) {
1656 struct buffer_head *array_bh = NULL;
1658 array_bh = ext3_bread(handle, inode,
1659 s_attr->array[index].start + i, 1, &error);
1661 CERROR("inode %lu Can not get the block of attr %s\n",
1662 inode->i_ino, name);
1664 GOTO(exit, error = -ENOSPC);
1666 ext3_journal_get_write_access(handle, array_bh);
1667 if (left_size > inode->i_sb->s_blocksize)
1668 memcpy(array_bh->b_data, buf, inode->i_sb->s_blocksize);
1670 memcpy(array_bh->b_data, buf, left_size);
1671 ext3_journal_dirty_metadata(handle, array_bh);
1672 left_size -= inode->i_sb->s_blocksize;
1677 ext3_journal_stop(handle, sb->s_root->d_inode);
1683 struct fsfilt_operations fsfilt_ext3_snap_ops = {
1684 .fs_type = "ext3_snap",
1685 .fs_owner = THIS_MODULE,
1686 .fs_create_indirect = fsfilt_ext3_create_indirect,
1687 .fs_get_indirect = fsfilt_ext3_get_indirect,
1688 .fs_set_indirect = fsfilt_ext3_set_indirect,
1689 .fs_snap_feature = fsfilt_ext3_snap_feature,
1690 .fs_is_redirector = fsfilt_ext3_is_redirector,
1691 .fs_is_indirect = fsfilt_ext3_is_indirect,
1692 .fs_get_indirect_ino = fsfilt_ext3_get_indirect_ino,
1693 .fs_set_generation = fsfilt_ext3_set_generation,
1694 .fs_get_generation = fsfilt_ext3_get_generation,
1695 .fs_destroy_indirect = fsfilt_ext3_destroy_indirect,
1696 .fs_restore_indirect = fsfilt_ext3_restore_indirect,
1697 .fs_iterate = fsfilt_ext3_iterate,
1698 .fs_copy_block = fsfilt_ext3_copy_block,
1699 .fs_set_meta_attr = fsfilt_ext3_set_meta_attr,
1700 .fs_get_meta_attr = fsfilt_ext3_get_meta_attr,
1703 static int __init fsfilt_ext3_snap_init(void)
1707 rc = fsfilt_register_ops(&fsfilt_ext3_snap_ops);
1712 static void __exit fsfilt_ext3_snap_exit(void)
1715 fsfilt_unregister_ops(&fsfilt_ext3_snap_ops);
1718 module_init(fsfilt_ext3_snap_init);
1719 module_exit(fsfilt_ext3_snap_exit);
1721 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1722 MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
1723 MODULE_LICENSE("GPL");