Whamcloud - gitweb
* updates to HEAD lustre since landing b_port_step on portals
[fs/lustre-release.git] / lustre / lvfs / fsfilt_snap_ext3.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Lustre filesystem abstraction routines
5  *
6  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
7  *   Author: Andreas Dilger <adilger@clusterfs.com>
8  *
9  *   This file is part of Lustre, http://www.lustre.org.
10  *
11  *   Lustre is free software; you can redistribute it and/or
12  *   modify it under the terms of version 2 of the GNU General Public
13  *   License as published by the Free Software Foundation.
14  *
15  *   Lustre is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *   GNU General Public License for more details.
19  *
20  *   You should have received a copy of the GNU General Public License
21  *   along with Lustre; if not, write to the Free Software
22  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23  */
24 #define DEBUG_SUBSYSTEM S_FILTER
25
26 #include <linux/init.h>
27 #include <linux/module.h>
28 #include <linux/fs.h>
29 #include <linux/jbd.h>
30 #include <linux/slab.h>
31 #include <linux/pagemap.h>
32 #include <linux/quotaops.h>
33 #include <linux/ext3_fs.h>
34 #include <linux/ext3_jbd.h>
35 #include <linux/ext3_extents.h>
36 #include <linux/version.h>
37 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
38 #include <linux/locks.h>
39 #include <linux/ext3_xattr.h>
40 #include <linux/module.h>
41 #include <linux/iobuf.h>
42 #else
43 #include <ext3/xattr.h>
44 #endif
45
46 #include <libcfs/kp30.h>
47 #include <linux/lustre_fsfilt.h>
48 #include <linux/obd.h>
49 #include <linux/obd_class.h>
50 #include <linux/lustre_smfs.h>
51 #include <linux/lustre_snap.h>
52
53 /* For snapfs in EXT3 flags --- FIXME will find other ways to store it*/
54 #define EXT3_COW_FL                     0x00100000 /* inode is snapshot cow */
55 #define EXT3_DEL_FL                     0x00200000 /* inode is deleting in snapshot */
56
57 #define EXT3_SNAP_ATTR "@snap"
58 #define EXT3_SNAP_GENERATION "@snap_generation"
59 #define EXT3_MAX_SNAPS 10
60 #define EXT3_MAX_SNAP_DATA (sizeof(struct snap_ea))
61 #define EXT3_SNAP_INDEX EXT3_XATTR_INDEX_LUSTRE
62 #define EXT3_SNAP_COUNT "@snapcount"
63 #define EXT3_SNAP_ROOT_INO "@snap_rootino"
64
65 #define SB_FEATURE_COMPAT(sb)  (EXT3_SB(sb)->s_es->s_feature_compat)
66                                                                                                                                                                                                      
67 #define SNAP_HAS_COMPAT_FEATURE(sb,mask)        \
68         (SB_FEATURE_COMPAT(sb) & cpu_to_le32(mask))
69
70 #define EXT3_FEATURE_COMPAT_SNAPFS             0x0010
71 #define EXT3_FEATURE_COMPAT_BLOCKCOW           0x0020
72 /*snaptable info for EXT3*/
73 #define EXT3_SNAPTABLE_EA       "@snaptable"
74                                                                                                                                                                                                      
75 /* NOTE: these macros are close dependant on the structure of snap ea */
76 #define SNAP_CNT_FROM_SIZE(size)       ((((size)-sizeof(ino_t)*2)/2)/sizeof(ino_t))
77 #define SNAP_EA_SIZE_FROM_INDEX(index) (sizeof(ino_t)*2 + 2*sizeof(ino_t)*((index)+1))
78                                                                                                                                                                                                      
79 #define SNAP_EA_INO_BLOCK_SIZE(size)   (((size)-sizeof(ino_t)*2)/2)
80 #define SNAP_EA_PARENT_OFFSET(size)    (sizeof(ino_t)*2 + SNAP_EA_INO_BLOCK_SIZE((size)))
81
82 #define EXT3_EA_TRANS_BLOCKS            EXT3_DATA_TRANS_BLOCKS
83 #define EXT3_SETMETA_TRANS_BLOCKS       EXT3_DATA_TRANS_BLOCKS
84 #define EXT3_NEWINODE_TRANS_BLOCKS      10
85
86 #define SNAP_COPYBLOCK_TRANS_BLOCKS    (EXT3_DATA_TRANS_BLOCKS)
87 #define SNAP_INSERTLIST_TRANS_BLOCKS    (2 * EXT3_EA_TRANS_BLOCKS + 1)
88 #define SNAP_DELETELIST_TRANS_BLOCKS    (2 * EXT3_EA_TRANS_BLOCKS + 2)
89 #define SNAP_MIGRATEDATA_TRANS_BLOCKS   2
90 #define SNAP_SETIND_TRANS_BLOCKS        (SNAP_INSERTLIST_TRANS_BLOCKS + 1)
91 #define SNAP_ADDORPHAN_TRANS_BLOCKS     2
92 #define SNAP_REMOVEORPHAN_TRANS_BLOCKS  1
93 #define SNAP_RESTOREORPHAN_TRANS_BLOCKS (EXT3_EA_TRANS_BLOCKS + \
94                                          SNAP_DELETELIST_TRANS_BLOCKS + \
95                                          EXT3_NEWINODE_TRANS_BLOCKS + \
96                                          2 * SNAP_MIGRATEDATA_TRANS_BLOCKS)
97 #define SNAP_BIGCOPY_TRANS_BLOCKS       (2 * EXT3_DATA_TRANS_BLOCKS)
98 #define SNAP_CREATEIND_TRANS_BLOCKS     (EXT3_NEWINODE_TRANS_BLOCKS + \
99                                          SNAP_MIGRATEDATA_TRANS_BLOCKS + \
100                                          SNAP_SETIND_TRANS_BLOCKS + \
101                                          SNAP_BIGCOPY_TRANS_BLOCKS + 3)
102 #define SNAP_MIGRATEBLK_TRANS_BLOCKS    2
103 #define SNAP_DESTROY_TRANS_BLOCKS       (SNAP_DELETELIST_TRANS_BLOCKS + \
104                                          EXT3_EA_TRANS_BLOCKS + 2)
105 #define SNAP_RESTORE_TRANS_BLOCKS       (EXT3_NEWINODE_TRANS_BLOCKS + \
106                                          2 * SNAP_MIGRATEDATA_TRANS_BLOCKS + 1)
107
108 #define EXT3_JOURNAL_START(sb, handle, blocks, rc)              \
109 do {                                                            \
110         journal_t *journal;                                     \
111         journal = EXT3_SB(sb)->s_journal;                       \
112         lock_kernel();                                          \
113         handle = journal_start(journal, blocks);                \
114         unlock_kernel();                                        \
115         if(IS_ERR(handle)) {                                    \
116                 CERROR("can't start transaction\n");            \
117                 rc = PTR_ERR(handle);                           \
118         } else                                                  \
119                 rc = 0;                                         \
120 } while(0)
121
122
123 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
124 static inline void double_lock_inode(struct inode *i1, struct inode *i2)
125 {
126         if (i1 == i2)
127                 down(&i1->i_sem);
128         else
129                 double_down(&i1->i_sem, &i2->i_sem);
130 }
131 static inline void double_unlock_inode(struct inode *i1, struct inode *i2)
132 {
133         if (i1 == i2)
134                 up(&i1->i_sem);
135         else 
136                 double_up(&i1->i_sem, &i2->i_sem);
137 }
138 #else
139 static inline void double_lock_inode(struct inode *i1, struct inode *i2)
140 {
141        struct semaphore *s1 = &i1->i_sem;
142        struct semaphore *s2 = &i2->i_sem;
143
144        if (s1 != s2) {
145                if ((unsigned long) s1 < (unsigned long) s2) {
146                        struct semaphore *tmp = s2;
147                        s2 = s1; s1 = tmp;
148                }
149                down(s1);
150        }
151        down(s2);
152 }
153
154 static inline void double_unlock_inode(struct inode *i1, struct inode *i2)
155 {
156        struct semaphore *s1 = &i1->i_sem;
157        struct semaphore *s2 = &i2->i_sem;
158
159        up(s1);
160        if (s1 != s2)
161                up(s2);
162 }
163
164 #endif
165
166 /* helper functions to manipulate field 'parent' in snap_ea */
167 static inline int
168 set_parent_ino(struct snap_ea *pea, int size, int index, ino_t val)
169 {
170        char * p = (char*) pea;
171        int offset;
172                                                                                                                                                                                                      
173        offset = sizeof(ino_t)*2 + (size - sizeof(ino_t)*2)/2;
174        offset += sizeof(ino_t) * index;
175        *(ino_t*)(p+offset) = val;
176                                                                                                                                                                                                      
177        return 0;
178 }
179 /**
180  * fsfilt_ext3_get_indirect - get a specific indirect inode from a primary inode
181  * @primary: primary (direct) inode
182  * @table: table of @slot + 1 indices in reverse chronological order
183  * @slot: starting slot number to check for indirect inode number
184  *
185  * We locate an indirect inode from a primary inode using the redirection
186  * table stored in the primary inode.  Because the desired inode may actually
187  * be in a "newer" slot number than the supplied slot, we are given a table
188  * of indices in chronological order to search for the correct inode number.
189  * We walk table from @slot to 0 looking for a non-zero inode to load.
190  *
191  * To only load a specific index (and fail if it does not exist), you can
192  * pass @table = NULL, and the index number in @slot.  If @slot == 0, the
193  * primary inode data is returned.
194  *
195  * We return a pointer to an inode, or an error.  If the indirect inode for
196  * the given index does not exist, NULL is returned.
197  */
198 static struct inode *fsfilt_ext3_get_indirect(struct inode *primary, int *table,
199                                               int slot)
200 {
201         char buf[EXT3_MAX_SNAP_DATA];
202         struct snap_ea *snaps;
203         ino_t ino;
204         struct inode *inode = NULL;
205         int rc = 0;
206
207         ENTRY;
208
209         if (slot < 0 || slot > EXT3_MAX_SNAPS || !primary)
210                 RETURN(NULL);
211         
212         CDEBUG(D_INODE, "ino %lu, table %p, slot %d\n", primary->i_ino, table,
213                slot);
214         rc = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR, buf, 
215                              EXT3_MAX_SNAP_DATA); 
216         if (rc == -ENODATA) {
217                 slot = -1;
218         } else if (rc < 0) {
219                 CERROR("attribute read rc=%d \n", rc);
220                 RETURN(NULL);
221         }
222         snaps = (struct snap_ea *)buf;
223
224         /* if table is NULL and there is a slot */
225         if( !table && slot >= 0) {
226                 ino = le32_to_cpu(snaps->ino[slot]);
227                 if(ino) 
228                         inode = iget(primary->i_sb, ino);
229                 GOTO(err_free, rc);
230         }
231         /* if table is not NULL */
232         while (!inode && slot >= 0 ) {
233                 ino = le32_to_cpu(snaps->ino[slot]);
234
235                 CDEBUG(D_INODE, "snap inode at slot %d is %lu\n", slot, ino);
236                 if (!ino) {
237                         --slot;
238                         continue;
239                 }
240                 inode = iget(primary->i_sb, ino);
241                 GOTO(err_free, rc);
242         }
243         if(slot == -1 && table) {
244                 CDEBUG(D_INODE, "redirector not found, using primary\n");
245                 inode = iget(primary->i_sb, primary->i_ino);
246         }
247 err_free:
248         RETURN(inode);
249 }
250
251 /* Save the indirect inode in the snapshot table of the primary inode. */
252 static int fsfilt_ext3_set_indirect(struct inode *pri, int index, ino_t ind_ino, 
253                                     ino_t parent_ino )
254 {
255         char buf[EXT3_MAX_SNAP_DATA];
256         struct snap_ea *snaps;
257         int rc = 0, inlist = 1;
258         int ea_size;
259         handle_t *handle = NULL;
260         ENTRY;
261         
262         CDEBUG(D_INODE, "(ino %lu, parent %lu): saving ind %lu to index %d\n", 
263                pri->i_ino, parent_ino, ind_ino, index);
264
265         if (index < 0 || index > MAX_SNAPS || !pri)
266                 RETURN(-EINVAL);
267         /* need lock the list before get_attr() to avoid race */
268         /* read ea at first */
269         rc = ext3_xattr_get(pri, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
270                                           buf, EXT3_MAX_SNAP_DATA);
271         if (rc == -ENODATA || rc == -ENODATA) {
272                 CDEBUG(D_INODE, "no extended attributes - zeroing\n");
273                 memset(buf, 0, EXT3_MAX_SNAP_DATA);
274                 /* XXX
275                  * To judge a inode in list, we only see if it has snap ea.
276                  * So take care of snap ea of primary inodes very carefully.
277                  * Is it right in snapfs EXT3, check it later?
278                  */
279                 inlist = 0;
280                 rc = 0; 
281         } else if (rc < 0 || rc > EXT3_MAX_SNAP_DATA) {
282                 GOTO(out_unlock, rc);
283         }
284         EXT3_JOURNAL_START(pri->i_sb, handle, SNAP_SETIND_TRANS_BLOCKS, rc); 
285         if(rc) 
286                 GOTO(out_unlock, rc = PTR_ERR(handle));
287         
288         snaps = (struct snap_ea *)buf;
289         snaps->ino[index] = cpu_to_le32 (ind_ino);
290         ea_size = EXT3_MAX_SNAP_DATA;
291
292         set_parent_ino(snaps, ea_size, index, cpu_to_le32(parent_ino));
293
294         rc = ext3_xattr_set_handle(handle, pri, EXT3_SNAP_INDEX,EXT3_SNAP_ATTR,
295                                     buf, EXT3_MAX_SNAP_DATA, 0);
296         ext3_mark_inode_dirty(handle, pri);
297         journal_stop(handle);
298 out_unlock:
299         RETURN(rc);
300 }
301
302 static int ext3_set_generation(struct inode *inode, unsigned long gen)
303 {
304         handle_t *handle;
305         int err = 0;
306         ENTRY;
307        
308         EXT3_JOURNAL_START(inode->i_sb, handle, EXT3_XATTR_TRANS_BLOCKS, err);
309         if(err)
310                 RETURN(err);
311         
312         err = ext3_xattr_set_handle(handle, inode, EXT3_SNAP_INDEX, 
313                                     EXT3_SNAP_GENERATION, (char*)&gen, 
314                                     sizeof(int), 0);
315         if (err < 0) {
316                 CERROR("ino %lu, set_ext_attr err %d\n", inode->i_ino, err);
317                 RETURN(err);
318         }
319         
320         journal_stop(handle);
321         RETURN(0);
322 }
323
324 /*
325  * Copy inode metadata from one inode to another, excluding blocks and size.
326  * FIXME do we copy EA data - ACLs and such (excluding snapshot data)?
327  */
328 static void ext3_copy_meta(handle_t *handle, struct inode *dst, struct inode *src)
329 {
330         int size;
331         
332         dst->i_mode = src->i_mode;
333         dst->i_nlink = src->i_nlink;
334         dst->i_uid = src->i_uid;
335         dst->i_gid = src->i_gid;
336         dst->i_atime = src->i_atime;
337         dst->i_mtime = src->i_mtime;
338         dst->i_ctime = src->i_ctime;
339 //      dst->i_version = src->i_version;
340         
341 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
342         dst->i_attr_flags = src->i_attr_flags;
343 #endif
344         dst->i_generation = src->i_generation;
345         EXT3_I(dst)->i_dtime = EXT3_I(src)->i_dtime;
346         EXT3_I(dst)->i_flags = EXT3_I(src)->i_flags | EXT3_COW_FL;
347 #ifdef EXT3_FRAGMENTS
348         EXT3_I(dst)->i_faddr = EXT3_I(src)->i_faddr;
349         EXT3_I(dst)->i_frag_no = EXT3_I(src)->i_frag_no;
350         EXT3_I(dst)->i_frag_size = EXT3_I(src)->i_frag_size;
351 #endif
352         if ((size = ext3_xattr_list(src, NULL, 0)) > 0) {
353                 char names[size];
354                 char *name;
355                 int namelen;
356
357                 if (ext3_xattr_list(src, names, 0) < 0)
358                         return;
359                 /*
360                  * the list of attribute names are stored as NUL terminated
361                  * strings, with a double NUL string at the end.
362                  */
363                 name = names;
364                 while ((namelen = strlen(name))) {
365                         int attrlen;
366                         char *buf;
367                         
368                         /* don't copy snap data */
369                         if (!strcmp(name, EXT3_SNAP_ATTR)) {
370                                 CDEBUG(D_INFO, "skipping %s item\n", name);
371                                 continue;
372                         }
373                         CDEBUG(D_INODE, "copying %s item\n", name);
374                         attrlen = ext3_xattr_get(src, EXT3_SNAP_INDEX, 
375                                                  EXT3_SNAP_ATTR, NULL, 0);
376                         if (attrlen < 0)
377                                 continue;
378                         OBD_ALLOC(buf, attrlen);
379                                 break;
380                         if (!buf) {
381                                 CERROR("No MEM\n");
382                                 break;
383                         }
384                         if (ext3_xattr_get(src, EXT3_SNAP_INDEX,
385                                            EXT3_SNAP_ATTR, buf, attrlen) < 0)
386                                 continue;       
387                         if (ext3_xattr_set_handle(handle, dst, EXT3_SNAP_INDEX,
388                                                   EXT3_SNAP_ATTR, buf, attrlen, 
389                                                   0) < 0)
390                                 break;
391                         OBD_FREE(buf, attrlen);
392                         name += namelen + 1; /* skip name and trailing NUL */
393                 }
394         }
395 }
396 static int ext3_copy_reg_block(struct inode *dst, struct inode *src, int blk)
397 {
398         struct page     *src_page, *dst_page; 
399         loff_t          offset = blk << src->i_sb->s_blocksize_bits;
400         unsigned long   index = offset >> PAGE_CACHE_SHIFT;
401         int             rc = 0;
402         ENTRY;
403         
404         /*read the src page*/
405         src_page = grab_cache_page(src->i_mapping, index);
406         if (src_page == NULL)
407                 RETURN(-ENOMEM);
408
409         if (!PageUptodate(src_page)) {
410                 rc = src->i_mapping->a_ops->readpage(NULL, src_page);
411                 if (rc < 0) {
412                         page_cache_release(src_page);
413                         RETURN(rc);
414                 }
415         }
416         kmap(src_page);
417         /*get dst page*/
418         
419         dst_page = grab_cache_page(dst->i_mapping, index);
420         if (dst_page == NULL)
421                 GOTO(src_page_unlock, rc = -ENOMEM);
422         kmap(dst_page);
423
424         rc = dst->i_mapping->a_ops->prepare_write(NULL, dst_page, 0, 
425                                                   PAGE_CACHE_SIZE - 1);
426         if (rc)
427                 GOTO(dst_page_unlock, rc = -EFAULT);
428         memcpy(page_address(dst_page), page_address(src_page), PAGE_CACHE_SIZE);
429         
430         flush_dcache_page(dst_page);
431         
432         rc = dst->i_mapping->a_ops->commit_write(NULL, dst_page, 0, 
433                                                  PAGE_CACHE_SIZE - 1);
434         if (!rc)
435                 rc = 1;
436 dst_page_unlock:
437         kunmap(dst_page);
438         unlock_page(dst_page);
439         page_cache_release(dst_page);
440 src_page_unlock:
441         kunmap(src_page);
442         page_cache_release(src_page);
443         RETURN(rc);
444 }
445 static int ext3_copy_dir_block(struct inode *dst, struct inode *src, int blk)
446 {
447         struct buffer_head *bh_dst = NULL, *bh_src = NULL;
448         int rc = 0;
449         handle_t *handle = NULL;
450         ENTRY;   
451
452         EXT3_JOURNAL_START(dst->i_sb, handle, SNAP_COPYBLOCK_TRANS_BLOCKS, rc);
453         if(rc)
454                 RETURN(rc);
455                                                                                                                                                                                                      
456         bh_src = ext3_bread(handle, src, blk, 0, &rc);
457         if (!bh_src) {
458                 CERROR("rcor for src blk %d, rcor %d\n", blk, rc);
459                 GOTO(exit_relese, rc);
460         }
461         bh_dst = ext3_getblk(handle, dst, blk, 1, &rc);
462         if (!bh_dst) {
463                 CERROR("rcor for dst blk %d, rcor %d\n", blk, rc);
464                 GOTO(exit_relese, rc);
465         }
466         CDEBUG(D_INODE, "copy block %lu to %lu (%ld bytes)\n",
467                bh_src->b_blocknr, bh_dst->b_blocknr, src->i_sb->s_blocksize);
468         
469         ext3_journal_get_write_access(handle, bh_dst);
470         memcpy(bh_dst->b_data, bh_src->b_data, src->i_sb->s_blocksize);
471         ext3_journal_dirty_metadata(handle, bh_dst);
472         rc = 1;
473
474 exit_relese:
475         if (bh_src) brelse(bh_src);
476         if (bh_dst) brelse(bh_dst);
477         if (handle)
478                 journal_stop(handle);
479         RETURN(rc);
480 }
481 /* fsfilt_ext3_copy_block - copy one data block from inode @src to @dst.
482    No lock here.  User should do the lock.
483    User should check the return value to see if the result is correct.
484    Return value:
485    1:    The block has been copied successfully
486    0:    No block is copied, usually this is because src has no such blk
487   -1:    Error
488 */
489                                                                                                                                                                                                      
490 static int fsfilt_ext3_copy_block (struct inode *dst, struct inode *src, int blk)
491 {
492         int rc = 0;
493         ENTRY;                                                                                                                                                                                             
494         CDEBUG(D_INODE, "copy blk %d from %lu to %lu \n", blk, src->i_ino, 
495                dst->i_ino);
496         /*
497          * ext3_getblk() require handle!=NULL
498          */
499         if (S_ISREG(src->i_mode)) { 
500                 rc = ext3_copy_reg_block(dst, src, blk);
501         } else {
502                 rc = ext3_copy_dir_block(dst, src, blk);
503         }
504
505         RETURN(rc);
506 }
507                                                                                                                                                                                              
508 static inline int ext3_has_ea(struct inode *inode)
509 {
510        return (EXT3_I(inode)->i_file_acl != 0);
511 }
512 /* XXXThis function has a very bad effect to
513  * the performance of filesystem,
514  * will find another way to fix it
515  */
516 static void fs_flushinval_pages(handle_t *handle, struct inode* inode)
517 {
518         if (inode->i_blocks > 0 && inode->i_mapping) {
519 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
520                 fsync_inode_data_buffers(inode);
521 #endif
522                 truncate_inode_pages(inode->i_mapping, 0);
523         }
524 }
525 /*  ext3_migrate_data:
526  *  MOVE all the data blocks from inode src to inode dst as well as
527  *  COPY all attributes(meta data) from inode src to inode dst.
528  *  For extended attributes(EA), we COPY all the EAs but skip the Snap EA from 
529  *  src to dst. If the dst has Snap EA, then we CAN'T overwrite it. We CAN'T 
530  *  copy the src Snap EA. XXX for EA, can we change it to MOVE all the EAs
531  *  (exclude Snap EA) to dst and copy it back to src ? This is for LAN free 
532  *  backup later.
533  */
534 static int ext3_migrate_data(handle_t *handle, struct inode *dst, 
535                              struct inode *src)
536 {
537         unsigned long err = 0;
538         /* 512 byte disk blocks per inode block */
539         int bpib = src->i_sb->s_blocksize >> 9;
540         ENTRY;
541         
542         
543         if((!dst) || (!src)) 
544                 RETURN(-EINVAL);
545         
546         if (dst->i_ino == src->i_ino)
547                 RETURN(0);
548
549         fs_flushinval_pages(handle, src);
550         
551         ext3_copy_meta(handle, dst, src);
552
553         CDEBUG(D_INODE, "migrating data blocks from %lu to %lu\n", 
554                src->i_ino, dst->i_ino);
555         /* Can't check blocks in case of EAs */
556        
557         memcpy(EXT3_I(dst)->i_data, EXT3_I(src)->i_data,
558                sizeof(EXT3_I(src)->i_data));
559         memset(EXT3_I(src)->i_data, 0, sizeof(EXT3_I(src)->i_data));
560         
561         ext3_discard_prealloc(src);
562
563         dst->i_size = EXT3_I(dst)->i_disksize = EXT3_I(src)->i_disksize;
564         src->i_size = EXT3_I(src)->i_disksize = 0;
565
566         dst->i_blocks = src->i_blocks;
567         src->i_blocks = 0;
568         /*  Check EA blocks here to modify i_blocks correctly */
569         if(ext3_has_ea (src)) {
570                 src->i_blocks += bpib;
571                 if( ! ext3_has_ea (dst) )
572                         if( dst->i_blocks >= bpib )
573                                 dst->i_blocks -= bpib;
574         } else {
575                 if( ext3_has_ea (dst))
576                         dst->i_blocks += bpib;
577         }
578         
579         CDEBUG(D_INODE, "migrate data from ino %lu to ino %lu\n", src->i_ino, 
580                dst->i_ino);
581         ext3_mark_inode_dirty(handle, src);
582         ext3_mark_inode_dirty(handle, dst);
583         RETURN(err);
584 }
585
586 static handle_t * ext3_copy_data(handle_t *handle, struct inode *dst,
587                                  struct inode *src, int *has_orphan)
588 {
589         unsigned long blocks, blk, cur_blks;
590         int low_credits, save_ref;
591         int err = 0;
592         ENTRY;
593
594         blocks =(src->i_size + src->i_sb->s_blocksize-1) >>
595                  src->i_sb->s_blocksize_bits;
596         low_credits = handle->h_buffer_credits - SNAP_BIGCOPY_TRANS_BLOCKS;
597         
598         CDEBUG(D_INODE, "%lu blocks need to be copied,low credits limit %d\n", 
599                blocks, low_credits);
600
601         for (blk = 0, cur_blks= dst->i_blocks; blk < blocks; blk++) {
602                 if (!ext3_bmap(src->i_mapping, blk))
603                         continue;
604                 if(handle->h_buffer_credits <= low_credits) {
605                         int needed = (blocks - blk) * EXT3_DATA_TRANS_BLOCKS;
606                         if (needed > 4 * SNAP_COPYBLOCK_TRANS_BLOCKS)
607                                 needed = 4 * SNAP_COPYBLOCK_TRANS_BLOCKS;
608                         if (journal_extend(handle, needed)) {
609                                 CDEBUG(D_INFO, "create_indirect:fail to extend "
610                                        "journal, restart trans\n");
611                                 
612                                 if(!*has_orphan) {
613                                         CDEBUG(D_INODE, "add orphan ino %lu" 
614                                                "nlink %d to orphan list \n",
615                                                 dst->i_ino, dst->i_nlink); 
616                                         ext3_orphan_add(handle, dst);
617                                         *has_orphan = 1;
618                                 }
619                                 EXT3_I(dst)->i_disksize =
620                                         blk * dst->i_sb->s_blocksize;
621                                 dst->i_blocks = cur_blks;
622                                 dst->i_mtime = CURRENT_TIME;
623                                 ext3_mark_inode_dirty(handle, dst);
624                                 /*
625                                  * We can be sure the last handle was stoped
626                                  * ONLY if the handle's reference count is 1
627                                  */
628                                 save_ref = handle->h_ref;
629                                 handle->h_ref = 1;
630                                 if(journal_stop(handle) ){
631                                         CERROR("fail to stop journal\n");
632                                         handle = NULL;
633                                         break;
634                                 }
635                                 EXT3_JOURNAL_START(dst->i_sb, handle, 
636                                                    low_credits + needed, err);
637                                 if(err) break;
638                                 handle->h_ref = save_ref;
639                         }
640                 }
641                 if (fsfilt_ext3_copy_block( dst, src, blk) < 0 )
642                         break;
643                 cur_blks += dst->i_sb->s_blocksize / 512;
644         }
645         
646         dst->i_size = EXT3_I(dst)->i_disksize = src->i_size;
647         RETURN(handle);
648 }
649 /*Here delete the data of that pri inode 
650  *FIXME later, should throw the blocks of 
651  *primary inode directly
652  */
653 static int ext3_throw_inode_data(handle_t *handle, struct inode *inode) 
654 {       
655         struct inode *tmp = NULL;
656         ENTRY;
657         tmp = ext3_new_inode(handle, inode, (int)inode->i_mode, 0);
658         if(tmp) { 
659                 CERROR("ext3_new_inode error\n");
660                 RETURN(-EIO);
661         }                
662         double_lock_inode(inode, tmp);
663         ext3_migrate_data(handle, tmp, inode);
664         double_unlock_inode(inode, tmp);
665         tmp->i_nlink = 0;
666         iput(tmp);      
667         RETURN(0);
668 }
669 /**
670  * fsfilt_ext3_create_indirect - copy data, attributes from primary to new indir inode
671  * @pri: primary (source) inode
672  * @index: index in snapshot table where indirect inode should be stored
673  * @delete: flag that the primary inode is being deleted
674  *
675  * We copy all of the data blocks from the @*src inode to the @*dst inode, as
676  * well as copying the attributes from @*src to @*dst.  If @delete == 1, then
677  * the primary inode will only be a redirector and will appear deleted.
678  *
679  * FIXME do we move EAs, only non-snap EAs, what?
680  * FIXME we could do readpage/writepage, but we would have to handle block
681  *       allocation then, and it ruins sparse files for 1k/2k filesystems,
682  *       at the expense of doing a memcpy.
683  */
684 static struct inode* fsfilt_ext3_create_indirect(struct inode *pri, int index, 
685                                                  unsigned int gen, 
686                                                  struct inode* parent,
687                                                  int del)
688 {
689         struct inode *ind = NULL;
690         handle_t *handle = NULL;
691         int err = 0;
692         int has_orphan = 0;
693         ENTRY;
694         
695         if( pri == EXT3_SB(pri->i_sb)->s_journal_inode ){
696                 CERROR("TRY TO COW JOUNRAL\n");
697                 RETURN(ERR_PTR(-EINVAL));
698         }
699         CDEBUG(D_INODE, "creating indirect inode for %lu at index %d, %s pri\n",
700                pri->i_ino, index, del ? "deleting" : "preserve");
701
702         ind = fsfilt_ext3_get_indirect(pri, NULL, index);
703         
704         EXT3_JOURNAL_START(pri->i_sb, handle, SNAP_CREATEIND_TRANS_BLOCKS,
705                            err);
706         if(err) 
707                 RETURN(ERR_PTR(err));
708         /* XXX ? We should pass an err argument to get_indirect and precisely
709          * detect the errors, for some errors, we should exit right away.
710          */
711
712         /* if the option is SNAP_DEL_PRI_WITH_IND and there is an indirect, 
713          * we just free the primary data blocks and mark this inode delete
714          */
715         if((del) && ind && !IS_ERR(ind)) {
716                 /* for directory, we don't free the data blocks, 
717                  * or ext3_rmdir will report errors "bad dir, no data blocks" 
718                  */
719                 CDEBUG(D_INODE, "del==SNAP_DEL_PRI_WITH_IND && ind\n");
720                 if(!S_ISDIR(pri->i_mode)) {     
721                         err = ext3_throw_inode_data(handle, pri);
722                         if (err)
723                                 GOTO(exit, err);
724                         pri->i_nlink = 1;
725                 }
726                 EXT3_I(pri)->i_dtime = LTIME_S(CURRENT_TIME);
727                 ext3_mark_inode_dirty(handle, pri);
728                 GOTO(exit, err=0);
729         }
730
731         if (ind && !IS_ERR(ind)) {
732                 CDEBUG(D_INODE, "existing indirect ino %lu for %lu: index %d\n",
733                        ind->i_ino, pri->i_ino, index);
734         
735                 GOTO(exit, err=0);
736         }
737         
738         /* XXX: check this, ext3_new_inode, the first arg should be "dir" */ 
739         ind = ext3_new_inode(handle, pri, (int)pri->i_mode, 0);
740
741         if (IS_ERR(ind))
742                 GOTO(exit, err);
743         CDEBUG(D_INODE, "got new inode %lu\n", ind->i_ino);
744         ind->i_rdev = pri->i_rdev;
745         ind->i_op = pri->i_op;
746       
747         /*init ind ops*/ 
748         memcpy(ind->i_op, pri->i_op, sizeof(*pri->i_op));
749         memcpy(ind->i_fop, pri->i_fop, sizeof(*pri->i_fop));
750         memcpy(ind->i_mapping->a_ops, pri->i_mapping->a_ops, 
751                sizeof(*pri->i_mapping->a_ops));
752          
753         ext3_set_generation(ind, (unsigned long)gen);
754         /* If we are deleting the primary inode, we want to ensure that it is
755          * written to disk with a non-zero link count, otherwise the next iget
756          * and iput will mark the inode as free (which we don't want, we want
757          * it to stay a redirector).  We fix this in ext3_destroy_indirect()
758          * when the last indirect inode is removed.
759          *
760          * We then do what ext3_delete_inode() does so that the metadata will
761          * appear the same as a deleted inode, and we can detect it later.
762          */
763         if (del) {
764                 CDEBUG(D_INODE, "deleting primary inode\n");
765                 
766                 down(&ind->i_sem);
767                 err = ext3_migrate_data(handle, ind, pri);
768                 if (err)
769                         GOTO(exit_unlock, err);
770
771                 err = fsfilt_ext3_set_indirect(pri, index, ind->i_ino, parent->i_ino);
772                 if (err)
773                         GOTO(exit_unlock, err);
774
775                 /* XXX for directory, we copy the block back 
776                  * or ext3_rmdir will report errors "bad dir, no data blocks" 
777                  */
778                 if( S_ISDIR(pri->i_mode)) {
779                         handle = ext3_copy_data(handle, pri, ind, &has_orphan);
780                         if(!handle) 
781                                 GOTO(exit_unlock, err= -EINVAL);
782                 }
783
784                 EXT3_I(pri)->i_flags |= EXT3_DEL_FL;
785                 EXT3_I(ind)->i_flags |= EXT3_COW_FL;
786                 if(S_ISREG(pri->i_mode)) pri->i_nlink = 1;
787                 EXT3_I(pri)->i_dtime = LTIME_S(CURRENT_TIME);
788                 //EXT3_I(pri)->i_generation++;
789                 ext3_mark_inode_dirty(handle, pri);
790                 ext3_mark_inode_dirty(handle, ind);
791                 up(&ind->i_sem);
792         } else {
793                 down(&ind->i_sem);
794                 err = ext3_migrate_data(handle, ind, pri);
795                 if (err)
796                         goto exit_unlock;
797
798                 /* for regular files we do blocklevel COW's maybe */
799                 if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW)
800                     && S_ISREG(pri->i_mode)) {
801
802                         CDEBUG(D_INODE, "ino %lu, do block cow\n", pri->i_ino);
803                         /* because after migrate_data , pri->i_size is 0 */
804                         pri->i_size = ind->i_size;
805                 }
806                 else {
807                         int bpib = pri->i_sb->s_blocksize >> 9;
808                         CDEBUG(D_INODE, "ino %lu, do file cow\n", pri->i_ino);
809
810                         /* XXX: can we do this better? 
811                          * If it's a fast symlink, we should copy i_data back!
812                          * The criteria to determine a fast symlink is:
813                          * 1) it's a link and its i_blocks is 0
814                          * 2) it's a link and its i_blocks is bpib ( the case 
815                          *    it has been cowed and has ea )
816                          */
817                         if( S_ISLNK(ind->i_mode) && ((ind->i_blocks == 0) || 
818                             (ext3_has_ea(ind) && ind->i_blocks == bpib))) {
819                                 CDEBUG(D_INODE, "ino %lu is fast symlink\n", pri->i_ino);
820                                 memcpy(EXT3_I(pri)->i_data, EXT3_I(ind)->i_data,
821                                        sizeof(EXT3_I(ind)->i_data));
822                                 pri->i_size = ind->i_size;
823                         }
824                         else {
825                                 handle = ext3_copy_data(handle, pri, ind, &has_orphan);
826                                 if (!handle)
827                                         GOTO(exit_unlock, err);
828                         }
829                 }
830                 /* set cow flag for ind */
831                 EXT3_I(ind)->i_flags |= EXT3_COW_FL;
832                 EXT3_I(pri)->i_flags &= ~EXT3_COW_FL;
833
834                 ext3_mark_inode_dirty(handle, pri);
835                 ext3_mark_inode_dirty(handle, ind);
836
837                 err = fsfilt_ext3_set_indirect(pri, index, ind->i_ino, parent->i_ino);
838                 if (err)
839                         GOTO(exit_unlock, err);
840                 up(&ind->i_sem);
841         }
842
843         if (!EXT3_HAS_COMPAT_FEATURE(pri->i_sb,
844                                      EXT3_FEATURE_COMPAT_SNAPFS)) {
845                 lock_super(pri->i_sb);
846                 ext3_journal_get_write_access(handle, EXT3_SB(pri->i_sb)->s_sbh);
847                 EXT3_SB(pri->i_sb)->s_es->s_feature_compat |=
848                         cpu_to_le32(EXT3_FEATURE_COMPAT_SNAPFS);
849                 ext3_journal_dirty_metadata(handle, EXT3_SB(pri->i_sb)->s_sbh);
850                 pri->i_sb->s_dirt = 1;
851                 unlock_super(pri->i_sb);
852         }
853         if (has_orphan) {
854                 CDEBUG(D_INODE, "del %lu nlink %d from orphan list\n", 
855                        ind->i_ino, ind->i_nlink);
856                 ext3_orphan_del(handle, ind);
857         }
858         journal_stop(handle);
859
860         RETURN(ind);
861
862 exit_unlock:
863         up(&ind->i_sem);
864         ind->i_nlink = 0;
865 exit:
866         if (has_orphan) {
867                 CDEBUG(D_INODE, "del %lu nlink %d from orphan list\n", 
868                        ind->i_ino, ind->i_nlink);
869                 ext3_orphan_del(handle, ind);
870         }
871         iput(ind);
872         journal_stop(handle);
873         
874         RETURN(ERR_PTR(err));
875 }
876
877 static int fsfilt_ext3_snap_feature (struct super_block *sb, int feature, int op) {
878                                                                                                                                                                                                      
879         int rc = -EINVAL;
880         handle_t *handle;
881         ENTRY;
882         
883         switch (op) {
884                 case SNAP_SET_FEATURE:
885                 case SNAP_CLEAR_FEATURE:
886                         EXT3_JOURNAL_START(sb, handle, 1, rc);
887                         if(rc)
888                                 RETURN(rc);
889                         lock_super(sb);
890                         ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
891                         if (op == SNAP_SET_FEATURE) 
892                                 SB_FEATURE_COMPAT(sb) |= cpu_to_le32(feature);
893                         else 
894                                 SB_FEATURE_COMPAT(sb) &= ~cpu_to_le32(feature);
895                         sb->s_dirt = 1;
896                         ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
897                         unlock_super(sb);
898                         journal_stop(handle);
899                         break;
900                 case SNAP_HAS_FEATURE:
901                         /*FIXME should lock super or not*/
902                         rc = SNAP_HAS_COMPAT_FEATURE(sb, feature);
903                         break;
904                 default:
905                         break;
906         }
907         RETURN(rc);
908 }
909 /*
910  * is_redirector - determines if a primary inode is a redirector
911  * @inode: primary inode to test
912  *
913  * Returns 1 if the inode is a redirector, 0 otherwise.
914  */
915 static int fsfilt_ext3_is_redirector(struct inode *inode)
916 {
917         int is_redirector = 0;
918         int rc;
919         ENTRY;
920                                                                                                                                                                                                      
921         rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX ,EXT3_SNAP_ATTR,
922                                           NULL, 0);
923         if (rc > 0 && rc <= MAX_SNAP_DATA)
924                 is_redirector = 1;
925         CDEBUG(D_INODE, "inode %lu %s redirector\n", inode->i_ino,
926                is_redirector ? "is" : "isn't");
927         RETURN(is_redirector);
928 }
929 /*if it's indirect inode or not */
930 static int fsfilt_ext3_is_indirect(struct inode *inode)
931 {
932         if (EXT3_I(inode)->i_flags |= EXT3_COW_FL)
933                 return 1;
934         else
935                 return 0;
936 }
937
938 /* get the indirect ino at index of the primary inode
939  * return value:        postive:        indirect ino number
940  *                      negative or 0:  error
941  */
942 static ino_t fsfilt_ext3_get_indirect_ino(struct super_block *sb, 
943                                           ino_t primary_ino, int index)
944 {
945         char buf[EXT3_MAX_SNAP_DATA];
946         struct inode *primary = NULL;
947         struct snap_ea *snaps;
948         ino_t ino = 0;
949         int err;
950         ENTRY;                                                                                                                                                                                             
951         if (index < 0 || index > EXT3_MAX_SNAPS)
952                 RETURN(0);
953         primary = iget(sb, primary_ino);   
954        
955         if (!primary) {
956                 err = -EIO;
957                 CERROR("attribute read error=%d", err);
958                 GOTO (err_free, ino = err); 
959         }                                                                                                                                                                                              
960         err = ext3_xattr_get(primary, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
961                              buf, EXT3_MAX_SNAP_DATA);
962         if (err == -ENODATA) {
963                 GOTO(err_free, ino = -ENODATA);
964         } else if (err < 0) {
965                 CERROR(" attribute read error err=%d\n", err);
966                 GOTO(err_free, ino = err);
967         }
968         snaps = (struct snap_ea *)buf;
969         ino = le32_to_cpu (snaps->ino[index]);
970         CDEBUG(D_INODE, "snap ino for %ld at index %d is %lu\n",
971                primary->i_ino, index, ino);
972 err_free:
973         if (primary)
974                 iput(primary); 
975         RETURN(ino);
976 }
977                                                                                                                                                                                                      
978
979 /* The following functions are used by destroy_indirect */
980 #define inode_bmap(inode, nr) (EXT3_I(inode)->i_data[(nr)])
981 #define inode_setbmap(inode, nr, physical) (EXT3_I(inode)->i_data[(nr)]=(physical))
982 static inline int block_bmap(struct buffer_head * bh, int nr)
983 {
984         int tmp;
985                                                                                                                                                                                                      
986         if (!bh)
987                 return 0;
988         tmp = le32_to_cpu(((u32 *) bh->b_data)[nr]);
989         brelse (bh);
990         return tmp;
991 }
992                                                                                                                                                                                                      
993 static inline int block_setbmap(handle_t *handle, struct buffer_head * bh, 
994                                  int nr, int physical)
995 {
996                                                                                                                                                                                                      
997         if (!bh)
998                 return 0;
999         ext3_journal_get_write_access(handle, bh);
1000         ((u32 *) bh->b_data)[nr] = cpu_to_le32(physical);
1001         ext3_journal_dirty_metadata(handle, bh);
1002         brelse (bh);
1003         return 1;
1004 }
1005
1006 static int ext3_migrate_block(handle_t *handle, struct inode * dst, 
1007                               struct inode *src, int block)
1008 {
1009         int i1_d=0, i1_s=0, i2_d=0, i2_s=0, i3_d=0, i3_s=0;
1010         int addr_per_block = EXT3_ADDR_PER_BLOCK(src->i_sb);
1011         int addr_per_block_bits = EXT3_ADDR_PER_BLOCK_BITS(src->i_sb);
1012         int physical = 0;
1013         ENTRY;        
1014
1015         if (block < 0) {
1016                 CWARN("ext3_migrate_block block < 0 %p \n", src->i_sb);
1017                 RETURN(0);
1018         }
1019         if (block >= EXT3_NDIR_BLOCKS + addr_per_block +
1020                 (1 << (addr_per_block_bits * 2)) +
1021                 ((1 << (addr_per_block_bits * 2)) << addr_per_block_bits)) {
1022                 CWARN("ext3_migrate_block block > big %p \n", src->i_sb);
1023                 RETURN(0);
1024         }
1025         /* EXT3_NDIR_BLOCK */
1026         if (block < EXT3_NDIR_BLOCKS) {
1027                 if(inode_bmap(dst, block))      
1028                         RETURN(0);
1029                 else {
1030                         if( (physical = inode_bmap(src, block)) ) {
1031                                 inode_setbmap (dst, block, physical);
1032                                 inode_setbmap (src, block, 0);
1033                                 RETURN(1);
1034                         }
1035                         else 
1036                                 RETURN(0);
1037                 }
1038         }
1039         /* EXT3_IND_BLOCK */
1040         block -= EXT3_NDIR_BLOCKS;
1041         if (block < addr_per_block) {
1042                 i1_d = inode_bmap (dst, EXT3_IND_BLOCK);
1043                 if (!i1_d) {
1044                         physical = inode_bmap(src, EXT3_IND_BLOCK);
1045                         if( physical ) {
1046                                 inode_setbmap (dst, EXT3_IND_BLOCK, physical);
1047                                 inode_setbmap (src, EXT3_IND_BLOCK, 0);
1048                                 RETURN(1);
1049                         }
1050                         else 
1051                                 RETURN(0);
1052                 }
1053                 if(block_bmap(sb_bread(dst->i_sb, i1_d), block)) 
1054                         RETURN(0);
1055
1056                 i1_s = inode_bmap (src, EXT3_IND_BLOCK);
1057                 if( !i1_s)      RETURN(0);
1058
1059                 physical = block_bmap(sb_bread(src->i_sb, i1_s), block);
1060
1061                 if( physical) {
1062                         block_setbmap(handle, sb_bread(dst->i_sb, i1_d),block,
1063                                       physical); 
1064                         block_setbmap(handle, sb_bread(src->i_sb, i1_s),block,0);
1065                         RETURN(1); 
1066                 }
1067                 else 
1068                         RETURN(0);
1069         }
1070         /* EXT3_DIND_BLOCK */
1071         block -= addr_per_block;
1072         if (block < (1 << (addr_per_block_bits * 2))) {
1073                 i1_d = inode_bmap (dst, EXT3_DIND_BLOCK);
1074                 i1_s = inode_bmap (src, EXT3_DIND_BLOCK);
1075                 if (!i1_d) {
1076                         if( (physical = inode_bmap(src, EXT3_DIND_BLOCK)) ) {
1077                                 inode_setbmap (dst, EXT3_DIND_BLOCK, physical);
1078                                 inode_setbmap (src, EXT3_DIND_BLOCK, 0);
1079                                 RETURN(1);
1080                         }
1081                         else 
1082                                 RETURN(0);
1083                 }
1084                 i2_d = block_bmap (sb_bread (dst->i_sb, i1_d),
1085                                 block >> addr_per_block_bits);
1086
1087                 if (!i2_d) {
1088                         
1089                         if(!i1_s)       RETURN(0);
1090
1091                         physical = block_bmap(sb_bread (src->i_sb, i1_s),
1092                                                block >> addr_per_block_bits);
1093                         if(physical) {
1094                                 block_setbmap(handle, sb_bread(dst->i_sb, i1_d), 
1095                                               block >> addr_per_block_bits, 
1096                                               physical);
1097                                 block_setbmap(handle, sb_bread(src->i_sb, i1_s), 
1098                                               block >> addr_per_block_bits, 0);
1099                                 RETURN(1);
1100                         }
1101                         else
1102                                 RETURN(0);
1103                 }
1104                 physical = block_bmap(sb_bread(dst->i_sb, i2_d),
1105                                       block & (addr_per_block - 1));
1106                 if(physical) 
1107                                 RETURN(0);
1108                 else {
1109                         i2_s =  block_bmap (sb_bread(src->i_sb, i1_s),
1110                                 block >> addr_per_block_bits);
1111                         if(!i2_s)       RETURN(0);
1112         
1113                         physical = block_bmap(sb_bread(src->i_sb, i2_s),
1114                                    block & (addr_per_block - 1));
1115                         if(physical) {
1116                                 block_setbmap(handle, sb_bread(dst->i_sb, i2_d),
1117                                    block & (addr_per_block - 1), physical);
1118                                 block_setbmap(handle, sb_bread(src->i_sb, i2_s),
1119                                    block & (addr_per_block - 1), 0);
1120                                 RETURN(1);
1121                         }
1122                         else 
1123                                 RETURN(0);
1124                 }
1125                 
1126         }
1127         /* EXT3_TIND_BLOCK */
1128         block -= (1 << (addr_per_block_bits * 2));
1129         i1_d = inode_bmap (dst, EXT3_TIND_BLOCK);
1130         i1_s = inode_bmap (src, EXT3_TIND_BLOCK);
1131         if (!i1_d) {
1132                 if((physical = inode_bmap(src, EXT3_TIND_BLOCK)) )
1133                         inode_setbmap (dst, EXT3_TIND_BLOCK, physical);
1134                 else 
1135                         RETURN(0);
1136         }
1137         i2_d = block_bmap(sb_bread (dst->i_sb, i1_d),
1138                            block >> (addr_per_block_bits * 2));
1139
1140         if(i1_s) i2_s = block_bmap(sb_bread(src->i_sb, i1_s),
1141                                    block >> (addr_per_block_bits * 2));
1142
1143         if (!i2_d) {
1144                 if( !i1_s)      RETURN(0);
1145                 
1146                 physical = block_bmap(sb_bread (src->i_sb, i1_s),
1147                                        block >> (addr_per_block_bits * 2));
1148                 if(physical) {
1149                         block_setbmap(handle, sb_bread (dst->i_sb, i1_d),
1150                                       block >> (addr_per_block_bits * 2), physical);
1151                         block_setbmap(handle, sb_bread (src->i_sb, i1_s),
1152                                       block >> (addr_per_block_bits * 2), 0);
1153                         RETURN(1);
1154                 }
1155                 else
1156                         RETURN(0);
1157         }
1158         i3_d = block_bmap (sb_bread (dst->i_sb, i2_d),
1159                         (block >> addr_per_block_bits) & (addr_per_block - 1));
1160         if( i2_s) i3_s = block_bmap (sb_bread (src->i_sb, i2_s),
1161                         (block >> addr_per_block_bits) & (addr_per_block - 1));
1162         
1163         if (!i3_d) {
1164                 if (!i2_s)      RETURN(0);      
1165                 physical = block_bmap (sb_bread (src->i_sb, i2_s),
1166                         (block >> addr_per_block_bits) & (addr_per_block - 1));
1167                 if( physical) {
1168                         block_setbmap (handle, sb_bread (dst->i_sb, i2_d),
1169                                        (block >> addr_per_block_bits) & 
1170                                        (addr_per_block - 1), physical);
1171                         block_setbmap (handle, sb_bread (src->i_sb, i2_s),
1172                                        (block >> addr_per_block_bits) & 
1173                                        (addr_per_block - 1),0);
1174                         RETURN(1);
1175                 }
1176                 else
1177                         RETURN(0);
1178         }
1179         physical = block_bmap (sb_bread (dst->i_sb, i3_d),
1180                            block & (addr_per_block - 1)) ;
1181         if(physical)    
1182                 RETURN(0);
1183         else {
1184                 if(!i3_s)       
1185                         RETURN(0);      
1186                 physical = block_bmap(sb_bread(src->i_sb, i3_s),
1187                                       block & (addr_per_block - 1));
1188                 if(physical) {
1189                         block_setbmap (handle, sb_bread (dst->i_sb, i3_d),
1190                                        block & (addr_per_block - 1), physical);
1191                         block_setbmap (handle, sb_bread (src->i_sb, i3_s),
1192                                        block & (addr_per_block - 1), 0); 
1193                         RETURN(1);
1194                 }
1195                 else
1196                         RETURN(0); 
1197         }
1198 }
1199
1200 /* Generate i_blocks from blocks for an inode .
1201  * We also calculate EA block here.
1202  */
1203 static unsigned long calculate_i_blocks(struct inode *inode, int blocks)
1204 {
1205         /* 512 byte disk blocks per inode block */
1206         int bpib = inode->i_sb->s_blocksize >> 9;
1207         int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
1208         unsigned long i_blocks = 0;
1209         int i=0, j=0, meta_blocks = 0;
1210         ENTRY;                                                                                                                                                                                                     
1211         if(!inode)    
1212                 RETURN(0);
1213         
1214         if( blocks < 0 ) {
1215                 /* re-calculate blocks here */
1216                 blocks = (inode->i_size + inode->i_sb->s_blocksize-1)
1217                           >> inode->i_sb->s_blocksize_bits;
1218         }
1219                                                                                                                                                                                                      
1220         /* calculate data blocks */
1221         for(i = 0; i < blocks; i++) {
1222                 if(ext3_bmap(inode->i_mapping, i))
1223                         i_blocks += bpib;
1224         }
1225         /* calculate meta blocks */
1226         blocks -= EXT3_NDIR_BLOCKS;
1227         if(blocks > 0) {
1228                 meta_blocks++;
1229                 blocks -= addr_per_block;
1230         }
1231         if( blocks > 0 ) meta_blocks++;
1232         i=0;
1233         
1234         while( (blocks > 0) && (i < addr_per_block) ) {
1235                 meta_blocks++;
1236                 blocks -= addr_per_block;
1237                 i++;
1238         }
1239         
1240         if ( blocks > 0 ) meta_blocks += 2;
1241         i=0; j=0;
1242         
1243         while( blocks > 0) {
1244                 meta_blocks++;
1245                 blocks -= addr_per_block;
1246                 i++;
1247                 if(i >= addr_per_block  ) {
1248                         i=0;
1249                         j++;
1250                 }
1251                 if( j >= addr_per_block) {
1252                         j=0;
1253                         meta_blocks++;
1254                 }
1255         }
1256         /* calculate EA blocks */
1257         if(ext3_has_ea(inode))       
1258                 meta_blocks++;
1259                                                                                                                                                                                                      
1260         i_blocks += meta_blocks * bpib;
1261         CDEBUG(D_INODE, "ino %lu, get i_blocks %lu\n", inode->i_ino, i_blocks);
1262         
1263         RETURN(i_blocks);
1264 }
1265
1266 /**
1267  * fsfilt_ext3_destroy_indirect - delete an indirect inode from the table
1268  * @pri: primary inode
1269  * @ind: indirect inode
1270  * @index: index of inode that should be deleted
1271  *
1272  * We delete the @*ind inode, and remove it from the snapshot table.  If @*ind
1273  * is NULL, we use the inode at @index.
1274  */
1275 static int fsfilt_ext3_destroy_indirect(struct inode *pri, int index, 
1276                                         struct inode *next_ind)
1277 {
1278         char buf[EXT3_MAX_SNAP_DATA];
1279         struct snap_ea *snaps;
1280         struct inode *ind;
1281         int save = 0, i=0, err = 0;
1282         handle_t *handle=NULL;
1283         ENTRY;
1284
1285         if (index < 0 || index > EXT3_MAX_SNAPS)
1286                 RETURN(0);
1287
1288         if( pri == EXT3_SB(pri->i_sb)->s_journal_inode ){
1289                 CERROR("TRY TO DESTROY JOURNAL'S IND\n");
1290                 RETURN(-EINVAL);
1291         }
1292
1293         err = ext3_xattr_get(pri, EXT3_SNAP_INDEX, EXT3_SNAP_ATTR,
1294                              buf, EXT3_MAX_SNAP_DATA);
1295         if (err < 0) {
1296                 CERROR("inode %lu attribute read error\n", pri->i_ino);
1297                 RETURN(err);
1298         }
1299         
1300         snaps = (struct snap_ea *)buf;
1301         if ( !snaps->ino[index] ) {
1302                 CERROR("for pri ino %lu, index %d, redirect ino is 0\n",
1303                        pri->i_ino, index);      
1304                 RETURN(-EINVAL);
1305         }
1306
1307         CDEBUG(D_INODE, "for pri ino %lu, reading inode %lu at index %d\n", 
1308                pri->i_ino, (ulong)le32_to_cpu(snaps->ino[index]), index);
1309
1310         ind = iget(pri->i_sb, le32_to_cpu (snaps->ino[index]));
1311
1312         if ( !ind || IS_ERR(ind) || is_bad_inode(ind)) 
1313                 RETURN(-EINVAL);
1314
1315         CDEBUG(D_INODE, "iget ind %lu, ref count = %d\n", 
1316                ind->i_ino, atomic_read(&ind->i_count));
1317         
1318         EXT3_JOURNAL_START(pri->i_sb, handle, SNAP_DESTROY_TRANS_BLOCKS, err);
1319         if (err) {
1320                 iput(ind);
1321                 RETURN(err);
1322         }
1323         /* if it's block level cow, first copy the blocks back */       
1324         if (EXT3_HAS_COMPAT_FEATURE(pri->i_sb, EXT3_FEATURE_COMPAT_BLOCKCOW) &&
1325             S_ISREG(pri->i_mode)) {
1326                 int blocks;
1327                 
1328                 if (!next_ind) {        
1329                         next_ind = pri;
1330                         down(&ind->i_sem);
1331                 } else {
1332                         double_lock_inode(next_ind, ind);
1333                 }
1334                 blocks = (next_ind->i_size + next_ind->i_sb->s_blocksize-1) 
1335                           >> next_ind->i_sb->s_blocksize_bits;
1336
1337                 CDEBUG(D_INODE, "migrate block back from ino %lu to %lu\n",
1338                        ind->i_ino, next_ind->i_ino);
1339
1340                 for(i = 0; i < blocks; i++) {
1341                         if( ext3_bmap(next_ind->i_mapping, i) ) 
1342                                 continue;
1343                         if( !ext3_bmap(ind->i_mapping, i) ) 
1344                                 continue;
1345                         ext3_migrate_block(handle, next_ind, ind, i) ;
1346                 }
1347                 /* Now re-compute the i_blocks */
1348                 /* XXX shall we take care of ind here? probably not */
1349                 next_ind->i_blocks = calculate_i_blocks( next_ind, blocks);
1350                 ext3_mark_inode_dirty(handle, next_ind);
1351
1352                 if (next_ind == pri) 
1353                         up(&ind->i_sem);
1354                 else 
1355                         double_unlock_inode(next_ind, ind);
1356         }
1357         
1358         CDEBUG(D_INODE, "delete indirect ino %lu\n", ind->i_ino);
1359         CDEBUG(D_INODE, "iput ind %lu, ref count = %d\n", ind->i_ino, 
1360                atomic_read(&ind->i_count));
1361         
1362         ind->i_nlink = 0;
1363         iput (ind);
1364
1365         snaps->ino[index] = cpu_to_le32(0);
1366         for (i = 0; i < EXT3_MAX_SNAPS; i++)
1367                 save += snaps->ino[i];
1368
1369
1370         /*Should we remove snap feature here*/
1371         /*
1372          * If we are deleting the last indirect inode, and the primary inode
1373          * has already been deleted, then mark the primary for deletion also.
1374          * Otherwise, if we are deleting the last indirect inode remove the
1375          * snaptable from the inode.    XXX
1376          */
1377         if (!save && EXT3_I(pri)->i_dtime) {
1378                 CDEBUG(D_INODE, "deleting primary %lu\n", pri->i_ino);
1379                 pri->i_nlink = 0;
1380                 /* reset err to 0 now */
1381                 err = 0;
1382         } else {
1383                 CDEBUG(D_INODE, "%s redirector table\n", 
1384                        save ? "saving" : "deleting");
1385                 err = ext3_xattr_set_handle(handle, pri, EXT3_SNAP_INDEX, 
1386                                             EXT3_SNAP_ATTR, save ? buf : NULL, 
1387                                             EXT3_MAX_SNAP_DATA, 0);
1388                 ext3_mark_inode_dirty(handle, pri);
1389         }
1390         journal_stop(handle);
1391         
1392         RETURN(err);
1393 }
1394
1395 /* restore a primary inode with the indirect inode at index */
1396 static int fsfilt_ext3_restore_indirect(struct inode *pri, int index)
1397 {
1398         struct inode *ind;
1399         int err = 0;
1400         handle_t *handle = NULL;
1401         ENTRY;
1402
1403         if (index < 0 || index > EXT3_MAX_SNAPS)
1404                 RETURN(-EINVAL);
1405
1406         if( pri == EXT3_SB(pri->i_sb)->s_journal_inode ){
1407                 CERROR("TRY TO RESTORE JOURNAL\n");
1408                 RETURN(-EINVAL);
1409         }
1410         CDEBUG(D_INODE, "pri ino %lu, index %d\n", pri->i_ino, index);
1411
1412         ind = fsfilt_ext3_get_indirect(pri, NULL, index);
1413
1414         if (!ind) 
1415                 RETURN(-EINVAL);
1416
1417         CDEBUG(D_INODE, "restore ino %lu to %lu\n", pri->i_ino, ind->i_ino);
1418
1419         EXT3_JOURNAL_START(pri->i_sb, handle, SNAP_RESTORE_TRANS_BLOCKS, err); 
1420         if(err)
1421                 RETURN(err);
1422         /* first destroy all the data blocks in primary inode */
1423         /* XXX: check this, ext3_new_inode, the first arg should be "dir" */
1424         err = ext3_throw_inode_data(handle, pri);
1425         if (err) {
1426                 CERROR("restore_indirect, new_inode err\n");
1427                 RETURN(err);
1428         }       
1429         double_lock_inode(pri, ind);
1430         ext3_migrate_data(handle, pri, ind);
1431         EXT3_I(pri)->i_flags &= ~EXT3_COW_FL;
1432         ext3_mark_inode_dirty(handle, pri);
1433         double_unlock_inode(pri, ind);
1434         iput(ind);
1435         
1436         //fsfilt_ext3_destroy_indirect(pri, index);
1437         journal_stop(handle);
1438         
1439         RETURN(err);
1440 }
1441
1442 /**
1443  * ext3_snap_iterate - iterate through all of the inodes
1444  * @sb: filesystem superblock
1445  * @repeat: pointer to function called on each valid inode
1446  * @start: inode to start iterating at
1447  * @priv: private data to the caller/repeat function
1448  *
1449  * If @start is NULL, then we do not return an inode pointer.  If @*start is
1450  * NULL, then we start at the beginning of the filesystem, and iterate over
1451  * all of the inodes in the system.  If @*start is non-NULL, then we start
1452  * iterating at this inode.
1453  *
1454  * We call the repeat function for each inode that is in use.  The repeat
1455  * function must check if this is a redirector (with is_redirector) if it
1456  * only wants to operate on redirector inodes.  If there is an error or
1457  * the repeat function returns non-zero, we return the last inode operated
1458  * on in the @*start parameter.  This allows the caller to restart the
1459  * iteration at this inode if desired, by returning a positive value.
1460  * Negative return values indicate an error.
1461  *
1462  * NOTE we cannot simply traverse the existing filesystem tree from the root
1463  *      inode, as there may be disconnected trees from deleted files/dirs
1464  *
1465  * FIXME If there was a list of inodes with EAs, we could simply walk the list
1466  * intead of reading every inode.  This is an internal implementation issue.
1467  */
1468
1469 static int ext3_iterate_all(struct super_block *sb,
1470                             int (*repeat)(struct inode *inode,void *priv),
1471                             struct inode **start, void *priv)
1472 {
1473         struct inode *tmp = NULL;
1474         int gstart, gnum, err = 0;
1475         ino_t istart, ibase;
1476         ENTRY;
1477
1478         if (!start)
1479                 start = &tmp;
1480         if (!*start) {
1481                 *start = iget(sb, EXT3_ROOT_INO);
1482                 if (!*start) 
1483                         GOTO(exit, err = -ENOMEM);
1484                 
1485                 if (is_bad_inode(*start)) 
1486                         GOTO(exit, err = -EIO);
1487         }
1488         if ((*start)->i_ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
1489                 CERROR("invalid starting inode %ld\n",(*start)->i_ino);
1490                 GOTO(exit, err = -EINVAL); 
1491         }
1492         if ((*start)->i_ino < EXT3_FIRST_INO(sb)) {
1493                 if ((err = (*repeat)(*start, priv) != 0))
1494                         GOTO(exit, err);
1495                 iput(*start);
1496                 *start = iget(sb, EXT3_FIRST_INO(sb));
1497                 if (!*start)
1498                         GOTO(exit, err = -ENOMEM);
1499                 if (is_bad_inode(*start)) 
1500                         GOTO(exit, err = -EIO);
1501         }
1502
1503         gstart = ((*start)->i_ino - 1) / EXT3_INODES_PER_GROUP(sb);
1504         istart = ((*start)->i_ino - 1) % EXT3_INODES_PER_GROUP(sb);
1505         ibase = gstart * EXT3_INODES_PER_GROUP(sb);
1506         for (gnum = gstart; gnum < EXT3_SB(sb)->s_groups_count;
1507              gnum++, ibase += EXT3_INODES_PER_GROUP(sb)) {
1508                 struct buffer_head *bitmap_bh = NULL;
1509                 struct ext3_group_desc * gdp;
1510                 ino_t  ino;
1511                 
1512                 gdp = ext3_get_group_desc (sb, gnum, NULL);
1513                 if (!gdp || le16_to_cpu(gdp->bg_free_inodes_count) ==
1514                     EXT3_INODES_PER_GROUP(sb))
1515                         continue;
1516                 bitmap_bh = read_inode_bitmap(sb, gnum);
1517
1518                 if (!bitmap_bh)
1519                         continue;
1520                 ino = 0;
1521 repeat:
1522 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
1523                 ino = find_next_bit((unsigned long *)bitmap_bh->b_data, 
1524                                     EXT3_INODES_PER_GROUP(sb), ino);
1525 #else
1526                 ino = find_next_bit((unsigned long *)bitmap_bh->b_data, 
1527                                     EXT3_INODES_PER_GROUP(sb), ino);
1528 #warning"FIXME-WANGDI need to port find_next_bit to 2.4" 
1529 #endif                
1530                 if (ino < EXT3_INODES_PER_GROUP(sb)) { 
1531                         ino_t inum = ino + gnum * EXT3_INODES_PER_GROUP(sb) + 1;
1532                         if (*start) {
1533                                 if (inum < (*start)->i_ino)
1534                                         continue;
1535                         } else {
1536                                 *start = iget(sb, inum);
1537                                 if (!*start) 
1538                                         GOTO(exit, err = -ENOMEM);
1539                                 if (is_bad_inode(*start)) 
1540                                         GOTO(exit, err = -EIO);
1541                         }
1542                         if ((err = (*repeat)(*start, priv)) != 0)
1543                                 GOTO(exit, err);
1544                         iput(*start);
1545                         *start = NULL;
1546                         if (++ino < EXT3_INODES_PER_GROUP(sb))
1547                                 goto repeat;
1548                 }
1549                 istart = 0;
1550         }
1551 exit:
1552         iput(tmp);
1553         RETURN(err);
1554 }
1555
1556 static int fsfilt_ext3_iterate(struct super_block *sb,
1557                                int (*repeat)(struct inode *inode, void *priv),
1558                                struct inode **start, void *priv, int flag)
1559 {
1560         switch(flag) {
1561                 case SNAP_ITERATE_ALL_INODE:
1562                         return ext3_iterate_all (sb, repeat, start, priv);
1563                 default:
1564                         return -EINVAL;
1565         }
1566 }
1567
1568 static int fsfilt_ext3_get_snap_info(struct inode *inode, void *key, 
1569                                      __u32 keylen, void *val, 
1570                                      __u32 *vallen) 
1571 {
1572         int rc = 0;
1573         ENTRY;
1574
1575         if (!vallen || !val) {
1576                 CERROR("val and val_size is 0!\n");
1577                 RETURN(-EFAULT);
1578         }
1579         if (keylen >= strlen(MAX_SNAPTABLE_COUNT) 
1580             && strcmp(key, MAX_SNAPTABLE_COUNT) == 0) {
1581                 /*FIXME should get it from the EA_size*/
1582                *((__u32 *)val) = EXT3_MAX_SNAPS; 
1583                *vallen = sizeof(int);
1584                RETURN(rc);
1585         } else if (keylen >= strlen(SNAPTABLE_INFO) 
1586                    && strcmp(key, SNAPTABLE_INFO) == 0) {
1587                 rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX, 
1588                                     EXT3_SNAPTABLE_EA, val, *vallen); 
1589                 RETURN(rc);
1590         } else if (keylen >= strlen(SNAP_GENERATION) 
1591                    && strcmp(key, SNAP_GENERATION) == 0) {
1592                 
1593                 rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX,
1594                                     EXT3_SNAP_GENERATION, (char *)val, *vallen);
1595                 if (rc == -ENODATA) {
1596                         *((__u32 *)val) = 0; 
1597                         *vallen = sizeof(int);
1598                         rc = 0;
1599                 }
1600                 if (rc > 0) {
1601                         rc = 0;
1602                         *vallen = rc;
1603                 }
1604                 RETURN(rc);
1605         } else if (keylen >= strlen(SNAP_COUNT) && 
1606                    strcmp(key, SNAP_COUNT) == 0) {
1607                 rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX,
1608                                     EXT3_SNAP_COUNT, val, *vallen);
1609                 if (rc == -ENODATA) {
1610                         *((__u32 *)val) = 0; 
1611                         *vallen = sizeof(int);
1612                         rc = 0;
1613                 }
1614                 if (rc > 0) {
1615                         rc = 0;
1616                         *vallen = rc;
1617                 }
1618                 RETURN(rc);
1619         } else if (keylen >= strlen(SNAP_ROOT_INO) && 
1620                    (strcmp(key, SNAP_ROOT_INO) == 0)) {
1621                 
1622                 rc = ext3_xattr_get(inode, EXT3_SNAP_INDEX,
1623                                     EXT3_SNAP_ROOT_INO, val, *vallen);
1624                 if (rc > 0) {
1625                         rc = 0;
1626                         *vallen = rc;
1627                 }
1628                 RETURN(rc);
1629         }
1630         RETURN(-EINVAL);
1631
1632
1633 static int fsfilt_ext3_set_snap_info(struct inode *inode, void *key, 
1634                                      __u32 keylen, void *val, 
1635                                      __u32 *vallen)
1636 {
1637         int rc = 0;
1638         ENTRY;
1639         
1640         if (!vallen || !val) {
1641                 CERROR("val and val_size is 0!\n");
1642                 RETURN(-EFAULT);
1643         }
1644
1645         if (keylen >= strlen(SNAPTABLE_INFO) 
1646             && strcmp(key, SNAPTABLE_INFO) == 0) {
1647                 handle_t *handle;
1648                 EXT3_JOURNAL_START(inode->i_sb, handle, EXT3_XATTR_TRANS_BLOCKS,
1649                                     rc); 
1650                 if(rc)
1651                         RETURN(rc);
1652                 rc = ext3_xattr_set_handle(handle, inode, EXT3_SNAP_INDEX, 
1653                                            EXT3_SNAPTABLE_EA, val, *vallen, 0); 
1654                 journal_stop(handle);
1655                 
1656                 RETURN(rc);
1657         } else if (keylen >= strlen(SNAP_GENERATION) 
1658                    && strcmp(key, SNAP_GENERATION) == 0) {
1659                 LASSERT(inode);
1660                 rc = ext3_set_generation(inode, *(int*)val);
1661                 
1662                 RETURN(rc); 
1663         } else if (keylen >= strlen(SNAP_COUNT) && 
1664                    (strcmp(key, SNAP_COUNT) == 0)) {
1665                 handle_t *handle;
1666                 EXT3_JOURNAL_START(inode->i_sb, handle, 
1667                                    EXT3_XATTR_TRANS_BLOCKS, rc); 
1668                 if(rc)
1669                         RETURN(rc);
1670                 rc = ext3_xattr_set_handle(handle, inode, EXT3_SNAP_INDEX, 
1671                                            EXT3_SNAP_COUNT, val, *vallen, 0); 
1672                 journal_stop(handle);
1673                 
1674                 RETURN(rc);
1675         } else if (keylen >= strlen(SNAP_ROOT_INO) && 
1676                    (strcmp(key, SNAP_ROOT_INO) == 0)) {
1677                 handle_t *handle;
1678                 EXT3_JOURNAL_START(inode->i_sb, handle, 
1679                                    EXT3_XATTR_TRANS_BLOCKS, rc); 
1680                 if(rc)
1681                         RETURN(rc);
1682                 rc = ext3_xattr_set_handle(handle, inode, EXT3_SNAP_INDEX, 
1683                                            EXT3_SNAP_ROOT_INO, val, *vallen, 0); 
1684                 journal_stop(handle);
1685                 
1686                 RETURN(rc);
1687         }       
1688  
1689         RETURN(-EINVAL);
1690 }
1691 static int fsfilt_ext3_dir_ent_size(char *name)
1692 {
1693         if (name) {
1694                 return EXT3_DIR_REC_LEN(strlen(name));
1695         }
1696         return 0;
1697 }
1698
1699 static int fsfilt_ext3_set_dir_ent(struct super_block *sb, char *name, 
1700                                    char *buf, int buf_off, int nlen, size_t count)
1701 {
1702         int rc = 0; 
1703         ENTRY;
1704         if (buf_off == 0 && nlen == 0) {
1705                 struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)buf;  
1706                 LASSERT(count == PAGE_CACHE_SIZE);
1707                 de->rec_len = count;
1708                 de->inode = 0;
1709                 RETURN(rc);
1710         } else {
1711                 struct ext3_dir_entry_2 *de, *de1; 
1712                 de = (struct ext3_dir_entry_2 *)(buf + buf_off - nlen); 
1713                 de1 = (struct ext3_dir_entry_2 *)(buf + buf_off); 
1714                 int rlen, nlen;
1715  
1716                 rlen = le16_to_cpu(de->rec_len);
1717                 de->rec_len = cpu_to_le16(nlen);
1718                 
1719                 de1->rec_len = cpu_to_le16(rlen - nlen);
1720                 de1->name_len = strlen(name);
1721                 memcpy (de1->name, name, de->name_len);
1722                 nlen = EXT3_DIR_REC_LEN_DE(de1); 
1723                 LASSERT(nlen == EXT3_DIR_REC_LEN_DE(de));
1724                 RETURN(nlen);
1725         }        
1726
1727 }
1728 struct fsfilt_operations fsfilt_ext3_snap_ops = {
1729         .fs_type                = "ext3_snap",
1730         .fs_owner               = THIS_MODULE,
1731         .fs_create_indirect     = fsfilt_ext3_create_indirect,
1732         .fs_get_indirect        = fsfilt_ext3_get_indirect,
1733         .fs_set_indirect        = fsfilt_ext3_set_indirect,
1734         .fs_snap_feature        = fsfilt_ext3_snap_feature,
1735         .fs_is_redirector       = fsfilt_ext3_is_redirector,
1736         .fs_is_indirect         = fsfilt_ext3_is_indirect,
1737         .fs_get_indirect_ino    = fsfilt_ext3_get_indirect_ino,
1738         .fs_destroy_indirect    = fsfilt_ext3_destroy_indirect,
1739         .fs_restore_indirect    = fsfilt_ext3_restore_indirect,
1740         .fs_iterate             = fsfilt_ext3_iterate,
1741         .fs_copy_block          = fsfilt_ext3_copy_block,
1742         .fs_set_snap_info       = fsfilt_ext3_set_snap_info,
1743         .fs_get_snap_info       = fsfilt_ext3_get_snap_info,
1744         .fs_dir_ent_size        = fsfilt_ext3_dir_ent_size,
1745         .fs_set_dir_ent         = fsfilt_ext3_set_dir_ent,
1746 };
1747
1748
1749 static int __init fsfilt_ext3_snap_init(void)
1750 {
1751         int rc;
1752
1753         rc = fsfilt_register_ops(&fsfilt_ext3_snap_ops);
1754
1755         return rc;
1756 }
1757
1758 static void __exit fsfilt_ext3_snap_exit(void)
1759 {
1760
1761         fsfilt_unregister_ops(&fsfilt_ext3_snap_ops);
1762 }
1763
1764 module_init(fsfilt_ext3_snap_init);
1765 module_exit(fsfilt_ext3_snap_exit);
1766
1767 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1768 MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1");
1769 MODULE_LICENSE("GPL");