Whamcloud - gitweb
add snapfs to cvs
authorwangdi <wangdi>
Sun, 4 Jan 2004 07:22:40 +0000 (07:22 +0000)
committerwangdi <wangdi>
Sun, 4 Jan 2004 07:22:40 +0000 (07:22 +0000)
15 files changed:
lustre/snapfs/cache.c [new file with mode: 0644]
lustre/snapfs/clonefs.c [new file with mode: 0644]
lustre/snapfs/dcache.c [new file with mode: 0644]
lustre/snapfs/dir.c [new file with mode: 0644]
lustre/snapfs/dotsnap.c [new file with mode: 0644]
lustre/snapfs/file.c [new file with mode: 0644]
lustre/snapfs/filter.c [new file with mode: 0644]
lustre/snapfs/inode.c [new file with mode: 0644]
lustre/snapfs/journal_ext3.c [new file with mode: 0644]
lustre/snapfs/psdev.c [new file with mode: 0644]
lustre/snapfs/snap.c [new file with mode: 0644]
lustre/snapfs/snaptable.c [new file with mode: 0644]
lustre/snapfs/super.c [new file with mode: 0644]
lustre/snapfs/symlink.c [new file with mode: 0644]
lustre/snapfs/sysctl.c [new file with mode: 0644]

diff --git a/lustre/snapfs/cache.c b/lustre/snapfs/cache.c
new file mode 100644 (file)
index 0000000..d0c8f1b
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ *
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *
+ *
+ */
+
+
+#include <stdarg.h>
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+/*
+ * XXX - Not sure for snapfs that the cache functions are even needed.
+ * Can't all lookups be done by an inode->superblock->u.generic_sbp
+ * lookup?
+ */
+
+/*
+   This file contains the routines associated with managing a
+   cache of files .  These caches need to be found
+   fast so they are hashed by the device, with an attempt to have
+   collision chains of length 1.
+*/
+
+/* the intent of this hash is to have collision chains of length 1 */
+#define CACHES_BITS 8
+#define CACHES_SIZE (1 << CACHES_BITS)
+#define CACHES_MASK CACHES_SIZE - 1
+static struct list_head snap_caches[CACHES_SIZE];
+
+static inline int snap_cache_hash(kdev_t dev)
+{
+       return (CACHES_MASK) & ((0x000F & (dev)) + ((0x0F00 & (dev)) >>8));
+}
+
+inline void snap_cache_add(struct snap_cache *cache, kdev_t dev)
+{
+       list_add(&cache->cache_chain,
+                &snap_caches[snap_cache_hash(dev)]);
+       cache->cache_dev = dev;
+}
+
+inline void snap_init_cache_hash(void)
+{
+       int i;
+       for ( i = 0; i < CACHES_SIZE; i++ ) {
+               INIT_LIST_HEAD(&snap_caches[i]);
+       }
+}
+
+/* map a device to a cache */
+struct snap_cache *snap_find_cache(kdev_t dev)
+{
+       struct snap_cache *cache;
+       struct list_head *lh, *tmp;
+
+       lh = tmp = &(snap_caches[snap_cache_hash(dev)]);
+       while ( (tmp = lh->next) != lh ) {
+               cache = list_entry(tmp, struct snap_cache, cache_chain);
+               if ( cache->cache_dev == dev )
+                       return cache;
+       }
+       return NULL;
+}
+
+
+/* map an inode to a cache */
+struct snap_cache *snap_get_cache(struct inode *inode)
+{
+       struct snap_cache *cache;
+
+       /* find the correct snap_cache here, based on the device */
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) {
+               printk("WARNING: no  cache for dev %d, ino %ld\n",
+                      inode->i_dev, inode->i_ino);
+               return NULL;
+       }
+
+       return cache;
+}
+
+
+/* another debugging routine: check fs is InterMezzo fs */
+int snap_ispresto(struct inode *inode)
+{
+       struct snap_cache *cache;
+
+       if ( !inode )
+               return 0;
+       cache = snap_get_cache(inode);
+       if ( !cache )
+               return 0;
+       return (inode->i_dev == cache->cache_dev);
+}
+
+/* setup a cache structure when we need one */
+struct snap_cache *snap_init_cache(void)
+{
+       struct snap_cache *cache;
+
+       /* make a snap_cache structure for the hash */
+       SNAP_ALLOC(cache, struct snap_cache *, sizeof(struct snap_cache));
+       if ( cache ) {
+               memset(cache, 0, sizeof(struct snap_cache));
+               INIT_LIST_HEAD(&cache->cache_chain);
+               INIT_LIST_HEAD(&cache->cache_clone_list);
+       }
+       return cache;
+}
+
+
+/* free a cache structure and all of the memory it is pointing to */
+inline void snap_free_cache(struct snap_cache *cache)
+{
+       if (!cache)
+               return;
+
+
+       SNAP_FREE(cache, sizeof(struct snap_cache));
+}
+
diff --git a/lustre/snapfs/clonefs.c b/lustre/snapfs/clonefs.c
new file mode 100644 (file)
index 0000000..1ef8078
--- /dev/null
@@ -0,0 +1,596 @@
+/*
+ * Super block/filesystem wide operations
+ *
+ * Copryright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and
+ * Michael Callahan <callahan@maths.ox.ac.uk>
+ *
+ * Rewritten for Linux 2.1.  Peter Braam <braam@cs.cmu.edu>
+ * Copyright (C) Carnegie Mellon University
+ * 
+ * Copyright (C) 2000, Mountain View Data, Inc, authors
+ * Peter Braam <braam@mountainviewdata.com>, 
+ * Harrison Xing <harrisonx@mountainviewdata.com>
+ * 
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/string.h>
+#include <asm/uaccess.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <asm/segment.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+/* Clone is a simple file system, read only that just follows redirectors
+   we have placed the entire implementation except clone_read_super in
+   this file 
+ */
+
+struct inode_operations clonefs_dir_inode_operations;
+struct inode_operations clonefs_file_inode_operations;
+struct inode_operations clonefs_symlink_inode_operations;
+struct inode_operations clonefs_special_inode_operations;
+struct file_operations clonefs_dir_file_operations;
+struct file_operations clonefs_file_file_operations;
+struct file_operations clonefs_special_file_operations;
+
+/* support routines for following redirectors */
+
+/* Parameter is clonefs inode, 'inode', and typically this may be
+   called before read_inode has completed on this clonefs inode,
+   i.e. we may only assume that i_ino is valid.
+
+   We return an underlying (likely disk) fs inode.  This involved
+   handling any redirector inodes found along the way. 
+
+   This function is used by all clone fs interface functions to get an
+   underlying fs inode.  
+*/
+
+struct inode *clonefs_get_inode(struct inode *inode)
+{
+       struct snap_clone_info *clone_sb;
+       struct inode *cache_inode, *redirected_inode;
+
+       ENTRY;
+
+        /* this only works if snapfs_current does NOT overwrite read_inode */
+       clone_sb = (struct snap_clone_info *) &inode->i_sb->u.generic_sbp;
+
+       /* basic invariant: clone and current ino's are equal */
+        cache_inode = iget(clone_sb->clone_cache->cache_sb, inode->i_ino); 
+
+       redirected_inode = snap_redirect(cache_inode, inode->i_sb);
+
+       CDEBUG(D_SNAP, "redirected_inode: %lx, cache_inode %lx\n",
+              (ulong) redirected_inode, (ulong) cache_inode);
+
+       CDEBUG(D_SNAP, "cache_inode: %lx, ino %ld, sb %lx, count %d\n",
+              (ulong) cache_inode, cache_inode->i_ino, 
+              (ulong) cache_inode->i_sb, cache_inode->i_count);
+
+       iput(cache_inode); 
+       EXIT;
+       return redirected_inode;
+}
+
+
+/* super operations */
+static void clonefs_read_inode(struct inode *inode)
+{
+       struct inode *cache_inode;
+
+       ENTRY;
+
+       CDEBUG(D_SNAP, "inode: %lx, ino %ld, sb %lx, count %d\n",
+              (ulong) inode , inode->i_ino, (long) inode->i_sb, 
+              inode->i_count);
+
+       /* redirecting inode in the cache */
+        cache_inode = clonefs_get_inode(inode);
+       if (!cache_inode) {
+               make_bad_inode(inode);
+               EXIT;
+               return;
+       }
+       /* copy attrs of that inode to our clone inode */
+       snapfs_cpy_attrs(inode, cache_inode);
+
+       if (S_ISREG(inode->i_mode))
+               inode->i_op = &clonefs_file_inode_operations;
+       else if (S_ISDIR(inode->i_mode))
+               inode->i_op = &clonefs_dir_inode_operations;
+       else if (S_ISLNK(inode->i_mode))
+               inode->i_op = &clonefs_symlink_inode_operations;
+       else if (S_ISCHR(inode->i_mode))
+               inode->i_op = &chrdev_inode_operations;
+       else if (S_ISBLK(inode->i_mode))
+               inode->i_op = &blkdev_inode_operations;
+       else if (S_ISFIFO(inode->i_mode))
+               init_fifo(inode);
+
+       iput(cache_inode);
+
+       CDEBUG(D_SNAP, "cache_inode: %lx ino %ld, sb %lx, count %d\n",
+                (ulong) cache_inode, cache_inode->i_ino, 
+              (ulong) cache_inode->i_sb, cache_inode->i_count);
+       EXIT;
+       return; 
+}
+
+
+static void clonefs_put_super(struct super_block *sb)
+{
+       struct snap_clone_info *clone_sb;
+
+       ENTRY;
+       CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n",
+                (ulong) sb, (ulong) &sb->u.generic_sbp);
+       clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp;
+       dput( clone_sb->clone_cache->cache_sb->s_root );
+       list_del(&clone_sb->clone_list_entry);
+
+       MOD_DEC_USE_COUNT;
+
+       EXIT;
+}
+
+static int clonefs_statfs(struct super_block *sb, struct statfs *buf, 
+                       int bufsiz)
+{
+       struct snap_clone_info *clone_sb;
+       struct snap_cache *cache; 
+
+       ENTRY;
+       clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp;
+
+       cache = clone_sb->clone_cache;
+       if (!cache) {
+               printk("clone_statfs: no cache\n");
+               return -EINVAL;
+       }
+
+       EXIT;
+       return cache->cache_filter->o_caops.cache_sops->statfs
+               (cache->cache_sb, buf, bufsiz);
+}
+
+struct super_operations clone_super_ops =
+{
+       clonefs_read_inode,       /* read_inode */
+       NULL,                   /* write_inode */
+       NULL,                   /* put_inode */
+       NULL,                   /* delete_inode */
+       NULL,                   /* notify_change */
+       clonefs_put_super,      /* put_super */
+       NULL,                   /* write_super */
+       clonefs_statfs,         /* statfs */
+       NULL                    /* remount_fs */
+};
+
+
+/* ***************** end of clonefs super ops *******************  */ 
+/* ***************** begin clonefs dir ops *******************  */ 
+
+static void d_unalloc(struct dentry *dentry)
+{
+
+       list_del(&dentry->d_hash);
+       INIT_LIST_HEAD(&dentry->d_hash);
+       dput(dentry); /* this will free the dentry memory */
+}
+
+/*
+ * Return the underlying fs dentry with name in 'dentry' that points
+ * to the right inode. 'dir' is the clone fs directory to search for
+ * the 'dentry'.
+ */
+struct dentry *clonefs_lookup(struct inode *dir,  struct dentry *dentry)
+{
+       struct inode            *cache_dir;
+       struct dentry           *cache_dentry;
+       struct inode            *cache_inode;
+       struct dentry           *result;
+       struct inode            *inode;
+       struct snap_clone_info  *clone_sb;
+
+       ENTRY;
+
+       cache_dir = clonefs_get_inode(dir); 
+
+       cache_dentry = d_alloc(dentry->d_parent, &dentry->d_name);
+       if (!cache_dentry) {
+                iput(cache_dir);
+               EXIT;
+               return ERR_PTR(-ENOENT);
+       }
+
+        /* Lock cache directory inode. */
+       down(&cache_dir->i_sem);
+        /*
+         * Call underlying fs lookup function to set the 'd_inode' pointer
+         * to the corresponding directory inode.
+         *
+         * Note: If the lookup function does not return NULL, return
+         * from 'clone_lookup' with an error.
+         */
+       result = cache_dir->i_op->lookup(cache_dir, cache_dentry);
+       if (result) { 
+               dput(cache_dentry);
+               up(&cache_dir->i_sem);
+                iput(cache_dir);
+               dentry->d_inode = NULL;
+               EXIT;
+               return ERR_PTR(-ENOENT);
+       }
+        /* Unlock cache directory inode. */
+       up(&cache_dir->i_sem);
+
+        /*
+         * If there is no inode pointer in the underlying fs 'cache_dentry'
+         * then the directory doesn't have an entry with this name.  In fs/ext2
+        * we see that we return 0 and put dentry->d_inode = NULL;
+         */
+       cache_inode = cache_dentry->d_inode;
+       if ( cache_inode == NULL ) {
+                inode = NULL;
+       } else {
+               clone_sb = (struct snap_clone_info *) &dir->i_sb->u.generic_sbp;
+               /* note, iget below will follow a redirector, since 
+                  it calls into clone_read_inode 
+               */ 
+                inode = iget(dir->i_sb, cache_inode->i_ino);
+       }
+
+        /* dput(cache_dentry) will not put the dentry away
+         * immediately, unless we first arrange that its hash list is
+         * empty.
+        */
+
+       if ( cache_inode != NULL ) {
+               CDEBUG(D_INODE, "cache ino %ld, count %d, dir %ld, count %d\n", 
+                               cache_inode->i_ino, cache_inode->i_count, cache_dir->i_ino, 
+                               cache_dir->i_count);
+       }
+
+       d_unalloc(cache_dentry);
+       iput(cache_dir);
+
+        /*
+         * Add 'inode' to the directory entry 'dentry'.
+         */
+       d_add(dentry, inode);
+
+       EXIT;
+        return NULL;
+}
+
+
+/* instantiate a file handle to the cache file */
+static void clonefs_prepare_snapfile(struct inode *i,
+                                    struct file *clone_file, 
+                                    struct inode *cache_inode,
+                                    struct file *cache_file,
+                                    struct dentry *cache_dentry)
+{
+       ENTRY;
+        cache_file->f_pos = clone_file->f_pos;
+        cache_file->f_mode = clone_file->f_mode;
+        cache_file->f_flags = clone_file->f_flags;
+        cache_file->f_count  = clone_file->f_count;
+        cache_file->f_owner  = clone_file->f_owner;
+       cache_file->f_op = cache_inode->i_op->default_file_ops;
+       cache_file->f_dentry = cache_dentry;
+        cache_file->f_dentry->d_inode = cache_inode;
+       EXIT;
+        return ;
+}
+
+/* update the clonefs file struct after IO in cache file */
+static void clonefs_restore_snapfile(struct inode *cache_inode,
+                                  struct file *cache_file, 
+                                  struct inode *clone_inode,
+                                  struct file *clone_file)
+{
+       ENTRY;
+        cache_file->f_pos = clone_file->f_pos;
+       cache_inode->i_size = clone_inode->i_size;
+       EXIT;
+        return;
+}
+
+static int clonefs_readdir(struct file *file, void *dirent, 
+                          filldir_t filldir)
+{
+       int result;
+       struct inode *cache_inode;
+        struct file open_file;
+       struct dentry open_dentry;
+       struct inode *inode=file->f_dentry->d_inode;
+
+       ENTRY;
+
+       if(!inode) {
+               EXIT;
+               return -EINVAL;
+       }
+        cache_inode = clonefs_get_inode(inode);
+
+       if (!cache_inode) {
+               make_bad_inode(inode);
+               EXIT;
+               return -ENOMEM;
+       }
+
+       CDEBUG(D_INODE,"clone ino %ld\n",cache_inode->i_ino);
+
+       clonefs_prepare_snapfile(inode, file, cache_inode, &open_file,
+                             &open_dentry);
+       /* potemkin case: we are handed a directory inode */
+       result = -ENOENT;
+       if (open_file.f_op->readdir) {
+               down(&cache_inode->i_sem);
+               result = open_file.f_op->readdir(&open_file, dirent, filldir);
+               up(&cache_inode->i_sem);
+       }
+       clonefs_restore_snapfile(inode, file, cache_inode, &open_file);
+       iput(cache_inode);
+        EXIT;
+       return result;
+}
+
+struct file_operations clonefs_dir_file_operations = {
+        NULL,                   /* lseek */
+        NULL,                   /* read -- bad */
+        NULL,                   /* write */
+        clonefs_readdir,        /* readdir */
+        NULL,                   /* select */
+        NULL,                   /* ioctl */
+        NULL,                   /* mmap */
+        NULL,                   /* open */
+       NULL,
+        NULL,                   /* release */
+       NULL,                   /* fsync */
+        NULL,                   
+       NULL,
+       NULL
+};
+
+struct inode_operations clonefs_dir_inode_operations =
+{
+       &clonefs_dir_file_operations,
+       NULL,           /* create */
+       clonefs_lookup,   /* lookup */
+       NULL,           /* link */
+       NULL,           /* unlink */
+       NULL,           /* symlink */
+       NULL,           /* mkdir */
+       NULL,           /* rmdir */
+       NULL,           /* mknod */
+       NULL,           /* rename */
+       NULL,           /* readlink */
+       NULL,           /* follow_link */
+       NULL,           /* readpage */
+       NULL,           /* writepage */
+       NULL,           /* bmap */
+       NULL,           /* truncate */
+       NULL,           /* permission */
+       NULL,           /* smap */
+       NULL,           /* update page */
+        NULL,           /* revalidate */
+};
+
+
+/* ***************** end of clonefs dir ops *******************  */ 
+/* ***************** begin clonefs file ops *******************  */ 
+
+int clonefs_readpage(struct file *file, struct page *page)
+{
+       int result = 0;
+       struct inode *cache_inode;
+       struct file open_file;
+       struct dentry open_dentry;
+       struct inode *inode;
+
+       ENTRY;
+
+       inode = file->f_dentry->d_inode;
+        cache_inode = clonefs_get_inode(file->f_dentry->d_inode); 
+       if (!cache_inode) {
+               make_bad_inode(file->f_dentry->d_inode);
+               EXIT;
+               return -ENOMEM;
+       }
+
+       clonefs_prepare_snapfile(inode, file, cache_inode, &open_file,
+                             &open_dentry);
+       /* tell currentfs_readpage the primary inode number */
+       open_dentry.d_fsdata = (void*)inode->i_ino;
+
+       /* potemkin case: we are handed a directory inode */
+       down(&cache_inode->i_sem);
+        /* XXX - readpage NULL on directories... */
+        if (cache_inode->i_op->readpage == NULL)
+                printk("Yes, Grigori, directories are a problem.\n");
+        else
+               cache_inode->i_op->readpage(&open_file, page);
+       up(&cache_inode->i_sem);
+       clonefs_restore_snapfile(inode, file, cache_inode, &open_file);
+       iput(cache_inode);
+        EXIT;
+       return result;
+}
+
+
+struct file_operations clonefs_file_file_operations = {
+        NULL,                   /* lseek */
+        generic_file_read,      /* read -- bad */
+        NULL,                   /* write */
+        NULL,                   /* readdir */
+        NULL,                   /* select */
+        NULL,                   /* ioctl */
+        generic_file_mmap,      /* mmap */
+        NULL,                   /* open */
+       NULL,
+        NULL,                   /* release */
+       NULL,                   /* fsync */
+        NULL,                   
+       NULL,
+       NULL
+};
+
+struct inode_operations clonefs_file_inode_operations =
+{
+       &clonefs_file_file_operations,
+       NULL,           /* create */
+       NULL,           /* lookup */
+       NULL,           /* link */
+       NULL,           /* unlink */
+       NULL,           /* symlink */
+       NULL,           /* mkdir */
+       NULL,           /* rmdir */
+       NULL,           /* mknod */
+       NULL,           /* rename */
+       NULL,           /* readlink */
+       NULL,           /* follow_link */
+       clonefs_readpage, /* readpage */
+       NULL,           /* writepage */
+       NULL,           /* bmap */
+       NULL,           /* truncate */
+       NULL,           /* permission */
+       NULL,           /* smap */
+       NULL,           /* update page */
+        NULL,           /* revalidate */
+};
+
+
+
+/* ***************** end of clonefs file ops *******************  */ 
+/* ***************** begin clonefs symlink ops *******************  */ 
+
+int clonefs_readlink(struct dentry *dentry, char *buf, int len)
+{
+       int res;
+       struct inode * cache_inode;
+       struct inode * old_inode;
+
+       ENTRY;
+
+       cache_inode = clonefs_get_inode(dentry->d_inode); 
+
+       res = -ENOENT;
+
+       if ( ! cache_inode ) {
+               CDEBUG(D_INODE, "clonefs_get_inode failed, NULL\n");
+               EXIT;
+               return res;     
+       }
+       
+       /* XXX: shall we allocate a new dentry ? 
+               The following is safe for ext2, etc. because ext2_readlink only
+               use the inode info */
+
+       /* save the old dentry inode */ 
+       old_inode = dentry->d_inode;
+       /* set dentry inode to cache inode */
+       dentry->d_inode = cache_inode;
+
+       if ( cache_inode->i_op->readlink ) {
+               res = cache_inode->i_op->readlink(dentry, buf, len); 
+       }else {
+               CDEBUG(D_INODE,"NO readlink for ino %lu\n", cache_inode->i_ino);
+       }
+
+       /* restore the old inode */
+       dentry->d_inode = old_inode;
+
+       iput(cache_inode);
+
+       EXIT;
+       return res;
+}
+
+struct dentry * clonefs_follow_link(struct dentry * dentry,
+                                        struct dentry *base,
+                                        unsigned int follow)
+{
+       struct dentry * res;
+       struct inode * cache_inode;
+       struct inode * old_inode;
+
+       ENTRY;
+       res = ERR_PTR(-ENOENT);
+
+       cache_inode = clonefs_get_inode(dentry->d_inode); 
+       if ( ! cache_inode ) {
+               CDEBUG(D_INODE, "clonefs_get_inode failed, NULL\n");
+               EXIT;
+               return res;     
+       }
+
+       /* XXX: shall we allocate a new dentry ? 
+               The following is safe for ext2, etc. because ext2_follow_link 
+               only use the inode info */
+
+       /* save the old dentry inode */ 
+       old_inode = dentry->d_inode;
+       /* set dentry inode to cache inode */
+       dentry->d_inode = cache_inode;
+
+       if ( cache_inode->i_op->follow_link ) {
+               res = cache_inode->i_op->follow_link(dentry, base, follow); 
+       }
+
+       /* restore the old inode */
+       dentry->d_inode = old_inode;
+
+       iput(cache_inode);
+
+       EXIT;
+       return res;
+}
+
+struct inode_operations clonefs_symlink_inode_operations =
+{
+       NULL,               /* no file operations */      
+       NULL,               /* create */                  
+       NULL,               /* lookup */                  
+       NULL,               /* link */                    
+       NULL,               /* unlink */                  
+       NULL,               /* symlink */                 
+       NULL,               /* mkdir */                   
+       NULL,               /* rmdir */                   
+       NULL,               /* mknod */                   
+       NULL,               /* rename */                  
+       clonefs_readlink,   /* readlink */              
+       clonefs_follow_link,/* follow_link */             
+       NULL,               /* readpage */                
+       NULL,               /* writepage */               
+       NULL,               /* bmap */                    
+       NULL,               /* truncate */                
+       NULL,               /* permission */              
+       NULL,               /* smap */                    
+       NULL,               /* update page */             
+        NULL,               /* revalidate */          
+};
+
+
diff --git a/lustre/snapfs/dcache.c b/lustre/snapfs/dcache.c
new file mode 100644 (file)
index 0000000..88ce4bb
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Directory operations for SnapFS filesystem
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+/* called when a cache lookup succeeds */
+
+/* XXX PJB: the intent here is to make sure that inodes which are
+   currently primary inodes under .snap directories are dropped when
+   they are COWED.  It seems hard to me to get semantics that are equally
+   good as for mounted snap_clone file systems, but we should try to get
+   close 
+*/
+static int currentfs_dentry_revalidate(struct dentry *de, int flag)
+{
+//     struct inode *inode = de->d_inode;
+       ENTRY;
+
+       /* unless an ancestor is a .snap directory there is nothing to do */
+#if 0
+       if ( !currentfs_is_under_dotsnap(dentry) ) {
+               EXIT;
+               return 1;
+       }
+       /* XXX PJB get this to work guys! */
+       if ( de->d_parent == "dotsnap inode" && 
+            inode_is_newer_than(find_time_by_name(de->d_parent->d_name.name))){
+               1. drop this dentry 
+               2. make sure the VFS does a new lookup
+                3. probably all you need to do is 
+               return 0;
+       }
+#else 
+       return 1;
+#endif
+}
+
+struct dentry_operations currentfs_dentry_ops = 
+{
+       d_revalidate: currentfs_dentry_revalidate
+};
+
diff --git a/lustre/snapfs/dir.c b/lustre/snapfs/dir.c
new file mode 100644 (file)
index 0000000..0b83fa1
--- /dev/null
@@ -0,0 +1,777 @@
+/*
+ * dir.c
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+#ifdef CONFIG_SNAPFS_EXT3
+void ext3_orphan_del(handle_t *handle, struct inode *inode);
+#endif
+
+static ino_t get_parent_ino(struct inode * inode)
+{
+       ino_t ino = 0;
+       struct dentry * dentry;
+
+       if (list_empty(&inode->i_dentry)) {
+                       printk("snapfs ERROR: no dentry for ino %lu\n", inode->i_ino);
+                return 0;
+        }
+
+               dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
+
+        if(dentry->d_parent->d_inode)
+               ino = dentry->d_parent->d_inode->i_ino;
+
+       dput(dentry);
+       return ino;
+
+}
+
+static void d_unadd_iput(struct dentry *dentry)
+{
+       list_del(&dentry->d_alias);
+       INIT_LIST_HEAD(&dentry->d_alias);
+       list_del(&dentry->d_hash);
+       INIT_LIST_HEAD(&dentry->d_hash);
+       iput(dentry->d_inode);
+       dentry->d_inode = NULL;
+}
+
+/* XXX check the return values */
+static struct dentry *currentfs_lookup(struct inode * dir,struct dentry *dentry)
+{
+       struct snap_cache *cache;
+       struct dentry *rc;
+       struct inode_operations *iops;
+       struct inode *cache_inode;
+       int index;
+
+       ENTRY;
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return ERR_PTR(-EINVAL);
+       }
+
+       if ( dentry->d_name.len == strlen(".snap") &&
+            (memcmp(dentry->d_name.name, ".snap", strlen(".snap")) == 0) ) {
+               struct inode *snap;
+               ino_t ino;
+
+               /* Don't permit .snap in clonefs */
+               if( dentry->d_sb != cache->cache_sb )
+                       return ERR_PTR(-ENOENT);
+
+               /* Don't permit .snap under .snap */
+               if( currentfs_is_under_dotsnap(dentry) )
+                       return ERR_PTR(-ENOENT);
+
+               ino = 0xF0000000 | dir->i_ino;
+               snap = iget(dir->i_sb, ino);
+               CDEBUG(D_INODE, ".snap inode ino %ld, mode %o\n", snap->i_ino, snap->i_mode);
+               d_add(dentry, snap);
+               EXIT;
+               return NULL;
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops || !iops->lookup) {
+               EXIT;
+               return ERR_PTR(-EINVAL);
+       }
+
+       rc = iops->lookup(dir, dentry);
+       if ( rc || !dentry->d_inode) {
+               EXIT;
+               return NULL;
+       }
+       
+       /*
+        * If we are under dotsnap, we need save extra data into
+        * dentry->d_fsdata:  For dir, we only need _this_ snapshot's index; 
+        * For others, save primary ino, with it we could found index later
+        * anyway
+        */
+       cache_inode = dentry->d_inode;
+       if ( (index = currentfs_is_under_dotsnap(dentry)) ) {
+               struct snapshot_operations *snapops;
+               struct inode *ind_inode;
+               ino_t pri_ino, ind_ino;
+              
+               pri_ino = cache_inode->i_ino;
+               snapops = filter_c2csnapops(cache->cache_filter);
+               if( !snapops )
+                       goto err_out;
+
+               ind_ino = snapops->get_indirect_ino(cache_inode, index);
+               if( ind_ino <=0 && ind_ino != -ENOATTR )
+                       goto err_out;
+               else if( ind_ino != -ENOATTR ){
+                       ind_inode = iget(cache_inode->i_sb, ind_ino);
+                       if( !ind_inode ){
+                               goto err_out;
+                       }
+                       list_del(&dentry->d_alias);
+                       INIT_LIST_HEAD(&dentry->d_alias);
+                       list_add(&dentry->d_alias, &ind_inode->i_dentry);
+                       dentry->d_inode = ind_inode;
+                       iput(cache_inode);
+               }
+
+               if( S_ISDIR(dentry->d_inode->i_mode) )
+                       dentry->d_fsdata = (void*)index;
+               else
+                       dentry->d_fsdata = (void*)pri_ino;
+       }
+
+       EXIT;
+       return NULL;
+
+#if 0
+       /* XXX: PJB these need to be set up again. See dcache.c */
+       printk("set up dentry ops\n");
+       CDEBUG(D_CACHE, "\n");
+        filter_setup_dentry_ops(cache->cache_filter,
+                                dentry->d_op, &currentfs_dentry_ops);
+        dentry->d_op = filter_c2udops(cache->cache_filter);
+        CDEBUG(D_CACHE, "\n");
+#endif
+
+err_out:
+       d_unadd_iput(dentry);
+       EXIT;
+       return ERR_PTR(-EINVAL);
+}
+
+static int currentfs_create(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_CREATE);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->create) {
+               rc = -EINVAL;
+               goto exit;
+       }
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 2);
+       rc = iops->create(dir, dentry, mode);
+
+       /* XXX now set the correct snap_{file,dir,sym}_iops */
+        if ( ! dentry->d_inode) {
+                printk("Error in currentfs_create, dentry->d_inode is NULL\n");
+                goto exit;
+        }
+
+       if ( S_ISDIR(dentry->d_inode->i_mode) )
+                dentry->d_inode->i_op = filter_c2udiops(cache->cache_filter);
+        else if ( S_ISREG(dentry->d_inode->i_mode) ) {
+                if ( !filter_c2cfiops(cache->cache_filter) ) {
+                        filter_setup_file_ops(cache->cache_filter,
+                                dentry->d_inode->i_op, &currentfs_file_iops);
+                }
+                dentry->d_inode->i_op = filter_c2ufiops(cache->cache_filter);
+        }
+       printk("inode %lu, i_op %p\n", dentry->d_inode->i_ino, dentry->d_inode->i_op);
+
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_CREATE, 3);
+       
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_MKDIR);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               CDEBUG(D_FILE, "snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->mkdir) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 2);
+       rc = iops->mkdir(dir, dentry, mode);
+
+        if ( rc ) 
+                goto exit;
+                     
+       /* XXX now set the correct snap_{file,dir,sym}_iops */
+        if ( dentry->d_inode) {
+                dentry->d_inode->i_op = filter_c2udiops(cache->cache_filter);
+               printk("inode %lu, i_op %p\n", dentry->d_inode->i_ino, dentry->d_inode->i_op);
+        } else {
+                printk("Error in currentfs_mkdir, dentry->d_inode is NULL\n");
+        }
+
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_MKDIR, 3);
+       
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_link (struct dentry * old_dentry, struct inode * dir, 
+                       struct dentry *dentry)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_LINK);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+        if ( snap_needs_cow(old_dentry->d_inode) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",old_dentry->d_inode->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 2);
+               snap_do_cow(old_dentry->d_inode, dir->i_ino, 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->link) {
+               rc = -EINVAL;
+               goto exit;
+       }
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 2);
+       rc = iops->link(old_dentry,dir, dentry);
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_LINK, 3);
+       
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_symlink(struct inode *dir, struct dentry *dentry, 
+                       const char * symname)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_SYMLINK);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->symlink) {
+               rc = -EINVAL;
+               goto exit;
+       }
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 2);
+       rc = iops->symlink(dir, dentry, symname);
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_SYMLINK, 3);
+       
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_mknod(struct inode *dir, struct dentry *dentry, int mode, 
+                       int rdev)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_MKNOD);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->mknod) {
+               rc = -EINVAL;
+               goto exit;
+       }
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 2);
+       rc = iops->mknod(dir, dentry, mode, rdev);
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_MKNOD, 3);
+       
+       /* XXX do we need to set the correct snap_{*}_iops */
+
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       struct inode *inode = NULL;
+//     time_t i_ctime = 0;
+       nlink_t i_nlink = 0;
+       off_t   i_size = 0;
+       ino_t ino = 0;
+       int keep_inode = 0;
+//     struct dentry_operations *save_dop = NULL;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_RMDIR);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->rmdir) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       /* XXX : there are two cases that we can't remove this inode from disk. 
+               1. the inode needs to be cowed. 
+               2. the inode is a redirector.
+               then we must keep this inode(dir) so that the inode 
+               will not be deleted after rmdir, will only remove dentry 
+       */
+
+       if( snap_needs_cow(dentry->d_inode) != -1) {
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 2);
+               snap_do_cow (dentry->d_inode, dir->i_ino, 
+                               SNAP_DEL_PRI_WITHOUT_IND);
+               keep_inode = 1;
+       }
+       else if( snap_is_redirector(dentry->d_inode) ) {
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 3);
+               snap_do_cow(dentry->d_inode, dir->i_ino, SNAP_DEL_PRI_WITH_IND);
+               keep_inode = 1;
+       }       
+#if 0
+       if ( keep_inode ) {     
+                       printk("set up dentry ops, before %p\n",dentry->d_op);
+               save_dop = dentry->d_op;
+
+               filter_setup_dentry_ops(cache->cache_filter,
+                                dentry->d_op, &currentfs_dentry_ops);
+               dentry->d_op = filter_c2udops(cache->cache_filter);
+
+                       printk("set up dentry ops, after %p\n",dentry->d_op);
+
+       }
+
+#endif
+
+       if( keep_inode && dentry->d_inode ) {
+               ino = dentry->d_inode->i_ino;
+       //      i_ctime = dentry->d_inode->i_ctime;
+               i_nlink = dentry->d_inode->i_nlink;
+               i_size = dentry->d_inode->i_size;
+       
+}
+
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 4);
+       rc = iops->rmdir(dir, dentry);
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 5);
+
+       /* XXX : check this */
+#if 0
+       if ( keep_inode ) {
+               dentry->d_op = save_dop;
+               printk("restore dentry ops, now at %p\n",dentry->d_op);
+       }
+
+#endif
+
+       if( keep_inode && ino) {
+               inode = iget ( dir->i_sb, ino);
+               if( inode) {
+//                     inode->i_ctime = i_ctime;
+                       inode->i_nlink = i_nlink;
+                       inode->i_size = i_size;
+                       mark_inode_dirty(inode);
+                       iput( inode);
+#ifdef CONFIG_SNAPFS_EXT3
+                       /*
+                        * In Ext3, rmdir() will put this inode into
+                        * orphan list, we must remove it out. It's ugly!!
+                        */
+                       if( cache->cache_type == FILTER_FS_EXT3 )
+                               ext3_orphan_del(handle, inode);
+#endif
+                       snap_debug_device_fail(dir->i_dev, SNAP_OP_RMDIR, 6);
+               }
+       }
+
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct inode *inode = dentry->d_inode;
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, dir, SNAP_OP_UNLINK);
+
+       if ( snap_needs_cow(dir) != -1 ) {
+               printk("snap_needs_cow for ino %lu \n",dir->i_ino);
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 1);
+               snap_do_cow(dir, get_parent_ino(dir), 0);
+       }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->unlink) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       /* XXX : if nlink for this inode is 1, there are two cases that we 
+               can't remove this inode from disk. 
+               1. the inode needs to be cowed. 
+               2. the inode is a redirector.
+               then we increament dentry->d_inode->i_nlink so that the inode 
+               will not be deleted after unlink, will only remove dentry 
+       */
+
+       if( snap_needs_cow (inode) != -1) {
+               /* call snap_do_cow with DEL_WITHOUT_IND option */
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 2);
+               snap_do_cow(inode, dir->i_ino,SNAP_DEL_PRI_WITHOUT_IND);
+               if( inode->i_nlink == 1 )
+                       inode->i_nlink++;
+       }
+       else if( snap_is_redirector (inode) && inode->i_nlink == 1 ) {
+               /* call snap_do_cow with DEL_WITH_IND option 
+                * just free the blocks of inode, not really delete it
+                */
+               snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 3);
+               snap_do_cow (inode, dir->i_ino, SNAP_DEL_PRI_WITH_IND);
+               inode->i_nlink++;
+       }       
+
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 4);
+       rc = iops->unlink(dir, dentry);
+       snap_debug_device_fail(dir->i_dev, SNAP_OP_UNLINK, 5);
+
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_rename (struct inode * old_dir, struct dentry *old_dentry,
+                       struct inode * new_dir, struct dentry *new_dentry)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       void *handle = NULL;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(old_dentry) ||
+           currentfs_is_under_dotsnap(new_dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(old_dir->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       handle = snap_trans_start(cache, old_dir, SNAP_OP_RENAME);
+       
+        /* Always cow the old dir and old dentry->d_inode */ 
+       if ( snap_needs_cow(old_dir) != -1 ) {
+               printk("rename: needs_cow for old_dir %lu\n",old_dir->i_ino);
+               snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 1);
+               snap_do_cow(old_dir, get_parent_ino(old_dir), 0);
+       }
+       if( snap_needs_cow (old_dentry->d_inode) != -1) {
+               printk("rename: needs_cow for old_dentry, ino %lu\n",
+                       old_dentry->d_inode->i_ino);
+               snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 2);
+               snap_do_cow(old_dentry->d_inode, old_dir->i_ino,0);
+       }
+
+       /* If it's not in the same dir, whether the new_dentry is NULL or not,
+         * we should cow the new_dir. Because rename will use the ino of 
+         * old_dentry as the ino of the new_dentry in new_dir. 
+         */
+       if(( old_dir != new_dir) ) {
+               if( snap_needs_cow(new_dir) !=-1 ){
+                       printk("rename:snap_needs_cow for new_dir %lu\n",
+                               new_dir->i_ino);
+                       snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,3);
+                       snap_do_cow(new_dir, get_parent_ino(new_dir),0);        
+               }
+       }
+
+#if 0
+       if( ( old_dir != new_dir) && ( new_dentry->d_inode )) {
+               if(snap_needs_cow(new_dentry->d_inode) !=-1 ){
+                       printk("rename:needs_cow for new_entry ,ino %lu\n",
+                               new_dentry->d_inode->i_ino);
+                       snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 4);
+                       snap_do_cow (new_dentry->d_inode, 
+                               new_dentry->d_parent->d_inode->i_ino, 0);       
+               }
+       }
+#endif
+        /* The inode for the new_dentry will be freed for normal rename option.
+         * But we should keep this inode since we need to keep it available 
+         * for the clone and for snap rollback
+         */
+        if( new_dentry->d_inode && new_dentry->d_inode->i_nlink == 1 ) {
+               if( snap_needs_cow (new_dentry->d_inode) != -1) {
+                       /* call snap_do_cow with DEL_WITHOUT_IND option */
+                       snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,4);
+                       snap_do_cow(new_dentry->d_inode, new_dir->i_ino,
+                                    SNAP_DEL_PRI_WITHOUT_IND);
+                       new_dentry->d_inode->i_nlink++;
+               }
+               else if( snap_is_redirector (new_dentry->d_inode) ) {
+                       /* call snap_do_cow with DEL_WITH_IND option 
+                        * just free the blocks of inode, not really delete it
+                        */
+                       snap_debug_device_fail(old_dir->i_dev,SNAP_OP_RENAME,4);
+                       snap_do_cow (new_dentry->d_inode, new_dir->i_ino, 
+                                     SNAP_DEL_PRI_WITH_IND);
+                       new_dentry->d_inode->i_nlink++;
+               }       
+        }
+
+       iops = filter_c2cdiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->rename) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 5);
+       rc = iops->rename(old_dir, old_dentry, new_dir, new_dentry);
+       snap_debug_device_fail(old_dir->i_dev, SNAP_OP_RENAME, 6);
+
+exit:
+       snap_trans_commit(cache, handle);
+       EXIT;
+       return rc;
+}
+
+static int currentfs_readdir(struct file *filp, void *dirent,
+                            filldir_t filldir)
+{
+       struct snap_cache *cache;
+       struct file_operations *fops;
+       int rc;
+       
+       ENTRY;
+       if( !filp || !filp->f_dentry || !filp->f_dentry->d_inode ) {
+               EXIT;
+               return -EINVAL;
+       }
+
+       cache = snap_find_cache(filp->f_dentry->d_inode->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+       fops = filter_c2cdfops( cache->cache_filter );
+       if( !fops ) {
+               EXIT;
+               return -EINVAL;
+       }
+
+       /*
+        * no action if we are under clonefs or .snap
+        */
+       if( cache->cache_show_dotsnap &&
+           (filp->f_dentry->d_sb == cache->cache_sb) &&
+           !currentfs_is_under_dotsnap(filp->f_dentry) ){
+               if( filp->f_pos == 0 ){
+                       if( filldir(dirent, ".snap",
+                                   strlen(".snap")+1, filp->f_pos,
+                                   0xF0000000|filp->f_dentry->d_inode->i_ino) ){
+                               return -EINVAL;
+                       }
+                       filp->f_pos += strlen(".snap")+1;
+               }
+               filp->f_pos -= strlen(".snap")+1;
+               rc = fops->readdir(filp, dirent, filldir);
+               filp->f_pos += strlen(".snap")+1;
+       }else
+               rc = fops->readdir(filp, dirent, filldir);
+
+       return rc;
+}
+
+struct file_operations currentfs_dir_fops = {
+       readdir: currentfs_readdir
+};
+
+struct inode_operations currentfs_dir_iops = { 
+       default_file_ops: &currentfs_dir_fops,
+       create: currentfs_create,
+       mkdir: currentfs_mkdir,
+       link: currentfs_link,
+       symlink: currentfs_symlink,
+       mknod: currentfs_mknod,
+       rmdir: currentfs_rmdir,
+       unlink: currentfs_unlink,
+       rename: currentfs_rename,
+       lookup: currentfs_lookup
+};
diff --git a/lustre/snapfs/dotsnap.c b/lustre/snapfs/dotsnap.c
new file mode 100644 (file)
index 0000000..19dfc1c
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * dotsnap.c - support for .snap directories
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+struct inode_operations dotsnap_inode_operations;
+struct file_operations dotsnap_file_operations;
+
+int currentfs_is_under_dotsnap(struct dentry *de) 
+{
+       int index = 0;
+
+       while(de && de->d_parent != de) {
+               if ( de->d_inode && de->d_inode->i_ino & 0xF0000000 ) {
+                       EXIT;
+                       return index;
+               }
+               index = (int)de->d_fsdata;
+               de = de->d_parent;
+       }
+
+       EXIT;
+       return 0;
+}
+
+void currentfs_dotsnap_read_inode(struct snap_cache *cache, 
+                                 struct inode *inode)
+{
+       int tableno = cache->cache_snap_tableno; 
+       struct snap_table *table; 
+       ENTRY;
+
+       table = &snap_tables[tableno];
+
+       inode->i_mode = S_IFDIR | 0755 ;
+       inode->i_op = &dotsnap_inode_operations;
+       inode->i_size = table->tbl_count - 1; 
+       /* all except current form a subdirectory and . and .. */
+       inode->i_nlink = table->tbl_count -1 + 2;
+       inode->i_uid = 0;
+       inode->i_gid = 0;
+       EXIT;
+}
+
+struct dentry *dotsnap_lookup(struct inode *dir,  struct dentry *dentry)
+{
+       struct snap_table       *table;
+       struct snap_cache       *cache;
+       int i;
+       int index;
+       int tableno; 
+       ino_t ino;
+       struct inode *inode;
+       struct snapshot_operations *snapops;
+
+       ENTRY;
+
+       cache = snap_find_cache(dir->i_dev);
+       if ( !cache ) {
+               printk("dotsnap_readdir: cannot find cache\n");
+               make_bad_inode(dir);
+               EXIT;
+               return ERR_PTR(-EINVAL);
+       }
+
+       snapops = filter_c2csnapops(cache->cache_filter);
+       if (!snapops || !snapops->get_indirect_ino) {
+                EXIT;
+                return ERR_PTR(-EINVAL);
+        }
+
+       tableno = cache->cache_snap_tableno; 
+       table = &snap_tables[tableno];
+
+       if( table->tbl_count <= 1 )
+               return NULL;
+       
+       index = table->tbl_index[0]; 
+       for ( i = 1 ; i < table->tbl_count ; i++ ) {
+               if ( (dentry->d_name.len == strlen(table->tbl_name[i])) &&
+                    (memcmp(dentry->d_name.name, table->tbl_name[i], 
+                            dentry->d_name.len) == 0) ) {
+                       index = table->tbl_index[i]; 
+                       break;
+               }
+       }
+       
+       if( i >= table->tbl_count )
+               return ERR_PTR(-ENOENT);
+
+       inode = iget(dir->i_sb, dir->i_ino & (~0xF0000000));
+
+        if ( !inode ) 
+                return ERR_PTR(-EINVAL);
+
+       ino =  snapops->get_indirect_ino(inode, index);
+       iput(inode); 
+
+       if ( ino == -ENOATTR || ino == 0 ) {
+               ino = dir->i_ino & (~0xF0000000);
+       }
+
+       if ( ino == -EINVAL ) {
+               return ERR_PTR(-EINVAL);
+       }
+CDEBUG(D_INODE, "index %d, ino is %lu\n",index, ino);
+
+       inode = iget(dir->i_sb, ino);
+       d_add(dentry, inode); 
+       dentry->d_fsdata = (void*)index;
+       inode->i_op = dentry->d_parent->d_parent->d_inode->i_op;
+       return NULL;
+}
+
+
+static int dotsnap_readdir(struct file * filp,
+                          void * dirent, filldir_t filldir)
+{
+       unsigned int i;
+       int tableno;
+       struct snap_cache *cache;
+       struct snap_table *table; 
+       struct snapshot_operations *snapops;
+
+       ENTRY; 
+
+       cache = snap_find_cache(filp->f_dentry->d_inode->i_dev);
+       if ( !cache ) {
+               printk("dotsnap_readdir: cannot find cache\n");
+               make_bad_inode(filp->f_dentry->d_inode);
+               EXIT;
+               return -EINVAL;
+       }
+
+       snapops = filter_c2csnapops(cache->cache_filter);
+       if (!snapops || !snapops->get_indirect_ino) {
+                EXIT;
+                return -EINVAL;
+        }
+
+       tableno = cache->cache_snap_tableno; 
+       table = &snap_tables[tableno];
+       CDEBUG(D_INODE, "\n");  
+       for (i = filp->f_pos ; i < table->tbl_count -1 ; i++) {
+               int index;
+               struct inode *inode;
+               ino_t ino;
+
+               CDEBUG(D_INODE, "%d\n",i);      
+
+               inode = filp->f_dentry->d_inode;
+               index = table->tbl_index[i+1];
+               ino =  snapops->get_indirect_ino 
+                       (filp->f_dentry->d_inode, index);
+
+               CDEBUG(D_INODE, "\n");  
+
+               if ( ino == -ENOATTR || ino == 0 ) {
+                       ino = filp->f_dentry->d_parent->d_inode->i_ino;
+               }
+
+               CDEBUG(D_INODE, "\n");  
+               if ( ino == -EINVAL ) {
+                       return -EINVAL;
+               }
+
+               CDEBUG(D_INODE, "Listing %s\n", table->tbl_name[i+1]);  
+               if (filldir(dirent, table->tbl_name[i+1],
+                           strlen(table->tbl_name[i+1]),
+                           filp->f_pos, ino) < 0){
+                       CDEBUG(D_INODE, "\n");
+                       break;
+               }
+               filp->f_pos++;
+       }
+       EXIT;
+       return 0;
+}
+
+
+struct file_operations dotsnap_file_operations = {
+        readdir: dotsnap_readdir,        /* readdir */
+};
+
+struct inode_operations dotsnap_inode_operations =
+{
+       default_file_ops: &dotsnap_file_operations,
+       lookup: dotsnap_lookup
+};
diff --git a/lustre/snapfs/file.c b/lustre/snapfs/file.c
new file mode 100644 (file)
index 0000000..38c5652
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * file.c
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+/* instantiate a file handle to the cache file */
+static void currentfs_prepare_snapfile(struct inode *inode,
+                                    struct file *clone_file, 
+                                    struct inode *cache_inode,
+                                    struct file *cache_file,
+                                    struct dentry *cache_dentry)
+{
+       ENTRY;
+        cache_file->f_pos = clone_file->f_pos;
+        cache_file->f_mode = clone_file->f_mode;
+        cache_file->f_flags = clone_file->f_flags;
+        cache_file->f_count  = clone_file->f_count;
+        cache_file->f_owner  = clone_file->f_owner;
+       cache_file->f_op = cache_inode->i_op->default_file_ops;
+       cache_file->f_dentry = cache_dentry;
+        cache_file->f_dentry->d_inode = cache_inode;
+       EXIT;
+        return ;
+}
+
+/* update the currentfs file struct after IO in cache file */
+static void currentfs_restore_snapfile(struct inode *cache_inode,
+                                  struct file *cache_file, 
+                                  struct inode *clone_inode,
+                                  struct file *clone_file)
+{
+       ENTRY;
+        cache_file->f_pos = clone_file->f_pos;
+       EXIT;
+        return;
+}
+
+
+static ssize_t currentfs_write (struct file *filp, const char *buf, 
+                               size_t count, loff_t *ppos)
+{
+        struct snap_cache *cache;
+       struct inode *inode = filp->f_dentry->d_inode;
+        ssize_t rc;
+        struct file_operations *fops;
+       loff_t pos;
+       long block[2]={-1,-1}, mask, i;
+       struct snap_table *table;
+       int slot = 0;
+       int index = 0;
+       struct inode_operations *ciops;
+       struct inode *cache_inode = NULL;
+       struct snapshot_operations *snapops;
+  
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(filp->f_dentry)) {
+               EXIT;
+               return -ENOSPC;
+       }
+
+        cache = snap_find_cache(inode->i_dev);
+        if ( !cache ) { 
+                EXIT;
+                return -EINVAL;
+        }
+
+        if ( snap_needs_cow(inode) != -1 ) {
+                CDEBUG(D_FILE, "snap_needs_cow for ino %lu \n",inode->i_ino);
+                snap_do_cow(inode, filp->f_dentry->d_parent->d_inode->i_ino, 0);
+       }
+
+        fops = filter_c2cffops(cache->cache_filter); 
+        if (!fops ||
+            !fops->write) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        if (filp->f_flags & O_APPEND)
+                pos = inode->i_size;
+        else {
+                pos = *ppos;
+                if (pos != *ppos)
+                        return -EINVAL;
+        }
+
+       /*
+        * we only need to copy back the first and last blocks
+        */
+       mask = inode->i_sb->s_blocksize-1;
+       if( pos & mask )
+               block[0] = pos >> inode->i_sb->s_blocksize_bits;
+       pos += count - 1;
+       if( (pos+1) &  mask )
+               block[1] = pos >> inode->i_sb->s_blocksize_bits;
+       if( block[0] == block[1] )
+               block[1] = -1;
+
+       ciops = filter_c2cfiops(cache->cache_filter);
+       snapops = filter_c2csnapops(cache->cache_filter);
+
+       for( i=0; i<2; i++ ){
+               if( block[i]!=-1 && !ciops->bmap(inode, block[i]) ) {
+                       table = &snap_tables[cache->cache_snap_tableno];
+                       for (slot = table->tbl_count ; slot >= 1; slot--)
+                       {
+                               cache_inode = NULL;
+                               index = table->tbl_index[slot];
+                               cache_inode = snap_get_indirect(inode, NULL, index);
+
+                               if ( !cache_inode )  continue;
+
+                               if (cache_inode->i_op->bmap(cache_inode, block[i])) {
+                                       CDEBUG(D_FILE, "find cache_ino %lu\n",
+                                               cache_inode->i_ino);
+                                       if( snapops && snapops->copy_block) {
+                                               snapops->copy_block( inode, 
+                                                               cache_inode, block[i]);
+                                       }
+
+                                       iput(cache_inode);
+                                       break;
+                               }
+                                        iput(cache_inode);
+                       }
+               }
+       }
+
+        rc = fops->write(filp, buf, count, ppos);
+        
+        EXIT;
+        return rc;
+}
+
+static int currentfs_readpage(struct file *file, struct page *page)
+{
+       int result = 0;
+       struct inode *inode = file->f_dentry->d_inode;
+       unsigned long ind_ino = inode->i_ino;
+       struct inode *pri_inode = NULL;
+       struct inode *cache_inode = NULL;
+       struct file open_file;
+       struct dentry open_dentry ;
+       struct inode_operations *ciops;
+       struct snap_cache *cache;
+       long block;
+       struct snap_table *table;
+       int slot = 0;
+       int index = 0;
+       int search_older = 0;
+
+       ENTRY;
+
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       ciops = filter_c2cfiops(cache->cache_filter);
+
+       block = page->offset >> inode->i_sb->s_blocksize_bits;
+
+       /* if there is a block in the cache, return the cache readpage */
+       if( inode->i_blocks && ciops->bmap(inode, block) ) {
+               CDEBUG(D_FILE, "block %lu in cache, ino %lu\n", 
+                               block, inode->i_ino);
+               result = ciops->readpage(file, page);
+               EXIT;
+               return result;
+       }
+
+       /*
+        * clonefs_readpage will fill this with primary ino number
+        * we need it to follow the cloned chain of primary inode
+        */
+       if( file->f_dentry->d_fsdata ){
+               pri_inode = iget(inode->i_sb, (unsigned long)file->f_dentry->d_fsdata);
+               if( !pri_inode )
+                       return -EINVAL;
+               inode = pri_inode;
+               search_older = 1;
+       }
+
+       table = &snap_tables[cache->cache_snap_tableno];
+
+        for (slot = table->tbl_count ; slot >= 1; slot--)
+        {
+               cache_inode = NULL;
+                index = table->tbl_index[slot];
+               cache_inode = snap_get_indirect(inode, NULL, index);
+
+               if ( !cache_inode )  continue;
+
+               /* we only want slots between cache_inode to the oldest one */
+               if( search_older && cache_inode->i_ino == ind_ino )
+                       search_older = 0;
+
+                if ( !search_older && cache_inode->i_op->bmap(cache_inode, block)) {
+                        break;
+                }
+                iput(cache_inode);
+        }
+       if( pri_inode )
+               iput(pri_inode);
+
+       if ( !cache_inode ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       currentfs_prepare_snapfile(inode, file, cache_inode, &open_file,
+                             &open_dentry);
+
+       down(&cache_inode->i_sem);
+
+       if( ciops->readpage ) {
+               CDEBUG(D_FILE, "block %lu NOT in cache, use redirected ino %lu\n", block, cache_inode->i_ino );
+               result = ciops->readpage(&open_file, page);
+       }else {
+               CDEBUG(D_FILE, "cache ino %lu, readpage is NULL\n", 
+                               cache_inode->i_ino);
+       }
+
+       up(&cache_inode->i_sem);
+       currentfs_restore_snapfile(inode, file, cache_inode, &open_file);
+       iput(cache_inode);
+        EXIT;
+       return result;
+}
+
+struct file_operations currentfs_file_fops = {
+       write:currentfs_write,
+};
+
+struct inode_operations currentfs_file_iops = {
+       default_file_ops: &currentfs_file_fops,
+       readpage: currentfs_readpage,
+};
diff --git a/lustre/snapfs/filter.c b/lustre/snapfs/filter.c
new file mode 100644 (file)
index 0000000..c63caac
--- /dev/null
@@ -0,0 +1,433 @@
+/*
+ *
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Mountain View Data, Inc.
+ *
+ *
+ */
+
+#include <stdarg.h>
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/filter.h>
+
+int filter_print_entry = 1;
+int filter_debug = 0xfffffff;
+/*
+ * The function in this file are responsible for setting up the 
+ * correct methods layered file systems like InterMezzo and SnapFS
+ */
+
+
+static struct filter_fs filter_oppar[FILTER_FS_TYPES];
+
+/* get to the upper methods (intermezzo, snapfs) */
+inline struct super_operations *filter_c2usops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_sops;
+}
+
+inline struct inode_operations *filter_c2udiops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_dir_iops;
+}
+
+inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_file_iops;
+}
+
+inline struct inode_operations *filter_c2usiops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_sym_iops;
+}
+
+inline struct file_operations *filter_c2udfops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_dir_fops;
+}
+
+inline struct file_operations *filter_c2uffops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_file_fops;
+}
+
+inline struct file_operations *filter_c2usfops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_sym_fops;
+}
+
+inline struct dentry_operations *filter_c2udops(struct filter_fs *cache)
+{
+       return &cache->o_fops.filter_dentry_ops;
+}
+
+/* get to the cache (lower) methods */
+inline struct super_operations *filter_c2csops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_sops;
+}
+
+inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_dir_iops;
+}
+
+inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_file_iops;
+}
+
+inline struct inode_operations *filter_c2csiops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_sym_iops;
+}
+
+inline struct file_operations *filter_c2cdfops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_dir_fops;
+}
+
+inline struct file_operations *filter_c2cffops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_file_fops;
+}
+
+inline struct file_operations *filter_c2csfops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_sym_fops;
+}
+
+inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache)
+{
+       return cache->o_caops.cache_dentry_ops;
+}
+/* snapfs: for snapshot operations */
+inline struct snapshot_operations *filter_c2csnapops(struct filter_fs *cache)
+{
+       return cache->o_snapops;
+}
+
+/* find the cache for this FS */
+struct filter_fs *filter_get_filter_fs(const char *cache_type)
+{
+       struct filter_fs *ops = NULL;
+       FENTRY;
+
+       if ( strlen(cache_type) == strlen("ext2") &&
+            memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
+               ops = &filter_oppar[FILTER_FS_EXT2];
+               FDEBUG(D_SUPER, "ops at %p\n", ops);
+       }
+
+       if ( strlen(cache_type) == strlen("ext3") &&
+            memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
+               ops = &filter_oppar[FILTER_FS_EXT3];
+               FDEBUG(D_SUPER, "ops at %p\n", ops);
+       }
+       if ( strlen(cache_type) == strlen("reiser") &&
+            memcmp(cache_type, "reiser", strlen("reiser")) == 0 ) {
+               ops = &filter_oppar[FILTER_FS_REISER];
+               FDEBUG(D_SUPER, "ops at %p\n", ops);
+       }
+
+       if (ops == NULL) {
+               printk("prepare to die: unrecognized cache type for Filter\n");
+       }
+       FEXIT;
+       return ops;
+}
+
+
+/*
+ *  Frobnicate the InterMezzo/SnapFS operations
+ *    this establishes the link between the InterMezzo/SnapFS file system
+ *    and the underlying file system used for the cache.
+ */
+
+void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops)
+{
+        /* Get ptr to the shared struct snapfs_ops structure. */
+       struct filter_ops *uops = &cache->o_fops;
+        /* Get ptr to the shared struct cache_ops structure. */
+       struct cache_ops *caops = &cache->o_caops;
+
+       FENTRY;
+
+       if ( cache->o_flags & FILTER_DID_SUPER_OPS ) {
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_SUPER_OPS;
+
+        /* Set the cache superblock operations to point to the
+          superblock operations of the underlying file system.  */
+       caops->cache_sops = cache_sops;
+
+        /*
+         * Copy the cache (real fs) superblock ops to the "filter"
+         * superblock ops as defaults. Some will be changed below
+         */
+       memcpy(&uops->filter_sops, cache_sops, sizeof(*cache_sops));
+
+       /*  now overwrite with filtering ops */
+       if (cache_sops->put_super && uops->filter_sops.put_super) { 
+               uops->filter_sops.put_super = filter_sops->put_super;
+       }
+       if (cache_sops->read_inode && uops->filter_sops.read_inode) {
+               uops->filter_sops.read_inode = filter_sops->read_inode;
+               FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n",
+                     cache, cache, uops->filter_sops.read_inode);
+       }
+       if (cache_sops->notify_change && uops->filter_sops.notify_change) 
+               uops->filter_sops.notify_change = filter_sops->notify_change;
+       if (cache_sops->remount_fs && uops->filter_sops.remount_fs)
+               uops->filter_sops.remount_fs = filter_sops->remount_fs;
+       FEXIT;
+}
+
+
+void filter_setup_dir_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops)
+{
+       struct inode_operations *u_iops;
+       struct file_operations *u_fops, *c_fops, *f_fops;
+       FENTRY;
+
+       if ( cache->o_flags & FILTER_DID_DIR_OPS ) {
+               FEXIT;
+               return;
+       }
+       FDEBUG(D_SUPER, "\n");
+       cache->o_flags |= FILTER_DID_DIR_OPS;
+
+       /* steal the old ops */
+       cache->o_caops.cache_dir_iops = cache_iops;
+       cache->o_caops.cache_dir_fops = 
+               cache_iops->default_file_ops;
+
+       FDEBUG(D_SUPER, "\n");
+       /* abbreviate */
+       u_iops = &cache->o_fops.filter_dir_iops;
+
+       /* setup our dir iops: copy and modify */
+       memcpy(u_iops, cache_iops, sizeof(*cache_iops));
+       FDEBUG(D_SUPER, "\n");
+
+       /* methods that filter if cache filesystem has these ops */
+       if ( cache_iops->lookup && filter_iops->lookup ) {
+       FDEBUG(D_SUPER, "\n");
+               u_iops->lookup = filter_iops->lookup;
+               FDEBUG(D_SUPER, "lookup at %p\n", &filter_iops->lookup);
+       }
+       if (cache_iops->create && filter_iops->create)
+               u_iops->create = filter_iops->create;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->link && filter_iops->link)
+               u_iops->link = filter_iops->link;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->unlink && filter_iops->unlink)
+               u_iops->unlink = filter_iops->unlink;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->mkdir && filter_iops->mkdir)
+               u_iops->mkdir = filter_iops->mkdir;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->rmdir && filter_iops->rmdir)
+               u_iops->rmdir = filter_iops->rmdir;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->symlink && filter_iops->symlink)
+               u_iops->symlink = filter_iops->symlink;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->rename && filter_iops->rename)
+               u_iops->rename = filter_iops->rename;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->mknod && filter_iops->mknod)
+               u_iops->mknod = filter_iops->mknod;
+       FDEBUG(D_SUPER, "\n");
+       if (cache_iops->permission && filter_iops->permission)
+               u_iops->permission = filter_iops->permission;
+
+       /* copy dir fops */
+       FDEBUG(D_SUPER, "\n");
+       u_fops = &cache->o_fops.filter_dir_fops;
+       c_fops = cache_iops->default_file_ops;
+       f_fops = filter_iops->default_file_ops;
+
+        memcpy(u_fops, c_fops, sizeof(*c_fops));
+
+       if( c_fops->readdir && f_fops->readdir )
+               u_fops->readdir = f_fops->readdir;
+
+       /* assign */
+       FDEBUG(D_SUPER, "\n");
+       filter_c2udiops(cache)->default_file_ops = filter_c2udfops(cache);
+       FDEBUG(D_SUPER, "\n");
+
+       /* unconditional filtering operations */
+       if ( filter_iops->default_file_ops && 
+            filter_iops->default_file_ops->open ) 
+               filter_c2udfops(cache)->open = 
+                       filter_iops->default_file_ops->open;
+
+       FEXIT;
+}
+
+
+void filter_setup_file_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops)
+{
+       struct inode_operations *u_iops;
+       FENTRY;
+
+       if ( cache->o_flags & FILTER_DID_FILE_OPS ) {
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_FILE_OPS;
+
+       /* steal the old ops */
+       cache->o_caops.cache_file_iops = cache_iops;
+       cache->o_caops.cache_file_fops = 
+               cache_iops->default_file_ops;
+
+       /* abbreviate */
+       u_iops = filter_c2ufiops(cache); 
+
+       /* setup our dir iops: copy and modify */
+       memcpy(u_iops, cache_iops, sizeof(*cache_iops));
+
+       /* copy dir fops */
+        memcpy(filter_c2uffops(cache), cache_iops->default_file_ops, 
+              sizeof(*cache_iops->default_file_ops));
+       /* assign */
+       filter_c2ufiops(cache)->default_file_ops = filter_c2uffops(cache);
+
+       /* unconditional filtering operations */
+       if (filter_iops->default_file_ops &&
+           filter_iops->default_file_ops->open ) 
+               filter_c2uffops(cache)->open = 
+                       filter_iops->default_file_ops->open;
+       if (filter_iops->default_file_ops &&
+           filter_iops->default_file_ops->release ) 
+               filter_c2uffops(cache)->release = 
+                       filter_iops->default_file_ops->release;
+       if (filter_iops->default_file_ops &&
+           filter_iops->default_file_ops->write ) 
+               filter_c2uffops(cache)->write = 
+                       filter_iops->default_file_ops->write;
+
+       /* set up readpage */
+       if (filter_iops->readpage) 
+               filter_c2ufiops(cache)->readpage = filter_iops->readpage;
+
+       FEXIT;
+}
+
+/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */
+void filter_setup_symlink_ops(struct filter_fs *cache, struct inode_operations *cache_iops, struct inode_operations *filter_iops)
+{
+       struct inode_operations *u_iops;
+       FENTRY;
+
+       if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) {
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_SYMLINK_OPS;
+
+       /* steal the old ops */
+       cache->o_caops.cache_sym_iops = cache_iops;
+       cache->o_caops.cache_sym_fops = 
+               cache_iops->default_file_ops;
+
+       /* abbreviate */
+       u_iops = filter_c2usiops(cache); 
+
+       /* setup our dir iops: copy and modify */
+       memcpy(u_iops, cache_iops, sizeof(*cache_iops));
+
+       /* copy fops - careful for symlinks they might be NULL */
+       if ( cache_iops->default_file_ops ) { 
+               memcpy(filter_c2usfops(cache), cache_iops->default_file_ops, 
+                      sizeof(*cache_iops->default_file_ops));
+       }
+
+       /* assign */
+       filter_c2usiops(cache)->default_file_ops = filter_c2usfops(cache);
+
+       if (cache_iops->readlink && filter_iops->readlink) 
+               u_iops->readlink = filter_iops->readlink;
+       if (cache_iops->follow_link && filter_iops->follow_link)
+               u_iops->follow_link = filter_iops->follow_link;
+
+       FEXIT;
+}
+
+void filter_setup_dentry_ops(struct filter_fs *cache,
+                            struct dentry_operations *cache_dop,
+                            struct dentry_operations *filter_dop)
+{
+       if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) {
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_DENTRY_OPS;
+
+       cache->o_caops.cache_dentry_ops = cache_dop;
+       memcpy(&cache->o_fops.filter_dentry_ops,
+              filter_dop, sizeof(*filter_dop));
+       
+       if (cache_dop &&  cache_dop != filter_dop && cache_dop->d_revalidate){
+               printk("WARNING: filter overriding revalidation!\n");
+       }
+       return;
+}
+/* snapfs : for snapshot operations */
+void filter_setup_snapshot_ops (struct filter_fs *cache, 
+                               struct snapshot_operations *cache_snapops)
+{
+       FENTRY;
+
+       if ( cache->o_flags & FILTER_DID_SNAPSHOT_OPS ) {
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_SNAPSHOT_OPS;
+
+       cache->o_snapops = cache_snapops;
+
+       FEXIT;
+}
+
+void filter_setup_journal_ops (struct filter_fs *cache,
+                              struct journal_ops *cache_journal_ops)
+{
+       FENTRY;
+
+       if( cache->o_flags & FILTER_DID_JOURNAL_OPS ){
+               FEXIT;
+               return;
+       }
+       cache->o_flags |= FILTER_DID_JOURNAL_OPS;
+
+       cache->o_trops = cache_journal_ops;
+
+       FEXIT;
+}
diff --git a/lustre/snapfs/inode.c b/lustre/snapfs/inode.c
new file mode 100644 (file)
index 0000000..d6434f3
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ *  fs/snap/snap.c
+ *
+ *  A snap shot file system.
+ *
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#ifdef CONFIG_SNAPFS_EXT2
+#include <linux/ext2_fs.h>
+#endif
+#ifdef CONFIG_SNAPFS_EXT3
+#include <linux/ext3_fs.h>
+#endif
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+
+extern int currentfs_remount(struct super_block * sb, int *flags, char *data);
+
+/* XXX PJB: this is exactly what we need to put things under 
+   filters - we don't want the ext2 methods hardcoded, we want them
+   in the filter (in read_super) and then call those methods. 
+   See how InterMezzo gets the journal operations .
+*/
+extern void currentfs_dotsnap_read_inode(struct snap_cache *, struct inode *);
+
+/* Superblock operations. */
+static void currentfs_read_inode(struct inode *inode)
+{
+        struct snap_cache *cache;
+       ENTRY;
+
+       if( !inode ) 
+       {
+               EXIT;
+               return;
+       }
+
+       CDEBUG(D_INODE, "read_inode ino %lu\n", inode->i_ino);
+
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) {
+               printk("currentfs_read_inode: cannot find cache\n");
+               make_bad_inode(inode);
+               EXIT;
+               return ;
+       }
+
+       if ( inode->i_ino & 0xF0000000 ) { 
+               CDEBUG(D_INODE, "\n");
+               currentfs_dotsnap_read_inode(cache, inode);
+               EXIT;
+               return ;
+       }
+
+       if( filter_c2csops(cache->cache_filter) )
+               filter_c2csops(cache->cache_filter)->read_inode(inode);
+
+       /* XXX now set the correct snap_{file,dir,sym}_iops */
+       if ( S_ISDIR(inode->i_mode) ) 
+               inode->i_op = filter_c2udiops(cache->cache_filter);
+       else if ( S_ISREG(inode->i_mode) ) {
+               if ( !filter_c2cfiops(cache->cache_filter) ) {
+                       filter_setup_file_ops(cache->cache_filter,
+                               inode->i_op, &currentfs_file_iops);
+               }
+               inode->i_op = filter_c2ufiops(cache->cache_filter);
+               printk("inode %lu, i_op at %p\n", inode->i_ino, inode->i_op);
+       }
+       else if ( S_ISLNK(inode->i_mode) ) {
+               if ( !filter_c2csiops(cache->cache_filter) ) {
+                       filter_setup_symlink_ops(cache->cache_filter,
+                               inode->i_op, &currentfs_sym_iops);
+               }
+               inode->i_op = filter_c2usiops(cache->cache_filter);
+               printk("inode %lu, i_op at %p\n", inode->i_ino, inode->i_op);
+       }
+
+       EXIT;
+       return; 
+}
+
+
+static int currentfs_notify_change(struct dentry *dentry, struct iattr *iattr)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct super_operations *sops;
+
+       ENTRY;
+
+       if (currentfs_is_under_dotsnap(dentry)) {
+               EXIT;
+               return -EPERM;
+       }
+
+       cache = snap_find_cache(dentry->d_inode->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       /* XXX better alloc a new dentry */
+
+       if ( snap_needs_cow(dentry->d_inode) != -1 ) {
+               printk("notify_change:snap_needs_cow for ino %lu \n",
+                       dentry->d_inode->i_ino);
+               snap_do_cow(dentry->d_inode, 
+                       dentry->d_parent->d_inode->i_ino, 0);
+       }
+
+       sops = filter_c2csops(cache->cache_filter); 
+       if (!sops ||
+           !sops->notify_change) {
+               EXIT;
+               return -EINVAL;
+       }
+       rc = sops->notify_change(dentry, iattr);
+       
+       EXIT;
+       return rc;
+}
+
+
+static void currentfs_put_super(struct super_block *sb)
+{
+
+       struct snap_cache *cache;
+       ENTRY;
+
+       CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n",
+                (ulong) sb, (ulong) sb->u.generic_sbp);
+       cache = snap_find_cache(sb->s_dev);
+       if (!cache) {
+               EXIT;
+               goto exit;
+       }
+       /* handle COMPAT_FEATUREs */
+#ifdef CONFIG_SNAPFS_EXT2
+       else if( cache->cache_type == FILTER_FS_EXT2 ){
+               if( !EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_SNAPFS) ){
+                       sb->u.ext2_sb.s_feature_compat &=
+                               ~EXT2_FEATURE_COMPAT_BLOCKCOW;
+                       sb->u.ext2_sb.s_es->s_feature_compat &=
+                               cpu_to_le32(~EXT2_FEATURE_COMPAT_BLOCKCOW);
+               }
+       }
+#endif
+#ifdef CONFIG_SNAPFS_EXT3
+       else if( cache->cache_type == FILTER_FS_EXT3 ){
+               if( !EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_SNAPFS) ){
+                       sb->u.ext3_sb.s_es->s_feature_compat &=
+                               cpu_to_le32(~EXT3_FEATURE_COMPAT_BLOCKCOW);
+               }
+       }
+#endif
+        /*
+         * If there is a saved 'put_super' function for the underlying
+         * fs then call it.
+         */
+       if (cache->cache_filter->o_caops.cache_sops->put_super) { 
+               cache->cache_filter->o_caops.cache_sops->put_super(sb);
+       }
+       
+       if (!list_empty(&cache->cache_clone_list)) {
+               printk("Warning: snap_put_super: clones exist!\n");
+       }
+
+       list_del(&cache->cache_chain);
+       snap_free_cache(cache);
+
+       CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n",
+                (ulong) sb, (ulong) sb->u.generic_sbp);
+exit:
+       CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+       MOD_DEC_USE_COUNT;
+       EXIT;
+       return ;
+}
+
+struct super_operations currentfs_super_ops = {
+       currentfs_read_inode,
+       NULL, /* write inode */
+       NULL, /* put inode */
+       NULL, /* delete inode */
+       currentfs_notify_change,
+       currentfs_put_super,
+       NULL, /* write super */
+       NULL,
+       NULL, /* remount */
+};
diff --git a/lustre/snapfs/journal_ext3.c b/lustre/snapfs/journal_ext3.c
new file mode 100644 (file)
index 0000000..8171842
--- /dev/null
@@ -0,0 +1,84 @@
+
+/*
+ * Snapfs. (C) 2000 Peter J. Braam
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#ifdef CONFIG_SNAPFS_EXT3
+#include <linux/ext3_jfs.h>
+#endif
+#include "linux/filter.h"
+#include "linux/snapfs.h"
+#include "linux/snapsupport.h"
+
+#ifdef CONFIG_SNAPFS_EXT3
+
+#define EXT3_EA_TRANS_BLOCKS EXT3_DATA_TRANS_BLOCKS
+
+/*
+ * must follow the changes of ext3_create_indirect() in fs/ext3/snap.c
+ */
+#define COW_CREDITS (2 * EXT3_EA_TRANS_BLOCKS + 17 + 2 * EXT3_DATA_TRANS_BLOCKS )
+
+/* start the filesystem journal operations */
+static void *snap_e3_trans_start(struct inode *inode, int op)
+{
+       int jblocks;
+
+       /* XXX needs to be fixed up when we get reiserfs support */
+       switch (op) {
+       case SNAP_OP_CREATE:
+               jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 3;
+               break;
+       case SNAP_OP_LINK:
+               jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS;
+               break;
+       case SNAP_OP_UNLINK:
+               jblocks = COW_CREDITS + EXT3_DELETE_TRANS_BLOCKS;
+               break;
+       case SNAP_OP_SYMLINK:
+               jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 5;
+               break;
+       case SNAP_OP_MKDIR:
+               jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 4;
+               break;
+       case SNAP_OP_RMDIR:
+               jblocks = 2 * COW_CREDITS + EXT3_DELETE_TRANS_BLOCKS;
+               break;
+       case SNAP_OP_MKNOD:
+               jblocks = COW_CREDITS + EXT3_DATA_TRANS_BLOCKS + 3;
+               break;
+       case SNAP_OP_RENAME:
+               jblocks = 4 * COW_CREDITS + 2 * EXT3_DATA_TRANS_BLOCKS + 2;
+               break;
+       default:
+               CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
+               return NULL;
+       }
+
+       CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
+       return journal_start(EXT3_JOURNAL(inode), jblocks);
+}
+
+static void snap_e3_trans_commit(void *handle)
+{
+       journal_stop(current->j_handle);
+}
+
+struct journal_ops snap_ext3_journal_ops = {
+       snap_e3_trans_start,
+       snap_e3_trans_commit
+};
+
+#endif /* CONFIG_EXT3_FS */
diff --git a/lustre/snapfs/psdev.c b/lustre/snapfs/psdev.c
new file mode 100644 (file)
index 0000000..424d325
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ *              A file system filter driver in the style of InterMezzo
+ *              to manage file system snapshots
+ *
+ *             Author:  Peter J. Braam <braam@mountainviewdata.com>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+#define EXPORT_SYMTAB
+
+#include <linux/config.h> /* for CONFIG_PROC_FS */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+/* #include <linux/kmod.h>    for request_module() */
+#include <linux/sched.h>
+#include <linux/lp.h>
+#include <linux/malloc.h>
+#include <linux/ioport.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/poll.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+#if 1 /* XXX - enable for debug messages */
+int snap_print_entry = 1;
+int snap_debug_level = ~D_INFO;
+#else
+int snap_print_entry = 0;
+int snap_debug_level = 0;
+#endif
+int snap_inodes = 0;
+long snap_memory = 0;
+
+struct snap_control_device snap_dev;
+
+extern int snap_ioctl (struct inode * inode, struct file * filp, 
+                      unsigned int cmd, unsigned long arg);
+
+/* called when opening /dev/device */
+static int snap_psdev_open(struct inode * inode, struct file * file)
+{
+       int dev;
+        ENTRY;
+
+       if (!inode)
+               return -EINVAL;
+       dev = MINOR(inode->i_rdev);
+       if (dev != SNAP_PSDEV_MINOR)
+               return -ENODEV;
+
+        MOD_INC_USE_COUNT;
+        EXIT;
+        return 0;
+}
+
+/* called when closing /dev/device */
+static int snap_psdev_release(struct inode * inode, struct file * file)
+{
+       int dev;
+        ENTRY;
+
+       if (!inode)
+               return -EINVAL;
+       dev = MINOR(inode->i_rdev);
+       if (dev != SNAP_PSDEV_MINOR)
+               return -ENODEV;
+
+        MOD_DEC_USE_COUNT;
+
+        EXIT;
+        return 0;
+}
+
+/* XXX need ioctls here to do snap_delete and snap_restore, snap_backup */
+
+
+/* declare character device */
+static struct file_operations snapcontrol_fops = {
+       NULL,                  /* llseek */
+       NULL,                  /* read */
+       NULL,                  /* write */
+       NULL,                  /* presto_psdev_readdir */
+        NULL,                  /* poll */
+       snap_ioctl,            /* ioctl */
+       NULL,                  /* presto_psdev_mmap */
+       snap_psdev_open,       /* open */
+       NULL,
+       snap_psdev_release,    /* release */
+       NULL,                  /* fsync */
+       NULL,                  /* fasync */
+       NULL                   /* lock */
+};
+
+
+
+#define SNAPFS_MINOR 240
+
+static struct miscdevice snapcontrol_dev = {
+       SNAPFS_MINOR,
+       "snapcontrol",
+       &snapcontrol_fops
+};
+
+int init_snap_psdev(void)
+{
+       printk(KERN_INFO "SNAP psdev driver  v0.01, braam@mountainviewdata.com\n");
+       
+       misc_register( &snapcontrol_dev );
+
+       return 0;
+}
+
+void snap_cleanup_psdev(void)
+{
+        ENTRY;
+       misc_deregister(&snapcontrol_dev);
+       EXIT;
+}
+
+#ifdef MODULE
+MODULE_AUTHOR("Peter J. Braam <braam@cs.cmu.edu>");
+MODULE_DESCRIPTION("Snapfs file system filters v0.01");
+
+extern int init_snapfs(void);
+extern int cleanup_snapfs(void);
+extern int init_clonefs(void);
+extern int init_snap_sysctl(void); 
+
+int init_module(void)
+{
+       int err;
+       if ( (err = init_snap_psdev()) ) {
+               printk("Error initializing snap_psdev, %d\n", err);
+               return -EINVAL;
+       }
+
+       if ( (err = init_snapfs()) ) {
+               printk("Error initializing snapfs, %d\n", err);
+               return -EINVAL;
+       }
+
+       if ( (err = init_snapfs_proc_sys()) ) {
+               printk("Error initializing snapfs proc sys, %d\n", err);
+               return -EINVAL;
+       }
+
+
+       return 0;
+}
+
+void cleanup_module(void)
+{
+
+       cleanup_snapfs();
+       snap_cleanup_psdev();
+       
+}
+#endif
+
diff --git a/lustre/snapfs/snap.c b/lustre/snapfs/snap.c
new file mode 100644 (file)
index 0000000..463e0fa
--- /dev/null
@@ -0,0 +1,278 @@
+
+/*
+ *  fs/snap/snap.c
+ *
+ *  A snap shot file system.
+ *
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+/*
+ * Return true if the inode is a redirector inode.
+ */
+int snap_is_redirector(struct inode *cache_inode)
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       cache = snap_find_cache(cache_inode->i_dev);
+       if (!cache) {
+               EXIT;
+               return 0;
+       }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->is_redirector) {
+                EXIT;
+                return 0;
+        }
+
+       CDEBUG(D_SNAP, "ino %ld\n", cache_inode->i_ino);
+       return snapops->is_redirector(cache_inode);
+}
+
+/*
+ * Using a cache inode and clone super block find the real one.
+ */
+struct inode *snap_redirect(struct inode *cache_inode, 
+                           struct super_block *clone_sb)
+{
+       struct snap_clone_info *clone_info;
+       struct snap_table *table;
+       struct inode *redirected;
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+       int slot = 0;
+       int my_table[SNAP_MAX];
+       int clone_slot;
+
+       ENTRY;
+
+        cache = snap_find_cache(cache_inode->i_dev);
+        if (!cache) {
+                EXIT;
+                return NULL;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->get_indirect) {
+                EXIT;
+                return NULL;
+        }
+
+       CDEBUG(D_SNAP, "cache ino %ld\n", cache_inode->i_ino);
+       clone_info = (struct snap_clone_info *)&clone_sb->u.generic_sbp;
+
+       table = &snap_tables[clone_info->clone_cache->cache_snap_tableno];
+
+       /* first find if there are indirected at the clone_index */
+       redirected = snapops->get_indirect(cache_inode, NULL, 
+                                       clone_info->clone_index);
+       /* if not found, get the FIRST index after this and before NOW */
+       /* XXX fix this later, now use tbl_count, not NOW */
+       if(!redirected) {
+               clone_slot = snap_index2slot(table, clone_info->clone_index);
+               for(slot = table->tbl_count; slot >= clone_slot; slot --)
+               {
+                       my_table[slot-clone_slot+1] = table->tbl_index[slot];
+               }
+               redirected = snapops->get_indirect 
+               (cache_inode, my_table, table->tbl_count - clone_slot + 1);
+       }
+        /* old version
+       redirected = snapops->get_indirect 
+                       (cache_inode, table->tbl_index,
+                       snap_index2slot(table, clone_info->clone_index));
+       */
+       if(redirected) CDEBUG(D_SNAP,"redirected ino %ld\n",redirected->i_ino);
+       EXIT;
+       return redirected;
+}
+
+/*
+ * Make a copy of the data and plug a redirector in between if there
+ * is no redirector yet.
+ */
+int snap_do_cow(struct inode *inode, ino_t parent_ino, int del)
+{
+       struct snap_cache *cache;
+       struct snap snap;
+       struct inode *ind = NULL;
+       struct snapshot_operations *snapops;
+
+       ENTRY;
+       CDEBUG(D_SNAP, "snap_do_cow, ino %ld\n", inode->i_ino);
+
+       cache = snap_find_cache(inode->i_dev);
+       if (!cache) {
+               EXIT;
+               return -EINVAL;
+       }
+       snapops = filter_c2csnapops(cache->cache_filter);
+       if (!snapops || !snapops->create_indirect) {
+               EXIT;
+               return -EINVAL;
+       }
+       snap_last(cache, &snap);
+       ind = snapops->create_indirect(inode, parent_ino, snap.index, del);
+       EXIT;
+       if(ind) {
+               iput(ind);
+               return  0;
+       }
+       else
+               return -EINVAL;
+}
+
+int snap_iterate(struct super_block *sb,
+               int (*repeat)(struct inode *inode, void *priv),
+               struct inode **start, void *priv, int flag)
+{
+       struct inode *inode = sb->s_root->d_inode;
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+
+        cache = snap_find_cache(inode->i_dev);
+        if (!cache) {
+                EXIT;
+                return 0;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->iterate) {
+                EXIT;
+                return 0;
+        }
+
+       EXIT;
+       return snapops->iterate(sb, repeat, start, priv, flag);
+}
+
+int snap_destroy_indirect(struct inode *pri, int index, struct inode *next_ind )
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+        cache = snap_find_cache(pri->i_dev);
+        if (!cache) {
+                EXIT;
+                return 0;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->destroy_indirect) {
+                EXIT;
+                return 0;
+        }
+
+       EXIT;
+       return snapops->destroy_indirect(pri, index, next_ind);
+}
+
+int snap_restore_indirect(struct inode *pri, int index )
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+
+        cache = snap_find_cache(pri->i_dev);
+        if (!cache) {
+                EXIT;
+                return 0;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->restore_indirect) {
+                EXIT;
+                return 0;
+        }
+
+       EXIT;
+       return snapops->restore_indirect(pri, index);
+}
+
+struct inode *snap_get_indirect(struct inode *pri, int *table, int slot)
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+
+        cache = snap_find_cache(pri->i_dev);
+        if (!cache) {
+                EXIT;
+                return NULL;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->get_indirect) {
+                EXIT;
+                return NULL;
+        }
+
+       EXIT;
+       return snapops->get_indirect(pri, table, slot);
+}
+
+int snap_migrate_data(struct inode *dst, struct inode *src)
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+
+        cache = snap_find_cache(src->i_dev);
+        if (!cache) {
+                EXIT;
+                return 0;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->migrate_data) {
+                EXIT;
+                return 0;
+        }
+
+       EXIT;
+       return snapops->migrate_data(dst, src);
+}
+
+int snap_set_indirect(struct inode *pri, ino_t ind_ino, int index, ino_t parent_ino)
+{
+       struct snap_cache *cache;
+        struct snapshot_operations *snapops;
+
+       ENTRY;
+
+        cache = snap_find_cache(pri->i_dev);
+        if (!cache) {
+                EXIT;
+                return -EINVAL;
+        }
+        snapops = filter_c2csnapops(cache->cache_filter);
+        if (!snapops || !snapops->set_indirect) {
+                EXIT;
+                return -EINVAL;
+        }
+
+       EXIT;
+       return snapops->set_indirect(pri, ind_ino, index, parent_ino);
+}
+
+
diff --git a/lustre/snapfs/snaptable.c b/lustre/snapfs/snaptable.c
new file mode 100644 (file)
index 0000000..058164b
--- /dev/null
@@ -0,0 +1,1099 @@
+
+/*
+ *  snaptable.c
+ *
+ *  Manipulate snapshot tables
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/malloc.h>
+#include <linux/locks.h>
+#include <linux/errno.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/sysrq.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/quotaops.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+#include <asm/mmu_context.h>
+
+#include <linux/filter.h>
+#include <linux/snapsupport.h>
+#include <linux/snapfs.h>
+
+struct snap_table snap_tables[SNAP_MAX_TABLES];
+
+#if 0
+static void snap_lock_table(int table_no)
+{
+
+       spin_lock(snap_tables[table_no].tbl_lock);
+
+}
+
+static void snap_unlock_table(int table_no)
+{
+
+       spin_unlock(snap_tables[table_no].tbl_lock);
+
+}
+#endif
+
+int snap_index2slot(struct snap_table *snap_table, int snap_index)
+{
+       int slot;
+
+       for ( slot=0 ; slot<snap_table->tbl_count ; slot++ )
+               if ( snap_table->tbl_index[slot] == snap_index )
+                       return slot;
+       return -1;
+}
+
+
+
+/* latest snap: returns 
+   -  the index of the latest snapshot before NOW
+   -  hence it returns 0 in case all the volume snapshots lie in the future
+   -  this is the index where a COW will land (will be created) 
+ */
+void snap_last(struct snap_cache *info, struct snap *snap)
+{
+       int i ;
+       time_t now = CURRENT_TIME;
+       struct snap_table *table;
+       int tableno = info->cache_snap_tableno;
+
+       ENTRY;
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table no %d\n", tableno);
+               snap->index = -1;
+       }
+       table = &snap_tables[tableno];
+
+       /* start at the highest index in the superblock 
+          snaptime array */ 
+       i = table->tbl_count - 1;
+
+       /* NOTE: i>0 is an unnecessary check */
+       while ( table->tbl_times[i] > now && i > 0) {
+               CDEBUG(D_SNAP, "time: %ld, i: %d\n", table->tbl_times[i], i);
+               i--;
+       }
+
+       snap->index = table->tbl_index[i];
+       snap->time = table->tbl_times[i];
+       CDEBUG(D_SNAP, "index: %d, time[i]: %ld, now: %ld\n",
+              snap->index, snap->time, now);
+       EXIT;
+       return;
+}
+
+/* return -1 if no COW is needed, otherwise the index of the 
+   clone to COW to is returned
+*/
+
+int snap_needs_cow(struct inode *inode)
+{
+       struct snap snap;
+       struct snap_cache *cache;
+       int index = -1;
+       ENTRY;
+
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) {
+               EXIT;
+               return -1;
+       }
+
+       /* here we find the time of the last snap to compare with */
+       snap_last(cache, &snap);
+
+       /* decision .... if the snapshot is more recent than the object,
+        * then any change to the object should cause a COW.
+        */
+       if (inode->i_mtime <= snap.time && inode->i_ctime <= snap.time) {
+               index = snap.index;
+       }
+       printk("snap_needs_cow, ino %lu , get index %d\n",inode->i_ino, index);
+
+       EXIT;
+       return index;
+} /* snap_needs_cow */
+
+#if 0
+int  snap_obd2snap(struct snap_clone_info *info, struct snap *snap)
+{
+       struct snap_table *table;
+       int tableno = info->clone_cache->cache_snap_tableno;
+       int index =  info->clone_index;
+       int slot;
+
+       ENTRY;
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table no %d\n", tableno);
+               snap->index = -1;
+       }
+       table = &snap_tables[tableno];
+       slot = snap_index2slot(table, index);
+
+       snap->index = index;
+       snap->time = table->tbl_times[slot];
+       EXIT;
+       return slot;
+}
+#endif
+
+/* at what index is the current snapshot located */
+int snap_current(struct snap_cache *cache)
+{
+       int tableno = cache->cache_snap_tableno;
+
+       return snap_tables[tableno].tbl_index[0];
+}
+
+int snap_is_used(int table_no, int snap_index) 
+
+{
+       /* ENTRY; */
+       if ( snap_index < 0 || snap_index >= SNAP_MAX ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               EXIT;
+               return -1;
+       }
+       if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               EXIT;
+               return -1;
+       }
+
+       /* EXIT; */
+       return snap_tables[table_no].tbl_used & (1<<snap_index);
+}
+
+void snap_use(int table_no, int snap_index) 
+{
+       if ( snap_index < 0 || snap_index >= SNAP_MAX ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               return;
+       }
+       if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               return;
+       }
+       if ( snap_index2slot(&snap_tables[table_no], snap_index) < 0 ) 
+               return;
+
+       snap_tables[table_no].tbl_used |=  (1<<snap_index);
+}
+
+void snap_unuse(int table_no, int snap_index) 
+{
+       if ( snap_index < 0 || snap_index >= SNAP_MAX ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               return;
+       }
+       if ( table_no < 0 || table_no > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid snapno %d,table %d\n",
+                      snap_index, table_no);
+               return;
+       }
+       if ( snap_index2slot(&snap_tables[table_no], snap_index) < 0 ) 
+               return;
+
+       snap_tables[table_no].tbl_used &=  ~(1<<snap_index);
+}
+
+static int nprint_buf(char *buf, int buflen, char *fmt, ...)
+{
+        va_list args;
+        int n;
+       char local_buf[1024];
+
+        va_start(args, fmt);
+        n = vsprintf(local_buf, fmt, args);
+        va_end(args);
+       
+       if( n > buflen ) {
+               if( buflen > 1024)      buflen=1024;
+               memcpy(buf, local_buf, buflen);
+               return buflen;
+       }
+       else {
+               memcpy(buf, local_buf, n);
+               return n;
+       }
+}
+       
+int snap_print_table(struct snap_table_data *data, char *buf, int *buflen)
+{
+       int tableno = data->tblcmd_no;
+       int i;
+       struct snap_table *table;
+       char *buf_ptr;
+       int nleft = (*buflen);
+       int nprint = 0;
+
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table number %d\n", tableno);
+               EXIT;
+               return -EINVAL;
+       }
+
+       table = &snap_tables[tableno];
+
+       printk("------- snap table %d\n", tableno);
+       printk("     -- snap count %d\n", table->tbl_count);
+       printk("     -- snap used  0x%x\n", table->tbl_used);
+       for ( i = 0 ; i < SNAP_MAX ; i++ ) {
+               printk("     -- slot %d, idx %d, time %ld, name %s\n",
+                      i, table->tbl_index[i], table->tbl_times[i], 
+                       table->tbl_name[i]);
+       }
+
+       buf_ptr = buf;
+       nprint= nprint_buf(buf_ptr, nleft, "------- snap table %d\n", tableno);
+       nleft -= nprint;
+       if( nleft > 0 )  buf_ptr += nprint;
+       else goto exit; 
+       nprint = nprint_buf(buf_ptr, nleft, "     -- snap count %d\n", table->tbl_count);
+       nleft -= nprint;
+       if( nleft > 0 )  buf_ptr += nprint;
+       else goto exit;
+       nprint = nprint_buf(buf_ptr, nleft, "     -- snap used  0x%x\n", table->tbl_used);
+       nleft -= nprint;
+       if( nleft > 0 )  buf_ptr += nprint;
+       else goto exit;
+       for ( i = 0 ; i < SNAP_MAX ; i++ ) {
+               nprint = nprint_buf( buf_ptr, nleft,
+                       "     -- slot %d, idx %d, time %ld, name %s\n",
+                      i, table->tbl_index[i], table->tbl_times[i], 
+                       table->tbl_name[i]);
+               nleft -= nprint;
+               if( nleft > 0 )  buf_ptr += nprint;
+               else goto exit;
+       }
+
+exit:
+       if(nleft > 0) (*buflen) = (*buflen) - nleft;
+
+       return 0;
+}
+
+int snap_install_table(int len, struct snap_table_data *data)
+{
+       int i, j;
+       int tableno = data->tblcmd_no;
+//     int found_current;
+       struct snap_table *table;
+
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table number %d\n", tableno);
+               EXIT;
+               return -EINVAL;
+       }
+       table = &snap_tables[tableno];  
+
+       /* for each index that is used by the current table
+          we need to make sure that the new table we are about
+          to put in contains that index too 
+       */
+       for ( i = 0; i < SNAP_MAX ; i++ ) {
+               int foundit;
+               int err;
+
+               if ((err = snap_is_used(tableno, i)) < 0 ) {
+                       printk(__FUNCTION__  ": table %d not used\n", tableno);
+                       EXIT;
+                       return -EINVAL;
+               } else if (err == 0) {
+                       continue;
+               }
+
+               foundit = 0;
+               for (j = 0 ; j<= data->tblcmd_count ; j++) {
+                       if ( i == data->tblcmd_snaps[j].index ) {
+                               foundit = 1;
+                               break;
+                       }
+               }
+               if ( !foundit ) {
+                       printk(__FUNCTION__ ": index %d not in table %d\n", 
+                              i, tableno);
+                       return -EINVAL;
+               }
+       }
+
+       /* we must have:
+           - valid indices 
+          - a current snapshot in the table 
+          - increasing snapshot times 
+       */
+//     found_current = 0;
+       CDEBUG(D_SNAP, "snaplist: tblcmd_count %d\n", data->tblcmd_count);
+       for (i = 0 ; i < data->tblcmd_count ; i++) {
+
+               if ( (data->tblcmd_snaps[i].index < 0) ||
+                    (data->tblcmd_snaps[i].index >= SNAP_MAX) ) {
+                       printk(__FUNCTION__ ": snap_index out of range!\n");
+                       return -EINVAL;
+               }
+
+               if (i>0 && data->tblcmd_snaps[i].time <= 
+                   data->tblcmd_snaps[i-1].time) {
+                       printk(__FUNCTION__ ": times not increasing\n");
+                       return -EINVAL;
+               }
+
+//             if ( 0 == data->tblcmd_snaps[i].time ) {
+//                     found_current = 1;
+//                     break;
+//             }
+       }
+//     if ( !found_current ) {
+//             printk(__FUNCTION__ "no current snapshot in table\n");
+//             return -EINVAL;
+//     }
+
+       /* ready to go: over write the table */
+/*     
+       for (i = 0 ; i < data->tblcmd_count ; i++) {
+
+               table->tbl_times[i] = data->tblcmd_snaps[i].time;
+               table->tbl_index[i] = data->tblcmd_snaps[i].index;
+               memcpy(table->tbl_name[i], data->tblcmd_snaps[i].name, 
+                       SNAP_MAX_NAMELEN);
+               table->tbl_name[i][SNAP_MAX_NAMELEN - 1] = '\0';
+
+               CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n",
+                      i, table->tbl_times[i], table->tbl_index[i], 
+                       table->tbl_name[i]);
+       }
+*/
+       /* below : new, we don't need current snapshot for data
+        * current snapshot always has slot 0, index 0, name "current" 
+        */
+       table->tbl_times[0] = 0;
+       table->tbl_index[0] = 0;
+       strcpy(table->tbl_name[0], "current");
+
+       i=0;    
+       CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n",
+              i, table->tbl_times[i], table->tbl_index[i], 
+               table->tbl_name[i]);
+
+       for (i = 0 ; i < data->tblcmd_count ; i++) {
+
+               table->tbl_times[i+1] = data->tblcmd_snaps[i].time;
+               table->tbl_index[i+1] = data->tblcmd_snaps[i].index;
+               memcpy(table->tbl_name[i+1], data->tblcmd_snaps[i].name, 
+                       SNAP_MAX_NAMELEN);
+               table->tbl_name[i+1][SNAP_MAX_NAMELEN - 1] = '\0';
+
+               CDEBUG(D_SNAP, "snaplist: i %d, time %ld, idx %d, name %s\n",
+                      i+1, table->tbl_times[i+1], table->tbl_index[i+1], 
+                       table->tbl_name[i+1]);
+       }
+
+       for ( i = data->tblcmd_count + 1 ; i < SNAP_MAX ; i++ ) {
+               table->tbl_times[i] = 0;
+               table->tbl_index[i] = 0;
+               memset(table->tbl_name[i], 0, SNAP_MAX_NAMELEN);
+       }
+
+       /* set the table count */
+//     table->tbl_count = data->tblcmd_count;
+       table->tbl_count = data->tblcmd_count + 1;
+       return 0;
+}
+
+
+int snap_table_attach(int tableno, int snap_index) 
+{
+       struct snap_table *table;
+
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table number %d\n", tableno);
+               EXIT;
+               return -EINVAL;
+       }
+       table = &snap_tables[tableno];  
+       
+       if ( snap_index2slot(table, snap_index) < 0 ) {
+               printk(__FUNCTION__ ": snap index %d not present in table %d\n",
+                      snap_index, tableno);
+               return -EINVAL;
+       }
+
+       snap_use(tableno, snap_index);
+       return 0;
+}
+
+static int getdata(int len, void **data)
+{
+       void *tmp = NULL;
+
+       if (!len) {
+               *data = NULL;
+               return 0;
+       }
+
+       SNAP_ALLOC(tmp, void *, len);
+       if ( !tmp )
+               return -ENOMEM;
+
+       CDEBUG(D_MALLOC, "snap_alloc:len %d, add %p\n", len, tmp);
+
+       memset(tmp, 0, len);
+       if ( copy_from_user(tmp, *data, len)) {
+               SNAP_FREE(tmp, len);
+               CDEBUG(D_MALLOC, "snap_free:len %d, add %p\n", len, tmp);
+               return -EFAULT;
+       }
+       *data = tmp;
+
+       return 0;
+}
+
+static void freedata(void *data, int len) {
+       SNAP_FREE(data, len);
+       CDEBUG(D_MALLOC, "snap_free:len %d, add %p\n", len, data);
+}
+
+static int get_next_inode(struct inode *pri, void *ino)
+{
+       static ino_t prev_ino = -1 ;    
+        ino_t this_ino = pri->i_ino;
+       ino_t find_ino = *(ino_t *)ino;
+       ino_t *found = ino; 
+
+       if( find_ino == 0) {
+               (*found) = this_ino;
+               return -1;
+       }
+
+       if( find_ino == prev_ino ) {
+               (*found) = this_ino;
+               return -1;
+       }
+       else {
+               prev_ino = this_ino; 
+       }       
+       return 0;
+}
+
+
+static int snap_get_next_inode(struct snap_ino_list_data *data, ino_t *found_ino, ino_t *parent_ino)
+{
+       kdev_t dev = data->dev;
+       ino_t this_ino = data->ino; 
+
+       struct snap_cache *cache;
+
+       struct inode *inode;
+       struct dentry * dentry;
+
+       ENTRY;
+
+       cache = snap_find_cache(dev); 
+       if ( !cache ) {
+                EXIT;
+                return -EINVAL;
+        }
+
+       snap_iterate( cache->cache_sb, &get_next_inode, NULL, &(data->ino), 
+                       SNAP_ITERATE_COWED_INODE);
+
+       if( data->ino == this_ino ) {
+               data->ino = 0;
+       }
+
+       *found_ino = data->ino;
+
+       if( !(*found_ino) )     return 0;
+
+       *parent_ino = 0;
+       inode = iget (cache->cache_sb, *found_ino);
+       if (list_empty(&inode->i_dentry)) {
+               printk("No dentry for ino %lu, Error(XXX)! \n", inode->i_ino);
+               iput(inode);    
+                       return 0;
+       }
+       else {
+               dentry = dget(list_entry(inode->i_dentry.next, struct dentry, d_alias));
+       }
+       if( dentry->d_parent->d_inode)  
+               *parent_ino = dentry->d_parent->d_inode->i_ino;
+       else    
+               *parent_ino = 0;
+
+       dput(dentry);
+       iput(inode);
+
+       return 0;
+}
+/*
+static int snap_get_inode_info(struct snap_ino_list_data *data,  int index)
+{
+       kdev_t dev = data->dev;
+       ino_t pri = data->ino; 
+       int index = data->index;
+
+       struct snap_cache *cache;
+
+       struct inode *pri;
+       struct inode *ind;
+       ino_t ind_ino = 0;
+
+       ENTRY;
+
+       cache = snap_find_cache(dev); 
+       if ( !cache ) {
+                EXIT;
+                return -EINVAL;
+        }
+       pri = iget(cache->cache->sb, pri_ino);
+       ind = snap_get_indirect(pri, NULL, index);
+       if(ind) {
+               ind_ino = ind->i_ino;   
+               iput(ind);
+       }
+       return ind_ino;
+}
+*/
+
+static int print_inode(struct inode *pri,void *param)
+{
+       CDEBUG(D_SNAP, "cowed inode list: ino %lu \n", pri->i_ino);
+       return 0;
+}
+
+static int snap_print(struct super_block *sb, void *data)
+{
+       snap_iterate(sb, &print_inode, NULL, data, SNAP_ITERATE_COWED_INODE);
+       return 0;
+}
+
+static int delete_inode(struct inode *primary, void *param)
+{
+       struct snap_iterdata * data;
+       int tableno = 0;
+       int index = 0;
+       int rc = 0;
+
+       struct inode *redirect;
+       ino_t old_ind = 0;
+       struct snap_table *table;
+       int slot;
+       int delete_slot;
+       int this_index;
+       struct inode *next_ind = NULL;
+       int my_table[SNAP_MAX];
+
+       if(!primary) return 0;
+
+       data = (struct snap_iterdata*) param;
+
+       if(data) {
+               index = data->index;
+               tableno = data->tableno;
+       }
+
+       printk("delete_inode ino %lu, index %d\n", primary->i_ino, index);
+
+       table = &snap_tables[tableno];
+
+       redirect = snap_get_indirect(primary, NULL, index);
+
+       if(!redirect)   
+               return 0;
+
+       old_ind = redirect->i_ino;
+       iput(redirect);
+       slot = snap_index2slot(table, index) - 1;
+       if( slot > 0 ) {
+               this_index = table->tbl_index[slot];
+               redirect = snap_get_indirect(primary, NULL, this_index);
+               if(redirect)    
+                       iput(redirect);
+               else  {
+                       snap_set_indirect(primary, old_ind, this_index, 0);
+                       snap_set_indirect(primary, 0, index, 0);
+                       return 0;
+               }
+       }
+
+       /* get the FIRST index after this and before NOW */
+       /* used for destroy_indirect and block level cow */
+       /* XXX fix this later, now use tbl_count, not NOW */
+       delete_slot = snap_index2slot(table, index);
+       for(slot = table->tbl_count; slot > delete_slot; slot --)
+       {
+               my_table[slot - delete_slot] = table->tbl_index[slot];
+       }
+       next_ind = snap_get_indirect 
+               (primary, my_table, table->tbl_count - delete_slot );
+       if( next_ind && (next_ind->i_ino == primary->i_ino) ) {
+               iput(next_ind);
+               next_ind = NULL;
+       }
+
+       if( next_ind && (next_ind->i_ino == old_ind) ) {
+               iput(next_ind);
+               next_ind = NULL;
+       }
+
+       rc = snap_destroy_indirect(primary, index, next_ind);
+
+       if(next_ind)    iput(next_ind);
+
+       if(rc != 0)     
+               printk("ERROR:snap_destroy_indirect(ino %lu,index %d),ret %d\n",                        primary->i_ino, index, rc);
+       return 0;
+}
+
+static int snap_delete(struct super_block *sb, struct snap_iterdata *data)
+//static int snap_delete(struct super_block *sb, void *data)
+{
+       CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n",
+               data->dev, data->tableno, data->index, data->time );
+
+       snap_iterate(sb,&delete_inode,NULL, data, SNAP_ITERATE_COWED_INODE);
+       return 0;
+}
+
+static int delete_new_inode(struct inode *pri, void *param)
+{
+       struct snap_iterdata * data;
+
+       int index = 1;
+       time_t restore_time = 0xFFFFFFFF;
+
+       ENTRY; 
+
+       if(!pri) return 0;
+
+       if(snap_is_redirector(pri)){
+               EXIT;
+               return 0;
+       }
+
+       data = (struct snap_iterdata*) param;
+
+       if(data) {
+               index = data->index;
+               restore_time = data->time;
+       }
+
+       CDEBUG(D_SNAP, "ino %lu, index=%d, time=%lu\n", 
+                       pri->i_ino, index, restore_time);
+
+
+       if( pri->i_mtime > restore_time || pri->i_ctime > restore_time ) {
+               struct list_head *head = &pri->i_dentry, *pos;
+
+               CDEBUG(D_SNAP, "snap_restore ino %lu is newer, delete \n",pri->i_ino);
+               for( pos = head->next; pos != head; pos = pos->next ){
+//                     d_invalidate( list_entry(pos, struct dentry, d_alias) );
+                       d_drop( list_entry(pos, struct dentry, d_alias) );
+               }
+               pri->i_nlink = 0;
+       }
+       return 0;
+
+}
+
+static int restore_inode(struct inode *pri, void *param)
+{
+       struct snap_iterdata * data;
+//     struct snap_cache *cache;
+       int tableno = 0;
+
+       int index = 1;
+       time_t restore_time = 0xFFFFFFFF;
+
+       struct inode *ind = NULL;
+       int slot;
+       int restore_slot;
+       struct snap_table *table;
+//     int my_table[SNAP_MAX];
+       int restore_index;
+       
+       ENTRY; 
+
+       if(!pri) return 0;
+
+       data = (struct snap_iterdata*) param;
+
+       if(data) {
+               index = data->index;
+               tableno  = data->tableno;
+               restore_time = data->time;
+       }
+
+       CDEBUG(D_SNAP, "ino %lu, index=%d, time=%lu, tableno %d\n", 
+                       pri->i_ino, index, restore_time, tableno);
+
+               /* XXX: should we have = here? */       
+       if( pri->i_mtime > restore_time || pri->i_ctime > restore_time )
+       {
+               restore_index = index;
+               table = &snap_tables[tableno];
+               /* first find if there are indirected at the index */
+               ind = snap_get_indirect(pri, NULL, index);
+               /* if not found, get the FIRST index after this and before NOW*/
+               /* XXX fix this later, now use tbl_count, not NOW */
+               if(!ind) {
+                       restore_slot = snap_index2slot(table, index);
+                       for(slot = restore_slot; slot <= table->tbl_count; 
+                           slot++) {
+                               ind = snap_get_indirect (pri, NULL, 
+                                       table->tbl_index[slot]);
+                               if(ind) {
+                                       restore_index = table->tbl_index[slot];
+                                       break;
+                               }
+                       }
+/*                     for(slot = table->tbl_count; slot >= restore_slot; 
+                               slot --)
+                       {
+                               my_table[slot - restore_slot + 1] = 
+                                       table->tbl_index[slot];
+                       }
+                       ind = snap_get_indirect (pri, my_table, 
+                                       table->tbl_count - restore_slot + 1);
+
+                       if( ind && (ind->i_ino == pri->i_ino) ) {
+                               iput(ind);
+                               ind =  NULL;
+                       }
+*/
+               }
+
+               if(ind) {
+                       CDEBUG(D_SNAP, "restore ino %lu with index %d\n",
+                                       pri->i_ino, restore_index);
+                       iput(ind);
+//                     snap_restore_indirect(pri, index);
+                       snap_restore_indirect(pri, restore_index);
+                       /* XXX */
+                       //delete_inode(pri, param);
+                       snap_destroy_indirect(pri, restore_index, NULL);
+               }
+               else {  
+                       CDEBUG(D_SNAP, "ERROR:restore ino %lu\n", pri->i_ino);  
+               }
+       }
+       else {
+               printk("ino %lu is older, don't restore\n",pri->i_ino);
+       }
+       EXIT;
+       return 0;
+}
+
+//int snap_restore(struct super_block *sb, void *data)
+static int snap_restore(struct super_block *sb, struct snap_iterdata *data)
+{      
+       CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n",
+               data->dev, data->tableno, data->index, data->time );
+
+       snap_iterate(sb, &delete_new_inode, NULL, data, SNAP_ITERATE_ALL_INODE);
+       snap_iterate(sb, &restore_inode, NULL, data, SNAP_ITERATE_COWED_INODE );
+       return 0;
+}
+
+/* return the index number of a name in a table */
+int snap_get_index_from_name(int tableno, char *name)
+{
+       struct snap_table *table;
+       int slot;
+
+       if ( tableno < 0 || tableno > SNAP_MAX_TABLES ) {
+               printk(__FUNCTION__ ": invalid table number %d\n", tableno);
+               return -EINVAL;
+       }
+
+       table = &snap_tables[tableno];
+
+       for ( slot = 0 ; slot < SNAP_MAX ; slot++ ) {
+/*             if(memcmp (table->tbl_name[slot], name, 
+                       strlen(table->tbl_name[slot]) ) == 0 ) {
+                       return table->tbl_index[slot];
+               }
+*/
+               if(strncmp (table->tbl_name[slot], name, 
+                       SNAP_MAX_NAMELEN) == 0 ) {
+                       return table->tbl_index[slot];
+               }
+       }
+       return -EINVAL;
+}
+
+int snap_iterate_func(int len, struct snap_ioc_data *ioc_data, unsigned int cmd)
+{
+       struct snap_iterdata data;
+
+       kdev_t dev ;
+       char name[SNAP_MAX_NAMELEN];
+
+       int index ;
+       int tableno; 
+       int name_len;
+       int slot;
+
+       struct super_block *sb;
+       struct snap_cache *cache;
+       struct snap_table *table;
+
+       ENTRY;
+
+       dev = ioc_data->dev;
+       cache = snap_find_cache(dev); 
+       if ( !cache ) {
+                EXIT;
+                return -EINVAL;
+        }
+
+       sb = cache->cache_sb;
+       tableno = cache->cache_snap_tableno;
+       table = &snap_tables[tableno];
+
+       name_len = len - sizeof(kdev_t);        
+       memset(name, 0, SNAP_MAX_NAMELEN);      
+       if(name_len > SNAP_MAX_NAMELEN)
+               name_len = SNAP_MAX_NAMELEN;
+       if(name_len < 0 ) 
+               name_len = 0;
+       memcpy(name, ioc_data->name, name_len);
+       if ( (index = snap_get_index_from_name (tableno, name)) < 0 ) {
+               EXIT;
+               return -EINVAL;
+       }
+       
+       data.dev = dev;
+       data.index = index;
+       data.tableno = tableno;
+       slot = snap_index2slot (table, index);
+       if( slot < 0 ) {
+               EXIT;
+               return -EINVAL;
+       }
+       data.time = table->tbl_times[slot];
+
+       CDEBUG(D_SNAP, "dev %d, tableno %d, index %d, time %lu\n",
+               data.dev, data.tableno, data.index, data.time );
+
+       switch (cmd) {
+               case IOC_SNAP_DEBUG:
+                       snap_print(sb, &data);  
+                       break;
+               case IOC_SNAP_DELETE:
+                       snap_delete(sb, &data); 
+                       break;
+               case IOC_SNAP_RESTORE:
+                       snap_restore(sb, &data);        
+                       break;
+               default:
+                       return -EINVAL;
+       }
+       
+       EXIT;
+
+       return 0;
+}
+
+int snap_ioctl (struct inode * inode, struct file * filp, 
+                            unsigned int cmd, unsigned long arg)
+{
+       void *uarg, *karg;
+       int len;
+       int err;
+       kdev_t dev;
+       struct  {
+               int len;
+               char *data;
+       }input;
+       int rc = 0;
+
+       ENTRY;  
+
+        dev = MINOR(inode->i_rdev);
+        if (dev != SNAP_PSDEV_MINOR)
+                return -ENODEV;
+
+        if (!inode) {
+                CDEBUG(D_IOCTL, "invalid inode\n");
+                return -EINVAL;
+        }
+
+        if ( _IOC_TYPE(cmd) != IOC_SNAP_TYPE || 
+             _IOC_NR(cmd) < IOC_SNAP_MIN_NR  || 
+             _IOC_NR(cmd) > IOC_SNAP_MAX_NR ) {
+                CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+                                _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+                EXIT;
+                return -EINVAL;
+        }
+
+       /* get data structures */
+       err = copy_from_user(&input, (void *)arg, sizeof(input));
+       if ( err ) {
+               EXIT;
+               return err;
+       }
+       uarg = input.data;
+       len = input.len;
+
+       karg = input.data;
+       err = getdata(input.len, &karg);
+       if ( err ) {
+               EXIT;
+               return err;
+       }
+       
+       switch (cmd) {
+       case IOC_SNAP_SETTABLE:
+               rc = snap_install_table(len, karg);
+               break;
+       case IOC_SNAP_PRINTTABLE: {
+               struct output_data{
+                       int len;
+                       char buf[1024];
+               }output;
+               output.len = sizeof(output.buf);
+               snap_print_table(karg, output.buf, &(output.len));
+               rc = copy_to_user((char *)arg, &output, output.len+sizeof(int));
+               break;
+       }
+       case IOC_SNAP_GETINDEXFROMNAME: {
+               int index = 0;
+               char name[SNAP_MAX_NAMELEN];
+               int tableno = 0; 
+               struct snap_cache *cache;
+               kdev_t dev;
+               int name_len;
+
+               struct get_index_struct {
+                       kdev_t dev;
+                       char name[SNAP_MAX_NAMELEN];
+               };
+
+               struct get_index_struct *data = karg;
+       
+               name_len = len - sizeof(kdev_t);        
+               dev = data->dev;
+               memset(name, 0, SNAP_MAX_NAMELEN);      
+               if(name_len > SNAP_MAX_NAMELEN)
+                       name_len = SNAP_MAX_NAMELEN;
+               if(name_len < 0 ) 
+                       name_len = 0;
+               /*for(i=0 ; i< name_len; i++) {
+                       name[i] = data->name[i];
+               }
+               */
+               memcpy(name, data->name, name_len);
+               printk("dev %d , len %d, name_len %d, find name is [%s]\n", dev, len, name_len, name);
+               cache = snap_find_cache(dev); 
+               if ( !cache ) {
+                       EXIT;
+                       rc = -EINVAL;
+                       break;
+               }
+               tableno = cache->cache_snap_tableno;
+
+               index = snap_get_index_from_name(tableno, name);
+               rc = copy_to_user((char *)arg, &index, sizeof(index));
+               break;
+       }
+       case IOC_SNAP_GET_NEXT_INO: { 
+               struct get_ino_struct{
+                       ino_t found_ino;
+                       ino_t parent_ino;
+               }get_ino;
+               get_ino.found_ino = 0;
+               get_ino.parent_ino = 0;
+               rc = snap_get_next_inode(karg,  &get_ino.found_ino, &get_ino.parent_ino);
+               rc = copy_to_user((char *)arg, &get_ino, sizeof(get_ino));
+               break;
+       }
+       case IOC_SNAP_GET_INO_INFO: { 
+               struct ioc_ino_info{
+                       kdev_t dev;
+                       ino_t ino;
+                       int index;
+               };
+               struct snap_cache *cache;
+               struct inode *pri;
+               struct inode *ind;
+               struct ioc_ino_info *data = karg;
+               ino_t ind_ino = 0;
+       
+               cache = snap_find_cache(data->dev); 
+               if ( !cache ) {
+                       EXIT;
+                       rc = -EINVAL;
+                       break;
+               }
+               printk("get_ino_info, dev %d, ino %lu, index %d\n",
+                        data->dev, data->ino, data->index);    
+               pri = iget(cache->cache_sb, data->ino);
+               ind = snap_get_indirect(pri, NULL, data->index);
+               if(ind) {
+                       ind_ino = ind->i_ino;
+                       iput(ind);
+               }
+               iput(pri);
+               printk("get_ino_info, get ind %lu\n", ind_ino);
+               rc = copy_to_user((char *)arg, &ind_ino, sizeof(ino_t));
+               break;
+       }
+       case IOC_SNAP_DELETE: 
+       case IOC_SNAP_RESTORE:
+       case IOC_SNAP_DEBUG:
+               rc = snap_iterate_func(len, karg, cmd);
+               break;
+       case IOC_SNAP_DEVFAIL:
+               snap_debug_failcode = (unsigned int)arg;
+               break;
+       case IOC_SNAP_SHOW_DOTSNAP: {
+               struct ioc_show_info{
+                       kdev_t dev;
+                       int show;
+               };
+               struct snap_cache *cache;
+               struct ioc_show_info *data = karg;
+
+               cache = snap_find_cache(data->dev);
+               if( !cache ) {
+                       EXIT;
+                       rc = -EINVAL;
+                       break;
+               }
+               cache->cache_show_dotsnap = (char)data->show;
+               CDEBUG(D_IOCTL, "Set show dotsnap: %s\n",
+                       data->show ? "Yes" : "No");
+               
+               break;
+       }
+
+       default:
+               rc = -EINVAL;
+               break;
+       }
+
+       freedata(karg, input.len);
+       EXIT;
+       return rc;
+}
diff --git a/lustre/snapfs/super.c b/lustre/snapfs/super.c
new file mode 100644 (file)
index 0000000..f101977
--- /dev/null
@@ -0,0 +1,714 @@
+/*
+ *  snap_current
+ *
+ *  Copyright (C) 1998 Peter J. Braam
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Mountain View Data, Inc.
+ *
+ *  Author: Peter J. Braam <braam@mountainviewdata.com>
+ */
+
+
+#include <stdarg.h>
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/dcache.h>
+
+#ifdef CONFIG_SNAPFS_EXT2
+#include <linux/ext2_fs.h>
+#endif
+#ifdef CONFIG_SNAPFS_EXT3
+#include <linux/ext3_fs.h>
+#endif
+
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+#ifdef SNAP_DEBUG
+long snap_vmemory = 0;
+long snap_kmemory = 0;
+unsigned int snap_debug_failcode = 0;
+#endif
+
+extern struct snap_cache *snap_init_cache(void);
+extern inline void snap_cache_add(struct snap_cache *, kdev_t);
+extern inline void snap_init_cache_hash(void);
+
+extern int snap_get_index_from_name (int tableno, char *name);
+
+#ifdef CONFIG_SNAPFS_EXT2
+extern struct snapshot_operations ext2_snap_operations;
+extern struct journal_ops snap_ext2_journal_ops;
+#endif
+
+#ifdef CONFIG_SNAPFS_EXT3
+extern struct snapshot_operations ext3_snap_operations;
+extern struct journal_ops snap_ext3_journal_ops;
+#endif
+
+/* returns an allocated string, copied out from data if opt is found */
+static char *read_opt(const char *opt, char *data)
+{
+       char *value;
+       char *retval;
+
+       CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
+       if ( strncmp(opt, data, strlen(opt)) )
+               return NULL;
+
+       if ( (value = strchr(data, '=')) == NULL )
+               return NULL;
+
+       value++;
+       SNAP_ALLOC(retval, char *, strlen(value) + 1);
+       if ( !retval ) {
+               printk("snapfs: Out of memory!\n");
+               return NULL;
+       }
+
+       strcpy(retval, value);
+       CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
+       return retval;
+}
+
+static inline void store_opt(char **dst, char *opt)
+{
+       if (dst) {
+               if (*dst)
+                       SNAP_FREE(*dst, strlen(*dst) + 1);
+               *dst = opt;
+       } else
+               SNAP_FREE(opt, strlen(opt) + 1);
+}
+
+/* Find the options for snapfs in "options", saving them into the
+ * passed pointers.  If the pointer is null, the option is discarded.
+ * Copy out all non-snapfs options into cache_data (to be passed
+ * to the read_super operation of the cache).  The return value will
+ * be a pointer to the end of the cache_data.
+ */
+static char *snapfs_options(char *options, char *cache_data,
+                           char **cache_type, char **cow_type,
+                           char **snaptable)
+{
+       char *this_char;
+       char *cache_data_end = cache_data;
+
+       /* set the defaults here */
+       if (cache_type && !*cache_type) {
+               SNAP_ALLOC(*cache_type, char *, strlen("ext2") + 1);
+               strcpy(*cache_type, "ext2");
+       }
+       if (cow_type && !*cow_type) {
+               SNAP_ALLOC(*cow_type, char *, strlen("block") + 1);
+               strcpy(*cow_type, "block");
+       }
+       if (snaptable && !*snaptable) {
+               SNAP_ALLOC(*snaptable, char *, strlen("-1")+1);
+               strcpy(*snaptable, "-1");
+       }
+
+       if (!options || !cache_data)
+               return cache_data_end;
+
+       CDEBUG(D_SUPER, "parsing options\n");
+       for (this_char = strtok (options, ",");
+            this_char != NULL;
+            this_char = strtok (NULL, ",")) {
+               char *opt;
+               CDEBUG(D_SUPER, "this_char %s\n", this_char);
+
+               if ( (opt = read_opt("cache_type", this_char)) ) {
+                       store_opt(cache_type, opt);
+                       continue;
+               }
+               if ( (opt = read_opt("cow_type", this_char)) ){
+                       store_opt(cow_type, opt);
+                       continue;
+               }
+               if ( (opt = read_opt("table", this_char)) ) {
+                       store_opt(snaptable, opt);
+                       continue;
+               }
+
+               cache_data_end += sprintf(cache_data_end, "%s%s",
+                                         cache_data_end != cache_data ? ",":"",
+                                         this_char);
+       }
+
+       return cache_data_end;
+}
+
+int snapfs_remount(struct super_block * sb, int *flags, char *data)
+{
+       char *cache_data = NULL;
+       char *snapno = NULL;
+       char *cache_data_end;
+       struct snap_cache *cache = NULL;
+       struct super_operations *sops;
+       int err = 0;
+
+       ENTRY;
+       CDEBUG(D_MALLOC, "before remount: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+       CDEBUG(D_SUPER, "remount opts: %s\n", data ? (char *)data : "(none)");
+       if (data) {
+               /* reserve space for the cache's data */
+               SNAP_ALLOC(cache_data, void *, PAGE_SIZE);
+               if ( !cache_data ) {
+                       err = -ENOMEM;
+                       EXIT;
+                       goto out_err;
+               }
+       }
+
+       cache = snap_find_cache(sb->s_dev);
+       if (!cache) {
+               printk(__FUNCTION__ ": cannot find cache on remount\n");
+               err = -ENODEV;
+               EXIT;
+               goto out_err;
+       }
+
+       /* If an option has not yet been set, we allow it to be set on
+        * remount.  If an option already has a value, we pass NULL for
+        * the option pointer, which means that the snapfs option
+        * will be parsed but discarded.
+        */
+       cache_data_end = snapfs_options(data, cache_data, NULL, NULL, &snapno);
+
+       if (cache_data) {
+               if (cache_data_end == cache_data) {
+                       SNAP_FREE(cache_data, PAGE_SIZE);
+                       cache_data = NULL;
+               } else {
+                       CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
+                              cache_data);
+               }
+       }
+
+
+       sops = filter_c2csops(cache->cache_filter);
+       if (sops->remount_fs) {
+               err = sops->remount_fs(sb, flags, cache_data);
+       }
+
+       CDEBUG(D_MALLOC, "after remount: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+       EXIT;
+out_err:
+       if (cache_data)
+               SNAP_FREE(cache_data, PAGE_SIZE);
+       return err;
+}
+
+/* XXXX remount: needed if snapfs was mounted RO at boot time
+   without a snaptable 
+*/ 
+
+
+/*
+ * snapfs super block read.
+ *
+ * Allocate a struct snap_cache, determine the underlying fs type,
+ * read the underlying fs superblock, save the underlying fs ops,
+ * and then replace them with snapfs ops.
+ *
+ * Remove the snapfs options before passing to underlying fs.
+ */
+struct super_block *
+snapfs_read_super (
+        struct super_block *sb,
+        void *data,
+        int silent)
+{
+       struct file_system_type *fstype;
+       struct snap_cache *cache = NULL;
+       char *cache_data = NULL;
+       char *cache_data_end;
+       char *cache_type = NULL;
+       char *cow_type = NULL;
+       char *snapno = NULL;
+       char *endptr;
+       int tableno;
+
+       ENTRY;
+       CDEBUG(D_MALLOC, "before parsing: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+
+       /* reserve space for the cache's data */
+       SNAP_ALLOC(cache_data, void *, PAGE_SIZE);
+       if ( !cache_data ) {
+               printk("snapfs_read_super: Cannot allocate data page.\n");
+               EXIT;
+               goto out_err;
+       }
+
+       CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)");
+
+       /* read and validate options */
+       cache_data_end = snapfs_options(data, cache_data, &cache_type, &cow_type, &snapno);
+
+       /* Need to free cache_type and snapno when it's not in use */
+
+       /* was there anything for the cache filesystem in the data? */
+       if (cache_data_end == cache_data) {
+               SNAP_FREE(cache_data, PAGE_SIZE);
+               cache_data = NULL;
+       } else {
+               CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
+                      cache_data);
+       }
+
+       /* set up the cache */
+       cache = snap_init_cache();
+       if ( !cache ) {
+               printk("snapfs_read_super: failure allocating cache.\n");
+               EXIT;
+               goto out_err;
+       }
+
+       fstype = get_fs_type(cache_type);
+       if ( !fstype || !fstype->read_super) {
+               EXIT;
+               goto out_err;
+       }
+       
+       cache->cache_filter = filter_get_filter_fs((const char *)cache_type); 
+       /* XXX if cache->cache_filter==NULL?although it's rare ***/
+
+
+       /*
+         * Read the underlying file system superblock - ext2, ext3, reiser.
+         * This performs the underlying mount operation. The snapfs options
+         * have been removed from 'cache_data'.
+         *
+         * Note: It's assumed that sb is always returned.
+         */
+       CDEBUG(D_SUPER, "\n");
+       if (fstype->read_super(sb, cache_data, silent) != sb) {
+               printk("snapfs: cache mount failure.\n");
+               EXIT;
+               goto out_err;
+        }
+
+       /* XXX now look at the flags in the superblock and determine if this 
+              is a block cow file system or a file cow fs.  Then assign the 
+              snap operations accordingly.  This goes in the sections for ext2/ext3/xfs etc
+        */ 
+
+       /* this might have been freed above */
+       CDEBUG(D_SUPER, "\n");
+       if (cache_data) {
+               SNAP_FREE(cache_data, PAGE_SIZE);
+               cache_data = NULL;
+       }
+
+
+       /*
+         * We now know the dev of the cache: hash the cache.
+         *
+         * 'cache' is the struct snap_cache allocated for this
+         * snapfs mount.
+         */
+       CDEBUG(D_SUPER, "\n");
+       snap_cache_add(cache, sb->s_dev);
+
+       tableno  =  simple_strtoul(snapno, &endptr, 0);
+       cache->cache_snap_tableno = tableno;
+
+       CDEBUG(D_SUPER, "get tableno %d\n", cache->cache_snap_tableno);
+
+       /*
+         * make sure we have our own super operations
+         *
+         * Initialize or re-initialize the cache->cache_ops shared
+         * struct snap_ops structure set based on the underlying
+         * file system type.
+         */
+       CDEBUG(D_SUPER, "\n");
+       filter_setup_super_ops(cache->cache_filter, sb->s_op,
+                              &currentfs_super_ops);
+       CDEBUG(D_SUPER, "\n");
+       sb->s_op = filter_c2usops(cache->cache_filter); 
+        /*
+         * Save pointers in the snap_cache structure to the
+         * snapfs and underlying file system superblocks.
+         */
+       cache->cache_sb = sb; /* Underlying file system superblock. */
+
+       /* set up snapshot ops, handle COMPAT_FEATUREs */
+       if( 0 ){
+       }
+#ifdef CONFIG_SNAPFS_EXT2
+       else if ( strcmp (cache_type,"ext2") == 0 ){
+               cache->cache_type = FILTER_FS_EXT2;
+               filter_setup_snapshot_ops(cache->cache_filter, 
+                                       &ext2_snap_operations);
+               filter_setup_journal_ops(cache->cache_filter,
+                                       &snap_ext2_journal_ops);
+               if( !EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_SNAPFS) ){
+                       if( strcmp(cow_type, "block")==0 ){
+                               sb->u.ext2_sb.s_feature_compat |=
+                                       EXT2_FEATURE_COMPAT_BLOCKCOW;
+                               sb->u.ext2_sb.s_es->s_feature_compat |=
+                                       cpu_to_le32(EXT2_FEATURE_COMPAT_BLOCKCOW);
+                       }
+               }
+                sb->u.ext2_sb.s_last_cowed_ino = 0;
+       }
+#endif
+#ifdef CONFIG_SNAPFS_EXT3
+       else if ( strcmp (cache_type,"ext3") == 0 ){
+               cache->cache_type = FILTER_FS_EXT3;
+               filter_setup_snapshot_ops(cache->cache_filter,
+                                       &ext3_snap_operations);
+               filter_setup_journal_ops(cache->cache_filter,
+                                       &snap_ext3_journal_ops);
+               if( !EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_SNAPFS) ){
+                       if( strcmp(cow_type, "block")==0 ){
+                               sb->u.ext3_sb.s_es->s_feature_compat |=
+                                       cpu_to_le32(EXT3_FEATURE_COMPAT_BLOCKCOW);
+                       }
+               }
+               sb->u.ext3_sb.s_last_cowed_ino = 0;
+       }
+#endif
+
+       CDEBUG(D_SUPER, "\n");
+       /* now get our own directory operations */
+       if ( sb->s_root && sb->s_root->d_inode ) {
+               CDEBUG(D_SUPER, "\n");
+               filter_setup_dir_ops(cache->cache_filter, 
+                                    sb->s_root->d_inode->i_op,
+                                    &currentfs_dir_iops);
+               CDEBUG(D_SUPER, "\n");
+               sb->s_root->d_inode->i_op =filter_c2udiops(cache->cache_filter);
+//     CDEBUG(D_SUPER, "\n");
+//             sb->s_root->d_inode->i_snapop = ext2_snapops();
+
+               CDEBUG(D_SUPER, "lookup at %p\n", 
+                      sb->s_root->d_inode->i_op->lookup);
+#if 0
+               /* XXX is this needed ?? */
+               filter_setup_dentry_ops(cache->cache_filter, 
+                                       sb->s_root->d_op, 
+                                       &currentfs_dentry_ops);
+               sb->s_root->d_op = filter_c2udops(cache->cache_filter);
+#endif
+       }
+        /*
+         * Save a pointer to the snap_cache structure in the
+         * "snap_current" superblock.
+         */
+        (struct snap_cache *) sb->u.generic_sbp = cache;
+       CDEBUG(D_SUPER, "sb %lx, sb->u.generic_sbp: %lx\n",
+                (ulong) sb, (ulong) sb->u.generic_sbp);
+
+       /* we can free snapno and cache_type now, because it's not used */
+       if (snapno) {
+               SNAP_FREE(snapno, strlen(snapno) + 1);
+               snapno = NULL;
+       }
+       if (cache_type) {
+               SNAP_FREE(cache_type, strlen(cache_type) + 1);
+               snapno = NULL;
+       }
+       if (cow_type) {
+               SNAP_FREE(cow_type, strlen(cow_type) + 1);
+               cow_type = NULL;
+       }
+
+       CDEBUG(D_MALLOC, "after mounting: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+
+       MOD_INC_USE_COUNT;
+       EXIT;
+       return sb;
+
+ out_err:
+       CDEBUG(D_SUPER, "out_err called\n");
+       if (cache)
+               SNAP_FREE(cache, sizeof(struct snap_cache));
+       if (cache_data)
+               SNAP_FREE(cache_data, PAGE_SIZE);
+       if (snapno)
+               SNAP_FREE(snapno, strlen(snapno) + 1);
+       if (cache_type)
+               SNAP_FREE(cache_type, strlen(cache_type) + 1);
+       if (cow_type)
+               SNAP_FREE(cow_type, strlen(cow_type) + 1);
+
+       CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+       return NULL;
+}
+
+
+struct file_system_type snapfs_current_type = {
+       "snap_current",
+       FS_REQUIRES_DEV, /* can use Ibaskets when ext2 does */
+       snapfs_read_super,
+       NULL
+};
+
+
+/* Find the options for the clone. These consist of a cache device
+   and an index in the snaptable associated with that device. 
+*/
+static char *clonefs_options(char *options, char *cache_data,
+                           char **devstr, char **namestr)
+{
+       char *this_char;
+       char *cache_data_end = cache_data;
+
+       if (!options || !cache_data)
+               return cache_data_end;
+
+       CDEBUG(D_SUPER, "parsing options\n");
+       for (this_char = strtok (options, ",");
+            this_char != NULL;
+            this_char = strtok (NULL, ",")) {
+               char *opt;
+               CDEBUG(D_SUPER, "this_char %s\n", this_char);
+
+               if ( (opt = read_opt("dev", this_char)) ) {
+                       store_opt(devstr, opt);
+                       continue;
+               }
+               if ( (opt = read_opt("name", this_char)) ) {
+                       store_opt(namestr, opt);
+                       continue;
+               }
+
+               cache_data_end += sprintf(cache_data_end, "%s%s",
+                                         cache_data_end != cache_data ? ",":"",
+                                         this_char);
+       }
+
+       return cache_data_end;
+}
+
+static int snapfs_path2dev(char *dev_path, kdev_t *dev)
+{
+       struct dentry *dentry;
+
+       dentry = lookup_dentry(dev_path, NULL, 0);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (!dentry->d_inode)
+               return -ENODEV;
+
+       if (!S_ISBLK(dentry->d_inode->i_mode))
+               return -ENODEV;
+
+       *dev = dentry->d_inode->i_rdev;
+
+       return 0;
+}
+
+
+extern struct super_operations clone_super_ops;
+
+/*
+ * We always need to remove the snapfs options before passing
+ * to bottom FS.
+ */
+struct super_block *
+clone_read_super(
+        struct super_block *sb,
+        void *data,
+        int silent)
+{
+       struct snap_clone_info *clone_sb;
+       struct snap_cache *snap_cache = NULL;
+       int err;
+       char *cache_data = NULL;
+       char *cache_data_end;
+       char *devstr = NULL;
+       kdev_t dev;
+       char *namestr = NULL;
+       //char *endptr;
+       int index;
+       ino_t root_ino;
+       struct inode *root_inode;
+
+       ENTRY;
+
+       CDEBUG(D_MALLOC, "before parsing: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+
+       /* reserve space for the cache's data */
+       SNAP_ALLOC(cache_data, void *, PAGE_SIZE);
+       if ( !cache_data ) {
+               printk("clone_read_super: Cannot allocate data page.\n");
+               EXIT;
+               goto out_err;
+       }
+
+       CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)");
+
+       /* read and validate options */
+       cache_data_end = clonefs_options(data, cache_data, &devstr, &namestr);
+
+       /* was there anything for the cache filesystem in the data? */
+       if (cache_data_end == cache_data) {
+               SNAP_FREE(cache_data, PAGE_SIZE);
+               cache_data = NULL;
+       } else {
+               printk("clonefs: invalid mount option %s\n", cache_data);
+               EXIT;
+               goto out_err;
+       }
+
+       if (!namestr || !devstr) {
+               printk("snapfs: mount options name and dev mandatory\n");
+               EXIT;
+               goto out_err;
+       }
+
+       err = snapfs_path2dev(devstr, &dev);
+       if ( err ) {
+               printk("snap: incorrect device option %s\n", devstr);
+               EXIT;
+               goto out_err;
+       }
+       
+       snap_cache = snap_find_cache(dev);
+       if ( !snap_cache ) {
+               printk("snap: incorrect device option %s\n", devstr);
+               EXIT;
+               goto out_err;
+       }
+
+       /*index =  simple_strtoul(indexstr, &endptr, 0);
+       if ( indexstr == endptr ) {
+               printk("No valid index passed to mount\n"); 
+               EXIT;
+               goto out_err;
+       }
+       */
+
+       index = snap_get_index_from_name (snap_cache->cache_snap_tableno, 
+                                       namestr);
+       CDEBUG(D_SUPER, "tableno %d, name %s, get index %d\n", 
+                       snap_cache->cache_snap_tableno, namestr, index);
+
+       if(index < 0 ) {
+               printk("No valid index for name %s passed to mount\n",namestr); 
+               EXIT;
+               goto out_err;
+       }
+
+        /*
+         * Force clone fs to be read-only.
+         *
+         * XXX - Is there a way to change the mount options too so
+         * the fs is listed as RO by mount?
+         */
+        sb->s_flags |= MS_RDONLY;
+
+       /* set up the super block */
+       clone_sb = (struct snap_clone_info *)&sb->u.generic_sbp;
+       list_add(&clone_sb->clone_list_entry, &snap_cache->cache_clone_list);
+       clone_sb->clone_cache = snap_cache;
+       clone_sb->clone_index = index;
+       sb->s_op = &clone_super_ops;
+
+       root_ino = snap_cache->cache_sb->s_root->d_inode->i_ino;
+       root_inode = iget(sb, root_ino);
+
+       CDEBUG(D_SUPER, "readinode %p, root ino %ld, root inode at %p\n",
+              sb->s_op->read_inode, root_ino, root_inode);
+
+       sb->s_root = d_alloc_root(root_inode, NULL);
+       if (!sb->s_root) {
+               list_del(&clone_sb->clone_list_entry);
+               sb = NULL;
+       }
+
+       dget( snap_cache->cache_sb->s_root );
+
+       if (cache_data)
+               SNAP_FREE(cache_data, PAGE_SIZE);
+       if (devstr)
+               SNAP_FREE(devstr, strlen(devstr) + 1);
+       if (namestr)
+               SNAP_FREE(namestr, strlen(namestr) + 1);
+       CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n",
+                (ulong) sb, (ulong) &sb->u.generic_sbp);
+
+       MOD_INC_USE_COUNT;
+       EXIT;
+       return sb;
+ out_err:
+       CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
+              snap_kmemory, snap_vmemory);
+       return NULL;
+}
+
+
+struct file_system_type snapfs_clone_type = {
+       "snap_clone",
+       0,
+       clone_read_super,
+       NULL
+};
+
+
+int init_snapfs(void)
+{
+       int status;
+
+       snap_init_cache_hash();
+
+       status = register_filesystem(&snapfs_current_type);
+       if (status) {
+               printk("snapfs: failed in register current filesystem!\n");
+       }
+       status = register_filesystem(&snapfs_clone_type);
+       if (status) {
+               unregister_filesystem(&snapfs_current_type);
+               printk("snapfs: failed in register clone filesystem!\n");
+       }
+       return status;
+}
+
+
+
+int cleanup_snapfs(void)
+{
+       int err;
+
+       ENTRY;
+
+       err = unregister_filesystem(&snapfs_clone_type);
+       if ( err ) {
+               printk("snapfs: failed to unregister clone filesystem\n");
+       }
+       err = unregister_filesystem(&snapfs_current_type);
+       if ( err ) {
+               printk("snapfs: failed to unregister filesystem\n");
+       }
+
+       return 0;
+}
diff --git a/lustre/snapfs/symlink.c b/lustre/snapfs/symlink.c
new file mode 100644 (file)
index 0000000..41efbd1
--- /dev/null
@@ -0,0 +1,213 @@
+/*
+ *  fs/snap/snap.c
+ *
+ *  A snap shot file system.
+ *
+ */
+
+#define EXPORT_SYMTAB
+
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+static inline int inode_has_ea(struct inode *inode)
+{               
+        return (inode->u.ext2_i.i_file_acl != 0); 
+}               
+
+static int currentfs_readlink(struct dentry * dentry, char * buffer, int buflen)
+{
+       struct snap_cache *cache;
+       int rc;
+       struct inode_operations *iops;
+       struct inode * inode = dentry->d_inode;
+       int bpib = inode->i_sb->s_blocksize >> 9;
+       __u32 save_i_blocks;
+
+       ENTRY;
+
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return -EINVAL;
+       }
+
+       iops = filter_c2csiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->readlink) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+       save_i_blocks = inode->i_blocks;
+       /* If this link has ea and its i_blocks is ea's block, 
+        * then we should treate it as a fast symlink 
+        */
+       if( inode_has_ea(inode) && inode->i_blocks == bpib ) {
+               inode->i_blocks = 0; 
+       }
+       rc = iops->readlink(dentry, buffer, buflen);
+       
+       if( inode->i_blocks != save_i_blocks ){
+               inode->i_blocks = save_i_blocks;
+               mark_inode_dirty(inode);
+       }
+       
+exit:
+       EXIT;
+       return rc;
+}
+
+static int cat_str_ahead(char *buf, int pos, const char* str)
+{
+       int len = strlen(str);
+
+       if( pos - len -1 < 0 )
+               return pos;
+
+       buf[--pos] = '/';
+       memcpy(&buf[pos-len], str, len);
+       return pos-len;
+}
+
+/*
+ * Adjust the following path if we are under dotsnap (skip .snap/clonexx...)
+ * in following two case, we just return null and let caller do
+ * the normal follow_link:
+ * (1) we are not lies in .snap
+ * (2) we are already in the root's .snap
+ */
+static struct dentry * dotsnap_follow_link(struct dentry *base,
+                                          struct dentry *dentry,
+                                          int follow)
+{
+       struct super_block *sb = dentry->d_inode->i_sb;
+       struct dentry *rc = NULL;
+       struct dentry *de = dentry, *de_save1=NULL, *de_save2=NULL;
+       char *buf = NULL;
+       int pos = D_MAXLEN;
+
+       SNAP_ALLOC(buf, char*, D_MAXLEN);
+       if( !buf )
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * iterate upward to construct the path
+        */
+       do {
+               if( de_save2 )
+                       pos = cat_str_ahead(buf, pos, de_save2->d_name.name);
+
+               if ( de->d_inode && de->d_inode->i_ino & 0xF0000000 )
+                       goto lookup;
+
+               de_save2 = de_save1;
+               de_save1 = de;
+               de = de->d_parent;
+       } while (de->d_parent != de);
+
+       /* we are not under dotsnap */
+       goto exit; 
+
+lookup:
+       /* See if we already under root's .snap */
+       de = de->d_parent;
+       if( de == sb->s_root )
+               goto exit;
+
+       while( (de->d_parent != de) && (de != sb->s_root) ){
+               pos = cat_str_ahead(buf, pos, de->d_name.name);
+               de = de->d_parent;
+       }
+       if( de_save1 )
+               pos = cat_str_ahead(buf, pos, de_save1->d_name.name);
+
+       pos = cat_str_ahead(buf, pos, ".snap");
+       buf[D_MAXLEN-1] = 0;
+       CDEBUG(D_FILE, "constructed path: %s\n", &buf[pos]);
+
+       /* FIXME lookup_dentry will never return NULL ?? */
+       rc = lookup_dentry(&buf[pos], dget(sb->s_root), follow);
+       if( !rc ){
+               rc = ERR_PTR(-ENOENT);
+               CDEBUG(D_FILE, "lookup_dentry return NULL~!@#$^&*\n");
+       }
+       dput(base);
+
+exit:
+       SNAP_FREE(buf, D_MAXLEN);
+       return rc;
+}
+
+static struct dentry * currentfs_follow_link ( struct dentry *dentry, 
+                                               struct dentry *base,
+                                               unsigned int follow)
+{
+       struct snap_cache *cache;
+       struct dentry * rc;
+       struct inode_operations *iops;
+       struct inode * inode = dentry->d_inode;
+       int bpib = inode->i_sb->s_blocksize >> 9;
+       __u32 save_i_blocks;
+
+       ENTRY;
+
+       cache = snap_find_cache(inode->i_dev);
+       if ( !cache ) { 
+               EXIT;
+               return ERR_PTR(-EINVAL);
+       }
+
+       iops = filter_c2csiops(cache->cache_filter); 
+       if (!iops ||
+           !iops->follow_link) {
+               rc = ERR_PTR(-EINVAL);
+               goto exit;
+       }
+
+       if( currentfs_is_under_dotsnap(dentry) ){
+               rc = dotsnap_follow_link( base, dentry, follow );
+               if( rc )
+                       goto exit;
+       }
+
+       save_i_blocks = inode->i_blocks;
+       /* If this link has ea and its i_blocks is ea's block, 
+        * then we should treate it as a fast symlink 
+        */
+       if( inode_has_ea(inode) && inode->i_blocks == bpib ) {
+               inode->i_blocks = 0; 
+       }
+       rc = iops->follow_link(dentry, base, follow);
+       
+       if( inode->i_blocks != save_i_blocks ){
+               inode->i_blocks = save_i_blocks;
+               mark_inode_dirty(inode);
+       }
+       
+exit:
+       EXIT;
+       return rc;
+}
+
+struct inode_operations currentfs_sym_iops = {
+       readlink:       currentfs_readlink,
+       follow_link:    currentfs_follow_link
+};
+
+struct file_operations currentfs_sym_fops;
diff --git a/lustre/snapfs/sysctl.c b/lustre/snapfs/sysctl.c
new file mode 100644 (file)
index 0000000..f7f88a6
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ *  Sysctrl entries for Snapfs
+ */
+
+#define __NO_VERSION__
+#include <linux/config.h> /* for CONFIG_PROC_FS */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/swapctl.h>
+#include <linux/proc_fs.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/bitops.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/blk.h>
+#include <linux/filter.h>
+#include <linux/snapfs.h>
+#include <linux/snapsupport.h>
+
+
+/* /proc entries */
+
+#ifdef CONFIG_PROC_FS
+
+
+static void snapfs_proc_modcount(struct inode *inode, int fill)
+{
+       if (fill)
+               MOD_INC_USE_COUNT;
+       else
+               MOD_DEC_USE_COUNT;
+}
+
+struct proc_dir_entry proc_fs_snapfs = {
+       0, 10, "snapfs",
+       S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0,
+       0, &proc_dir_inode_operations,
+       NULL, NULL,
+       NULL,
+       NULL, NULL
+};
+
+
+#endif
+
+
+/* SYSCTL below */
+
+static struct ctl_table_header *snapfs_table_header = NULL;
+/* 0x100 to avoid any chance of collisions at any point in the tree with
+ * non-directories
+ */
+#define PSDEV_SNAPFS  (0x120)
+
+#define PSDEV_DEBUG       1      /* control debugging */
+#define PSDEV_TRACE       2      /* control enter/leave pattern */
+
+/* These are global control options */
+#define ENTRY_CNT 3
+
+/* XXX - doesn't seem to be working in 2.2.15 */
+static struct ctl_table snapfs_ctltable[ENTRY_CNT] =
+{
+       {PSDEV_DEBUG, "debug", &snap_debug_level, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_TRACE, "trace", &snap_print_entry, sizeof(int), 0644, NULL, &proc_dointvec},
+       {0}
+};
+
+static ctl_table snapfs_table[2] = {
+       {PSDEV_SNAPFS, "snapfs",    NULL, 0, 0555, snapfs_ctltable},
+       {0}
+};
+
+
+int /* __init */ init_snapfs_proc_sys(void)
+{
+
+#ifdef CONFIG_SYSCTL
+       if ( !snapfs_table_header )
+               snapfs_table_header =
+                       register_sysctl_table(snapfs_table, 0);
+#endif
+#ifdef CONFIG_PROC_FS
+       proc_register(&proc_root_fs, &proc_fs_snapfs);
+       proc_fs_snapfs.fill_inode = &snapfs_proc_modcount;
+#endif
+       return 0;
+}
+
+void cleanup_snapfs_proc_sys(void) {
+
+#ifdef CONFIG_SYSCTL
+       if ( snapfs_table_header )
+               unregister_sysctl_table(snapfs_table_header);
+       snapfs_table_header = NULL;
+#endif
+
+#if CONFIG_PROC_FS
+       proc_unregister(&proc_root_fs, proc_fs_snapfs.low_ino);
+#endif
+}
+