Branch HEAD

[fs/lustre-release.git] / lustre / lvfs / fsfilt_ext3.c
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c

index f83949c..ce5532b 100644 (file)
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -39,20 +39,12 @@
  #include <linux/quota.h>
  #include <linux/quotaio_v1.h>
  #include <linux/quotaio_v2.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/ext3_xattr.h>
-#else
  #include <ext3/xattr.h>
-#endif
  
  #include <libcfs/kp30.h>
  #include <lustre_fsfilt.h>
  #include <obd.h>
-#include <obd_class.h>
  #include <lustre_quota.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/iobuf.h>
-#endif
  #include <linux/lustre_compat25.h>
  #include <linux/lprocfs_status.h>
  
@@ -68,7 +60,17 @@
  #define FSFILT_DELETE_TRANS_BLOCKS(sb)    EXT3_DELETE_TRANS_BLOCKS(sb)
  #endif
  
-static kmem_cache_t *fcb_cache;
+#ifdef EXT3_SINGLEDATA_TRANS_BLOCKS_HAS_SB
+/* for kernels 2.6.18 and later */
+#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)
+#else
+#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS
+#endif
+
+#define fsfilt_ext3_journal_start(inode, nblocks) ext3_journal_start(inode, nblocks)
+#define fsfilt_ext3_journal_stop(handle)          ext3_journal_stop(handle)
+
+static cfs_mem_cache_t *fcb_cache;
  
  struct fsfilt_cb_data {
          struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
@@ -95,9 +97,7 @@ static int fsfilt_ext3_set_label(struct super_block *sb, char *label)
          int err;
  
          journal = EXT3_SB(sb)->s_journal;
-        lock_24kernel();
          handle = journal_start(journal, 1);
-        unlock_24kernel();
          if (IS_ERR(handle)) {
                  CERROR("can't start transaction\n");
                  return(PTR_ERR(handle));
@@ -113,9 +113,7 @@ static int fsfilt_ext3_set_label(struct super_block *sb, char *label)
          err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
  
  out:
-        lock_24kernel();
          journal_stop(handle);
-        unlock_24kernel();
  
          return(err);
  }
@@ -125,6 +123,28 @@ static char *fsfilt_ext3_uuid(struct super_block *sb)
          return EXT3_SB(sb)->s_es->s_uuid;
  }
  
+#ifdef HAVE_DISK_INODE_VERSION
+/*
+ * Get the 64-bit version for an inode.
+ */
+static __u64 fsfilt_ext3_get_version(struct inode *inode)
+{
+        return EXT3_I(inode)->i_fs_version;
+}
+
+/*
+ * Set the 64-bit version and return the old version.
+ */
+static __u64 fsfilt_ext3_set_version(struct inode *inode, __u64 new_version)
+{
+        __u64 old_version = EXT3_I(inode)->i_fs_version;
+
+        (EXT3_I(inode))->i_fs_version = new_version;
+        return old_version;
+}
+
+#endif
+
  /*
   * We don't currently need any additional blocks for rmdir and
   * unlink transactions because we are storing the OST oa_id inside
@@ -135,7 +155,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                                 int logs)
  {
          /* For updates to the last received file */
-        int nblocks = EXT3_SINGLEDATA_TRANS_BLOCKS;
+        int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
          journal_t *journal;
          void *handle;
  
@@ -151,11 +171,11 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                  /* delete one file + create/update logs for each stripe */
                  nblocks += FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);
                  nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
-                            EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
+                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
                  break;
          case FSFILT_OP_RENAME:
                  /* modify additional directory */
-                nblocks += EXT3_SINGLEDATA_TRANS_BLOCKS;
+                nblocks += FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
                  /* no break */
          case FSFILT_OP_SYMLINK:
                  /* additional block + block bitmap + GDT for long symlink */
@@ -189,7 +209,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                           FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);
                  /* create/update logs for each stripe */
                  nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
-                            EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
+                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
                  break;
          case FSFILT_OP_SETATTR:
                  /* Setattr on inode */
@@ -198,7 +218,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                           FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);
                  /* quota chown log for each stripe */
                  nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
-                            EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
+                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
                  break;
          case FSFILT_OP_CANCEL_UNLINK:
                  /* blocks for log header bitmap update OR
@@ -207,14 +227,14 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                          FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs;
                  break;
          case FSFILT_OP_JOIN:
-                /* delete 2 file(file + array id) + create 1 file (array id) 
+                /* delete 2 file(file + array id) + create 1 file (array id)
                   * create/update logs for each stripe */
                  nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);
-               
-                /*create array log for head file*/ 
+
+                /*create array log for head file*/
                  nblocks += 3;
                  nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
-                            EXT3_SINGLEDATA_TRANS_BLOCKS);
+                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb));
                  /*update head file array */
                  nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +
                           FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);
@@ -227,15 +247,13 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
          journal = EXT3_SB(inode->i_sb)->s_journal;
          if (nblocks > journal->j_max_transaction_buffers) {
                  CWARN("too many credits %d for op %ux%u using %d instead\n",
-                      nblocks, op, logs, journal->j_max_transaction_buffers);
+                       nblocks, op, logs, journal->j_max_transaction_buffers);
                  nblocks = journal->j_max_transaction_buffers;
          }
  
   journal_start:
          LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
-        lock_24kernel();
-        handle = journal_start(EXT3_JOURNAL(inode), nblocks);
-        unlock_24kernel();
+        handle = fsfilt_ext3_journal_start(inode, nblocks);
  
          if (!IS_ERR(handle))
                  LASSERT(current->journal_info == handle);
@@ -322,7 +340,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,
           * quota file that is active.  This is at least true for now.
           */
          needed += hweight32(sb_any_quota_enabled(sb)) *
-                EXT3_SINGLEDATA_TRANS_BLOCKS;
+                FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
  #endif
  
          return needed;
@@ -372,9 +390,7 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
          }
  
          LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
-        lock_24kernel();
-        handle = journal_start(journal, needed);
-        unlock_24kernel();
+        handle = fsfilt_ext3_journal_start(fso->fso_dentry->d_inode, needed);
          if (IS_ERR(handle)) {
                  CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                         PTR_ERR(handle));
@@ -414,9 +430,7 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
          if (force_sync)
                  handle->h_sync = 1; /* recovery likes this */
  
-        lock_24kernel();
-        rc = journal_stop(handle);
-        unlock_24kernel();
+        rc = fsfilt_ext3_journal_stop(handle);
  
          return rc;
  }
@@ -426,36 +440,23 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h,
  {
          unsigned long tid;
          transaction_t *transaction;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
-        unsigned long rtid;
-#endif
          handle_t *handle = h;
          journal_t *journal;
          int rc;
  
          LASSERT(current->journal_info == handle);
  
-        lock_24kernel();
          transaction = handle->h_transaction;
          journal = transaction->t_journal;
          tid = transaction->t_tid;
          /* we don't want to be blocked */
          handle->h_sync = 0;
-        rc = journal_stop(handle);
+        rc = fsfilt_ext3_journal_stop(handle);
          if (rc) {
                  CERROR("error while stopping transaction: %d\n", rc);
-                unlock_24kernel();
                  return rc;
          }
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
-        rtid = log_start_commit(journal, transaction);
-        if (rtid != tid)
-                CERROR("strange race: %lu != %lu\n",
-                       (unsigned long) tid, (unsigned long) rtid);
-#else
          log_start_commit(journal, tid);
-#endif
-        unlock_24kernel();
  
          *wait_handle = (void *) tid;
          CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid);
@@ -484,8 +485,6 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          struct inode *inode = dentry->d_inode;
          int rc = 0;
  
-        lock_kernel();
-
          /* Avoid marking the inode dirty on the superblock list unnecessarily.
           * We are already writing the inode to disk as part of this
           * transaction and want to avoid a lot of extra inode writeout
@@ -493,7 +492,8 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
                  /* ATTR_SIZE would invoke truncate: clear it */
                  iattr->ia_valid &= ~ATTR_SIZE;
-                EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
+                EXT3_I(inode)->i_disksize = iattr->ia_size;
+                i_size_write(inode, iattr->ia_size);
  
                  if (iattr->ia_valid & ATTR_UID)
                          inode->i_uid = iattr->ia_uid;
@@ -535,7 +535,6 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,
          }
  
   out:
-        unlock_kernel();
          RETURN(rc);
  }
  
@@ -566,13 +565,11 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
  
          LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
  
-        lock_24kernel();
          rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED,
                                     name, lmm, lmm_size, 0);
  
-        unlock_24kernel();
  
-        if (rc)
+        if (rc && rc != -EROFS)
                  CERROR("error adding MD data to inode %lu: rc = %d\n",
                         inode->i_ino, rc);
          return rc;
@@ -585,11 +582,9 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size,
          int rc;
  
          LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
-        lock_24kernel();
  
          rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED,
                              name, lmm, lmm_size);
-        unlock_24kernel();
  
          /* This gives us the MD size */
          if (lmm == NULL)
@@ -606,43 +601,11 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size,
          return rc;
  }
  
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
  static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio)
  {
          submit_bio(rw, bio);
          return 0;
  }
-#else
-static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio)
-{
-        int rc, blk_per_page;
-
-        rc = brw_kiovec(rw, 1, &bio, inode->i_dev,
-                        KIOBUF_GET_BLOCKS(bio), 1 << inode->i_blkbits);
-        /*
-         * brw_kiovec() returns number of bytes actually written. If error
-         * occurred after something was written, error code is returned though
-         * kiobuf->errno. (See bug 6854.)
-         */
-
-        blk_per_page = CFS_PAGE_SIZE >> inode->i_blkbits;
-
-        if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page) {
-                CERROR("short write?  expected %d, wrote %d (%d)\n",
-                       (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page,
-                       rc, bio->errno);
-        }
-        if (bio->errno != 0) {
-                CERROR("IO error. Wrote %d of %d (%d)\n",
-                       rc,
-                       (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page,
-                       bio->errno);
-                rc = bio->errno;
-        }
-
-        return rc;
-}
-#endif
  
  static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
                                      loff_t *off)
@@ -662,7 +625,7 @@ static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
                          struct buffer_head *bh;
  
                          bh = NULL;
-                        if (*off < inode->i_size) {
+                        if (*off < i_size_read(inode)) {
                                  int err = 0;
  
                                  bh = ext3_bread(NULL, inode, *off >> blkbits,
@@ -715,7 +678,7 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
  {
          struct fsfilt_cb_data *fcb;
  
-        OBD_SLAB_ALLOC(fcb, fcb_cache, GFP_NOFS, sizeof *fcb);
+        OBD_SLAB_ALLOC(fcb, fcb_cache, CFS_ALLOC_IO, sizeof *fcb);
          if (fcb == NULL)
                  RETURN(-ENOMEM);
  
@@ -725,10 +688,8 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
          fcb->cb_data = cb_data;
  
          CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
-        lock_24kernel();
          journal_callback_set(handle, fsfilt_ext3_cb_func,
                               (struct journal_callback *)fcb);
-        unlock_24kernel();
  
          return 0;
  }
@@ -746,7 +707,8 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
          int rc;
  
          memset(&sfs, 0, sizeof(sfs));
-        rc = ll_do_statfs(sb,&sfs);
+
+        rc = ll_do_statfs(sb, &sfs);
  
          if (!rc && sfs.f_bfree < sfs.f_ffree) {
                  sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
@@ -771,10 +733,7 @@ static int fsfilt_ext3_sync(struct super_block *sb)
  #endif
  
  #ifdef EXT3_MULTIBLOCK_ALLOCATOR
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define ext3_up_truncate_sem(inode)  up_write(&EXT3_I(inode)->truncate_sem);
-#define ext3_down_truncate_sem(inode)  down_write(&EXT3_I(inode)->truncate_sem);
-#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
  #define ext3_up_truncate_sem(inode)  up(&EXT3_I(inode)->truncate_sem);
  #define ext3_down_truncate_sem(inode)  down(&EXT3_I(inode)->truncate_sem);
  #else
@@ -782,6 +741,25 @@ static int fsfilt_ext3_sync(struct super_block *sb)
  #define ext3_down_truncate_sem(inode)  mutex_lock(&EXT3_I(inode)->truncate_mutex);
  #endif
  
+#ifndef EXT_ASSERT
+#define EXT_ASSERT(cond)  BUG_ON(!(cond))
+#endif
+
+#ifdef EXT3_EXT_HAS_NO_TREE
+/* for kernels 2.6.18 and later */
+#define ext3_ext_base                   inode
+#define ext3_ext_base2inode(inode)      (inode)
+#define EXT_DEPTH(inode)                ext_depth(inode)
+#define EXT_GENERATION(inode)           ext_generation(inode)
+#define fsfilt_ext3_ext_walk_space(inode, block, num, cb, cbdata) \
+                        ext3_ext_walk_space(inode, block, num, cb, cbdata);
+#else
+#define ext3_ext_base                   ext3_extents_tree
+#define ext3_ext_base2inode(tree)       (tree->inode)
+#define fsfilt_ext3_ext_walk_space(tree, block, num, cb, cbdata) \
+                        ext3_ext_walk_space(tree, block, num, cb);
+#endif
+
  #include <linux/lustre_version.h>
  #if EXT3_EXT_MAGIC == 0xf301
  #define ee_start e_start
@@ -842,40 +820,80 @@ static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path,
          return bg_start + colour + block;
  }
  
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/locks.h>
-static void ll_unmap_underlying_metadata(struct super_block *sb,
-                                         unsigned long blocknr)
+#define ll_unmap_underlying_metadata(sb, blocknr) \
+        unmap_underlying_metadata((sb)->s_bdev, blocknr)
+
+#ifndef EXT3_MB_HINT_GROUP_ALLOC
+static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
+                                struct ext3_ext_path *path, unsigned long block,
+                                unsigned long *count, int *err)
  {
-        struct buffer_head *old_bh;
-
-        old_bh = get_hash_table(sb->s_dev, blocknr, sb->s_blocksize);
-        if (old_bh) {
-                mark_buffer_clean(old_bh);
-                wait_on_buffer(old_bh);
-                clear_bit(BH_Req, &old_bh->b_state);
-                __brelse(old_bh);
-        }
+        unsigned long pblock, goal;
+        int aflags = 0;
+        struct inode *inode = ext3_ext_base2inode(base);
+
+        goal = ext3_ext_find_goal(inode, path, block, &aflags);
+        aflags |= 2; /* block have been already reserved */
+        pblock = ext3_mb_new_blocks(handle, inode, goal, count, aflags, err);
+        return pblock;
+
  }
  #else
-#define ll_unmap_underlying_metadata(sb, blocknr) \
-        unmap_underlying_metadata((sb)->s_bdev, blocknr)
+static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
+                                struct ext3_ext_path *path, unsigned long block,
+                                unsigned long *count, int *err)
+{
+        struct inode *inode = ext3_ext_base2inode(base);
+        struct ext3_allocation_request ar;
+        unsigned long pblock;
+        int aflags;
+
+        /* find neighbour allocated blocks */
+        ar.lleft = block;
+        *err = ext3_ext_search_left(base, path, &ar.lleft, &ar.pleft);
+        if (*err)
+                return 0;
+        ar.lright = block;
+        *err = ext3_ext_search_right(base, path, &ar.lright, &ar.pright);
+        if (*err)
+                return 0;
+
+        /* allocate new block */
+        ar.goal = ext3_ext_find_goal(inode, path, block, &aflags);
+        ar.inode = inode;
+        ar.logical = block;
+        ar.len = *count;
+        ar.flags = EXT3_MB_HINT_DATA;
+        pblock = ext3_mb_new_blocks(handle, &ar, err);
+        *count = ar.len;
+        return pblock;
+
+}
  #endif
  
-static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
+#ifdef EXT3_EXT_HAS_NO_TREE
+static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
+                                  struct ext3_ext_path *path,
+                                  struct ext3_ext_cache *cex,
+                                  void *cbdata)
+{
+        struct bpointers *bp = cbdata;
+#else
+static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
                                    struct ext3_ext_path *path,
                                    struct ext3_ext_cache *cex)
  {
-        struct inode *inode = tree->inode;
-        struct bpointers *bp = tree->private;
+        struct bpointers *bp = base->private;
+#endif
+        struct inode *inode = ext3_ext_base2inode(base);
          struct ext3_extent nex;
-        int count, err, goal;
          unsigned long pblock;
          unsigned long tgen;
+        int err, i;
+        unsigned long count;
          handle_t *handle;
-        int i, aflags = 0;
  
-        i = EXT_DEPTH(tree);
+        i = EXT_DEPTH(base);
          EXT_ASSERT(i == path->p_depth);
          EXT_ASSERT(path[i].p_hdr);
  
@@ -903,33 +921,25 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
                  return EXT_CONTINUE;
          }
  
-        tgen = EXT_GENERATION(tree);
-        count = ext3_ext_calc_credits_for_insert(tree, path);
+        tgen = EXT_GENERATION(base);
+        count = ext3_ext_calc_credits_for_insert(base, path);
          ext3_up_truncate_sem(inode);
  
-        lock_24kernel();
-        handle = journal_start(EXT3_JOURNAL(inode), count+EXT3_ALLOC_NEEDED+1);
-        unlock_24kernel();
+        handle = fsfilt_ext3_journal_start(inode, count+EXT3_ALLOC_NEEDED+1);
          if (IS_ERR(handle)) {
                  ext3_down_truncate_sem(inode);
                  return PTR_ERR(handle);
          }
  
          ext3_down_truncate_sem(inode);
-        if (tgen != EXT_GENERATION(tree)) {
+        if (tgen != EXT_GENERATION(base)) {
                  /* the tree has changed. so path can be invalid at moment */
-                lock_24kernel();
-                journal_stop(handle);
-                unlock_24kernel();
+                fsfilt_ext3_journal_stop(handle);
                  return EXT_REPEAT;
          }
  
          count = cex->ec_len;
-        goal = ext3_ext_find_goal(inode, path, cex->ec_block, &aflags);
-        aflags |= 2; /* block have been already reserved */
-        lock_24kernel();
-        pblock = ext3_mb_new_blocks(handle, inode, goal, &count, aflags, &err);
-        unlock_24kernel();
+        pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
          if (!pblock)
                  goto out;
          EXT_ASSERT(count <= cex->ec_len);
@@ -938,9 +948,13 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
          nex.ee_block = cex->ec_block;
          nex.ee_start = pblock;
          nex.ee_len = count;
-        err = ext3_ext_insert_extent(handle, tree, path, &nex);
-        if (err)
+        err = ext3_ext_insert_extent(handle, base, path, &nex);
+        if (err) {
+                CERROR("can't insert extent: %d\n", err);
+                /* XXX: export ext3_free_blocks() */
+                /*ext3_free_blocks(handle, inode, nex.ee_start, nex.ee_len, 0);*/
                  goto out;
+        }
  
          /*
           * Putting len of the actual extent we just inserted,
@@ -953,9 +967,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
          BUG_ON(nex.ee_block != cex->ec_block);
  
  out:
-        lock_24kernel();
-        journal_stop(handle);
-        unlock_24kernel();
+        fsfilt_ext3_journal_stop(handle);
  map:
          if (err >= 0) {
                  /* map blocks */
@@ -997,15 +1009,22 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block,
                         unsigned long num, unsigned long *blocks,
                         int *created, int create)
  {
+#ifdef EXT3_EXT_HAS_NO_TREE
+        struct ext3_ext_base *base = inode;
+#else
          struct ext3_extents_tree tree;
+        struct ext3_ext_base *base = &tree;
+#endif
          struct bpointers bp;
          int err;
  
          CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n",
                 block, block + num - 1, (unsigned) inode->i_ino);
  
-        ext3_init_tree_desc(&tree, inode);
+#ifndef EXT3_EXT_HAS_NO_TREE
+        ext3_init_tree_desc(base, inode);
          tree.private = &bp;
+#endif
          bp.blocks = blocks;
          bp.created = created;
          bp.start = block;
@@ -1013,8 +1032,9 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block,
          bp.create = create;
  
          ext3_down_truncate_sem(inode);
-        err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb);
-        ext3_ext_invalidate_cache(&tree);
+        err = fsfilt_ext3_ext_walk_space(base, block, num,
+                                         ext3_ext_new_extent_cb, &bp);
+        ext3_ext_invalidate_cache(base);
          ext3_up_truncate_sem(inode);
  
          return err;
@@ -1118,30 +1138,20 @@ int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
          return rc;
  }
  
-extern int ext3_prep_san_write(struct inode *inode, long *blocks,
-                               int nblocks, loff_t newsize);
-static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
-                                      int nblocks, loff_t newsize)
-{
-        return ext3_prep_san_write(inode, blocks, nblocks, newsize);
-}
-
-static int fsfilt_ext3_read_record(struct file * file, void *buf,
-                                   int size, loff_t *offs)
+int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
  {
-        struct inode *inode = file->f_dentry->d_inode;
          unsigned long block;
          struct buffer_head *bh;
-        int err, blocksize, csize, boffs;
+        int err, blocksize, csize, boffs, osize = size;
  
          /* prevent reading after eof */
          lock_kernel();
-        if (inode->i_size < *offs + size) {
-                size = inode->i_size - *offs;
+        if (i_size_read(inode) < *offs + size) {
+                size = i_size_read(inode) - *offs;
                  unlock_kernel();
                  if (size < 0) {
                          CERROR("size %llu is too short for read %u@%llu\n",
-                               inode->i_size, size, *offs);
+                               i_size_read(inode), size, *offs);
                          return -EIO;
                  } else if (size == 0) {
                          return 0;
@@ -1169,36 +1179,28 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf,
                  buf += csize;
                  size -= csize;
          }
-        return 0;
+        return osize;
  }
+EXPORT_SYMBOL(fsfilt_ext3_read);
  
-static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
-                                    loff_t *offs, int force_sync)
+static int fsfilt_ext3_read_record(struct file * file, void *buf,
+                                   int size, loff_t *offs)
+{
+        int rc;
+        rc = fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs);
+        if (rc > 0)
+                rc = 0;
+        return rc;
+}
+
+int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
+                                loff_t *offs, handle_t *handle)
  {
          struct buffer_head *bh = NULL;
+        loff_t old_size = i_size_read(inode), offset = *offs;
+        loff_t new_size = i_size_read(inode);
          unsigned long block;
-        struct inode *inode = file->f_dentry->d_inode;
-        loff_t old_size = inode->i_size, offset = *offs;
-        loff_t new_size = inode->i_size;
-        journal_t *journal;
-        handle_t *handle;
-        int err, block_count = 0, blocksize, size, boffs;
-
-        /* Determine how many transaction credits are needed */
-        blocksize = 1 << inode->i_blkbits;
-        block_count = (*offs & (blocksize - 1)) + bufsize;
-        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
-
-        journal = EXT3_SB(inode->i_sb)->s_journal;
-        lock_24kernel();
-        handle = journal_start(journal,
-                               block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
-        unlock_24kernel();
-        if (IS_ERR(handle)) {
-                CERROR("can't start transaction for %d blocks (%d bytes)\n",
-                       block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize);
-                return PTR_ERR(handle);
-        }
+        int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs;
  
          while (bufsize > 0) {
                  if (bh != NULL)
@@ -1210,14 +1212,14 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                  bh = ext3_bread(handle, inode, block, 1, &err);
                  if (!bh) {
                          CERROR("can't read/create block: %d\n", err);
-                        goto out;
+                        break;
                  }
  
                  err = ext3_journal_get_write_access(handle, bh);
                  if (err) {
                          CERROR("journal_get_write_access() returned error %d\n",
                                 err);
-                        goto out;
+                        break;
                  }
                  LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
                  memcpy(bh->b_data + boffs, buf, size);
@@ -1225,7 +1227,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                  if (err) {
                          CERROR("journal_dirty_metadata() returned error %d\n",
                                 err);
-                        goto out;
+                        break;
                  }
                  if (offset + size > new_size)
                          new_size = offset + size;
@@ -1233,33 +1235,56 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                  bufsize -= size;
                  buf += size;
          }
-
-        if (force_sync)
-                handle->h_sync = 1; /* recovery likes this */
-out:
          if (bh)
                  brelse(bh);
  
          /* correct in-core and on-disk sizes */
-        if (new_size > inode->i_size) {
+        if (new_size > i_size_read(inode)) {
                  lock_kernel();
-                if (new_size > inode->i_size)
-                        inode->i_size = new_size;
-                if (inode->i_size > EXT3_I(inode)->i_disksize)
-                        EXT3_I(inode)->i_disksize = inode->i_size;
-                if (inode->i_size > old_size)
+                if (new_size > i_size_read(inode))
+                        i_size_write(inode, new_size);
+                if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
+                        EXT3_I(inode)->i_disksize = i_size_read(inode);
+                if (i_size_read(inode) > old_size)
                          mark_inode_dirty(inode);
                  unlock_kernel();
          }
  
-        lock_24kernel();
-        journal_stop(handle);
-        unlock_24kernel();
-
          if (err == 0)
                  *offs = offset;
          return err;
  }
+EXPORT_SYMBOL(fsfilt_ext3_write_handle);
+
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
+                                    loff_t *offs, int force_sync)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        handle_t *handle;
+        int err, block_count = 0, blocksize;
+
+        /* Determine how many transaction credits are needed */
+        blocksize = 1 << inode->i_blkbits;
+        block_count = (*offs & (blocksize - 1)) + bufsize;
+        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+
+        handle = fsfilt_ext3_journal_start(inode,
+                               block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
+        if (IS_ERR(handle)) {
+                CERROR("can't start transaction for %d blocks (%d bytes)\n",
+                       block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize);
+                return PTR_ERR(handle);
+        }
+
+        err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle);
+
+        if (!err && force_sync)
+                handle->h_sync = 1; /* recovery likes this */
+
+        fsfilt_ext3_journal_stop(handle);
+
+        return err;
+}
  
  static int fsfilt_ext3_setup(struct super_block *sb)
  {
@@ -1274,6 +1299,9 @@ static int fsfilt_ext3_setup(struct super_block *sb)
  #endif
          if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
                  CWARN("filesystem doesn't have dir_index feature enabled\n");
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)) && HAVE_QUOTA_SUPPORT
+        set_opt(EXT3_SB(sb)->s_mount_opt, QUOTA);
+#endif
          return 0;
  }
  
@@ -1331,7 +1359,7 @@ do {                                            \
          Q_COPY(out, in, dqb_valid);             \
  } while (0)
  
-      
+
  
  static int fsfilt_ext3_quotactl(struct super_block *sb,
                                  struct obd_quotactl *oqc)
@@ -1749,13 +1777,13 @@ static int commit_chkquot(struct super_block *sb, struct qchk_ctxt *qctxt,
          if (cdqb->dqb_bsoftlimit &&
              toqb(cdqb->dqb_curspace) >= cdqb->dqb_bsoftlimit &&
              !cdqb->dqb_btime)
-                cdqb->dqb_btime =
+                cdqb->dqb_btime = 
                          now + qctxt->qckt_dqinfo[cdqb->dqb_type].dqi_bgrace;
  
          if (cdqb->dqb_isoftlimit &&
              cdqb->dqb_curinodes >= cdqb->dqb_isoftlimit &&
              !cdqb->dqb_itime)
-                cdqb->dqb_itime =
+                cdqb->dqb_itime = 
                          now + qctxt->qckt_dqinfo[cdqb->dqb_type].dqi_igrace;
  
          cdqb->dqb_valid = QIF_ALL;
@@ -1835,7 +1863,7 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
                  bitmap_bh = read_inode_bitmap(sb, group);
                  if (!bitmap_bh) {
                          CERROR("read_inode_bitmap group %d failed", group);
-                        GOTO(out, -EIO);
+                        GOTO(out, rc = -EIO);
                  }
  
                  for (i = 0; i < sbi->s_inodes_per_group; i++, ino++) {
@@ -1915,7 +1943,7 @@ out:
  }
  
  #ifdef HAVE_QUOTA_SUPPORT
-static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, 
+static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type,
                                   int cmd)
  {
          int rc = 0;
@@ -1989,6 +2017,12 @@ static int fsfilt_ext3_dquot(struct lustre_dquot *dquot, int cmd)
  }
  #endif
  
+lvfs_sbdev_type fsfilt_ext3_journal_sbdev(struct super_block *sb)
+{
+        return (EXT3_SB(sb)->journal_bdev);
+}
+EXPORT_SYMBOL(fsfilt_ext3_journal_sbdev);
+
  static struct fsfilt_operations fsfilt_ext3_ops = {
          .fs_type                = "ext3",
          .fs_owner               = THIS_MODULE,
@@ -2010,7 +2044,6 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
          .fs_statfs              = fsfilt_ext3_statfs,
          .fs_sync                = fsfilt_ext3_sync,
          .fs_map_inode_pages     = fsfilt_ext3_map_inode_pages,
-        .fs_prep_san_write      = fsfilt_ext3_prep_san_write,
          .fs_write_record        = fsfilt_ext3_write_record,
          .fs_read_record         = fsfilt_ext3_read_record,
          .fs_setup               = fsfilt_ext3_setup,
@@ -2018,20 +2051,24 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
          .fs_get_op_len          = fsfilt_ext3_get_op_len,
          .fs_quotactl            = fsfilt_ext3_quotactl,
          .fs_quotacheck          = fsfilt_ext3_quotacheck,
+#ifdef HAVE_DISK_INODE_VERSION
+        .fs_get_version         = fsfilt_ext3_get_version,
+        .fs_set_version         = fsfilt_ext3_set_version,
+#endif
  #ifdef HAVE_QUOTA_SUPPORT
          .fs_quotainfo           = fsfilt_ext3_quotainfo,
          .fs_qids                = fsfilt_ext3_qids,
          .fs_dquot               = fsfilt_ext3_dquot,
  #endif
+        .fs_journal_sbdev       = fsfilt_ext3_journal_sbdev,
  };
  
  static int __init fsfilt_ext3_init(void)
  {
          int rc;
  
-        fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
-                                      sizeof(struct fsfilt_cb_data), 0,
-                                      0, NULL, NULL);
+        fcb_cache = cfs_mem_cache_create("fsfilt_ext3_fcb",
+                                         sizeof(struct fsfilt_cb_data), 0, 0);
          if (!fcb_cache) {
                  CERROR("error allocating fsfilt journal callback cache\n");
                  GOTO(out, rc = -ENOMEM);
@@ -2040,7 +2077,7 @@ static int __init fsfilt_ext3_init(void)
          rc = fsfilt_register_ops(&fsfilt_ext3_ops);
  
          if (rc) {
-                int err = kmem_cache_destroy(fcb_cache);
+                int err = cfs_mem_cache_destroy(fcb_cache);
                  LASSERTF(err == 0, "error destroying new cache: rc %d\n", err);
          }
  out:
@@ -2052,7 +2089,7 @@ static void __exit fsfilt_ext3_exit(void)
          int rc;
  
          fsfilt_unregister_ops(&fsfilt_ext3_ops);
-        rc = kmem_cache_destroy(fcb_cache);
+        rc = cfs_mem_cache_destroy(fcb_cache);
          LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n");
  }