Revert "LU-2158 lvfs: remove llog_lvfs.c and other lvfs code from llog"

[fs/lustre-release.git] / lustre / lvfs / fsfilt_ext3.c
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c

index 8ca8f19..ae23b27 100644 (file)
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -65,12 +65,124 @@
  #endif
  
  /* for kernels 2.6.18 and later */
+#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)
+
  #define fsfilt_ext3_ext_insert_extent(handle, inode, path, newext, flag) \
                 ext3_ext_insert_extent(handle, inode, path, newext, flag)
  
  #define ext3_mb_discard_inode_preallocations(inode) \
                   ext3_discard_preallocations(inode)
  
+#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
+#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
+
+static struct kmem_cache *fcb_cache;
+
+struct fsfilt_cb_data {
+       struct ext4_journal_cb_entry cb_jcb; /* private data - MUST BE FIRST */
+       fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+       struct obd_device *cb_obd;      /* MDS/OBD completion device */
+       __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+       void *cb_data;                  /* MDS/OST completion function data */
+};
+
+static char *fsfilt_ext3_get_label(struct super_block *sb)
+{
+        return EXT3_SB(sb)->s_es->s_volume_name;
+}
+
+/* kernel has ext4_blocks_for_truncate since linux-3.1.1 */
+#ifdef HAVE_BLOCKS_FOR_TRUNCATE
+# include <ext4/truncate.h>
+#else
+static inline unsigned long ext4_blocks_for_truncate(struct inode *inode)
+{
+       ext4_lblk_t needed;
+
+       needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+       if (needed < 2)
+               needed = 2;
+       if (needed > EXT4_MAX_TRANS_DATA)
+               needed = EXT4_MAX_TRANS_DATA;
+       return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+}
+#endif
+
+/*
+ * We don't currently need any additional blocks for rmdir and
+ * unlink transactions because we are storing the OST oa_id inside
+ * the inode (which we will be changing anyways as part of this
+ * transaction).
+ */
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
+                               int logs)
+{
+        /* For updates to the last received file */
+        int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);
+        journal_t *journal;
+        void *handle;
+
+        if (current->journal_info) {
+                CDEBUG(D_INODE, "increasing refcount on %p\n",
+                       current->journal_info);
+                goto journal_start;
+        }
+
+        switch(op) {
+        case FSFILT_OP_UNLINK:
+               /* delete one file + create/update logs for each stripe */
+               nblocks += EXT3_DELETE_TRANS_BLOCKS(inode->i_sb);
+               nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+                           FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;
+               break;
+        case FSFILT_OP_CANCEL_UNLINK:
+               LASSERT(logs == 1);
+
+               /* blocks for log header bitmap update OR
+                * blocks for catalog header bitmap update + unlink of logs +
+                * blocks for delete the inode (include blocks truncating). */
+               nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
+                         EXT3_DELETE_TRANS_BLOCKS(inode->i_sb) +
+                         ext4_blocks_for_truncate(inode) + 3;
+               break;
+        default: CERROR("unknown transaction start op %d\n", op);
+                LBUG();
+        }
+
+        LASSERT(current->journal_info == desc_private);
+        journal = EXT3_SB(inode->i_sb)->s_journal;
+        if (nblocks > journal->j_max_transaction_buffers) {
+                CWARN("too many credits %d for op %ux%u using %d instead\n",
+                       nblocks, op, logs, journal->j_max_transaction_buffers);
+                nblocks = journal->j_max_transaction_buffers;
+        }
+
+ journal_start:
+        LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
+        handle = ext3_journal_start(inode, nblocks);
+
+        if (!IS_ERR(handle))
+                LASSERT(current->journal_info == handle);
+        else
+                CERROR("error starting handle for op %u (%u credits): rc %ld\n",
+                       op, nblocks, PTR_ERR(handle));
+        return handle;
+}
+
+static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync)
+{
+        int rc;
+        handle_t *handle = h;
+
+        LASSERT(current->journal_info == handle);
+        if (force_sync)
+                handle->h_sync = 1; /* recovery likes this */
+
+        rc = ext3_journal_stop(handle);
+
+        return rc;
+}
+
  #ifndef EXT3_EXTENTS_FL
  #define EXT3_EXTENTS_FL                 0x00080000 /* Inode uses extents */
  #endif
@@ -444,20 +556,211 @@ int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
          return rc;
  }
  
+int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
+{
+        unsigned long block;
+        struct buffer_head *bh;
+        int err, blocksize, csize, boffs, osize = size;
+
+        /* prevent reading after eof */
+       spin_lock(&inode->i_lock);
+       if (i_size_read(inode) < *offs + size) {
+               size = i_size_read(inode) - *offs;
+               spin_unlock(&inode->i_lock);
+                if (size < 0) {
+                        CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
+                               i_size_read(inode), *offs);
+                        return -EBADR;
+                } else if (size == 0) {
+                        return 0;
+                }
+        } else {
+               spin_unlock(&inode->i_lock);
+        }
+
+        blocksize = 1 << inode->i_blkbits;
+
+        while (size > 0) {
+                block = *offs >> inode->i_blkbits;
+                boffs = *offs & (blocksize - 1);
+                csize = min(blocksize - boffs, size);
+                bh = ext3_bread(NULL, inode, block, 0, &err);
+                if (!bh) {
+                        CERROR("can't read block: %d\n", err);
+                        return err;
+                }
+
+                memcpy(buf, bh->b_data + boffs, csize);
+                brelse(bh);
+
+                *offs += csize;
+                buf += csize;
+                size -= csize;
+        }
+        return osize;
+}
+EXPORT_SYMBOL(fsfilt_ext3_read);
+
+static int fsfilt_ext3_read_record(struct file * file, void *buf,
+                                   int size, loff_t *offs)
+{
+        int rc;
+        rc = fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs);
+        if (rc > 0)
+                rc = 0;
+        return rc;
+}
+
+int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
+                                loff_t *offs, handle_t *handle)
+{
+        struct buffer_head *bh = NULL;
+        loff_t old_size = i_size_read(inode), offset = *offs;
+        loff_t new_size = i_size_read(inode);
+        unsigned long block;
+        int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs;
+
+        while (bufsize > 0) {
+                if (bh != NULL)
+                        brelse(bh);
+
+                block = offset >> inode->i_blkbits;
+                boffs = offset & (blocksize - 1);
+                size = min(blocksize - boffs, bufsize);
+                bh = ext3_bread(handle, inode, block, 1, &err);
+                if (!bh) {
+                        CERROR("can't read/create block: %d\n", err);
+                        break;
+                }
+
+                err = ext3_journal_get_write_access(handle, bh);
+                if (err) {
+                        CERROR("journal_get_write_access() returned error %d\n",
+                               err);
+                        break;
+                }
+                LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
+                memcpy(bh->b_data + boffs, buf, size);
+                err = ext3_journal_dirty_metadata(handle, bh);
+                if (err) {
+                        CERROR("journal_dirty_metadata() returned error %d\n",
+                               err);
+                        break;
+                }
+                if (offset + size > new_size)
+                        new_size = offset + size;
+                offset += size;
+                bufsize -= size;
+                buf += size;
+        }
+        if (bh)
+                brelse(bh);
+
+        /* correct in-core and on-disk sizes */
+        if (new_size > i_size_read(inode)) {
+               spin_lock(&inode->i_lock);
+               if (new_size > i_size_read(inode))
+                       i_size_write(inode, new_size);
+               if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
+                       EXT3_I(inode)->i_disksize = i_size_read(inode);
+               if (i_size_read(inode) > old_size) {
+                       spin_unlock(&inode->i_lock);
+                       mark_inode_dirty(inode);
+               } else {
+                       spin_unlock(&inode->i_lock);
+                }
+        }
+
+        if (err == 0)
+                *offs = offset;
+        return err;
+}
+EXPORT_SYMBOL(fsfilt_ext3_write_handle);
+
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
+                                    loff_t *offs, int force_sync)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        handle_t *handle;
+        int err, block_count = 0, blocksize;
+
+        /* Determine how many transaction credits are needed */
+        blocksize = 1 << inode->i_blkbits;
+        block_count = (*offs & (blocksize - 1)) + bufsize;
+        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+
+       handle = ext3_journal_start(inode,
+                       block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
+       if (IS_ERR(handle)) {
+               CERROR("can't start transaction for %d blocks (%d bytes)\n",
+                      block_count * EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + 2,
+                      bufsize);
+               return PTR_ERR(handle);
+       }
+
+        err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle);
+
+        if (!err && force_sync)
+                handle->h_sync = 1; /* recovery likes this */
+
+        ext3_journal_stop(handle);
+
+        return err;
+}
+
+static int fsfilt_ext3_setup(struct super_block *sb)
+{
+        if (!EXT3_HAS_COMPAT_FEATURE(sb,
+                                EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+                CERROR("ext3 mounted without journal\n");
+                return -EINVAL;
+        }
+
+#ifdef S_PDIROPS
+        CWARN("Enabling PDIROPS\n");
+        set_opt(EXT3_SB(sb)->s_mount_opt, PDIROPS);
+        sb->s_flags |= S_PDIROPS;
+#endif
+        if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+                CWARN("filesystem doesn't have dir_index feature enabled\n");
+        return 0;
+}
  static struct fsfilt_operations fsfilt_ext3_ops = {
-       .fs_type                = "ext3",
-       .fs_owner               = THIS_MODULE,
-       .fs_map_inode_pages     = fsfilt_ext3_map_inode_pages,
+        .fs_type                = "ext3",
+        .fs_owner               = THIS_MODULE,
+        .fs_getlabel            = fsfilt_ext3_get_label,
+        .fs_start               = fsfilt_ext3_start,
+        .fs_commit              = fsfilt_ext3_commit,
+        .fs_map_inode_pages     = fsfilt_ext3_map_inode_pages,
+        .fs_write_record        = fsfilt_ext3_write_record,
+        .fs_read_record         = fsfilt_ext3_read_record,
+        .fs_setup               = fsfilt_ext3_setup,
  };
  
  static int __init fsfilt_ext3_init(void)
  {
-       return fsfilt_register_ops(&fsfilt_ext3_ops);
+       int rc;
+
+       fcb_cache = kmem_cache_create("fsfilt_ext3_fcb",
+                                     sizeof(struct fsfilt_cb_data),
+                                     0, 0, NULL);
+       if (!fcb_cache) {
+               CERROR("error allocating fsfilt journal callback cache\n");
+               GOTO(out, rc = -ENOMEM);
+       }
+
+       rc = fsfilt_register_ops(&fsfilt_ext3_ops);
+
+       if (rc)
+               kmem_cache_destroy(fcb_cache);
+out:
+       return rc;
  }
  
  static void __exit fsfilt_ext3_exit(void)
  {
         fsfilt_unregister_ops(&fsfilt_ext3_ops);
+       kmem_cache_destroy(fcb_cache);
  }
  
  module_init(fsfilt_ext3_init);