#define DEBUG_SUBSYSTEM S_FILTER
+#include <linux/init.h>
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/slab.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <linux/version.h>
-/* XXX ugh */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- #include <linux/ext3_xattr.h>
-#else
- #include <linux/../../fs/ext3/xattr.h>
-#endif
+#include <linux/ext3_xattr.h>
+
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
#include <linux/obd_class.h>
-#include <linux/module.h>
static kmem_cache_t *fcb_cache;
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
#endif
#define XATTR_LUSTRE_MDS_LOV_EA "lov"
-#define EXT3_XATTR_INDEX_LUSTRE 5 /* old */
-#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" /* old */
-
/*
* We don't currently need any additional blocks for rmdir and
* unlink transactions because we are storing the OST oa_id inside
* the inode (which we will be changing anyways as part of this
* transaction).
*/
-static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
+ int logs)
{
/* For updates to the last recieved file */
- int nblocks = EXT3_DATA_TRANS_BLOCKS;
+ int nblocks = EXT3_SINGLEDATA_TRANS_BLOCKS;
+ journal_t *journal;
void *handle;
if (current->journal_info) {
}
switch(op) {
- case FSFILT_OP_CREATE_LOG:
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
- op = FSFILT_OP_CREATE;
- break;
- case FSFILT_OP_UNLINK_LOG:
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
- op = FSFILT_OP_UNLINK;
- break;
- }
-
- switch(op) {
case FSFILT_OP_RMDIR:
case FSFILT_OP_UNLINK:
+ /* delete one file + create/update logs for each stripe */
nblocks += EXT3_DELETE_TRANS_BLOCKS;
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
break;
case FSFILT_OP_RENAME:
/* modify additional directory */
- nblocks += EXT3_DATA_TRANS_BLOCKS;
+ nblocks += EXT3_SINGLEDATA_TRANS_BLOCKS;
/* no break */
case FSFILT_OP_SYMLINK:
/* additional block + block bitmap + GDT for long symlink */
nblocks += 3;
/* no break */
case FSFILT_OP_CREATE:
+ /* create/update logs for each stripe */
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
+ /* no break */
case FSFILT_OP_MKDIR:
case FSFILT_OP_MKNOD:
/* modify one inode + block bitmap + GDT */
/* no break */
case FSFILT_OP_LINK:
/* modify parent directory */
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_DATA_TRANS_BLOCKS;
break;
case FSFILT_OP_SETATTR:
/* Setattr on inode */
nblocks += 1;
break;
- case FSFILT_OP_CANCEL_UNLINK_LOG:
+ case FSFILT_OP_CANCEL_UNLINK:
+ /* blocks for log header bitmap update OR
+ * blocks for catalog header bitmap update + unlink of logs */
nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
- EXT3_DELETE_TRANS_BLOCKS;
+ EXT3_DELETE_TRANS_BLOCKS * logs;
break;
default: CERROR("unknown transaction start op %d\n", op);
LBUG();
}
LASSERT(current->journal_info == desc_private);
+ journal = EXT3_SB(inode->i_sb)->s_journal;
+ if (nblocks > journal->j_max_transaction_buffers) {
+ CERROR("too many credits %d for op %ux%u using %d instead\n",
+ nblocks, op, logs, journal->j_max_transaction_buffers);
+ nblocks = journal->j_max_transaction_buffers;
+ }
journal_start:
lock_kernel();
if (!IS_ERR(handle))
LASSERT(current->journal_info == handle);
+ else
+ CERROR("error starting handle for op %u (%u credits): rc %ld\n",
+ op, nblocks, PTR_ERR(handle));
return handle;
}
*/
static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
int niocount, struct niobuf_local *nb,
- void *desc_private)
+ void *desc_private, int logs)
{
journal_t *journal;
handle_t *handle;
static int fsfilt_ext3_commit_async(struct inode *inode, void *h,
void **wait_handle)
{
+ unsigned long tid;
transaction_t *transaction;
- unsigned long tid, rtid;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+ unsigned long rtid;
+#endif
handle_t *handle = h;
journal_t *journal;
int rc;
RETURN(rc);
}
-#undef INLINE_EA
-#undef OLD_EA
static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
void *lmm, int lmm_size)
{
- int rc, old_ea = 0;
-
-#ifdef INLINE_EA /* can go away before 1.0 - just for testing bug 2097 now */
- /* Nasty hack city - store stripe MD data in the block pointers if
- * it will fit, because putting it in an EA currently kills the MDS
- * performance. We'll fix this with "fast EAs" in the future.
- */
- if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) -
- sizeof(EXT3_I(inode)->i_data[0])) {
- unsigned old_size = EXT3_I(inode)->i_data[0];
- if (old_size != 0) {
- LASSERT(old_size < sizeof(EXT3_I(inode)->i_data));
- CERROR("setting EA on %lu/%u again... interesting\n",
- inode->i_ino, inode->i_generation);
- }
+ int rc;
- EXT3_I(inode)->i_data[0] = cpu_to_le32(lmm_size);
- memcpy(&EXT3_I(inode)->i_data[1], lmm, lmm_size);
- mark_inode_dirty(inode);
- return 0;
- }
-#endif
-#ifdef OLD_EA
/* keep this when we get rid of OLD_EA (too noisy during conversion) */
- if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) {
+ if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */)
CWARN("setting EA on %lu/%u again... interesting\n",
inode->i_ino, inode->i_generation);
- old_ea = 1;
- }
lock_kernel();
- /* this can go away before 1.0. For bug 2097 testing only. */
- rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
- XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
-#else
- lock_kernel();
rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED,
XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size, 0);
- /* This tries to delete the old-format LOV EA, but only as long as we
- * have successfully saved the new-format LOV EA (we can always try
- * the conversion again the next time the file is accessed). It is
- * possible (although unlikely) that the new-format LOV EA couldn't be
- * saved because it ran out of space but we would need a file striped
- * over least 123 OSTs before the two EAs filled a 4kB block.
- *
- * This can be removed when all filesystems have converted to the
- * new EA format, but otherwise adds little if any overhead. If we
- * wanted backward compatibility for existing files, we could keep
- * the old EA around for a while but we'd have to clean it up later. */
- if (rc >= 0 && old_ea) {
- int err = ext3_xattr_set_handle(handle, inode,
- EXT3_XATTR_INDEX_LUSTRE,
- XATTR_LUSTRE_MDS_OBJID,
- NULL, 0, 0);
- if (err)
- CERROR("error deleting old LOV EA on %lu/%u: rc %d\n",
- inode->i_ino, inode->i_generation, err);
- }
-#endif
unlock_kernel();
if (rc)
LASSERT(down_trylock(&inode->i_sem) != 0);
lock_kernel();
- /* Keep support for reading "inline EAs" until we convert
- * users over to new format entirely. See bug 841/2097. */
- if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) {
- unsigned size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
- void *handle;
-
- LASSERT(size < sizeof(EXT3_I(inode)->i_data));
- if (lmm) {
- if (size > lmm_size) {
- CERROR("inline EA on %lu/%u bad size %u > %u\n",
- inode->i_ino, inode->i_generation,
- size, lmm_size);
- return -ERANGE;
- }
- memcpy(lmm, &EXT3_I(inode)->i_data[1], size);
- }
-
-#ifndef INLINE_EA
- /* migrate LOV EA data to external block - keep same format */
- CWARN("DEBUG: migrate inline EA for inode %lu/%u to block\n",
- inode->i_ino, inode->i_generation);
-
- handle = journal_start(EXT3_JOURNAL(inode),
- EXT3_XATTR_TRANS_BLOCKS);
- if (!IS_ERR(handle)) {
- int err;
- rc = fsfilt_ext3_set_md(inode, handle,
- &EXT3_I(inode)->i_data[1],size);
- if (rc == 0) {
- memset(EXT3_I(inode)->i_data, 0,
- sizeof(EXT3_I(inode)->i_data));
- mark_inode_dirty(inode);
- }
- err = journal_stop(handle);
- if (err && rc == 0)
- rc = err;
- } else {
- rc = PTR_ERR(handle);
- }
-#endif
- unlock_kernel();
- return size;
- }
rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED,
XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size);
- /* try old EA type if new one failed - MDS will convert it for us */
- if (rc == -ENODATA) {
- CDEBUG(D_INFO,"failed new LOV EA %d/%s from inode %lu: rc %d\n",
- EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA,
- inode->i_ino, rc);
-
- rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE,
- XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size);
- }
unlock_kernel();
/* This gives us the MD size */
if (rc < 0) {
CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n",
- EXT3_XATTR_INDEX_LUSTRE, XATTR_LUSTRE_MDS_OBJID,
+ EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA,
inode->i_ino, rc);
memset(lmm, 0, lmm_size);
return (rc == -ENODATA) ? 0 : rc;
return 0;
}
+/* If fso is NULL, op is FSFILT operation, otherwise op is number of fso
+ objects. Logs is number of logfiles to update */
+static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs)
+{
+ if ( !fso ) {
+ switch(op) {
+ case FSFILT_OP_CREATE:
+ /* directory leaf, index & indirect & EA*/
+ return 4 + 3 * logs;
+ case FSFILT_OP_UNLINK:
+ return 3 * logs;
+ }
+ } else {
+ int i;
+ int needed = 0;
+ struct super_block *sb = fso->fso_dentry->d_inode->i_sb;
+ int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
+ int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp;
+ for (i = 0; i < op; i++, fso++) {
+ int nblocks = fso->fso_bufcnt * blockpp;
+ int ndindirect = min(nblocks, addrpp + 1);
+ int nindir = nblocks + ndindirect + 1;
+
+ needed += nindir;
+ }
+ return needed + 3 * logs;
+ }
+
+ return 0;
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
fs_type: "ext3",
fs_owner: THIS_MODULE,
fs_write_record: fsfilt_ext3_write_record,
fs_read_record: fsfilt_ext3_read_record,
fs_setup: fsfilt_ext3_setup,
+ fs_get_op_len: fsfilt_ext3_get_op_len,
};
static int __init fsfilt_ext3_init(void)