#define DEBUG_SUBSYSTEM S_FILTER
+#include <linux/init.h>
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/slab.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
#include <linux/obd_class.h>
-#include <linux/module.h>
static kmem_cache_t *fcb_cache;
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
* the inode (which we will be changing anyways as part of this
* transaction).
*/
-static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
+ int logs)
{
/* For updates to the last recieved file */
- int nblocks = EXT3_DATA_TRANS_BLOCKS;
+ int nblocks = EXT3_SINGLEDATA_TRANS_BLOCKS;
+ journal_t *journal;
void *handle;
if (current->journal_info) {
- CDEBUG(D_INODE, "increasing refcount on %p\n", current->journal_info);
+ CDEBUG(D_INODE, "increasing refcount on %p\n",
+ current->journal_info);
goto journal_start;
}
switch(op) {
- case FSFILT_OP_CREATE_LOG:
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
- op = FSFILT_OP_CREATE;
- break;
- case FSFILT_OP_UNLINK_LOG:
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
- op = FSFILT_OP_UNLINK;
- break;
- }
-
- switch(op) {
case FSFILT_OP_RMDIR:
case FSFILT_OP_UNLINK:
+ /* delete one file + create/update logs for each stripe */
nblocks += EXT3_DELETE_TRANS_BLOCKS;
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
break;
case FSFILT_OP_RENAME:
/* modify additional directory */
- nblocks += EXT3_DATA_TRANS_BLOCKS;
+ nblocks += EXT3_SINGLEDATA_TRANS_BLOCKS;
/* no break */
case FSFILT_OP_SYMLINK:
/* additional block + block bitmap + GDT for long symlink */
nblocks += 3;
/* no break */
case FSFILT_OP_CREATE:
+ /* create/update logs for each stripe */
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
+ /* no break */
case FSFILT_OP_MKDIR:
case FSFILT_OP_MKNOD:
/* modify one inode + block bitmap + GDT */
/* no break */
case FSFILT_OP_LINK:
/* modify parent directory */
- nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_DATA_TRANS_BLOCKS;
break;
case FSFILT_OP_SETATTR:
/* Setattr on inode */
nblocks += 1;
break;
+ case FSFILT_OP_CANCEL_UNLINK:
+ /* blocks for log header bitmap update OR
+ * blocks for catalog header bitmap update + unlink of logs */
+ nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +
+ EXT3_DELETE_TRANS_BLOCKS * logs;
+ break;
default: CERROR("unknown transaction start op %d\n", op);
LBUG();
}
LASSERT(current->journal_info == desc_private);
+ journal = EXT3_SB(inode->i_sb)->s_journal;
+ if (nblocks > journal->j_max_transaction_buffers) {
+ CERROR("too many credits %d for op %ux%u using %d instead\n",
+ nblocks, op, logs, journal->j_max_transaction_buffers);
+ nblocks = journal->j_max_transaction_buffers;
+ }
journal_start:
lock_kernel();
if (!IS_ERR(handle))
LASSERT(current->journal_info == handle);
+ else
+ CERROR("error starting handle for op %u (%u credits): rc %ld\n",
+ op, nblocks, PTR_ERR(handle));
return handle;
}
*
* 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update.
*/
-static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso)
+static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,
+ int niocount, struct niobuf_local *nb)
{
struct super_block *sb = fso->fso_dentry->d_inode->i_sb;
- int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
- int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp;
- int nbitmaps = 0;
- int ngdblocks = 0;
- int needed = objcount + 1;
- int i;
-
- for (i = 0; i < objcount; i++, fso++) {
- int nblocks = fso->fso_bufcnt * blockpp;
- int ndindirect = min(nblocks, addrpp + 1);
- int nindir = nblocks + ndindirect + 1;
-
- nbitmaps += nindir + nblocks;
- ngdblocks += nindir + nblocks;
-
- needed += nindir;
+ __u64 next_indir;
+ const int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
+ int nbitmaps = 0, ngdblocks;
+ int needed = objcount + 1; /* inodes + superblock */
+ int i, j;
+
+ for (i = 0, j = 0; i < objcount; i++, fso++) {
+ /* two or more dindirect blocks in case we cross boundary */
+ int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset -
+ nb[j].offset) >>
+ sb->s_blocksize_bits) /
+ (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb));
+ nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2);
+
+ /* leaf, indirect, tindirect blocks for first block */
+ nbitmaps += blockpp + 2;
+
+ j += fso->fso_bufcnt;
+ }
+
+ next_indir = nb[0].offset +
+ (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits);
+ for (i = 1; i < niocount; i++) {
+ if (nb[i].offset >= next_indir) {
+ nbitmaps++; /* additional indirect */
+ next_indir = nb[i].offset +
+ (EXT3_ADDR_PER_BLOCK(sb)<<sb->s_blocksize_bits);
+ } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) {
+ nbitmaps++; /* additional indirect */
+ }
+ nbitmaps += blockpp; /* each leaf in different group? */
}
- /* Assumes ext3 and ext3 have same sb_info layout at the start. */
+ ngdblocks = nbitmaps;
if (nbitmaps > EXT3_SB(sb)->s_groups_count)
nbitmaps = EXT3_SB(sb)->s_groups_count;
if (ngdblocks > EXT3_SB(sb)->s_gdb_count)
/* last_rcvd update */
needed += EXT3_DATA_TRANS_BLOCKS;
-#ifdef CONFIG_QUOTA
+#if defined(CONFIG_QUOTA) && !defined(__x86_64__) /* XXX */
/* We assume that there will be 1 bit set in s_dquot.flags for each
* quota file that is active. This is at least true for now.
*/
* the pages have been written.
*/
static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
- int niocount, void *desc_private)
+ int niocount, struct niobuf_local *nb,
+ void *desc_private, int logs)
{
journal_t *journal;
handle_t *handle;
LASSERT(current->journal_info == desc_private);
journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
- needed = fsfilt_ext3_credits_needed(objcount, fso);
+ needed = fsfilt_ext3_credits_needed(objcount, fso, niocount, nb);
/* The number of blocks we could _possibly_ dirty can very large.
* We reduce our request if it is absurd (and we couldn't get that
unlock_kernel();
return rc;
}
-
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
rtid = log_start_commit(journal, transaction);
if (rtid != tid)
CERROR("strange race: %lu != %lu\n",
(unsigned long) tid, (unsigned long) rtid);
+#else
+ log_start_commit(journal, transaction->t_tid);
+#endif
unlock_kernel();
*wait_handle = (void *) tid;
static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
struct kstatfs sfs;
- int rc = vfs_statfs(sb, &sfs);
+ int rc;
+
+ memset(&sfs, 0, sizeof(sfs));
+
+ rc = sb->s_op->statfs(sb, &sfs);
if (!rc && sfs.f_bfree < sfs.f_ffree) {
sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
int err, blocksize, csize, boffs;
/* prevent reading after eof */
+ lock_kernel();
if (inode->i_size < *offs + size) {
size = inode->i_size - *offs;
+ unlock_kernel();
if (size < 0) {
CERROR("size %llu is too short for read %u@%llu\n",
- inode->i_size, size, *offs);
+ inode->i_size, size, *offs);
return -EIO;
- } else if (size == 0)
+ } else if (size == 0) {
return 0;
+ }
+ } else {
+ unlock_kernel();
}
blocksize = 1 << inode->i_blkbits;
blocksize = 1 << inode->i_blkbits;
block_count = (*offs & (blocksize - 1)) + bufsize;
block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
-
- down(&inode->i_sem);
+
journal = EXT3_SB(inode->i_sb)->s_journal;
+ lock_kernel();
handle = journal_start(journal,
block_count * EXT3_DATA_TRANS_BLOCKS + 2);
+ unlock_kernel();
if (IS_ERR(handle)) {
CERROR("can't start transaction\n");
- up(&inode->i_sem);
return PTR_ERR(handle);
}
/* correct in-core and on-disk sizes */
if (new_size > inode->i_size) {
+ lock_kernel();
if (new_size > inode->i_size)
inode->i_size = new_size;
if (inode->i_size > EXT3_I(inode)->i_disksize)
EXT3_I(inode)->i_disksize = inode->i_size;
if (inode->i_size > old_size)
mark_inode_dirty(inode);
+ unlock_kernel();
}
+ lock_kernel();
journal_stop(handle);
- up(&inode->i_sem);
+ unlock_kernel();
if (err == 0)
*offs = offset;
return 0;
}
+/* If fso is NULL, op is FSFILT operation, otherwise op is number of fso
+ objects. Logs is number of logfiles to update */
+static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs)
+{
+ if ( !fso ) {
+ switch(op) {
+ case FSFILT_OP_CREATE:
+ /* directory leaf, index & indirect & EA*/
+ return 4 + 3 * logs;
+ case FSFILT_OP_UNLINK:
+ return 3 * logs;
+ }
+ } else {
+ int i;
+ int needed = 0;
+ struct super_block *sb = fso->fso_dentry->d_inode->i_sb;
+ int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
+ int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp;
+ for (i = 0; i < op; i++, fso++) {
+ int nblocks = fso->fso_bufcnt * blockpp;
+ int ndindirect = min(nblocks, addrpp + 1);
+ int nindir = nblocks + ndindirect + 1;
+
+ needed += nindir;
+ }
+ return needed + 3 * logs;
+ }
+
+ return 0;
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
fs_type: "ext3",
fs_owner: THIS_MODULE,
fs_write_record: fsfilt_ext3_write_record,
fs_read_record: fsfilt_ext3_read_record,
fs_setup: fsfilt_ext3_setup,
+ fs_get_op_len: fsfilt_ext3_get_op_len,
};
static int __init fsfilt_ext3_init(void)