From 0f22a996cd744ba2dba6288a107b1795dc496736 Mon Sep 17 00:00:00 2001 From: adilger Date: Thu, 12 Dec 2002 10:59:05 +0000 Subject: [PATCH] Add a new fsfilt method which reserved blocks for multiple bulk I/O operations. Fix lock ordering in filter_preprw() so we get O/R i_sem before journal ops. Move obdfilter to using the journal methods in fsfilt. Change fsfilt_statfs() to take struct obd_statfs as a parameter. Add some comments in fsfilt_extN_start() where we fall through on cases. --- lustre/include/linux/lustre_fsfilt.h | 29 ++++++-- lustre/obdclass/fsfilt_extN.c | 139 +++++++++++++++++++++++++++++++++-- 2 files changed, 152 insertions(+), 16 deletions(-) diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index b1ec270..eeae647 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -32,11 +32,18 @@ typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error); +struct fsfilt_objinfo { + struct dentry *fso_dentry; + int fso_bufcnt; +}; + struct fsfilt_operations { struct list_head fs_list; struct module *fs_owner; char *fs_type; void *(* fs_start)(struct inode *inode, int op); + void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_remote *nb); int (* fs_commit)(struct inode *inode, void *handle); int (* fs_setattr)(struct dentry *dentry, void *handle, struct iattr *iattr); @@ -48,7 +55,7 @@ struct fsfilt_operations { int (* fs_journal_data)(struct file *file); int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func); - int (* fs_statfs)(struct super_block *sb, struct statfs *sfs); + int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); }; extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); @@ -72,15 +79,21 @@ static inline void *fsfilt_start(struct obd_device *obd, return obd->obd_fsops->fs_start(inode, op); } +static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_remote *nb) +{ + return obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb); +} + static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, - void *handle) + void *handle) { return obd->obd_fsops->fs_commit(inode, handle); } -static inline int fsfilt_setattr(struct obd_device *obd, - struct dentry *dentry, - void *handle, struct iattr *iattr) +static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, + void *handle, struct iattr *iattr) { int rc; /* @@ -98,7 +111,7 @@ static inline int fsfilt_setattr(struct obd_device *obd, } static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, - void *handle, void *md, int size) + void *handle, void *md, int size) { return obd->obd_fsops->fs_set_md(inode, handle, md, size); } @@ -128,9 +141,9 @@ static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, } static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs, - struct statfs *sfs) + struct obd_statfs *osfs) { - return obd->obd_fsops->fs_statfs(fs, sfs); + return obd->obd_fsops->fs_statfs(fs, osfs); } #endif /* __KERNEL__ */ diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c index 54214ed..9b5a1f9 100644 --- a/lustre/obdclass/fsfilt_extN.c +++ b/lustre/obdclass/fsfilt_extN.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -67,18 +69,21 @@ static void *fsfilt_extN_start(struct inode *inode, int op) nblocks += EXTN_DELETE_TRANS_BLOCKS; break; case FSFILT_OP_RENAME: - /* We may be modifying two directories */ + /* modify additional directory */ nblocks += EXTN_DATA_TRANS_BLOCKS; + /* no break */ case FSFILT_OP_SYMLINK: - /* Possible new block + block bitmap + GDT for long symlink */ + /* additional block + block bitmap + GDT for long symlink */ nblocks += 3; + /* no break */ case FSFILT_OP_CREATE: case FSFILT_OP_MKDIR: case FSFILT_OP_MKNOD: - /* New inode + block bitmap + GDT for new file */ + /* modify one inode + block bitmap + GDT */ nblocks += 3; + /* no break */ case FSFILT_OP_LINK: - /* Change parent directory */ + /* modify parent directory */ nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; break; case FSFILT_OP_SETATTR: @@ -89,6 +94,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op) LBUG(); } + LASSERT(!current->journal_info); lock_kernel(); handle = journal_start(EXTN_JOURNAL(inode), nblocks); unlock_kernel(); @@ -96,6 +102,120 @@ static void *fsfilt_extN_start(struct inode *inode, int op) return handle; } +/* + * Calculate the number of buffer credits needed to write multiple pages in + * a single extN transaction. No, this shouldn't be here, but as yet extN + * doesn't have a nice API for calculating this sort of thing in advance. + * + * See comment above extN_writepage_trans_blocks for details. We assume + * no data journaling is being done, but it does allow for all of the pages + * being non-contiguous. If we are guaranteed contiguous pages we could + * reduce the number of (d)indirect blocks a lot. + * + * With N blocks per page and P pages, for each inode we have at most: + * N*P indirect + * min(N*P, blocksize/4 + 1) dindirect blocks + * niocount tindirect + * + * For the entire filesystem, we have at most: + * min(sum(nindir + P), ngroups) bitmap blocks (from the above) + * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) + * objcount inode blocks + * 1 superblock + * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files + */ +static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) +{ + struct super_block *sb = fso->fso_dentry->d_inode->i_sb; + int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); + int addrpp = EXTN_ADDR_PER_BLOCK(sb) * blockpp; + int nbitmaps = 0; + int ngdblocks = 0; + int needed = objcount + 1; + int i; + + for (i = 0; i < objcount; i++, fso++) { + int nblocks = fso->fso_bufcnt * blockpp; + int ndindirect = min(nblocks, addrpp + 1); + int nindir = nblocks + ndindirect + 1; + + nbitmaps += nindir + nblocks; + ngdblocks += nindir + nblocks; + + needed += nindir; + } + + /* Assumes extN and extN have same sb_info layout at the start. */ + if (nbitmaps > EXTN_SB(sb)->s_groups_count) + nbitmaps = EXTN_SB(sb)->s_groups_count; + if (ngdblocks > EXTN_SB(sb)->s_gdb_count) + ngdblocks = EXTN_SB(sb)->s_gdb_count; + + needed += nbitmaps + ngdblocks; + +#ifdef CONFIG_QUOTA + /* We assume that there will be 1 bit set in s_dquot.flags for each + * quota file that is active. This is at least true for now. + */ + needed += hweight32(sb_any_quota_enabled(sb)) * + EXTN_SINGLEDATA_TRANS_BLOCKS; +#endif + + return needed; +} + +/* We have to start a huge journal transaction here to hold all of the + * metadata for the pages being written here. This is necessitated by + * the fact that we do lots of prepare_write operations before we do + * any of the matching commit_write operations, so even if we split + * up to use "smaller" transactions none of them could complete until + * all of them were opened. By having a single journal transaction, + * we eliminate duplicate reservations for common blocks like the + * superblock and group descriptors or bitmaps. + * + * We will start the transaction here, but each prepare_write will + * add a refcount to the transaction, and each commit_write will + * remove a refcount. The transaction will be closed when all of + * the pages have been written. + */ +static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_remote *nb) +{ + journal_t *journal; + handle_t *handle; + int needed; + ENTRY; + + LASSERT(!current->journal_info); + journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; + needed = fsfilt_extN_credits_needed(objcount, fso); + + /* The number of blocks we could _possibly_ dirty can very large. + * We reduce our request if it is absurd (and we couldn't get that + * many credits for a single handle anyways). + * + * At some point we have to limit the size of I/Os sent at one time, + * increase the size of the journal, or we have to calculate the + * actual journal requirements more carefully by checking all of + * the blocks instead of being maximally pessimistic. It remains to + * be seen if this is a real problem or not. + */ + if (needed > journal->j_max_transaction_buffers) { + CERROR("want too many journal credits (%d) using %d instead\n", + needed, journal->j_max_transaction_buffers); + needed = journal->j_max_transaction_buffers; + } + + lock_kernel(); + handle = journal_start(journal, needed); + unlock_kernel(); + if (IS_ERR(handle)) + CERROR("can't get handle for %d credits: rc = %ld\n", needed, + PTR_ERR(handle)); + + RETURN(handle); +} + static int fsfilt_extN_commit(struct inode *inode, void *handle) { int rc; @@ -258,13 +378,15 @@ static int fsfilt_extN_journal_data(struct file *filp) * * This can be removed when the extN EA code is fixed. */ -static int fsfilt_extN_statfs(struct super_block *sb, struct statfs *sfs) +static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs) { - int rc = vfs_statfs(sb, sfs); + struct statfs sfs; + int rc = vfs_statfs(sb, &sfs); - if (!rc && sfs->f_bfree < sfs->f_ffree) - sfs->f_ffree = sfs->f_bfree; + if (!rc && sfs.f_bfree < sfs.f_ffree) + sfs.f_ffree = sfs.f_bfree; + statfs_pack(osfs, &sfs); return rc; } @@ -272,6 +394,7 @@ static struct fsfilt_operations fsfilt_extN_ops = { fs_type: "extN", fs_owner: THIS_MODULE, fs_start: fsfilt_extN_start, + fs_brw_start: fsfilt_extN_brw_start, fs_commit: fsfilt_extN_commit, fs_setattr: fsfilt_extN_setattr, fs_set_md: fsfilt_extN_set_md, -- 1.8.3.1