X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flvfs%2Ffsfilt_ext3.c;h=4bc107dfc0be54a18a0b85029c554e66290476da;hb=f24218109154516e02e9a60df1e5a6f078c5e63f;hp=3d631dc626c3c9d43232a01b2166024e17c11354;hpb=191061ee668400324f4505cf498f1ee2d57e4962;p=fs%2Flustre-release.git diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 3d631dc..4bc107d 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -42,7 +42,7 @@ #include #endif -#include +#include #include #include #include @@ -52,6 +52,14 @@ #endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)) +# define lock_24kernel() lock_kernel() +# define unlock_24kernel() unlock_kernel() +#else +# define lock_24kernel() do {} while (0) +# define unlock_24kernel() do {} while (0) +#endif + static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); @@ -59,14 +67,18 @@ struct fsfilt_cb_data { struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */ fsfilt_cb_t cb_func; /* MDS/OBD completion function */ struct obd_device *cb_obd; /* MDS/OBD completion device */ - __u64 cb_last_rcvd; /* MDS/OST last committed operation */ + __u64 cb_last_num; /* MDS/OST last committed operation */ void *cb_data; /* MDS/OST completion function data */ }; #ifndef EXT3_XATTR_INDEX_TRUSTED /* temporary until we hit l28 kernel */ #define EXT3_XATTR_INDEX_TRUSTED 4 #endif + #define XATTR_LUSTRE_MDS_LOV_EA "lov" +#define XATTR_LUSTRE_MDS_MEA_EA "mea" +#define XATTR_LUSTRE_MDS_MID_EA "mid" +#define XATTR_LUSTRE_MDS_SID_EA "sid" /* * We don't currently need any additional blocks for rmdir and @@ -88,26 +100,17 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, goto journal_start; } - /* XXX BUG 3188 -- must return to one set of opcodes */ - /* FIXME - cache hook */ - if (op & 0x20) { - nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; - op = op & ~0x20; - } - - /* FIXME - kml */ - if (op & 0x10) { - nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; - op = op & ~0x10; - } - + if (logs) + nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + + EXT3_SINGLEDATA_TRANS_BLOCKS) * logs; + switch(op) { case FSFILT_OP_RMDIR: case FSFILT_OP_UNLINK: /* delete one file + create/update logs for each stripe */ nblocks += EXT3_DELETE_TRANS_BLOCKS; - nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + - EXT3_SINGLEDATA_TRANS_BLOCKS) * logs; + /*nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + + EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;*/ break; case FSFILT_OP_RENAME: /* modify additional directory */ @@ -119,8 +122,8 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, /* no break */ case FSFILT_OP_CREATE: /* create/update logs for each stripe */ - nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + - EXT3_SINGLEDATA_TRANS_BLOCKS) * logs; + /*nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + + EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;*/ /* no break */ case FSFILT_OP_MKDIR: case FSFILT_OP_MKNOD: @@ -158,9 +161,10 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, } journal_start: - lock_kernel(); + LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks); + lock_24kernel(); handle = journal_start(EXT3_JOURNAL(inode), nblocks); - unlock_kernel(); + unlock_24kernel(); if (!IS_ERR(handle)) LASSERT(current->journal_info == handle); @@ -296,9 +300,10 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, needed = journal->j_max_transaction_buffers; } - lock_kernel(); + LASSERTF(needed > 0, "can't start %d credit transaction\n", needed); + lock_24kernel(); handle = journal_start(journal, needed); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); @@ -320,9 +325,9 @@ static int fsfilt_ext3_commit(struct super_block *sb, struct inode *inode, if (force_sync) handle->h_sync = 1; /* recovery likes this */ - lock_kernel(); + lock_24kernel(); rc = journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); return rc; } @@ -441,84 +446,135 @@ static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file, RETURN(rc); } -static int fsfilt_ext3_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) +static int fsfilt_ext3_set_xattr(struct inode * inode, void *handle, char *name, + void *buffer, int buffer_size) { - int rc; - - LASSERT(down_trylock(&inode->i_sem) != 0); - - /* keep this when we get rid of OLD_EA (too noisy during conversion) */ - if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) - CWARN("setting EA on %lu/%u again... interesting\n", - inode->i_ino, inode->i_generation); + int rc = 0; lock_kernel(); - rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, - XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size, 0); + rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, + name, buffer, buffer_size, 0); unlock_kernel(); - if (rc) - CERROR("error adding MD data to inode %lu: rc = %d\n", - inode->i_ino, rc); + CERROR("set xattr %s from inode %lu: rc %d\n", + name, inode->i_ino, rc); return rc; } -/* Must be called with i_sem held */ -static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) +static int fsfilt_ext3_get_xattr(struct inode *inode, char *name, + void *buffer, int buffer_size) { - int rc; - - LASSERT(down_trylock(&inode->i_sem) != 0); + int rc = 0; + lock_kernel(); rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, - XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size); + name, buffer, buffer_size); unlock_kernel(); - /* This gives us the MD size */ - if (lmm == NULL) + if (buffer == NULL) return (rc == -ENODATA) ? 0 : rc; - if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n", - EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA, - inode->i_ino, rc); - memset(lmm, 0, lmm_size); + CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc %d\n", + name, inode->i_ino, rc); + memset(buffer, 0, buffer_size); return (rc == -ENODATA) ? 0 : rc; } return rc; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio) +static int fsfilt_ext3_set_md(struct inode *inode, void *handle, + void *lmm, int lmm_size, + enum ea_type type) { - submit_bio(rw, bio); - return 0; + int rc; + + switch(type) { + case EA_LOV: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size); + break; + case EA_MEA: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_MEA_EA, + lmm, lmm_size); + break; + case EA_SID: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_SID_EA, + lmm, lmm_size); + break; + case EA_MID: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_MID_EA, + lmm, lmm_size); + break; + default: + return -EINVAL; + } + + return rc; } -#else -static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio) + +static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, + int lmm_size, enum ea_type type) { - int rc, blocks_per_page; + int rc; + + switch (type) { + case EA_LOV: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size); + break; + case EA_MEA: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_MEA_EA, + lmm, lmm_size); + break; + case EA_SID: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_SID_EA, + lmm, lmm_size); + break; + case EA_MID: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_MID_EA, + lmm, lmm_size); + break; + default: + return -EINVAL; + } + + return rc; +} - rc = brw_kiovec(rw, 1, &bio, inode->i_dev, - bio->blocks, 1 << inode->i_blkbits); +static int fsfilt_ext3_send_bio(int rw, struct inode *inode, void *bio) +{ + int rc = 0; +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + submit_bio(rw, (struct bio *)bio); +#else + struct bio *b = (struct kiobuf *)bio; + int blocks_per_page; + + rc = brw_kiovec(rw, 1, &b, inode->i_dev, + b->blocks, 1 << inode->i_blkbits); blocks_per_page = PAGE_SIZE >> inode->i_blkbits; - if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blocks_per_page) { + if (rc != (1 << inode->i_blkbits) * b->nr_pages * blocks_per_page) { CERROR("short write? expected %d, wrote %d\n", - (1 << inode->i_blkbits) * bio->nr_pages * + (1 << inode->i_blkbits) * b->nr_pages * blocks_per_page, rc); } - +#endif return rc; } -#endif -/* FIXME-UMKA: This should be used in 2.6.x io code later. */ static struct page *fsfilt_ext3_getpage(struct inode *inode, long int index) { int rc; @@ -602,7 +658,7 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) { struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error); + fcb->cb_func(fcb->cb_obd, fcb->cb_last_num, fcb->cb_data, error); OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb); atomic_dec(&fcb_cache_count); @@ -610,8 +666,8 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, struct super_block *sb, - __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func, + __u64 last_num, void *handle, + fsfilt_cb_t cb_func, void *cb_data) { struct fsfilt_cb_data *fcb; @@ -623,15 +679,14 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, atomic_inc(&fcb_cache_count); fcb->cb_func = cb_func; fcb->cb_obd = obd; - fcb->cb_last_rcvd = last_rcvd; + fcb->cb_last_num = last_num; fcb->cb_data = cb_data; - CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); + CDEBUG(D_EXT2, "set callback for last_num: "LPD64"\n", last_num); lock_kernel(); journal_callback_set(handle, fsfilt_ext3_cb_func, (struct journal_callback *)fcb); unlock_kernel(); - return 0; } @@ -673,7 +728,7 @@ static int fsfilt_ext3_sync(struct super_block *sb) #define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem); #define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem); #endif - + #include #if EXT3_EXT_MAGIC == 0xf301 #define ee_start e_start @@ -738,16 +793,16 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, loff_t new_i_size; handle_t *handle; int i, aflags = 0; - + i = EXT_DEPTH(tree); EXT_ASSERT(i == path->p_depth); EXT_ASSERT(path[i].p_hdr); - + if (exist) { err = EXT_CONTINUE; goto map; } - + if (bp->create == 0) { i = 0; if (newex->ee_block < bp->start) @@ -769,7 +824,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, tgen = EXT_GENERATION(tree); count = ext3_ext_calc_credits_for_insert(tree, path); ext3_up_truncate_sem(inode); - lock_kernel(); handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1); unlock_kernel(); @@ -777,7 +831,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return PTR_ERR(handle); } - + if (tgen != EXT_GENERATION(tree)) { /* the tree has changed. so path can be invalid at moment */ lock_kernel(); @@ -786,7 +840,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return EXT_REPEAT; } - ext3_down_truncate_sem(inode); count = newex->ee_len; goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags); @@ -813,9 +866,9 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, } } out: - lock_kernel(); + lock_24kernel(); journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); map: if (err >= 0) { /* map blocks */ @@ -863,12 +916,11 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block, bp.start = block; bp.init_num = bp.num = num; bp.create = create; - + ext3_down_truncate_sem(inode); err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); ext3_ext_invalidate_cache(&tree); ext3_up_truncate_sem(inode); - return err; } @@ -1034,7 +1086,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, loff_t new_size = inode->i_size; journal_t *journal; handle_t *handle; - int err, block_count = 0, blocksize, size, boffs; + int err = 0, block_count = 0, blocksize, size, boffs; /* Determine how many transaction credits are needed */ blocksize = 1 << inode->i_blkbits; @@ -1042,10 +1094,10 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, block_count = (block_count + blocksize - 1) >> inode->i_blkbits; journal = EXT3_SB(inode->i_sb)->s_journal; - lock_kernel(); + lock_24kernel(); handle = journal_start(journal, block_count * EXT3_DATA_TRANS_BLOCKS + 2); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return PTR_ERR(handle); @@ -1103,9 +1155,9 @@ out: unlock_kernel(); } - lock_kernel(); + lock_24kernel(); journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); if (err == 0) *offs = offset; @@ -1128,15 +1180,15 @@ static int fsfilt_ext3_setup(struct obd_device *obd, struct super_block *sb) #endif /* setup mdsnum in underlying fs */ #ifdef EXT3_FEATURE_INCOMPAT_MDSNUM - if (mds->mds_lmv_obd) { + if (mds->mds_md_obd) { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; handle_t *handle; int err; if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_MDSNUM)) { - CWARN("%s: set mdsnum %d in ext3fs\n", - obd->obd_name, mds->mds_num); + CWARN("%s: set mdsnum %d in ext3\n", + obd->obd_name, mds->mds_num); lock_kernel(); handle = journal_start(sbi->s_journal, 1); unlock_kernel(); @@ -1165,11 +1217,12 @@ extern int ext3_add_dir_entry(struct dentry *dentry); extern int ext3_del_dir_entry(struct dentry *dentry); static int fsfilt_ext3_add_dir_entry(struct obd_device *obd, - struct dentry *parent, - char *name, int namelen, - unsigned long ino, - unsigned long generation, - unsigned mds) + struct dentry *parent, + char *name, int namelen, + unsigned long ino, + unsigned long generation, + unsigned long mds, + unsigned long fid) { #ifdef EXT3_FEATURE_INCOMPAT_MDSNUM struct dentry *dentry; @@ -1184,7 +1237,7 @@ static int fsfilt_ext3_add_dir_entry(struct obd_device *obd, (int) PTR_ERR(dentry)); RETURN(PTR_ERR(dentry)); } - if (dentry->d_inode != NULL) { + if (dentry->d_inode != NULL || dentry->d_flags & DCACHE_CROSS_REF) { CERROR("dentry %*s(0x%p) found\n", dentry->d_name.len, dentry->d_name.name, dentry); l_dput(dentry); @@ -1200,6 +1253,7 @@ static int fsfilt_ext3_add_dir_entry(struct obd_device *obd, dentry->d_inum = ino; dentry->d_mdsnum = mds; dentry->d_generation = generation; + dentry->d_fid = fid; lock_kernel(); err = ext3_add_dir_entry(dentry); unlock_kernel(); @@ -1230,45 +1284,6 @@ static int fsfilt_ext3_del_dir_entry(struct obd_device *obd, #endif } -static int fsfilt_ext3_set_xattr(struct inode * inode, void *handle, char *name, - void *buffer, int buffer_size) -{ - int rc = 0; - - lock_kernel(); - - rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, - name, buffer, buffer_size, 0); - unlock_kernel(); - if (rc) - CERROR("set xattr %s from inode %lu: rc %d\n", - name, inode->i_ino, rc); - return rc; -} - -static int fsfilt_ext3_get_xattr(struct inode *inode, char *name, - void *buffer, int buffer_size) -{ - int rc = 0; - - lock_kernel(); - - rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, - name, buffer, buffer_size); - unlock_kernel(); - - if (buffer == NULL) - return (rc == -ENODATA) ? 0 : rc; - if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc %d\n", - name, inode->i_ino, rc); - memset(buffer, 0, buffer_size); - return (rc == -ENODATA) ? 0 : rc; - } - - return rc; -} - /* If fso is NULL, op is FSFILT operation, otherwise op is number of fso objects. Logs is number of logfiles to update */ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs) @@ -1363,33 +1378,33 @@ static int fsfilt_ext3_get_write_extents_num(struct inode *inode, int *size) } static struct fsfilt_operations fsfilt_ext3_ops = { - .fs_type = "ext3", - .fs_owner = THIS_MODULE, - .fs_start = fsfilt_ext3_start, - .fs_brw_start = fsfilt_ext3_brw_start, - .fs_commit = fsfilt_ext3_commit, - .fs_commit_async = fsfilt_ext3_commit_async, - .fs_commit_wait = fsfilt_ext3_commit_wait, - .fs_setattr = fsfilt_ext3_setattr, - .fs_iocontrol = fsfilt_ext3_iocontrol, - .fs_set_md = fsfilt_ext3_set_md, - .fs_get_md = fsfilt_ext3_get_md, - .fs_readpage = fsfilt_ext3_readpage, - .fs_add_journal_cb = fsfilt_ext3_add_journal_cb, - .fs_statfs = fsfilt_ext3_statfs, - .fs_sync = fsfilt_ext3_sync, - .fs_map_inode_pages = fsfilt_ext3_map_inode_pages, - .fs_prep_san_write = fsfilt_ext3_prep_san_write, - .fs_write_record = fsfilt_ext3_write_record, - .fs_read_record = fsfilt_ext3_read_record, - .fs_setup = fsfilt_ext3_setup, - .fs_getpage = fsfilt_ext3_getpage, - .fs_send_bio = fsfilt_ext3_send_bio, - .fs_set_xattr = fsfilt_ext3_set_xattr, - .fs_get_xattr = fsfilt_ext3_get_xattr, - .fs_get_op_len = fsfilt_ext3_get_op_len, - .fs_add_dir_entry = fsfilt_ext3_add_dir_entry, - .fs_del_dir_entry = fsfilt_ext3_del_dir_entry, + .fs_type = "ext3", + .fs_owner = THIS_MODULE, + .fs_start = fsfilt_ext3_start, + .fs_brw_start = fsfilt_ext3_brw_start, + .fs_commit = fsfilt_ext3_commit, + .fs_commit_async = fsfilt_ext3_commit_async, + .fs_commit_wait = fsfilt_ext3_commit_wait, + .fs_setattr = fsfilt_ext3_setattr, + .fs_iocontrol = fsfilt_ext3_iocontrol, + .fs_set_md = fsfilt_ext3_set_md, + .fs_get_md = fsfilt_ext3_get_md, + .fs_readpage = fsfilt_ext3_readpage, + .fs_add_journal_cb = fsfilt_ext3_add_journal_cb, + .fs_statfs = fsfilt_ext3_statfs, + .fs_sync = fsfilt_ext3_sync, + .fs_map_inode_pages = fsfilt_ext3_map_inode_pages, + .fs_prep_san_write = fsfilt_ext3_prep_san_write, + .fs_write_record = fsfilt_ext3_write_record, + .fs_read_record = fsfilt_ext3_read_record, + .fs_setup = fsfilt_ext3_setup, + .fs_getpage = fsfilt_ext3_getpage, + .fs_send_bio = fsfilt_ext3_send_bio, + .fs_set_xattr = fsfilt_ext3_set_xattr, + .fs_get_xattr = fsfilt_ext3_get_xattr, + .fs_get_op_len = fsfilt_ext3_get_op_len, + .fs_add_dir_entry = fsfilt_ext3_add_dir_entry, + .fs_del_dir_entry = fsfilt_ext3_del_dir_entry, .fs_init_extents_ea = fsfilt_ext3_init_extents_ea, .fs_insert_extents_ea = fsfilt_ext3_insert_extents_ea, .fs_remove_extents_ea = fsfilt_ext3_remove_extents_ea,