From: nikita Date: Mon, 23 Oct 2006 22:26:01 +0000 (+0000) Subject: osd: implement osd_{read,write}() on top of fsfilt_ldiskfs buffer-head-based versions. X-Git-Tag: v1_8_0_110~486^2~370 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=425f5f9a294997568cf403a9bcb61555a5de58e1;p=fs%2Flustre-release.git osd: implement osd_{read,write}() on top of fsfilt_ldiskfs buffer-head-based versions. --- diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 2cc74ab..f0e18b8 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -207,11 +207,11 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs; break; case FSFILT_OP_JOIN: - /* delete 2 file(file + array id) + create 1 file (array id) + /* delete 2 file(file + array id) + create 1 file (array id) * create/update logs for each stripe */ nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb); - - /*create array log for head file*/ + + /*create array log for head file*/ nblocks += 3; nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + EXT3_SINGLEDATA_TRANS_BLOCKS); @@ -1119,10 +1119,8 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, return ext3_prep_san_write(inode, blocks, nblocks, newsize); } -static int fsfilt_ext3_read_record(struct file * file, void *buf, - int size, loff_t *offs) +int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs) { - struct inode *inode = file->f_dentry->d_inode; unsigned long block; struct buffer_head *bh; int err, blocksize, csize, boffs; @@ -1164,34 +1162,22 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf, } return 0; } +EXPORT_SYMBOL(fsfilt_ext3_read); -static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, - loff_t *offs, int force_sync) +static int fsfilt_ext3_read_record(struct file * file, void *buf, + int size, loff_t *offs) +{ + return fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs); +} + +int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle) { struct buffer_head *bh = NULL; - unsigned long block; - struct inode *inode = file->f_dentry->d_inode; loff_t old_size = inode->i_size, offset = *offs; loff_t new_size = inode->i_size; - journal_t *journal; - handle_t *handle; - int err, block_count = 0, blocksize, size, boffs; - - /* Determine how many transaction credits are needed */ - blocksize = 1 << inode->i_blkbits; - block_count = (*offs & (blocksize - 1)) + bufsize; - block_count = (block_count + blocksize - 1) >> inode->i_blkbits; - - journal = EXT3_SB(inode->i_sb)->s_journal; - lock_24kernel(); - handle = journal_start(journal, - block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2); - unlock_24kernel(); - if (IS_ERR(handle)) { - CERROR("can't start transaction for %d blocks (%d bytes)\n", - block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize); - return PTR_ERR(handle); - } + unsigned long block; + int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs; while (bufsize > 0) { if (bh != NULL) @@ -1203,14 +1189,14 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, bh = ext3_bread(handle, inode, block, 1, &err); if (!bh) { CERROR("can't read/create block: %d\n", err); - goto out; + break; } err = ext3_journal_get_write_access(handle, bh); if (err) { CERROR("journal_get_write_access() returned error %d\n", err); - goto out; + break; } LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size); memcpy(bh->b_data + boffs, buf, size); @@ -1218,7 +1204,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, if (err) { CERROR("journal_dirty_metadata() returned error %d\n", err); - goto out; + break; } if (offset + size > new_size) new_size = offset + size; @@ -1226,10 +1212,6 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, bufsize -= size; buf += size; } - - if (force_sync) - handle->h_sync = 1; /* recovery likes this */ -out: if (bh) brelse(bh); @@ -1245,12 +1227,45 @@ out: unlock_kernel(); } + if (err == 0) + *offs = offset; + return err; +} +EXPORT_SYMBOL(fsfilt_ext3_write_handle); + +static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, + loff_t *offs, int force_sync) +{ + struct inode *inode = file->f_dentry->d_inode; + journal_t *journal; + handle_t *handle; + int err, block_count = 0, blocksize; + + /* Determine how many transaction credits are needed */ + blocksize = 1 << inode->i_blkbits; + block_count = (*offs & (blocksize - 1)) + bufsize; + block_count = (block_count + blocksize - 1) >> inode->i_blkbits; + + journal = EXT3_SB(inode->i_sb)->s_journal; + lock_24kernel(); + handle = journal_start(journal, + block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2); + unlock_24kernel(); + if (IS_ERR(handle)) { + CERROR("can't start transaction for %d blocks (%d bytes)\n", + block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize); + return PTR_ERR(handle); + } + + err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle); + + if (!err && force_sync) + handle->h_sync = 1; /* recovery likes this */ + lock_24kernel(); journal_stop(handle); unlock_24kernel(); - if (err == 0) - *offs = offset; return err; } @@ -1324,7 +1339,7 @@ do { \ Q_COPY(out, in, dqb_valid); \ } while (0) - + static int fsfilt_ext3_quotactl(struct super_block *sb, struct obd_quotactl *oqc) @@ -1908,7 +1923,7 @@ out: } #ifdef HAVE_QUOTA_SUPPORT -static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, +static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, int cmd) { int rc = 0; diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 0dd3312..174f207 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -284,31 +284,6 @@ static int osd_write_locked(const struct lu_env *env, struct osd_object *o) return oti->oti_w_locks > 0 && o->oo_owner == env; } -/* helper to push us into KERNEL_DS context */ -static struct file *osd_rw_init(const struct lu_env *env, - struct inode *inode, mm_segment_t *seg) -{ - struct osd_thread_info *info = lu_context_key_get(&env->le_ctx, &osd_key); - struct dentry *dentry = &info->oti_dentry; - struct file *file = &info->oti_file; - - file->f_dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = inode->i_fop; - file->f_mode = FMODE_WRITE|FMODE_READ; - dentry->d_inode = inode; - - *seg = get_fs(); - set_fs(KERNEL_DS); - return file; -} - -/* helper to pop us from KERNEL_DS context */ -static void osd_rw_fini(mm_segment_t *seg) -{ - set_fs(*seg); -} - static int osd_root_get(const struct lu_env *env, struct dt_device *dev, struct lu_fid *f) { @@ -1536,56 +1511,51 @@ static struct dt_object_operations osd_obj_ops = { * Body operations. */ +/* + * XXX: Another layering violation for now. + * + * We don't want to use ->f_op->read methods, because generic file write + * + * - serializes on ->i_sem, and + * + * - does a lot of extra work like balance_dirty_pages(), + * + * which doesn't work for globally shared files like /last-received. + */ +int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); +int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle); + static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, loff_t *pos, struct lustre_capa *capa) { struct inode *inode = osd_dt_obj(dt)->oo_inode; - struct file *file; - mm_segment_t seg; - ssize_t result; if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ)) RETURN(-EACCES); - file = osd_rw_init(env, inode, &seg); - /* - * We'd like to use vfs_read() here, but it messes with - * dnotify_parent() and locks. - */ - if (file->f_op->read) - result = file->f_op->read(file, buf->lb_buf, buf->lb_len, pos); - else { - /* TODO: how to serve symlink readlink()? */ - CERROR("read not implemented currently\n"); - result = -ENOSYS; - } - osd_rw_fini(&seg); - return result; + return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); } static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, struct thandle *handle, struct lustre_capa *capa) { - struct inode *inode = osd_dt_obj(dt)->oo_inode; - struct file *file; - mm_segment_t seg; - ssize_t result; + struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct osd_thandle *oh; + ssize_t result; LASSERT(handle != NULL); if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE)) RETURN(-EACCES); - file = osd_rw_init(env, inode, &seg); - if (file->f_op->write) - result = file->f_op->write(file, buf->lb_buf, buf->lb_len, pos); - else { - CERROR("write not implemented currently\n"); - result = -ENOSYS; - } - osd_rw_fini(&seg); + oh = container_of(handle, struct osd_thandle, ot_super); + result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, + pos, oh->ot_handle); + if (result == 0) + result = buf->lb_len; return result; } diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 527947a..3929474 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1461,9 +1461,9 @@ int main(int argc, char *argv[]) goto out; } #if 0 - /* + /* * Comment out these 2 checks temporarily, since for multi-MDSes - * in single node only 1 mds node could have mgs service + * in single node only 1 mds node could have mgs service */ if (IS_MDT(ldd) && !IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) { verrprint("No management node specified, adding MGS to this " @@ -1486,8 +1486,6 @@ int main(int argc, char *argv[]) if (IS_MDT(ldd) || IS_MGS(ldd)) strcat(always_mountopts, ",iopen_nopriv,user_xattr"); - if (IS_MDT(ldd)) - strcat(always_mountopts, ",data=journal"); if ((get_os_version() == 24) && IS_OST(ldd)) strcat(always_mountopts, ",asyncdel"); /* NB: Files created while extents are enabled cannot be read