X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fllite%2Fvvp_io.c;h=db53842f9247ea60e47222b198d9ddc93596f29d;hb=3a0bb68dca5f7f28e650cf521d703b2714087830;hp=413fd9496416770aaa8bf62ad0381760afda8368;hpb=74a7b9cbb2fe1729db7bcf74fef031fed73239d7;p=fs%2Flustre-release.git diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 413fd94..db53842 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -27,7 +27,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -65,6 +65,39 @@ int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) return vio->cui_io_subtype == IO_NORMAL; } +/** + * For swapping layout. The file's layout may have changed. + * To avoid populating pages to a wrong stripe, we have to verify the + * correctness of layout. It works because swapping layout processes + * have to acquire group lock. + */ +static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, + struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ccc_io *cio = ccc_env_io(env); + bool rc = true; + + switch (io->ci_type) { + case CIT_READ: + case CIT_WRITE: + /* don't need lock here to check lli_layout_gen as we have held + * extent lock and GROUP lock has to hold to swap layout */ + if (lli->lli_layout_gen != cio->cui_layout_gen) { + io->ci_need_restart = 1; + /* this will return application a short read/write */ + io->ci_continue = 0; + rc = false; + } + case CIT_FAULT: + /* fault is okay because we've already had a page. */ + default: + break; + } + + return rc; +} + /***************************************************************************** * * io operations. @@ -88,14 +121,22 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct ccc_io *cio = cl2ccc_io(env, ios); - __u32 gen; CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - /* check layout version */ - ll_layout_refresh(ccc_object_inode(obj), &gen); - if (cio->cui_layout_gen > 0) - io->ci_need_restart = cio->cui_layout_gen == gen; + CDEBUG(D_VFSTRACE, "ignore/verify layout %d/%d, layout version %d.\n", + io->ci_ignore_layout, io->ci_verify_layout, cio->cui_layout_gen); + + if (!io->ci_ignore_layout && io->ci_verify_layout) { + __u32 gen = 0; + + /* check layout version */ + ll_layout_refresh(ccc_object_inode(obj), &gen); + io->ci_need_restart = cio->cui_layout_gen != gen; + if (io->ci_need_restart) + CDEBUG(D_VFSTRACE, "layout changed from %d to %d.\n", + cio->cui_layout_gen, gen); + } } static void vvp_io_fault_fini(const struct lu_env *env, @@ -281,21 +322,6 @@ static int vvp_io_write_lock(const struct lu_env *env, static int vvp_io_setattr_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); - struct inode *inode = ccc_object_inode(ios->cis_obj); - - /* - * We really need to get our PW lock before we change inode->i_size. - * If we don't we can race with other i_size updaters on our node, - * like ll_file_read. We can also race with i_size propogation to - * other nodes through dirtying and writeback of final cached pages. - * This last one is especially bad for racing o_append users on other - * nodes. - */ - if (cl_io_is_trunc(ios->cis_io)) - inode_dio_write_done(inode); - mutex_unlock(&inode->i_mutex); - cio->u.setattr.cui_locks_released = 1; return 0; } @@ -307,10 +333,10 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env, static int vvp_io_setattr_lock(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); - struct cl_io *io = ios->cis_io; - size_t new_size; - __u32 enqflags = 0; + struct ccc_io *cio = ccc_env_io(env); + struct cl_io *io = ios->cis_io; + __u64 new_size; + __u32 enqflags = 0; if (cl_io_is_trunc(io)) { new_size = io->u.ci_setattr.sa_attr.lvb_size; @@ -378,15 +404,10 @@ static int vvp_io_setattr_time(const struct lu_env *env, static int vvp_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = cl2ccc_io(env, ios); struct cl_io *io = ios->cis_io; struct inode *inode = ccc_object_inode(io->ci_obj); - LASSERT(cio->u.setattr.cui_locks_released); - mutex_lock(&inode->i_mutex); - cio->u.setattr.cui_locks_released = 0; - if (cl_io_is_trunc(io)) return vvp_io_setattr_trunc(env, ios, inode, io->u.ci_setattr.sa_attr.lvb_size); @@ -397,30 +418,21 @@ static int vvp_io_setattr_start(const struct lu_env *env, static void vvp_io_setattr_end(const struct lu_env *env, const struct cl_io_slice *ios) { - struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); - - if (!cl_io_is_trunc(io)) - return; + struct cl_io *io = ios->cis_io; + struct inode *inode = ccc_object_inode(io->ci_obj); - /* Truncate in memory pages - they must be clean pages because osc - * has already notified to destroy osc_extents. */ - vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); + if (cl_io_is_trunc(io)) { + /* Truncate in memory pages - they must be clean pages + * because osc has already notified to destroy osc_extents. */ + vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); + inode_dio_write_done(inode); + } + mutex_unlock(&inode->i_mutex); } static void vvp_io_setattr_fini(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); - struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(ios->cis_io->ci_obj); - - if (cio->u.setattr.cui_locks_released) { - mutex_lock(&inode->i_mutex); - if (cl_io_is_trunc(io)) - inode_dio_wait(inode); - cio->u.setattr.cui_locks_released = 0; - } vvp_io_fini(env, ios); } @@ -473,6 +485,9 @@ static int vvp_io_read_start(const struct lu_env *env, CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); + if (!can_populate_pages(env, io, inode)) + return 0; + result = ccc_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; @@ -563,6 +578,9 @@ static int vvp_io_write_start(const struct lu_env *env, ENTRY; + if (!can_populate_pages(env, io, inode)) + return 0; + if (cl_io_is_append(io)) { /* * PARALLEL IO This has to be changed for parallel IO doing @@ -923,10 +941,10 @@ static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io, * purposes here we can treat it like i_size. */ if (attr->cat_kms <= offset) { - char *kaddr = kmap_atomic(cp->cpg_page, KM_USER0); + char *kaddr = ll_kmap_atomic(cp->cpg_page, KM_USER0); memset(kaddr, 0, cl_page_size(obj)); - kunmap_atomic(kaddr, KM_USER0); + ll_kunmap_atomic(kaddr, KM_USER0); } else if (cp->cpg_defer_uptodate) cp->cpg_ra_used = 1; else @@ -992,6 +1010,7 @@ static int vvp_io_commit_write(const struct lu_env *env, struct cl_page *pg = slice->cpl_page; struct inode *inode = ccc_object_inode(obj); struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ll_inode_info *lli = ll_i2info(inode); cfs_page_t *vmpage = cp->cpg_page; int result; @@ -1072,6 +1091,20 @@ static int vvp_io_commit_write(const struct lu_env *env, } ll_stats_ops_tally(sbi, tallyop, 1); + /* Inode should be marked DIRTY even if no new page was marked DIRTY + * because page could have been not flushed between 2 modifications. + * It is important the file is marked DIRTY as soon as the I/O is done + * Indeed, when cache is flushed, file could be already closed and it + * is too late to warn the MDT. + * It is acceptable that file is marked DIRTY even if I/O is dropped + * for some reasons before being flushed to OST. + */ + if (result == 0) { + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + } + size = cl_offset(obj, pg->cp_index) + to; ll_inode_size_lock(inode); @@ -1172,6 +1205,12 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, io->ci_lockreq = CILR_MANDATORY; } + /* ignore layout change for generic CIT_MISC but not for glimpse. + * io context for glimpse must set ci_verify_layout to true, + * see cl_glimpse_size0() for details. */ + if (io->ci_type == CIT_MISC && !io->ci_verify_layout) + io->ci_ignore_layout = 1; + /* Enqueue layout lock and get layout version. We need to do this * even for operations requiring to open file, such as read and write, * because it might not grant layout lock in IT_OPEN. */