From e8ffe16619baf1ef7c5c6b117d338956372aa752 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Wed, 19 Oct 2011 16:34:26 -0700 Subject: [PATCH] LU-884 clio: client in memory checksum Use page_mkwrite() method from latest kernels to correctly implement RPC checksum functionality. Also OBD_FL_MMAP is removed because it won't be used any more. Change-Id: I6ec5aae14f56c95b1ac6936d21b5a273582fa4e8 Signed-off-by: Jinshan Xiong Reviewed-on: http://review.whamcloud.com/1609 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 8 +- lustre/include/lustre/lustre_idl.h | 4 +- lustre/lclient/lcommon_cl.c | 11 -- lustre/llite/llite_internal.h | 13 +++ lustre/llite/llite_mmap.c | 206 +++++++++++++++++++++++++++++++++---- lustre/llite/vvp_io.c | 131 +++++++++++++---------- lustre/obdclass/cl_io.c | 8 +- lustre/osc/osc_request.c | 21 +--- lustre/tests/mmap_sanity.c | 68 +++++++++++- 9 files changed, 361 insertions(+), 109 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 86966dc..3d44b1c 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -2222,7 +2222,9 @@ enum cl_io_lock_dmd { /** Layers are free to decide between local and global locking. */ CILR_MAYBE, /** Never lock: there is no cache (e.g., liblustre). */ - CILR_NEVER + CILR_NEVER, + /** Peek lock: use existing locks, don't queue new ones */ + CILR_PEEK }; struct cl_io_rw_common { @@ -2283,10 +2285,12 @@ struct cl_io { pgoff_t ft_index; /** bytes valid byte on a faulted page. */ int ft_nob; - /** writable page? */ + /** writable page? for nopage() only */ int ft_writable; /** page of an executable? */ int ft_executable; + /** page_mkwrite() */ + int ft_mkwrite; /** resulting page */ struct cl_page *ft_page; } ci_fault; diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 45d8270..732846c 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1276,7 +1276,9 @@ enum obdo_flags { OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ - OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client */ + OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. + * XXX: obsoleted - reserved for old + * clients prior than 2.2 */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index 35b1455..c0798ad 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -1025,17 +1025,6 @@ void ccc_req_attr_set(const struct lu_env *env, } obdo_from_inode(oa, inode, &cl_i2info(inode)->lli_fid, valid_flags & flags); -#ifdef __KERNEL__ - /* Bug11742 - set the OBD_FL_MMAP flag for memory mapped files */ - if (cfs_atomic_read(&(cl_inode2ccc(inode)->cob_mmap_cnt)) != 0) { - if (!(oa->o_valid & OBD_MD_FLFLAGS)) { - oa->o_valid |= OBD_MD_FLFLAGS; - oa->o_flags = OBD_FL_MMAP; - } else { - oa->o_flags |= OBD_FL_MMAP; - } - } -#endif } const struct cl_req_operations ccc_req_ops = { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e24aa81..5d95c4e 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -975,6 +975,19 @@ void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count); struct vm_area_struct *our_vma(unsigned long addr, size_t count); +static inline void ll_invalidate_page(struct page *vmpage) +{ + struct address_space *mapping = vmpage->mapping; + loff_t offset = vmpage->index << PAGE_CACHE_SHIFT; + + LASSERT(PageLocked(vmpage)); + if (mapping == NULL) + return; + + ll_teardown_mmaps(mapping, offset, offset + CFS_PAGE_SIZE); + truncate_complete_page(mapping, vmpage); +} + #define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi) /* don't need an addref as the sb_info should be holding one */ diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 981c7d7..821f96f 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -64,13 +64,6 @@ #include "llite_internal.h" #include -#define VMA_DEBUG(vma, fmt, arg...) \ - CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p) " \ - "ino(%lu) iname(%s): " fmt, vma, vma->vm_start, vma->vm_end, \ - vma->vm_pgoff, vma->vm_file->f_dentry->d_inode, \ - vma->vm_file->f_dentry->d_inode->i_ino, \ - vma->vm_file->f_dentry->d_iname, ## arg); \ - struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, int *type); @@ -128,7 +121,6 @@ struct cl_io *ll_fault_io_init(struct vm_area_struct *vma, { struct file *file = vma->vm_file; struct inode *inode = file->f_dentry->d_inode; - const unsigned long writable = VM_SHARED|VM_WRITE; struct cl_io *io; struct cl_fault_io *fio; struct lu_env *env; @@ -157,7 +149,6 @@ struct cl_io *ll_fault_io_init(struct vm_area_struct *vma, fio = &io->u.ci_fault; fio->ft_index = index; - fio->ft_writable = (vma->vm_flags&writable) == writable; fio->ft_executable = vma->vm_flags&VM_EXEC; /* @@ -165,12 +156,13 @@ struct cl_io *ll_fault_io_init(struct vm_area_struct *vma, * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ - *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); + if (ra_flags != NULL) + *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; - CDEBUG(D_INFO, "vm_flags: %lx (%lu %d %d)\n", vma->vm_flags, - fio->ft_index, fio->ft_writable, fio->ft_executable); + CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, + fio->ft_index, fio->ft_executable); if (cl_io_init(env, io, CIT_FAULT, io->ci_obj) == 0) { struct ccc_io *cio = ccc_env_io(env); @@ -188,6 +180,93 @@ struct cl_io *ll_fault_io_init(struct vm_area_struct *vma, return io; } +/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ +static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, + bool *retry) +{ + struct lu_env *env; + struct cl_io *io; + struct vvp_io *vio; + struct cl_env_nest nest; + int result; + ENTRY; + + LASSERT(vmpage != NULL); + + io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL); + if (IS_ERR(io)) + GOTO(out, result = PTR_ERR(io)); + + result = io->ci_result; + if (result < 0) + GOTO(out, result); + + /* Don't enqueue new locks for page_mkwrite(). + * If the lock has been cancelled then page must have been + * truncated, in that case, kernel will handle it. + */ + io->ci_lockreq = CILR_PEEK; + io->u.ci_fault.ft_mkwrite = 1; + io->u.ci_fault.ft_writable = 1; + + vio = vvp_env_io(env); + vio->u.fault.ft_vma = vma; + vio->u.fault.ft_vmpage = vmpage; + + result = cl_io_loop(env, io); + + if (result == -ENODATA) /* peek failed, no lock caching. */ + CDEBUG(D_MMAP, "race on page_mkwrite: %lx (%lu %p)\n", + vma->vm_flags, io->u.ci_fault.ft_index, vmpage); + + if (result == 0 || result == -ENODATA) { + lock_page(vmpage); + if (vmpage->mapping == NULL) { + unlock_page(vmpage); + + /* page was truncated and lock was cancelled, return + * ENODATA so that VM_FAULT_NOPAGE will be returned + * to handle_mm_fault(). */ + if (result == 0) + result = -ENODATA; + } else if (result == -ENODATA) { + /* Invalidate it if the cl_lock is being revoked. + * This piece of code is definitely needed for RHEL5, + * otherwise, SIGBUS will be wrongly returned to + * applications. */ + ll_invalidate_page(vmpage); + LASSERT(vmpage->mapping == NULL); + unlock_page(vmpage); + } else if (!PageDirty(vmpage)) { + /* race, the page has been cleaned by ptlrpcd after + * it was unlocked, it has to be added into dirty + * cache again otherwise this soon-to-dirty page won't + * consume any grants, even worse if this page is being + * transferred because it will break RPC checksum. + */ + unlock_page(vmpage); + + CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has " + "been written out, retry.\n", + vmpage, vmpage->index); + + *retry = true; + result = -EAGAIN; + } + } + EXIT; + +out: + cl_io_fini(env, io); + cl_env_nested_put(&nest, env); + + CDEBUG(D_MMAP, "%s mkwrite with %d\n", cfs_current()->comm, result); + + LASSERT(ergo(result == 0, PageLocked(vmpage))); + return(result); +} + + #ifndef HAVE_VM_OP_FAULT /** * Lustre implementation of a vm_operations_struct::nopage() method, called by @@ -214,6 +293,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, unsigned long ra_flags; pgoff_t pg_offset; int result; + const unsigned long writable = VM_SHARED|VM_WRITE; ENTRY; pg_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; @@ -225,17 +305,21 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, if (result < 0) goto out_err; + io->u.ci_fault.ft_writable = (vma->vm_flags&writable) == writable; + vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.nopage.ft_address = address; vio->u.fault.nopage.ft_type = type; + vio->u.fault.ft_vmpage = NULL; result = cl_io_loop(env, io); + page = vio->u.fault.ft_vmpage; + if (result != 0 && page != NULL) + page_cache_release(page); out_err: - if (result == 0) - page = vio->u.fault.ft_vmpage; - else if (result == -ENOMEM) + if (result == -ENOMEM) page = NOPAGE_OOM; vma->vm_flags &= ~VM_RAND_READ; @@ -246,6 +330,36 @@ out_err: RETURN(page); } + +static int ll_page_mkwrite(struct vm_area_struct *vma, struct page *vmpage) +{ + int count = 0; + bool printed = false; + bool retry; + int result; + + do { + retry = false; + result = ll_page_mkwrite0(vma, vmpage, &retry); + + if (!printed && ++count > 16) { + CWARN("app(%s): the page %lu of file %lu is under heavy" + " contention.\n", + current->comm, page_index(vmpage), + vma->vm_file->f_dentry->d_inode->i_ino); + printed = true; + } + } while (retry); + + if (result == 0) + unlock_page(vmpage); + else if (result == -ENODATA) + result = 0; /* kernel will know truncate has happened and + * retry */ + + return result; +} + #else /** * Lustre implementation of a vm_operations_struct::fault() method, called by @@ -258,11 +372,12 @@ out_err: * \retval VM_FAULT_ERROR on general error * \retval NOPAGE_OOM not have memory for allocate new page */ -int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio = NULL; + struct page *vmpage; unsigned long ra_flags; struct cl_env_nest nest; int result; @@ -283,21 +398,30 @@ int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) vio->u.fault.fault.ft_vmf = vmf; result = cl_io_loop(env, io); + + vmpage = vio->u.fault.ft_vmpage; + if (result != 0 && vmpage != NULL) { + page_cache_release(vmpage); + vmf->page = NULL; + } + fault_ret = vio->u.fault.fault.ft_flags; out_err: - if ((result != 0) && !(fault_ret & VM_FAULT_RETRY)) - fault_ret |= VM_FAULT_ERROR; + if (result != 0 && fault_ret == 0) + fault_ret = VM_FAULT_ERROR; vma->vm_flags |= ra_flags; cl_io_fini(env, io); cl_env_nested_put(&nest, env); + CDEBUG(D_MMAP, "%s fault %d/%d\n", + cfs_current()->comm, fault_ret, result); RETURN(fault_ret); } -int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int count = 0; bool printed = false; @@ -330,6 +454,49 @@ restart: } return result; } + +static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int count = 0; + bool printed = false; + bool retry; + int result; + + do { + retry = false; + result = ll_page_mkwrite0(vma, vmf->page, &retry); + + if (!printed && ++count > 16) { + CWARN("app(%s): the page %lu of file %lu is under heavy" + " contention.\n", + current->comm, vmf->pgoff, + vma->vm_file->f_dentry->d_inode->i_ino); + printed = true; + } + } while (retry); + + switch(result) { + case 0: + LASSERT(PageLocked(vmf->page)); + result = VM_FAULT_LOCKED; + break; + case -ENODATA: + case -EFAULT: + result = VM_FAULT_NOPAGE; + break; + case -ENOMEM: + result = VM_FAULT_OOM; + break; + case -EAGAIN: + result = VM_FAULT_RETRY; + break; + default: + result = VM_FAULT_SIGBUS; + break; + } + + return result; +} #endif /** @@ -412,6 +579,7 @@ static struct vm_operations_struct ll_file_vm_ops = { #else .fault = ll_fault, #endif + .page_mkwrite = ll_page_mkwrite, .open = ll_vm_open, .close = ll_vm_close, }; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 9f92f9e..c7e6313 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -638,45 +638,43 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) cfio->nopage.ft_address, (long)cfio->nopage.ft_type); cfio->ft_vmpage = vmpage; + lock_page(vmpage); return 0; } #else static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { - cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, cfio->fault.ft_vmf); - - if (cfio->fault.ft_vmf->page) { - LL_CDEBUG_PAGE(D_PAGE, cfio->fault.ft_vmf->page, - "got addr %p type NOPAGE\n", - cfio->fault.ft_vmf->virtual_address); - /*XXX workaround to bug in CLIO - he deadlocked with - lock cancel if page locked */ - if (likely(cfio->fault.ft_flags & VM_FAULT_LOCKED)) { - unlock_page(cfio->fault.ft_vmf->page); - cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; + struct vm_fault *vmf = cfio->fault.ft_vmf; + + cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); + + if (vmf->page) { + LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n", + vmf->virtual_address); + if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { + lock_page(vmf->page); + cfio->fault.ft_flags &= VM_FAULT_LOCKED; } - cfio->ft_vmpage = cfio->fault.ft_vmf->page; + cfio->ft_vmpage = vmf->page; return 0; } - if (unlikely (cfio->fault.ft_flags & VM_FAULT_ERROR)) { - CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", - cfio->fault.ft_vmf->virtual_address); + if (cfio->fault.ft_flags & VM_FAULT_SIGBUS) { + CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); return -EFAULT; } - if (unlikely (cfio->fault.ft_flags & VM_FAULT_NOPAGE)) { - CDEBUG(D_PAGE, "got addr %p - OOM\n", - cfio->fault.ft_vmf->virtual_address); + if (cfio->fault.ft_flags & VM_FAULT_OOM) { + CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address); return -ENOMEM; } - if (unlikely(cfio->fault.ft_flags & VM_FAULT_RETRY)) + if (cfio->fault.ft_flags & VM_FAULT_RETRY) return -EAGAIN; - CERROR("unknow error in page fault!\n"); + CERROR("unknow error in page fault %d!\n", cfio->fault.ft_flags); return -EINVAL; } @@ -692,8 +690,8 @@ static int vvp_io_fault_start(const struct lu_env *env, struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; - int kernel_result = 0; int result = 0; + cfs_page_t *vmpage = NULL; struct cl_page *page; loff_t size; pgoff_t last; /* last page in a file data region */ @@ -711,63 +709,86 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result != 0) return result; - /* must return unlocked page */ - kernel_result = vvp_io_kernel_fault(cfio); - if (kernel_result != 0) - return kernel_result; - - if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) { - truncate_inode_pages_range(inode->i_mapping, - cl_offset(obj, fio->ft_index), offset); + /* must return locked page */ + if (fio->ft_mkwrite) { + LASSERT(cfio->ft_vmpage != NULL); + lock_page(cfio->ft_vmpage); + } else { + result = vvp_io_kernel_fault(cfio); + if (result != 0) + return result; } - /* Temporarily lock vmpage to keep cl_page_find() happy. */ - lock_page(cfio->ft_vmpage); + vmpage = cfio->ft_vmpage; + LASSERT(PageLocked(vmpage)); + + if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) + ll_invalidate_page(vmpage); /* Though we have already held a cl_lock upon this page, but * it still can be truncated locally. */ - if (unlikely(cfio->ft_vmpage->mapping == NULL)) { - unlock_page(cfio->ft_vmpage); - + if (unlikely(vmpage->mapping == NULL)) { CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); /* return +1 to stop cl_io_loop() and ll_fault() will catch * and retry. */ - return +1; + GOTO(out, result = +1); } - page = cl_page_find(env, obj, fio->ft_index, cfio->ft_vmpage, - CPT_CACHEABLE); - unlock_page(cfio->ft_vmpage); - if (IS_ERR(page)) { - page_cache_release(cfio->ft_vmpage); - cfio->ft_vmpage = NULL; - return PTR_ERR(page); + page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE); + if (IS_ERR(page)) + GOTO(out, result = PTR_ERR(page)); + + /* if page is going to be written, we should add this page into cache + * earlier. */ + if (fio->ft_mkwrite) { + wait_on_page_writeback(vmpage); + if (set_page_dirty(vmpage)) { + struct ccc_page *cp; + + /* vvp_page_assume() calls wait_on_page_writeback(). */ + cl_page_assume(env, io, page); + + cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); + vvp_write_pending(cl2ccc(obj), cp); + + /* Do not set Dirty bit here so that in case IO is + * started before the page is really made dirty, we + * still have chance to detect it. */ + result = cl_page_cache_add(env, io, page, CRT_WRITE); + if (result < 0) { + cl_page_unassume(env, io, page); + cl_page_put(env, page); + + /* we're in big trouble, what can we do now? */ + if (result == -EDQUOT) + result = -ENOSPC; + GOTO(out, result); + } + } } size = i_size_read(inode); last = cl_index(obj, size - 1); + LASSERT(fio->ft_index <= last); if (fio->ft_index == last) /* * Last page is mapped partially. */ fio->ft_nob = size - cl_offset(obj, fio->ft_index); - else + else fio->ft_nob = cl_page_size(obj); - lu_ref_add(&page->cp_reference, "fault", io); - fio->ft_page = page; - /* - * Certain 2.6 kernels return not-NULL from - * filemap_nopage() when page is beyond the file size, - * on the grounds that "An external ptracer can access - * pages that normally aren't accessible.." Don't - * propagate such page fault to the lower layers to - * avoid side-effects like KMS updates. - */ - if (fio->ft_index > last) - result = +1; + lu_ref_add(&page->cp_reference, "fault", io); + fio->ft_page = page; + EXIT; +out: + /* return unlocked vmpage to avoid deadlocking */ + unlock_page(vmpage); +#ifdef HAVE_VM_OP_FAULT + cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; +#endif return result; } diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index cf18605..9ebc40d 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -377,7 +377,13 @@ static int cl_lockset_lock_one(const struct lu_env *env, ENTRY; - lock = cl_lock_request(env, io, &link->cill_descr, "io", io); + if (io->ci_lockreq == CILR_PEEK) { + lock = cl_lock_peek(env, io, &link->cill_descr, "io", io); + if (lock == NULL) + lock = ERR_PTR(-ENODATA); + } else + lock = cl_lock_request(env, io, &link->cill_descr, "io", io); + if (!IS_ERR(lock)) { link->cill_lock = lock; cfs_list_move(&link->cill_linkage, &set->cls_curr); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 4c82107..f213c09 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1486,10 +1486,6 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer, return 0; } - /* If this is mmaped file - it can be changed at any time */ - if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & OBD_FL_MMAP) - return 1; - cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ? oa->o_flags : 0); new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE, @@ -2225,20 +2221,9 @@ static int brw_interpret(const struct lu_env *env, rc = osc_brw_fini_request(req, rc); CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); if (osc_recoverable_error(rc)) { - /* Only retry once for mmaped files since the mmaped page - * might be modified at anytime. We have to retry at least - * once in case there WAS really a corruption of the page - * on the network, that was not caused by mmap() modifying - * the page. Bug11742 */ - if ((rc == -EAGAIN) && (aa->aa_resends > 0) && - aa->aa_oa->o_valid & OBD_MD_FLFLAGS && - aa->aa_oa->o_flags & OBD_FL_MMAP) { - rc = 0; - } else { - rc = osc_brw_redo_request(req, aa); - if (rc == 0) - RETURN(0); - } + rc = osc_brw_redo_request(req, aa); + if (rc == 0) + RETURN(0); } if (aa->aa_ocapa) { diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c index 478896e..60f72a9 100644 --- a/lustre/tests/mmap_sanity.c +++ b/lustre/tests/mmap_sanity.c @@ -652,7 +652,7 @@ static int mmap_tst7_func(char *mnt, int rw) rc = errno; goto out; } - buf = mmap(NULL, page_size, + buf = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { perror("mmap"); @@ -688,6 +688,69 @@ static int mmap_tst7(char *mnt) return rc; } +static int mmap_tst8(char *mnt) +{ + char fname[256]; + char *buf = MAP_FAILED; + int fd = -1; + int rc = 0; + pid_t pid; + char xyz[page_size * 2]; + + if (snprintf(fname, 256, "%s/mmap_tst8", mnt) >= 256) { + fprintf(stderr, "dir name too long\n"); + rc = ENAMETOOLONG; + goto out; + } + fd = open(fname, O_RDWR | O_CREAT, 0644); + if (fd == -1) { + perror("open"); + rc = errno; + goto out; + } + if (ftruncate(fd, page_size) == -1) { + perror("truncate"); + rc = errno; + goto out; + } + buf = mmap(NULL, page_size * 2, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + perror("mmap"); + rc = errno; + goto out; + } + + pid = fork(); + if (pid == 0) { /* child */ + memcpy(xyz, buf, page_size * 2); + /* shouldn't reach here. */ + exit(0); + } else if (pid > 0) { /* parent */ + int status = 0; + pid = waitpid(pid, &status, 0); + if (pid < 0) { + perror("wait"); + rc = errno; + goto out; + } + + rc = EFAULT; + if (WIFSIGNALED(status) && SIGBUS == WTERMSIG(status)) + rc = 0; + } else { + perror("fork"); + rc = errno; + } + +out: + if (buf != MAP_FAILED) + munmap(buf, page_size); + if (fd != -1) + close(fd); + return rc; +} + static int remote_tst(int tc, char *mnt) { int rc = 0; @@ -705,7 +768,7 @@ static int remote_tst(int tc, char *mnt) } return rc; } - + struct test_case { int tc; /* test case number */ char *desc; /* test description */ @@ -724,6 +787,7 @@ struct test_case tests[] = { { 6, "mmap test6: check mmap write/read content on two nodes", mmap_tst6, 2 }, { 7, "mmap test7: file i/o with an unmapped buffer", mmap_tst7, 1}, + { 8, "mmap test8: SIGBUS for beyond file size", mmap_tst8, 1}, { 0, NULL, 0, 0 } }; -- 1.8.3.1