From: vitaly Date: Mon, 29 Jun 2009 16:06:40 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_9_220~78 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3e57f60221a4550f717e5e81f2e0b0505226abc9 Branch HEAD b=19207 i=zam i=panda direct I/O code checks whether data can fit user buffer now --- diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 32bfa8a..6f42664 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -173,10 +173,10 @@ static int ll_set_page_dirty(struct page *vmpage) #define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL static inline int ll_get_user_pages(int rw, unsigned long user_addr, - size_t size, struct page ***pages) + size_t size, struct page ***pages, + int *max_pages) { int result = -ENOMEM; - int page_count; /* set an arbitrary limit to prevent arithmetic overflow */ if (size > MAX_DIRECTIO_SIZE) { @@ -184,18 +184,18 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr, return -EFBIG; } - page_count = (user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - page_count -= user_addr >> CFS_PAGE_SHIFT; + *max_pages = (user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + *max_pages -= user_addr >> CFS_PAGE_SHIFT; - OBD_ALLOC_WAIT(*pages, page_count * sizeof(**pages)); + OBD_ALLOC_WAIT(*pages, *max_pages * sizeof(**pages)); if (*pages) { down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - page_count, (rw == READ), 0, *pages, + *max_pages, (rw == READ), 0, *pages, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) - OBD_FREE(*pages, page_count * sizeof(**pages)); + if (unlikely(result <= 0)) + OBD_FREE(*pages, *max_pages * sizeof(**pages)); } return result; @@ -208,6 +208,8 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) int i; for (i = 0; i < npages; i++) { + if (pages[i] == NULL) + break; if (do_dirty) set_page_dirty_lock(pages[i]); page_cache_release(pages[i]); @@ -365,7 +367,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ccc_object *obj = cl_inode2ccc(inode); - ssize_t count = iov_length(iov, nr_segs), tot_bytes = 0; + ssize_t count = iov_length(iov, nr_segs); + ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; unsigned long seg = 0; @@ -418,30 +421,34 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, while (iov_left > 0) { struct page **pages; - int page_count; - ssize_t result; + int page_count, max_pages = 0; + size_t bytes; + bytes = min(size,iov_left); page_count = ll_get_user_pages(rw, user_addr, - min(size, iov_left), - &pages); - LASSERT(page_count != 0); - if (page_count > 0) { + bytes, + &pages, &max_pages); + if (likely(page_count > 0)) { + if (unlikely(page_count < max_pages)) + bytes = page_count << CFS_PAGE_SHIFT; result = ll_direct_IO_26_seg(env, io, rw, inode, file->f_mapping, - min(size,iov_left), + bytes, file_offset, pages, page_count); - ll_free_user_pages(pages, page_count, rw==READ); + ll_free_user_pages(pages, max_pages, rw==READ); + } else if (page_count == 0) { + GOTO(out, result = -EFAULT); } else { - result = 0; + result = page_count; } - if (page_count < 0 || result <= 0) { + if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer * for the request, shrink it to a smaller * PAGE_SIZE multiple and try again. * We should always be able to kmalloc for a * page worth of page pointers = 4MB on i386. */ - if ((page_count == -ENOMEM||result == -ENOMEM)&& + if (result == -ENOMEM && size > (CFS_PAGE_SIZE / sizeof(*pages)) * CFS_PAGE_SIZE) { size = ((((size / 2) - 1) | @@ -452,9 +459,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, continue; } - if (tot_bytes <= 0) - tot_bytes = page_count < 0 ? page_count : result; - GOTO(out, tot_bytes); + GOTO(out, result); } tot_bytes += result; @@ -477,7 +482,7 @@ out: } cl_env_put(env, &refcheck); - RETURN(tot_bytes); + RETURN(tot_bytes ? : result); } struct address_space_operations ll_aops = { diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c index d2492af..012c8b9 100644 --- a/lustre/tests/mmap_sanity.c +++ b/lustre/tests/mmap_sanity.c @@ -599,6 +599,67 @@ out: return rc; } +static int mmap_tst7_func(char *mnt, int rw) +{ + char fname[256]; + char *buf = MAP_FAILED; + ssize_t bytes; + int fd = -1; + int rc = 0; + + if (snprintf(fname, 256, "%s/mmap_tst7.%s", + mnt, (rw == 0) ? "read":"write") >= 256) { + fprintf(stderr, "dir name too long\n"); + rc = ENAMETOOLONG; + goto out; + } + fd = open(fname, O_RDWR | O_DIRECT | O_CREAT, 0644); + if (fd == -1) { + perror("open"); + rc = errno; + goto out; + } + if (ftruncate(fd, 2 * page_size) == -1) { + perror("truncate"); + rc = errno; + goto out; + } + buf = mmap(NULL, page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) { + perror("mmap"); + rc = errno; + goto out; + } + /* ensure the second page isn't mapped */ + munmap(buf + page_size, page_size); + bytes = (rw == 0) ? read(fd, buf, 2 * page_size) : + write(fd, buf, 2 * page_size); + /* Expected behavior */ + if (bytes == page_size) + goto out; + fprintf(stderr, "%s returned %zd, errno = %d\n", + (rw == 0)?"read":"write", bytes, errno); + rc = EIO; +out: + if (buf != MAP_FAILED) + munmap(buf, page_size); + if (fd != -1) + close(fd); + return rc; +} + +static int mmap_tst7(char *mnt) +{ + int rc; + + rc = mmap_tst7_func(mnt, 0); + if (rc != 0) + return rc; + rc = mmap_tst7_func(mnt, 1); + return rc; +} + static int remote_tst(int tc, char *mnt) { int rc = 0; @@ -634,6 +695,7 @@ struct test_case tests[] = { "which mmapped to just this file", mmap_tst5, 1 }, { 6, "mmap test6: check mmap write/read content on two nodes", mmap_tst6, 2 }, + { 7, "mmap test7: file i/o with an unmapped buffer", mmap_tst7, 1}, { 0, NULL, 0, 0 } }; diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 6a41463..9431de5 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -348,7 +348,7 @@ test_17() { # bug 3513, 3667 run_test 17 "resource creation/LVB creation race ===============" test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 sync; sleep 1; sync } run_test 18 "mmap sanity check ================================="