From 1fd4820bc31a049febb5c76f60a530d4cef5901b Mon Sep 17 00:00:00 2001 From: zam Date: Sun, 24 May 2009 19:33:02 +0000 Subject: [PATCH] b=19207 i=alexey.lyashkov i=andrew.perepechko check for unmapped buffer in lustre direct i/o patch --- lustre/llite/rw26.c | 68 +++++++++++++++++++++++++--------------------- lustre/tests/mmap_sanity.c | 62 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 31 deletions(-) diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 32c0162..6a28589 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -112,10 +112,10 @@ static int ll_releasepage(struct page *page, RELEASEPAGE_ARG_TYPE gfp_mask) #define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL static inline int ll_get_user_pages(int rw, unsigned long user_addr, - size_t size, struct page ***pages) + size_t size, struct page ***pages, + int *max_pages) { int result = -ENOMEM; - int page_count; /* set an arbitrary limit to prevent arithmetic overflow */ if (size > MAX_DIRECTIO_SIZE) { @@ -123,18 +123,18 @@ static inline int ll_get_user_pages(int rw, unsigned long user_addr, return -EFBIG; } - page_count = ((user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT)- + *max_pages = ((user_addr + size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT)- (user_addr >> CFS_PAGE_SHIFT); - OBD_ALLOC_WAIT(*pages, page_count * sizeof(**pages)); + OBD_ALLOC_WAIT(*pages, *max_pages * sizeof(**pages)); if (*pages) { down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - page_count, (rw == READ), 0, *pages, + *max_pages, (rw == READ), 0, *pages, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) - OBD_FREE(*pages, page_count * sizeof(**pages)); + if (unlikely(result < 0)) + OBD_FREE(*pages, *max_pages * sizeof(**pages)); } return result; @@ -147,11 +147,12 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) int i; for (i = 0; i < npages; i++) { + if (pages[i] == NULL) + break; if (do_dirty) set_page_dirty_lock(pages[i]); page_cache_release(pages[i]); } - OBD_FREE(pages, npages * sizeof(*pages)); } @@ -208,7 +209,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - ssize_t count = iov_length(iov, nr_segs), tot_bytes = 0; + ssize_t count = iov_length(iov, nr_segs); + ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ptlrpc_request_set *set; @@ -269,31 +271,35 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, while (iov_left > 0) { struct page **pages; - int page_count; - ssize_t result; + int page_count, max_pages; + size_t bytes; + bytes = min(size,iov_left); page_count = ll_get_user_pages(rw, user_addr, - min(size, iov_left), - &pages); - LASSERT(page_count != 0); - if (page_count > 0) { + bytes, + &pages, &max_pages); + if (likely(page_count > 0)) { + if (unlikely(page_count < max_pages)) + bytes = page_count << CFS_PAGE_SHIFT; result = ll_direct_IO_26_seg(rw, inode, file->f_mapping, &oinfo, set, - min(size,iov_left), + bytes, file_offset, pages, page_count); - ll_free_user_pages(pages, page_count, rw==READ); + ll_free_user_pages(pages, max_pages, rw==READ); + } else if (page_count == 0) { + GOTO(out, result = -EFAULT); } else { - result = 0; + result = page_count; } - if (page_count < 0 || result <= 0) { + if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer * for the request, shrink it to a smaller * PAGE_SIZE multiple and try again. * We should always be able to kmalloc for a * page worth of page pointers = 4MB on i386. */ - if ((page_count == -ENOMEM||result == -ENOMEM)&& + if (result == -ENOMEM && size > (CFS_PAGE_SIZE / sizeof(*pages)) * CFS_PAGE_SIZE) { size = ((((size / 2) - 1) | @@ -303,31 +309,31 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, (int)size); continue; } - if (tot_bytes > 0) - GOTO(wait_io, tot_bytes); - GOTO(out, tot_bytes = page_count < 0 ? page_count : result); + GOTO(out, result); } - tot_bytes += result; file_offset += result; iov_left -= result; user_addr += result; } } - - if (tot_bytes > 0) { +out: + if (likely(tot_bytes > 0)) { int rc; - wait_io: + rc = ptlrpc_set_wait(set); - if (rc) - GOTO(out, tot_bytes = rc); + if (unlikely(rc != 0)) + GOTO(unlock_mutex, tot_bytes = rc); if (rw == WRITE) { lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2obdexp(inode), lsm, file_offset, 0); + obd_adjust_kms(ll_i2obdexp(inode), + lsm, file_offset, 0); lov_stripe_unlock(lsm); } + } else { + tot_bytes = result; } -out: +unlock_mutex: if (rw == READ) UNLOCK_INODE_MUTEX(inode); diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c index ea0db2d..1cda15a 100644 --- a/lustre/tests/mmap_sanity.c +++ b/lustre/tests/mmap_sanity.c @@ -599,6 +599,67 @@ out: return rc; } +static int mmap_tst7_func(char *mnt, int rw) +{ + char fname[256]; + char *buf = MAP_FAILED; + ssize_t bytes; + int fd = -1; + int rc = 0; + + if (snprintf(fname, 256, "%s/mmap_tst7.%s", + mnt, (rw == 0) ? "read":"write") >= 256) { + fprintf(stderr, "dir name too long\n"); + rc = ENAMETOOLONG; + goto out; + } + fd = open(fname, O_RDWR | O_DIRECT | O_CREAT, 0644); + if (fd == -1) { + perror("open"); + rc = errno; + goto out; + } + if (ftruncate(fd, 2 * page_size) == -1) { + perror("truncate"); + rc = errno; + goto out; + } + buf = mmap(NULL, 2 * page_size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buf == MAP_FAILED) { + perror("mmap"); + rc = errno; + goto out; + } + /* ensure the second page isn't mapped */ + munmap(buf + page_size, page_size); + bytes = (rw == 0) ? read(fd, buf, 2 * page_size) : + write(fd, buf, 2 * page_size); + /* Expected behavior */ + if (bytes == page_size) + goto out; + fprintf(stderr, "%s returned %zd, errno = %d\n", + (rw == 0)?"read":"write", bytes, errno); + rc = EIO; +out: + if (buf != MAP_FAILED) + munmap(buf, page_size); + if (fd != -1) + close(fd); + return rc; +} + +static int mmap_tst7(char *mnt) +{ + int rc; + + rc = mmap_tst7_func(mnt, 0); + if (rc != 0) + return rc; + rc = mmap_tst7_func(mnt, 1); + return rc; +} + static int remote_tst(int tc, char *mnt) { int rc = 0; @@ -634,6 +695,7 @@ struct test_case tests[] = { "which mmapped to just this file", mmap_tst5, 1 }, { 6, "mmap test6: check mmap write/read content on two nodes", mmap_tst6, 2 }, + { 7, "mmap test7: file i/o with an unmapped buffer", mmap_tst7, 1}, { 0, NULL, 0, 0 } }; -- 1.8.3.1