llap->llap_magic = LLAP_MAGIC;
llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
- /* XXX: for bug 11270 - check for lockless origin here! */
- if (origin == LLAP_ORIGIN_LOCKLESS_IO)
- llap->llap_nocache = 1;
-
rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
(obd_off)page->index << CFS_PAGE_SHIFT,
&ll_async_page_ops, llap, &llap->llap_cookie,
- llap->llap_nocache, lockh);
+ 0, lockh);
if (rc) {
OBD_SLAB_FREE(llap, ll_async_page_slab,
ll_async_page_slab_size);
OSC_DEFAULT_CKSUM);
kunmap_atomic(kaddr, KM_USER0);
if (origin == LLAP_ORIGIN_READAHEAD ||
- origin == LLAP_ORIGIN_READPAGE ||
- origin == LLAP_ORIGIN_LOCKLESS_IO) {
+ origin == LLAP_ORIGIN_READPAGE) {
llap->llap_checksum = 0;
} else if (origin == LLAP_ORIGIN_COMMIT_WRITE ||
llap->llap_checksum == 0) {
return;
}
- LASSERT(!llap->llap_lockless_io_page);
- LASSERT(!llap->llap_nocache);
-
LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n");
__ll_put_llap(page);
if (IS_ERR(llap))
GOTO(out, rc = PTR_ERR(llap));
- LASSERT(!llap->llap_nocache);
LASSERT(!PageWriteback(page));
set_page_writeback(page);
RETURN(rc);
}
-static void ll_file_put_pages(struct page **pages, int numpages)
-{
- int i;
- struct page **pp;
- ENTRY;
-
- for (i = 0, pp = pages; i < numpages; i++, pp++) {
- if (*pp) {
- LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n");
- __ll_put_llap(*pp);
- if (page_private(*pp))
- CERROR("the llap wasn't freed\n");
- (*pp)->mapping = NULL;
- if (page_count(*pp) != 1)
- CERROR("page %p, flags %#lx, count %i, "
- "private %p\n", (*pp),
- (unsigned long)(*pp)->flags,
- page_count(*pp),
- (void*)page_private(*pp));
- __free_pages(*pp, 0);
- }
- }
- OBD_FREE(pages, numpages * sizeof(struct page*));
- EXIT;
-}
-
-static struct page **ll_file_prepare_pages(int numpages, struct inode *inode,
- unsigned long first)
-{
- struct page **pages;
- int i;
- int rc = 0;
- ENTRY;
-
- OBD_ALLOC(pages, sizeof(struct page *) * numpages);
- if (pages == NULL)
- RETURN(ERR_PTR(-ENOMEM));
- for (i = 0; i < numpages; i++) {
- struct page *page;
- struct ll_async_page *llap;
-
- page = alloc_pages(GFP_HIGHUSER, 0);
- if (page == NULL)
- GOTO(err, rc = -ENOMEM);
- pages[i] = page;
- /* llap_from_page needs page index and mapping to be set */
- page->index = first++;
- page->mapping = inode->i_mapping;
- llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO);
- if (IS_ERR(llap))
- GOTO(err, rc = PTR_ERR(llap));
- llap->llap_lockless_io_page = 1;
- }
- RETURN(pages);
-err:
- ll_file_put_pages(pages, numpages);
- RETURN(ERR_PTR(rc));
- }
-
-static ssize_t ll_file_copy_pages(struct page **pages, int numpages,
- const struct iovec *iov, unsigned long nsegs,
- ssize_t iov_offset, loff_t pos, size_t count,
- int rw)
-{
- ssize_t amount = 0;
- int i;
- int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags &
- LL_SBI_LLITE_CHECKSUM;
- ENTRY;
-
- for (i = 0; i < numpages; i++) {
- unsigned offset, bytes, left = 0;
- char *vaddr;
-
- vaddr = kmap(pages[i]);
- offset = pos & (CFS_PAGE_SIZE - 1);
- bytes = min_t(unsigned, CFS_PAGE_SIZE - offset, count);
- LL_CDEBUG_PAGE(D_PAGE, pages[i], "op = %s, addr = %p, "
- "bytes = %u\n",
- (rw == WRITE) ? "CFU" : "CTU",
- vaddr + offset, bytes);
- while (bytes > 0 && !left && nsegs) {
- unsigned copy = min_t(ssize_t, bytes,
- iov->iov_len - iov_offset);
- if (rw == WRITE) {
- left = copy_from_user(vaddr + offset,
- iov->iov_base +iov_offset,
- copy);
- if (updatechecksum) {
- struct ll_async_page *llap;
-
- llap = llap_cast_private(pages[i]);
- llap->llap_checksum =
- init_checksum(OSC_DEFAULT_CKSUM);
- llap->llap_checksum =
- compute_checksum(llap->llap_checksum,
- vaddr,CFS_PAGE_SIZE,
- OSC_DEFAULT_CKSUM);
- }
- } else {
- left = copy_to_user(iov->iov_base + iov_offset,
- vaddr + offset, copy);
- }
-
- amount += copy;
- count -= copy;
- pos += copy;
- iov_offset += copy;
- bytes -= copy;
- if (iov_offset == iov->iov_len) {
- iov_offset = 0;
- iov++;
- nsegs--;
- }
- }
- kunmap(pages[i]);
- if (left) {
- amount -= left;
- break;
- }
- }
- if (amount == 0)
- RETURN(-EFAULT);
- RETURN(amount);
-}
-
-static int ll_file_oig_pages(struct inode * inode, struct page **pages,
- int numpages, loff_t pos, size_t count, int rw)
-{
- struct obd_io_group *oig;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_export *exp;
- loff_t org_pos = pos;
- obd_flag brw_flags;
- int rc;
- int i;
- ENTRY;
-
- exp = ll_i2obdexp(inode);
- if (exp == NULL)
- RETURN(-EINVAL);
- rc = oig_init(&oig);
- if (rc)
- RETURN(rc);
- brw_flags = OBD_BRW_SRVLOCK;
- if (cfs_capable(CFS_CAP_SYS_RESOURCE))
- brw_flags |= OBD_BRW_NOQUOTA;
-
- for (i = 0; i < numpages; i++) {
- struct ll_async_page *llap;
- unsigned from, bytes;
-
- from = pos & (CFS_PAGE_SIZE - 1);
- bytes = min_t(unsigned, CFS_PAGE_SIZE - from,
- count - pos + org_pos);
- llap = llap_cast_private(pages[i]);
- LASSERT(llap);
-
- lock_page(pages[i]);
-
- LL_CDEBUG_PAGE(D_PAGE, pages[i], "offset "LPU64","
- " from %u, bytes = %u\n",
- (__u64)pos, from, bytes);
- LASSERTF(pos >> CFS_PAGE_SHIFT == pages[i]->index,
- "wrong page index %lu (%lu)\n",
- pages[i]->index,
- (unsigned long)(pos >> CFS_PAGE_SHIFT));
- rc = obd_queue_group_io(exp, lli->lli_smd, NULL, oig,
- llap->llap_cookie,
- (rw == WRITE) ?
- OBD_BRW_WRITE:OBD_BRW_READ,
- from, bytes, brw_flags,
- ASYNC_READY | ASYNC_URGENT |
- ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
- if (rc) {
- i++;
- GOTO(out, rc);
- }
- pos += bytes;
- }
- rc = obd_trigger_group_io(exp, lli->lli_smd, NULL, oig);
- if (rc)
- GOTO(out, rc);
- rc = oig_wait(oig);
-out:
- while(--i >= 0)
- unlock_page(pages[i]);
- oig_release(oig);
- RETURN(rc);
-}
-
-/* Advance through passed iov, adjust iov pointer as necessary and return
- * starting offset in individual entry we are pointing at. Also reduce
- * nr_segs as needed */
-static ssize_t ll_iov_advance(const struct iovec **iov, unsigned long *nr_segs,
- ssize_t offset)
-{
- while (*nr_segs > 0) {
- if ((*iov)->iov_len > offset)
- return ((*iov)->iov_len - offset);
- offset -= (*iov)->iov_len;
- (*iov)++;
- (*nr_segs)--;
- }
- return 0;
-}
-
-ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov,
- unsigned long nr_segs,
- loff_t *ppos, int rw, ssize_t count)
-{
- loff_t pos;
- struct inode *inode = file->f_dentry->d_inode;
- ssize_t rc = 0;
- int max_pages;
- size_t amount = 0;
- unsigned long first, last;
- const struct iovec *iv = &iov[0];
- unsigned long nsegs = nr_segs;
- unsigned long offset = 0;
- ENTRY;
-
- if (rw == READ) {
- loff_t isize;
-
- ll_inode_size_lock(inode, 0);
- isize = i_size_read(inode);
- ll_inode_size_unlock(inode, 0);
- if (*ppos >= isize)
- GOTO(out, rc = 0);
- if (*ppos + count >= isize)
- count -= *ppos + count - isize;
- if (count == 0)
- GOTO(out, rc);
- } else {
- rc = generic_write_checks(file, ppos, &count, 0);
- if (rc)
- GOTO(out, rc);
- rc = ll_remove_suid(file, file->f_vfsmnt);
- if (rc)
- GOTO(out, rc);
- }
-
- pos = *ppos;
- first = pos >> CFS_PAGE_SHIFT;
- last = (pos + count - 1) >> CFS_PAGE_SHIFT;
- max_pages = PTLRPC_MAX_BRW_PAGES *
- ll_i2info(inode)->lli_smd->lsm_stripe_count;
- CDEBUG(D_INFO, "%u, stripe_count = %u\n",
- PTLRPC_MAX_BRW_PAGES /* max_pages_per_rpc */,
- ll_i2info(inode)->lli_smd->lsm_stripe_count);
-
- while (first <= last && rc >= 0) {
- int pages_for_io;
- struct page **pages;
- size_t bytes = count - amount;
-
- pages_for_io = min_t(int, last - first + 1, max_pages);
- pages = ll_file_prepare_pages(pages_for_io, inode, first);
- if (IS_ERR(pages)) {
- rc = PTR_ERR(pages);
- break;
- }
- if (rw == WRITE) {
- rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
- offset, pos + amount, bytes,
- rw);
- if (rc < 0)
- GOTO(put_pages, rc);
- offset = ll_iov_advance(&iv, &nsegs, offset + rc);
- bytes = rc;
- }
- rc = ll_file_oig_pages(inode, pages, pages_for_io,
- pos + amount, bytes, rw);
- if (rc)
- GOTO(put_pages, rc);
- if (rw == READ) {
- rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
- offset, pos + amount, bytes, rw);
- if (rc < 0)
- GOTO(put_pages, rc);
- offset = ll_iov_advance(&iv, &nsegs, offset + rc);
- bytes = rc;
- }
- amount += bytes;
-put_pages:
- ll_file_put_pages(pages, pages_for_io);
- first += pages_for_io;
- /* a short read/write check */
- if (pos + amount < ((loff_t)first << CFS_PAGE_SHIFT))
- break;
- /* Check if we are out of userspace buffers. (how that could
- happen?) */
- if (nsegs == 0)
- break;
- }
- /* NOTE: don't update i_size and KMS in absence of LDLM locks even
- * write makes the file large */
- file_accessed(file);
- if (rw == READ && amount < count && rc == 0) {
- unsigned long not_cleared;
-
- while (nsegs > 0) {
- ssize_t to_clear = min_t(ssize_t, count - amount,
- iv->iov_len - offset);
- not_cleared = clear_user(iv->iov_base + offset,
- to_clear);
- amount += to_clear - not_cleared;
- if (not_cleared) {
- rc = -EFAULT;
- break;
- }
- offset = 0;
- iv++;
- nsegs--;
- }
- }
- if (amount > 0) {
- lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
- (rw == WRITE) ?
- LPROC_LL_LOCKLESS_WRITE :
- LPROC_LL_LOCKLESS_READ,
- (long)amount);
- *ppos += amount;
- RETURN(amount);
- }
-out:
- RETURN(rc);
-}