* Author: Mike Pershin <tappro@whamcloud.com>
*/
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
#define DEBUG_SUBSYSTEM S_OSD
#include <lustre_ver.h>
#include <libcfs/libcfs.h>
-#include <lustre_fsfilt.h>
#include <obd_support.h>
#include <lustre_net.h>
#include <obd.h>
#include <obd_class.h>
#include <lustre_disk.h>
#include <lustre_fid.h>
+#include <lustre/lustre_idl.h> /* LLOG_CHUNK_SIZE definition */
#include "osd_internal.h"
size = old_size - *pos;
}
- rc = -dmu_read(osd->od_objset.os, obj->oo_db->db_object, *pos, size,
+ rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size,
buf->lb_buf, DMU_READ_PREFETCH);
if (rc == 0) {
rc = size;
}
static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt,
- const loff_t size, loff_t pos,
+ const struct lu_buf *buf, loff_t pos,
struct thandle *th)
{
struct osd_object *obj = osd_dt_obj(dt);
dmu_tx_hold_sa_create(oh->ot_tx, ZFS_SA_BASE_ATTR_SIZE);
}
- dmu_tx_hold_write(oh->ot_tx, oid, pos, size);
+ /* XXX: we still miss for append declaration support in ZFS
+ * -1 means append which is used by llog mostly, llog
+ * can grow upto LLOG_CHUNK_SIZE*8 records */
+ if (pos == -1)
+ pos = max_t(loff_t, 256 * 8 * LLOG_CHUNK_SIZE,
+ obj->oo_attr.la_size + (2 << 20));
+ dmu_tx_hold_write(oh->ot_tx, oid, pos, buf->lb_len);
/* dt_declare_write() is usually called for system objects, such
* as llog or last_rcvd files. We needn't enforce quota on those
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
uint64_t offset = *pos;
int rc;
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
- dmu_write(osd->od_objset.os, obj->oo_db->db_object, offset,
+ dmu_write(osd->od_os, obj->oo_db->db_object, offset,
(uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx);
write_lock(&obj->oo_attr_lock);
if (obj->oo_attr.la_size < offset + buf->lb_len) {
/* osd_object_sa_update() will be copying directly from oo_attr
* into dbuf. any update within a single txg will copy the
* most actual */
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
&obj->oo_attr.la_size, 8, oh);
if (unlikely(rc))
GOTO(out, rc);
LASSERT(obj->oo_db);
for (i = 0; i < npages; i++) {
- if (lnb[i].page == NULL)
+ if (lnb[i].lnb_page == NULL)
continue;
- if (lnb[i].page->mapping == (void *)obj) {
+ if (lnb[i].lnb_page->mapping == (void *)obj) {
/* this is anonymous page allocated for copy-write */
- lnb[i].page->mapping = NULL;
- __free_page(lnb[i].page);
- cfs_atomic_dec(&osd->od_zerocopy_alloc);
+ lnb[i].lnb_page->mapping = NULL;
+ __free_page(lnb[i].lnb_page);
+ atomic_dec(&osd->od_zerocopy_alloc);
} else {
/* see comment in osd_bufs_get_read() */
- ptr = (unsigned long)lnb[i].dentry;
+ ptr = (unsigned long)lnb[i].lnb_data;
if (ptr & 1UL) {
ptr &= ~1UL;
dmu_buf_rele((void *)ptr, osd_zerocopy_tag);
- cfs_atomic_dec(&osd->od_zerocopy_pin);
- } else if (lnb[i].dentry != NULL) {
- dmu_return_arcbuf((void *)lnb[i].dentry);
- cfs_atomic_dec(&osd->od_zerocopy_loan);
+ atomic_dec(&osd->od_zerocopy_pin);
+ } else if (lnb[i].lnb_data != NULL) {
+ dmu_return_arcbuf(lnb[i].lnb_data);
+ atomic_dec(&osd->od_zerocopy_loan);
}
}
- lnb[i].page = NULL;
- lnb[i].dentry = NULL;
+ lnb[i].lnb_page = NULL;
+ lnb[i].lnb_data = NULL;
}
return 0;
}
-static struct page *kmem_to_page(void *addr)
+static inline struct page *kmem_to_page(void *addr)
{
- struct page *page;
-
- if (kmem_virt(addr))
- page = vmalloc_to_page(addr);
+ if (is_vmalloc_addr(addr))
+ return vmalloc_to_page(addr);
else
- page = virt_to_page(addr);
-
- return page;
+ return virt_to_page(addr);
}
static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj,
LASSERT(len > 0);
- cfs_atomic_inc(&osd->od_zerocopy_pin);
+ atomic_inc(&osd->od_zerocopy_pin);
bufoff = off - dbp[i]->db_offset;
tocpy = min_t(int, dbp[i]->db_size - bufoff, len);
dbf = (void *) ((unsigned long)dbp[i] | 1);
while (tocpy > 0) {
- thispage = CFS_PAGE_SIZE;
- thispage -= bufoff & (CFS_PAGE_SIZE - 1);
+ thispage = PAGE_CACHE_SIZE;
+ thispage -= bufoff & (PAGE_CACHE_SIZE - 1);
thispage = min(tocpy, thispage);
- lnb->rc = 0;
+ lnb->lnb_rc = 0;
lnb->lnb_file_offset = off;
lnb->lnb_page_offset = bufoff & ~CFS_PAGE_MASK;
- lnb->len = thispage;
- lnb->page = kmem_to_page(dbp[i]->db_data +
- bufoff);
+ lnb->lnb_len = thispage;
+ lnb->lnb_page = kmem_to_page(dbp[i]->db_data +
+ bufoff);
/* mark just a single slot: we need this
* reference to dbuf to be release once */
- lnb->dentry = dbf;
+ lnb->lnb_data = dbf;
dbf = NULL;
tocpy -= thispage;
{
struct osd_device *osd = osd_obj2dev(obj);
int plen, off_in_block, sz_in_block;
- int i = 0, npages = 0;
+ int rc, i = 0, npages = 0;
arc_buf_t *abuf;
uint32_t bs;
uint64_t dummy;
abuf = dmu_request_arcbuf(obj->oo_db, bs);
if (unlikely(abuf == NULL))
- GOTO(out_err, -ENOMEM);
+ GOTO(out_err, rc = -ENOMEM);
- cfs_atomic_inc(&osd->od_zerocopy_loan);
+ atomic_inc(&osd->od_zerocopy_loan);
/* go over pages arcbuf contains, put them as
* local niobufs for ptlrpc's bulks */
while (sz_in_block > 0) {
- plen = min_t(int, sz_in_block, CFS_PAGE_SIZE);
+ plen = min_t(int, sz_in_block, PAGE_CACHE_SIZE);
lnb[i].lnb_file_offset = off;
lnb[i].lnb_page_offset = 0;
- lnb[i].len = plen;
- lnb[i].rc = 0;
+ lnb[i].lnb_len = plen;
+ lnb[i].lnb_rc = 0;
if (sz_in_block == bs)
- lnb[i].dentry = (void *)abuf;
+ lnb[i].lnb_data = abuf;
else
- lnb[i].dentry = NULL;
+ lnb[i].lnb_data = NULL;
/* this one is not supposed to fail */
- lnb[i].page = kmem_to_page(abuf->b_data +
+ lnb[i].lnb_page = kmem_to_page(abuf->b_data +
off_in_block);
- LASSERT(lnb[i].page);
+ LASSERT(lnb[i].lnb_page);
lprocfs_counter_add(osd->od_stats,
LPROC_OSD_ZEROCOPY_IO, 1);
/* can't use zerocopy, allocate temp. buffers */
while (sz_in_block > 0) {
- plen = min_t(int, sz_in_block, CFS_PAGE_SIZE);
+ plen = min_t(int, sz_in_block, PAGE_CACHE_SIZE);
lnb[i].lnb_file_offset = off;
lnb[i].lnb_page_offset = 0;
- lnb[i].len = plen;
- lnb[i].rc = 0;
- lnb[i].dentry = NULL;
+ lnb[i].lnb_len = plen;
+ lnb[i].lnb_rc = 0;
+ lnb[i].lnb_data = NULL;
- lnb[i].page = alloc_page(OSD_GFP_IO);
- if (unlikely(lnb[i].page == NULL))
- GOTO(out_err, -ENOMEM);
+ lnb[i].lnb_page = alloc_page(OSD_GFP_IO);
+ if (unlikely(lnb[i].lnb_page == NULL))
+ GOTO(out_err, rc = -ENOMEM);
- LASSERT(lnb[i].page->mapping == NULL);
- lnb[i].page->mapping = (void *)obj;
+ LASSERT(lnb[i].lnb_page->mapping == NULL);
+ lnb[i].lnb_page->mapping = (void *)obj;
- cfs_atomic_inc(&osd->od_zerocopy_alloc);
+ atomic_inc(&osd->od_zerocopy_alloc);
lprocfs_counter_add(osd->od_stats,
LPROC_OSD_COPY_IO, 1);
out_err:
osd_bufs_put(env, &obj->oo_dt, lnb, npages);
- RETURN(-ENOMEM);
+ RETURN(rc);
}
static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
oh = container_of0(th, struct osd_thandle, ot_super);
for (i = 0; i < npages; i++) {
- if (lnb[i].rc)
+ if (lnb[i].lnb_rc)
/* ENOSPC, network RPC error, etc.
* We don't want to book space for pages which will be
* skipped in osd_write_commit(). Hence we skip pages
*
* XXX we could handle this on per-lnb basis as done by
* grant. */
- if ((lnb[i].flags & OBD_BRW_NOQUOTA) ||
- (lnb[i].flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
+ if ((lnb[i].lnb_flags & OBD_BRW_NOQUOTA) ||
+ (lnb[i].lnb_flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
OBD_BRW_FROM_GRANT)
ignore_quota = true;
if (size == 0) {
/* first valid lnb */
offset = lnb[i].lnb_file_offset;
- size = lnb[i].len;
+ size = lnb[i].lnb_len;
continue;
}
if (offset + size == lnb[i].lnb_file_offset) {
/* this lnb is contiguous to the previous one */
- size += lnb[i].len;
+ size += lnb[i].lnb_len;
continue;
}
- dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object, offset,size);
-
+ dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object,
+ offset, size);
/* estimating space that will be consumed by a write is rather
* complicated with ZFS. As a consequence, we don't account for
* indirect blocks and quota overrun will be adjusted once the
* operation is committed, if required. */
space += osd_count_not_mapped(obj, offset, size);
- offset = lnb->lnb_file_offset;
- size = lnb->len;
+ offset = lnb[i].lnb_file_offset;
+ size = lnb[i].lnb_len;
}
if (size) {
- dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object, offset,size);
+ dmu_tx_hold_write(oh->ot_tx, obj->oo_db->db_object,
+ offset, size);
space += osd_count_not_mapped(obj, offset, size);
}
/* backend zfs filesystem might be configured to store multiple data
* copies */
- space *= osd->od_objset.os->os_copies;
+ space *= osd->od_os->os_copies;
space = toqb(space);
CDEBUG(D_QUOTA, "writting %d pages, reserving "LPD64"K of quota "
"space\n", npages, space);
* now, once we support multiple objects BRW, this code needs be
* revised. */
if (flags & QUOTA_FL_OVER_USRQUOTA)
- lnb[0].flags |= OBD_BRW_OVER_USRQUOTA;
+ lnb[0].lnb_flags |= OBD_BRW_OVER_USRQUOTA;
if (flags & QUOTA_FL_OVER_GRPQUOTA)
- lnb[0].flags |= OBD_BRW_OVER_GRPQUOTA;
+ lnb[0].lnb_flags |= OBD_BRW_OVER_GRPQUOTA;
RETURN(rc);
}
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
uint64_t new_size = 0;
int i, rc = 0;
for (i = 0; i < npages; i++) {
CDEBUG(D_INODE, "write %u bytes at %u\n",
- (unsigned) lnb[i].len,
+ (unsigned) lnb[i].lnb_len,
(unsigned) lnb[i].lnb_file_offset);
- if (lnb[i].rc) {
+ if (lnb[i].lnb_rc) {
/* ENOSPC, network RPC error, etc.
* Unlike ldiskfs, zfs allocates new blocks on rewrite,
* so we skip this page if lnb_rc is set to -ENOSPC */
CDEBUG(D_INODE, "obj "DFID": skipping lnb[%u]: rc=%d\n",
PFID(lu_object_fid(&dt->do_lu)), i,
- lnb[i].rc);
+ lnb[i].lnb_rc);
continue;
}
- if (lnb[i].page->mapping == (void *)obj) {
- dmu_write(osd->od_objset.os, obj->oo_db->db_object,
- lnb[i].lnb_file_offset, lnb[i].len,
- kmap(lnb[i].page), oh->ot_tx);
- kunmap(lnb[i].page);
- } else if (lnb[i].dentry) {
- LASSERT(((unsigned long)lnb[i].dentry & 1) == 0);
+ if (lnb[i].lnb_page->mapping == (void *)obj) {
+ dmu_write(osd->od_os, obj->oo_db->db_object,
+ lnb[i].lnb_file_offset, lnb[i].lnb_len,
+ kmap(lnb[i].lnb_page), oh->ot_tx);
+ kunmap(lnb[i].lnb_page);
+ } else if (lnb[i].lnb_data) {
+ LASSERT(((unsigned long)lnb[i].lnb_data & 1) == 0);
/* buffer loaned for zerocopy, try to use it.
* notice that dmu_assign_arcbuf() is smart
* enough to recognize changed blocksize
* in this case it fallbacks to dmu_write() */
dmu_assign_arcbuf(obj->oo_db, lnb[i].lnb_file_offset,
- (void *)lnb[i].dentry, oh->ot_tx);
+ lnb[i].lnb_data, oh->ot_tx);
/* drop the reference, otherwise osd_put_bufs()
* will be releasing it - bad! */
- lnb[i].dentry = NULL;
- cfs_atomic_dec(&osd->od_zerocopy_loan);
+ lnb[i].lnb_data = NULL;
+ atomic_dec(&osd->od_zerocopy_loan);
}
- if (new_size < lnb[i].lnb_file_offset + lnb[i].len)
- new_size = lnb[i].lnb_file_offset + lnb[i].len;
+ if (new_size < lnb[i].lnb_file_offset + lnb[i].lnb_len)
+ new_size = lnb[i].lnb_file_offset + lnb[i].lnb_len;
}
if (unlikely(new_size == 0)) {
/* osd_object_sa_update() will be copying directly from
* oo_attr into dbuf. any update within a single txg will copy
* the most actual */
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
- &obj->oo_attr.la_size, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
+ &obj->oo_attr.la_size, 8, oh);
} else {
write_unlock(&obj->oo_attr_lock);
}
LASSERT(obj->oo_db);
for (i = 0; i < npages; i++) {
- buf.lb_buf = kmap(lnb[i].page);
- buf.lb_len = lnb[i].len;
+ buf.lb_buf = kmap(lnb[i].lnb_page);
+ buf.lb_len = lnb[i].lnb_len;
offset = lnb[i].lnb_file_offset;
CDEBUG(D_OTHER, "read %u bytes at %u\n",
- (unsigned) lnb[i].len,
+ (unsigned) lnb[i].lnb_len,
(unsigned) lnb[i].lnb_file_offset);
- lnb[i].rc = osd_read(env, dt, &buf, &offset, NULL);
- kunmap(lnb[i].page);
+ lnb[i].lnb_rc = osd_read(env, dt, &buf, &offset, NULL);
+ kunmap(lnb[i].lnb_page);
- if (lnb[i].rc < buf.lb_len) {
+ if (lnb[i].lnb_rc < buf.lb_len) {
/* all subsequent rc should be 0 */
while (++i < npages)
- lnb[i].rc = 0;
+ lnb[i].lnb_rc = 0;
break;
}
}
{
struct osd_object *obj = osd_dt_obj(dt);
struct osd_device *osd = osd_obj2dev(obj);
- udmu_objset_t *uos = &osd->od_objset;
struct osd_thandle *oh;
__u64 len;
int rc = 0;
len = end - start;
write_unlock(&obj->oo_attr_lock);
- rc = __osd_object_punch(osd->od_objset.os, obj->oo_db, oh->ot_tx,
+ rc = __osd_object_punch(osd->od_os, obj->oo_db, oh->ot_tx,
obj->oo_attr.la_size, start, len);
/* set new size */
if (len == DMU_OBJECT_END) {
write_lock(&obj->oo_attr_lock);
obj->oo_attr.la_size = start;
write_unlock(&obj->oo_attr_lock);
- rc = osd_object_sa_update(obj, SA_ZPL_SIZE(uos),
- &obj->oo_attr.la_size, 8, oh);
+ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
+ &obj->oo_attr.la_size, 8, oh);
}
RETURN(rc);
}