page = find_or_create_page(inode->i_mapping,
offset >> PAGE_SHIFT, gfp_mask);
- if (likely(page))
- LASSERT(!test_bit(PG_private_2, &page->flags));
- else
+ if (likely(page)) {
+ LASSERT(!PagePrivate2(page));
+ wait_on_page_writeback(page);
+ } else {
lprocfs_counter_add(d->od_stats, LPROC_OSD_NO_PAGE, 1);
+ }
return page;
}
/* consult with pagecache, but do not create new pages */
/* this is normally used once */
page = find_lock_page(inode->i_mapping, offset >> PAGE_SHIFT);
- if (page)
+ if (page) {
+ wait_on_page_writeback(page);
return page;
+ }
}
LASSERT(oti->oti_dio_pages);
cur = oti->oti_dio_pages_used;
+ page = oti->oti_dio_pages[cur];
- if (unlikely(!oti->oti_dio_pages[cur])) {
+ if (unlikely(!page)) {
LASSERT(cur < PTLRPC_MAX_BRW_PAGES);
page = alloc_page(gfp_mask);
if (!page)
return NULL;
oti->oti_dio_pages[cur] = page;
+ SetPagePrivate2(page);
+ lock_page(page);
}
- page = oti->oti_dio_pages[cur];
- LASSERT(!test_bit(PG_private_2, &page->flags));
- set_bit(PG_private_2, &page->flags);
- oti->oti_dio_pages_used++;
-
- LASSERT(!PageLocked(page));
- lock_page(page);
-
- LASSERT(!page->mapping);
- LASSERT(!PageWriteback(page));
ClearPageUptodate(page);
-
page->index = offset >> PAGE_SHIFT;
+ oti->oti_dio_pages_used++;
return page;
}
/* if the page isn't cached, then reset uptodate
* to prevent reuse */
- if (test_bit(PG_private_2, &page->flags)) {
- clear_bit(PG_private_2, &page->flags);
- ClearPageUptodate(page);
- if (lnb[i].lnb_locked)
- unlock_page(page);
+ if (PagePrivate2(page)) {
oti->oti_dio_pages_used--;
} else {
if (lnb[i].lnb_locked)
if (pagevec_add(&pvec, page) == 0)
pagevec_release(&pvec);
}
- dt_object_put(env, dt);
lnb[i].lnb_page = NULL;
}
bypass_checks:
if (!cache && unlikely(!oti->oti_dio_pages)) {
- OBD_ALLOC(oti->oti_dio_pages,
- sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES);
+ OBD_ALLOC_PTR_ARRAY(oti->oti_dio_pages, PTLRPC_MAX_BRW_PAGES);
if (!oti->oti_dio_pages)
return -ENOMEM;
}
GOTO(cleanup, rc = -ENOMEM);
lnb->lnb_locked = 1;
- wait_on_page_writeback(lnb->lnb_page);
- BUG_ON(PageWriteback(lnb->lnb_page));
-
- lu_object_get(&dt->do_lu);
}
#if 0
enum osd_qid_declare_flags declare_flags = OSD_QID_BLK;
ENTRY;
- LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
- LASSERT(oh->ot_handle == NULL);
+ LASSERT(handle != NULL);
+ oh = container_of(handle, struct osd_thandle, ot_super);
+ LASSERT(oh->ot_handle == NULL);
- newblocks = npages;
+ newblocks = npages;
- /* calculate number of extents (probably better to pass nb) */
+ /* calculate number of extents (probably better to pass nb) */
for (i = 0; i < npages; i++) {
if (i && lnb[i].lnb_file_offset !=
lnb[i - 1].lnb_file_offset + lnb[i - 1].lnb_len)
/* Check if a block is allocated or not */
static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
- struct niobuf_local *lnb, int npages,
- struct thandle *thandle)
+ struct niobuf_local *lnb, int npages,
+ struct thandle *thandle, __u64 user_size)
{
- struct osd_thread_info *oti = osd_oti_get(env);
- struct osd_iobuf *iobuf = &oti->oti_iobuf;
- struct inode *inode = osd_dt_obj(dt)->oo_inode;
- struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
- loff_t isize;
- int rc = 0, i;
+ struct osd_thread_info *oti = osd_oti_get(env);
+ struct osd_iobuf *iobuf = &oti->oti_iobuf;
+ struct inode *inode = osd_dt_obj(dt)->oo_inode;
+ struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+ loff_t disk_size;
+ int rc = 0, i;
- LASSERT(inode);
+ LASSERT(inode);
rc = osd_init_iobuf(osd, iobuf, 1, npages);
if (unlikely(rc != 0))
RETURN(rc);
- isize = i_size_read(inode);
+ disk_size = i_size_read(inode);
+ /* if disk_size is already bigger than specified user_size,
+ * ignore user_size
+ */
+ if (disk_size > user_size)
+ user_size = 0;
dquot_initialize(inode);
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < npages; i++) {
if (lnb[i].lnb_rc == -ENOSPC &&
(lnb[i].lnb_flags & OBD_BRW_MAPPED)) {
/* Allow the write to proceed if overwriting an
LASSERT(PageLocked(lnb[i].lnb_page));
LASSERT(!PageWriteback(lnb[i].lnb_page));
- if (lnb[i].lnb_file_offset + lnb[i].lnb_len > isize)
- isize = lnb[i].lnb_file_offset + lnb[i].lnb_len;
+ if (lnb[i].lnb_file_offset + lnb[i].lnb_len > disk_size)
+ disk_size = lnb[i].lnb_file_offset + lnb[i].lnb_len;
/*
* Since write and truncate are serialized by oo_sem, even
SetPageUptodate(lnb[i].lnb_page);
osd_iobuf_add_page(iobuf, &lnb[i]);
- }
+ }
+ /* if file has grown, take user_size into account */
+ if (user_size && disk_size > user_size)
+ disk_size = user_size;
osd_trans_exec_op(env, thandle, OSD_OT_WRITE);
- if (OBD_FAIL_CHECK(OBD_FAIL_OST_MAPBLK_ENOSPC)) {
- rc = -ENOSPC;
- } else if (iobuf->dr_npages > 0) {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_MAPBLK_ENOSPC)) {
+ rc = -ENOSPC;
+ } else if (iobuf->dr_npages > 0) {
rc = osd_ldiskfs_map_inode_pages(inode, iobuf->dr_pages,
iobuf->dr_npages,
iobuf->dr_blocks, 1);
- } else {
- /* no pages to write, no transno is needed */
- thandle->th_local = 1;
- }
+ } else {
+ /* no pages to write, no transno is needed */
+ thandle->th_local = 1;
+ }
if (likely(rc == 0)) {
spin_lock(&inode->i_lock);
- if (isize > i_size_read(inode)) {
- i_size_write(inode, isize);
- LDISKFS_I(inode)->i_disksize = isize;
+ if (disk_size > i_size_read(inode)) {
+ i_size_write(inode, disk_size);
+ LDISKFS_I(inode)->i_disksize = disk_size;
spin_unlock(&inode->i_lock);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
} else {
spin_unlock(&inode->i_lock);
}
for (i = 0; i < npages; i++) {
if (lnb[i].lnb_page == NULL)
continue;
- LASSERT(PageLocked(lnb[i].lnb_page));
- generic_error_remove_page(inode->i_mapping,
- lnb[i].lnb_page);
+ if (!PagePrivate2(lnb[i].lnb_page)) {
+ LASSERT(PageLocked(lnb[i].lnb_page));
+ generic_error_remove_page(inode->i_mapping,
+ lnb[i].lnb_page);
+ }
}
}
/* early release to let others read data during the bulk */
for (i = 0; i < iobuf->dr_npages; i++) {
LASSERT(PageLocked(iobuf->dr_pages[i]));
- unlock_page(iobuf->dr_pages[i]);
+ if (!PagePrivate2(iobuf->dr_pages[i]))
+ unlock_page(iobuf->dr_pages[i]);
}
}
static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
struct lu_buf *buf, loff_t *pos)
{
- struct inode *inode = osd_dt_obj(dt)->oo_inode;
- int rc;
+ struct inode *inode = osd_dt_obj(dt)->oo_inode;
+ int rc;
- /* Read small symlink from inode body as we need to maintain correct
- * on-disk symlinks for ldiskfs.
- */
- if (S_ISLNK(dt->do_lu.lo_header->loh_attr) &&
- (buf->lb_len < sizeof(LDISKFS_I(inode)->i_data)))
- rc = osd_ldiskfs_readlink(inode, buf->lb_buf, buf->lb_len);
- else
- rc = osd_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
+ /* Read small symlink from inode body as we need to maintain correct
+ * on-disk symlinks for ldiskfs.
+ */
+ if (S_ISLNK(dt->do_lu.lo_header->loh_attr)) {
+ loff_t size = i_size_read(inode);
+
+ if (buf->lb_len < size)
+ return -EOVERFLOW;
- return rc;
+ if (size < sizeof(LDISKFS_I(inode)->i_data))
+ rc = osd_ldiskfs_readlink(inode, buf->lb_buf, size);
+ else
+ rc = osd_ldiskfs_read(inode, buf->lb_buf, size, pos);
+ } else {
+ rc = osd_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
+ }
+
+ return rc;
}
static inline int osd_extents_enabled(struct super_block *sb,
ENTRY;
LASSERT(buf != NULL);
- LASSERT(handle != NULL);
+ LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
- LASSERT(oh->ot_handle == NULL);
+ oh = container_of(handle, struct osd_thandle, ot_super);
+ LASSERT(oh->ot_handle == NULL);
size = buf->lb_len;
bits = sb->s_blocksize_bits;
LDISKFS_I(inode)->i_disksize = buflen;
i_size_write(inode, buflen);
spin_unlock(&inode->i_lock);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
return 0;
}
loff_t new_size = i_size_read(inode);
unsigned long block;
int blocksize = 1 << inode->i_blkbits;
+ struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
int err = 0;
int size;
int boffs;
int dirty_inode = 0;
- struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
- bool create, sparse;
+ bool create, sparse, sync = false;
if (write_NUL) {
/*
++bufsize;
}
+ dirty_inode = test_and_set_bit(LDISKFS_INODE_JOURNAL_DATA, &ei->i_flags);
+
/* sparse checking is racy, but sparse is very rare case, leave as is */
sparse = (new_size > 0 && (inode->i_blocks >> (inode->i_blkbits - 9)) <
((new_size - 1) >> inode->i_blkbits) + 1);
while (bufsize > 0) {
int credits = handle->h_buffer_credits;
- bool sync;
unsigned long last_block = (new_size == 0) ? 0 :
(new_size - 1) >> inode->i_blkbits;
bh = __ldiskfs_bread(handle, inode, block, flags);
create = true;
} else {
- if (sync)
+ if (sync) {
up(&ei->i_append_sem);
+ sync = false;
+ }
create = false;
}
if (IS_ERR_OR_NULL(bh)) {
boffs, size, (unsigned long)bh->b_size);
if (create) {
memset(bh->b_data, 0, bh->b_size);
- if (sync)
+ if (sync) {
up(&ei->i_append_sem);
+ sync = false;
+ }
}
memcpy(bh->b_data + boffs, buf, size);
err = ldiskfs_handle_dirty_metadata(handle, NULL, bh);
bufsize -= size;
buf += size;
}
- if (bh)
- brelse(bh);
+ if (sync)
+ up(&ei->i_append_sem);
+
+ if (bh)
+ brelse(bh);
if (write_NUL)
--new_size;
spin_lock(&inode->i_lock);
if (new_size > i_size_read(inode))
i_size_write(inode, new_size);
- if (i_size_read(inode) > LDISKFS_I(inode)->i_disksize) {
- LDISKFS_I(inode)->i_disksize = i_size_read(inode);
+ if (i_size_read(inode) > ei->i_disksize) {
+ ei->i_disksize = i_size_read(inode);
dirty_inode = 1;
}
spin_unlock(&inode->i_lock);
- if (dirty_inode)
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
}
+ if (dirty_inode)
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
if (err == 0)
*offs = offset;
return result;
}
+static int osd_declare_fallocate(const struct lu_env *env,
+ struct dt_object *dt, struct thandle *th)
+{
+ struct osd_thandle *oh;
+ struct inode *inode;
+ int rc;
+ ENTRY;
+
+ LASSERT(th);
+ oh = container_of(th, struct osd_thandle, ot_super);
+
+ osd_trans_declare_op(env, oh, OSD_OT_PREALLOC,
+ osd_dto_credits_noquota[DTO_WRITE_BLOCK]);
+ inode = osd_dt_obj(dt)->oo_inode;
+ LASSERT(inode);
+
+ rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
+ i_projid_read(inode), 0, oh, osd_dt_obj(dt),
+ NULL, OSD_QID_BLK);
+ RETURN(rc);
+}
+
+static int osd_fallocate(const struct lu_env *env, struct dt_object *dt,
+ __u64 start, __u64 end, int mode, struct thandle *th)
+{
+ struct osd_object *obj = osd_dt_obj(dt);
+ struct inode *inode = obj->oo_inode;
+ int rc = 0;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct dentry *dentry = &info->oti_obj_dentry;
+ struct file *file = &info->oti_file;
+
+ ENTRY;
+ /*
+ * Only mode == 0 (which is standard prealloc) is supported now.
+ * Rest of mode options is not supported yet.
+ */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ RETURN(-EOPNOTSUPP);
+
+ LASSERT(dt_object_exists(dt));
+ LASSERT(osd_invariant(obj));
+ LASSERT(inode != NULL);
+ dquot_initialize(inode);
+
+ LASSERT(th);
+
+ osd_trans_exec_op(env, th, OSD_OT_PREALLOC);
+
+ /*
+ * Because f_op->fallocate() does not have an inode arg
+ */
+ dentry->d_inode = inode;
+ dentry->d_sb = inode->i_sb;
+ file->f_path.dentry = dentry;
+ file->f_mapping = inode->i_mapping;
+ file->f_op = inode->i_fop;
+ file->f_inode = inode;
+ rc = file->f_op->fallocate(file, mode, start, end - start);
+
+ RETURN(rc);
+}
+
static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt,
__u64 start, __u64 end, struct thandle *th)
{
bool grow = false;
ENTRY;
- LASSERT(end == OBD_OBJECT_EOF);
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(obj));
LASSERT(inode != NULL);
GOTO(out, rc);
}
+ inode_lock(inode);
/* add to orphan list to ensure truncate completion
* if this transaction succeed. ldiskfs_truncate()
* will take the inode out of the list */
rc = ldiskfs_orphan_add(oh->ot_handle, inode);
+ inode_unlock(inode);
if (rc != 0)
GOTO(out, rc);
.dbo_punch = osd_punch,
.dbo_fiemap_get = osd_fiemap_get,
.dbo_ladvise = osd_ladvise,
+ .dbo_declare_fallocate = osd_declare_fallocate,
+ .dbo_fallocate = osd_fallocate,
};
/**
else
down_write(&obj->oo_ext_idx_sem);
al->tl_shared = shared;
+ lu_object_get(&obj->oo_dt.do_lu);
list_add(&al->tl_list, &oh->ot_trunc_locks);
return 0;
}
-void osd_trunc_unlock_all(struct list_head *list)
+void osd_trunc_unlock_all(const struct lu_env *env, struct list_head *list)
{
struct osd_access_lock *al, *tmp;
list_for_each_entry_safe(al, tmp, list, tl_list) {
up_read(&al->tl_obj->oo_ext_idx_sem);
else
up_write(&al->tl_obj->oo_ext_idx_sem);
+ osd_object_put(env, al->tl_obj);
list_del(&al->tl_list);
OBD_FREE_PTR(al);
}
return;
}
+ inode_lock(inode);
ldiskfs_truncate(inode);
+ inode_unlock(inode);
/*
* For a partial-page truncate, flush the page to disk immediately to