]) # LC_HAVE_FOLIO_BATCH_REINIT
#
+# LC_HAVE_INODE_ATTACH_WB_FOLIO
+#
+# linux kernel v6.2-rc4 commit: 9cfb816b1c6c99f4b3c1d4a0fb096162cd17ec71
+# mm/fs: convert inode_attach_wb() to take a folio
+#
+AC_DEFUN([LC_SRC_HAVE_INODE_ATTACH_WB_FOLIO], [
+ LB2_LINUX_TEST_SRC([inode_attach_wb_folio_arg], [
+ #include <linux/writeback.h>
+ ],[
+ struct folio *folio = NULL;
+
+ inode_attach_wb(NULL, folio);
+ ],[-Werror])
+])
+AC_DEFUN([LC_HAVE_INODE_ATTACH_WB_FOLIO], [
+ LB2_MSG_LINUX_TEST_RESULT([if 'inode_attach_wb()' takes folio],
+ [inode_attach_wb_folio_arg], [
+ AC_DEFINE(HAVE_INODE_ATTACH_WB_FOLIO, 1,
+ ['inode_attach_wb()' takes folio])
+ ])
+]) # LC_HAVE_INODE_ATTACH_WB_FOLIO
+
+#
# LC_HAVE_IOV_ITER_IOVEC
#
# linux kernel v6.3-rc4-32-g6eb203e1a868
LC_SRC_HAVE_LOCKS_LOCK_FILE_WAIT_IN_FILELOCK
LC_SRC_HAVE_U64_CAPABILITY
LC_SRC_HAVE_FOLIO_BATCH_REINIT
+ LC_SRC_HAVE_INODE_ATTACH_WB_FOLIO
# 6.4
LC_SRC_HAVE_IOV_ITER_IOVEC
LC_HAVE_LOCKS_LOCK_FILE_WAIT_IN_FILELOCK
LC_HAVE_U64_CAPABILITY
LC_HAVE_FOLIO_BATCH_REINIT
+ LC_HAVE_INODE_ATTACH_WB_FOLIO
# 6.4
LC_HAVE_IOV_ITER_IOVEC
ra->cra_release(env, ra);
}
+enum cl_io_priority {
+ /* Normal I/O, usually just queue the pages in the client side cache. */
+ IO_PRIO_NORMAL = 0,
+ /* I/O is urgent and should flush queued pages to OSTs ASAP. */
+ IO_PRIO_URGENT,
+ /* The memcg is under high memory pressure and the user write process
+ * is dirty exceeded and under rate limiting in balance_dirty_pages().
+ * It needs to flush dirty pages for the corresponding @wb ASAP.
+ */
+ IO_PRIO_DIRTY_EXCEEDED,
+ /*
+ * I/O is urgent and flushing pages are marked with OBD_BRW_SOFT_SYNC
+ * flag and may trigger a soft sync on OSTs. Thus it can free unstable
+ * pages much quickly.
+ */
+ IO_PRIO_SOFT_SYNC,
+ /*
+ * The system or a certain memcg is under high memory pressure. Need to
+ * flush dirty pages to OSTs immediately and I/O RPC must wait the write
+ * transcation commit on OSTs synchronously to release unstable pages.
+ */
+ IO_PRIO_HARD_SYNC,
+ IO_PRIO_MAX,
+};
+
+static inline bool cl_io_high_prio(enum cl_io_priority prio)
+{
+ return prio >= IO_PRIO_URGENT;
+}
+
/**
* Per-layer io operations.
* \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
int (*cio_commit_async)(const struct lu_env *env,
const struct cl_io_slice *slice,
struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb);
+ cl_commit_cbt cb, enum cl_io_priority prio);
/**
* Release active extent.
*/
void (*cio_extent_release)(const struct lu_env *env,
- const struct cl_io_slice *slice);
+ const struct cl_io_slice *slice,
+ enum cl_io_priority prio);
/**
* Decide maximum read ahead extent
*
struct cl_page *ft_page;
} ci_fault;
struct cl_fsync_io {
- loff_t fi_start;
- loff_t fi_end;
+ loff_t fi_start;
+ loff_t fi_end;
/** file system level fid */
- struct lu_fid *fi_fid;
- enum cl_fsync_mode fi_mode;
+ struct lu_fid *fi_fid;
+ enum cl_fsync_mode fi_mode;
/* how many pages were written/discarded */
- unsigned int fi_nr_written;
+ unsigned int fi_nr_written;
+ enum cl_io_priority fi_prio;
} ci_fsync;
struct cl_ladvise_io {
__u64 lio_start;
long timeout);
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb);
-void cl_io_extent_release(const struct lu_env *env, struct cl_io *io);
+ cl_commit_cbt cb, enum cl_io_priority prio);
+void cl_io_extent_release(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_priority prio);
int cl_io_lru_reserve(const struct lu_env *env, struct cl_io *io,
loff_t pos, size_t bytes);
int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io,
__u64 size, struct osc_extent **extp);
void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext);
int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end, int hp, int discard);
+ pgoff_t start, pgoff_t end, int hp, int discard,
+ enum cl_io_priority prio);
int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
pgoff_t start, pgoff_t end);
int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
int osc_io_commit_async(const struct lu_env *env,
const struct cl_io_slice *ios,
struct cl_page_list *qin, int from, int to,
- cl_commit_cbt cb);
+ cl_commit_cbt cb, enum cl_io_priority prio);
void osc_io_extent_release(const struct lu_env *env,
- const struct cl_io_slice *ios);
+ const struct cl_io_slice *ios,
+ enum cl_io_priority prio);
int osc_io_iter_init(const struct lu_env *env, const struct cl_io_slice *ios);
void osc_io_iter_fini(const struct lu_env *env,
const struct cl_io_slice *ios);
return &osc_export(obj)->exp_obd->u.cli;
}
+static inline char *cli_name(struct client_obd *cli)
+{
+ return cli->cl_import->imp_obd->obd_name;
+}
+
static inline struct osc_object *cl2osc(const struct cl_object *obj)
{
return container_of_safe(obj, struct osc_object, oo_cl);
/* flush local cache first if any */
cl_sync_file_range(inode, offset, OBD_OBJECT_EOF,
- CL_FSYNC_LOCAL, 0);
+ CL_FSYNC_LOCAL, 0, IO_PRIO_NORMAL);
retval = ll_lseek(file, offset, origin);
if (retval < 0)
* Return how many pages have been written.
*/
int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout)
+ enum cl_fsync_mode mode, int ignore_layout,
+ enum cl_io_priority prio)
{
struct lu_env *env;
struct cl_io *io;
fio->fi_fid = ll_inode2fid(inode);
fio->fi_mode = mode;
fio->fi_nr_written = 0;
+ fio->fi_prio = prio;
if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
result = cl_io_loop(env, io);
err = pcc_fsync(file, start, end, datasync, &cached);
if (!cached)
err = cl_sync_file_range(inode, start, end,
- CL_FSYNC_ALL, 0);
+ CL_FSYNC_ALL, 0,
+ IO_PRIO_NORMAL);
if (rc == 0 && err < 0)
rc = err;
if (rc < 0)
int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
struct cl_page *page, struct file *file);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_priority prio);
enum lcc_type;
void ll_cl_add(struct inode *inode, const struct lu_env *env, struct cl_io *io,
}
int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout);
+ enum cl_fsync_mode mode, int ignore_layout,
+ enum cl_io_priority prio);
static inline int ll_file_nolock(const struct file *file)
{
* unlink, so that file is not opened somewhere else
*/
cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, inode->i_nlink ?
- CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1);
+ CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1,
+ IO_PRIO_NORMAL);
}
ll_truncate_inode_pages_final(inode);
*/
result = cl_sync_file_range(inode, offset,
offset + PAGE_SIZE - 1,
- CL_FSYNC_LOCAL, 1);
+ CL_FSYNC_LOCAL, 1, IO_PRIO_NORMAL);
if (result > 0) {
/* May have written more than one page. decreasing this
* page because the caller will count it.
int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
+ enum cl_io_priority prio = IO_PRIO_NORMAL;
loff_t start;
loff_t end;
enum cl_fsync_mode mode;
wb = inode_to_wb(inode);
if (wbc->for_background ||
(wb->start_all_reason == WB_REASON_VMSCAN &&
- test_bit(WB_start_all, &wb->state)))
+ test_bit(WB_start_all, &wb->state))) {
mode = CL_FSYNC_RECLAIM;
+ if (wb->dirty_exceeded)
+ prio = IO_PRIO_DIRTY_EXCEEDED;
+ }
spin_unlock(&inode->i_lock);
#else
/*
* inside the IO context of write, which will cause deadlock at
* layout_conf since it waits for active IOs to complete.
*/
- result = cl_sync_file_range(inode, start, end, mode, 1);
+ result = cl_sync_file_range(inode, start, end, mode, 1, prio);
if (result > 0) {
wbc->nr_to_write -= result;
result = 0;
}
/* commit pages and then wait for page lock */
- result = vvp_io_write_commit(env, io);
+ result = vvp_io_write_commit(env, io, IO_PRIO_NORMAL);
if (result < 0)
GOTO(out, result);
struct cl_page *page;
struct page *vmpage = wbe_folio_page(vmfolio);
unsigned from = pos & (PAGE_SIZE - 1);
+ enum cl_io_priority prio = IO_PRIO_NORMAL;
bool unplug = false;
int result = 0;
ENTRY;
LASSERT(cl_page_is_owned(page, io));
if (copied > 0) {
struct cl_page_list *plist = &vio->u.readwrite.vui_queue;
+#ifdef SB_I_CGROUPWB
+ struct inode *inode = file_inode(file);
+ struct bdi_writeback *wb;
+
+ spin_lock(&inode->i_lock);
+#ifdef HAVE_INODE_ATTACH_WB_FOLIO
+ inode_attach_wb(inode, page_folio(vmpage));
+#else
+ inode_attach_wb(inode, vmpage);
+#endif
+ wb = inode_to_wb(inode);
+ LASSERTF(wb != NULL, "wb@%pK\n", wb);
+ if (wb->dirty_exceeded) {
+ unplug = true;
+ prio = IO_PRIO_URGENT;
+ CDEBUG(D_IOTRACE, "wb@%pK dirty_ratelimit=%lu balanced_dirty_ratelimit=%lu dirty_exceeded=%d state=%lX last_old_flush=%lu\n",
+ wb, wb->dirty_ratelimit,
+ wb->balanced_dirty_ratelimit,
+ wb->dirty_exceeded, wb->state,
+ wb->last_old_flush);
+ }
+ spin_unlock(&inode->i_lock);
+#endif
lcc->lcc_page = NULL; /* page will be queued */
io->u.ci_rw.crw_pos + io->u.ci_rw.crw_bytes)
unplug = true;
if (unplug)
- result = vvp_io_write_commit(env, io);
+ result = vvp_io_write_commit(env, io, prio);
if (result < 0)
io->ci_result = result;
int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
struct cl_io *io);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_priority prio);
int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
struct cl_page *page, pgoff_t index);
struct lu_object *vvp_object_alloc(const struct lu_env *env,
}
/* Return how many bytes have queued or written */
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_priority prio)
{
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
if (npages == 0)
RETURN(0);
- CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
- npages, vio->u.readwrite.vui_from, vio->u.readwrite.vui_to);
+ CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d prio %d\n",
+ npages, vio->u.readwrite.vui_from, vio->u.readwrite.vui_to,
+ prio);
LASSERT(page_list_sanity_check(obj, queue));
rc = cl_io_commit_async(env, io, queue,
vio->u.readwrite.vui_from,
vio->u.readwrite.vui_to,
- write_commit_callback);
+ write_commit_callback, prio);
npages -= queue->pl_nr; /* already committed pages */
if (npages > 0) {
/* calculate how many bytes were written */
LASSERT(ergo(rc == 0, queue->pl_nr == 0));
/* out of quota, try sync write */
- if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
+ if ((rc == -EDQUOT && !cl_io_is_mkwrite(io)) || prio > IO_PRIO_NORMAL) {
struct ll_inode_info *lli = ll_i2info(inode);
rc = vvp_io_commit_sync(env, io, queue,
}
if (result > 0) {
- result = vvp_io_write_commit(env, io);
+ result = vvp_io_write_commit(env, io, IO_PRIO_NORMAL);
/* Simulate short commit */
if (CFS_FAULT_CHECK(OBD_FAIL_LLITE_SHORT_COMMIT)) {
vio->u.readwrite.vui_written >>= 1;
* still have chance to detect it.
*/
result = cl_io_commit_async(env, io, plist, 0, to,
- mkwrite_commit_callback);
+ mkwrite_commit_callback,
+ IO_PRIO_NORMAL);
/* Have overquota flag, trying sync write to check
* whether indeed out of quota
*/
cl_page_list_add(plist, page, true);
result = cl_io_commit_async(env, io,
plist, 0, to,
- mkwrite_commit_callback);
+ mkwrite_commit_callback,
+ IO_PRIO_NORMAL);
io->ci_noquota = 0;
} else {
cl_page_put(env, page);
int rc;
ENTRY;
- rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+ rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1,
+ IO_PRIO_NORMAL);
if (rc < 0) {
CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
PFID(lu_object_fid(&obj->co_lu)), rc);
io->u.ci_fsync.fi_end = end;
io->u.ci_fsync.fi_fid = parent->u.ci_fsync.fi_fid;
io->u.ci_fsync.fi_mode = parent->u.ci_fsync.fi_mode;
+ io->u.ci_fsync.fi_prio = parent->u.ci_fsync.fi_prio;
break;
}
case CIT_READ:
static int lov_io_commit_async(const struct lu_env *env,
const struct cl_io_slice *ios,
struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb)
+ cl_commit_cbt cb, enum cl_io_priority prio)
{
struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
struct lov_io *lio = cl2lov_io(env, ios);
+ bool hp = cl_io_high_prio(prio);
struct lov_io_sub *sub;
struct cl_page *page;
int rc = 0;
LASSERT(!IS_ERR(sub));
LASSERT(sub == &lio->lis_single_subio);
rc = cl_io_commit_async(sub->sub_env, &sub->sub_io, queue,
- from, to, cb);
+ from, to, cb, prio);
RETURN(rc);
}
sub = lov_sub_get(env, lio, index);
if (!IS_ERR(sub)) {
rc = cl_io_commit_async(sub->sub_env, &sub->sub_io,
- plist, from, stripe_to, cb);
+ plist, from, stripe_to, cb,
+ prio);
} else {
rc = PTR_ERR(sub);
break;
from = 0;
- if (lov_comp_entry(index) !=
+ if (!hp && lov_comp_entry(index) !=
lov_comp_entry(page->cp_lov_index))
- cl_io_extent_release(sub->sub_env, &sub->sub_io);
+ cl_io_extent_release(sub->sub_env, &sub->sub_io, prio);
+ }
+
+ if (rc == 0 && hp) {
+ list_for_each_entry(sub, &lio->lis_subios, sub_list)
+ cl_io_extent_release(sub->sub_env, &sub->sub_io, prio);
}
/* for error case, add the page back into the qin list */
if (mode == CLM_WRITE) {
result = osc_cache_writeback_range(env, obj, start, end, 1,
- discard);
+ discard, IO_PRIO_NORMAL);
CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
obj, start, end, result,
discard ? "discarded" : "written back");
if (fio->fi_mode == CL_FSYNC_RECLAIM) {
struct client_obd *cli = osc_cli(osc);
- if (!atomic_long_read(&cli->cl_unstable_count)) {
- /* Stop flush when there are no unstable pages? */
- CDEBUG(D_CACHE, "unstable count is zero\n");
+ if (!atomic_read(&osc->oo_nr_ios) &&
+ !atomic_read(&osc->oo_nr_writes) &&
+ !atomic_long_read(&cli->cl_unstable_count)) {
+ /*
+ * No active IO, no dirty pages needing to write and no
+ * unstable pages needing to commit.
+ */
+ CDEBUG(D_CACHE,
+ "%s: dirty/unstable counts are both zero\n",
+ cli_name(cli));
RETURN(0);
}
}
* possible range despite of supplied start/end values.
*/
result = osc_cache_writeback_range(env, osc, 0, CL_PAGE_EOF, 0,
- fio->fi_mode == CL_FSYNC_DISCARD);
+ fio->fi_mode == CL_FSYNC_DISCARD,
+ fio->fi_prio);
if (result > 0) {
fio->fi_nr_written += result;
result = 0;
* @from: Starting position
* @to: Ending position
* @cb: callback function
+ * @prio: I/O priority
*
* Returns 0 if all pages committed, or errcode if error occurred.
* see cl_io_operations::cio_commit_async()
*/
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb)
+ cl_commit_cbt cb, enum cl_io_priority prio)
{
const struct cl_io_slice *scan;
int result = 0;
if (scan->cis_iop->cio_commit_async == NULL)
continue;
result = scan->cis_iop->cio_commit_async(env, scan, queue,
- from, to, cb);
+ from, to, cb, prio);
if (result != 0)
break;
}
}
EXPORT_SYMBOL(cl_io_commit_async);
-void cl_io_extent_release(const struct lu_env *env, struct cl_io *io)
+void cl_io_extent_release(const struct lu_env *env, struct cl_io *io,
+ enum cl_io_priority prio)
{
const struct cl_io_slice *scan;
ENTRY;
list_for_each_entry(scan, &io->ci_layers, cis_linkage) {
if (scan->cis_iop->cio_extent_release == NULL)
continue;
- scan->cis_iop->cio_extent_release(env, scan);
+ scan->cis_iop->cio_extent_release(env, scan, prio);
}
EXIT;
}
GOTO(out, rc = 65);
fallthrough;
default:
- if (atomic_read(&ext->oe_users) > 0)
- GOTO(out, rc = 70);
+ break;
}
if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start)
/**
* Drop user count of osc_extent, and unplug IO asynchronously.
*/
-void osc_extent_release(const struct lu_env *env, struct osc_extent *ext)
+void osc_extent_release(const struct lu_env *env, struct osc_extent *ext,
+ enum cl_io_priority prio)
{
struct osc_object *obj = ext->oe_obj;
struct client_obd *cli = osc_cli(obj);
+ bool hp = cl_io_high_prio(prio);
+
ENTRY;
LASSERT(atomic_read(&ext->oe_users) > 0);
LASSERT(ext->oe_grants > 0);
if (atomic_dec_and_lock(&ext->oe_users, &obj->oo_lock)) {
- LASSERT(ext->oe_state == OES_ACTIVE);
if (ext->oe_trunc_pending) {
- /* a truncate process is waiting for this extent.
+ /*
+ * A truncate process is waiting for this extent.
* This may happen due to a race, check
- * osc_cache_truncate_start(). */
+ * osc_cache_truncate_start().
+ */
+ if (ext->oe_state != OES_ACTIVE) {
+ int rc;
+
+ osc_object_unlock(obj);
+ rc = osc_extent_wait(env, ext, OES_INV);
+ if (rc < 0)
+ OSC_EXTENT_DUMP(D_ERROR, ext,
+ "error: %d.\n", rc);
+ osc_object_lock(obj);
+ }
osc_extent_state_set(ext, OES_TRUNC);
ext->oe_trunc_pending = 0;
osc_object_unlock(obj);
- } else {
+ } else if (ext->oe_state == OES_ACTIVE) {
int grant = 0;
osc_extent_state_set(ext, OES_CACHE);
if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
grant += cli->cl_grant_extent_tax;
- if (!ext->oe_rw && ext->oe_dlmlock) {
- bool hp;
-
+ if (!hp && !ext->oe_rw && ext->oe_dlmlock) {
lock_res_and_lock(ext->oe_dlmlock);
hp = ldlm_is_cbpending(ext->oe_dlmlock);
unlock_res_and_lock(ext->oe_dlmlock);
-
- /* HP extent should be written ASAP. */
- if (hp)
- ext->oe_hp = 1;
}
+
+ /* HP extent should be written ASAP. */
+ if (hp)
+ ext->oe_hp = 1;
+
if (ext->oe_hp)
list_move_tail(&ext->oe_link,
&obj->oo_hp_exts);
osc_object_unlock(obj);
if (grant > 0)
osc_unreserve_grant(cli, 0, grant);
+ } else {
+ osc_object_unlock(obj);
}
- osc_io_unplug_async(env, cli, obj);
+ if (unlikely(cl_io_high_prio(prio)))
+ osc_io_unplug(env, cli, obj);
+ else
+ osc_io_unplug_async(env, cli, obj);
}
osc_extent_put(env, ext);
}
osc_object_unlock(obj);
if (rc == 1)
- osc_extent_release(env, ext);
+ osc_extent_release(env, ext, IO_PRIO_NORMAL);
/* wait for the extent until its state becomes @state */
rc = wait_event_idle_timeout(ext->oe_waitq,
LASSERT(ext->oe_max_end >= index && ext->oe_start <= index);
osc_object_lock(obj);
+ if (ext->oe_state != OES_ACTIVE)
+ GOTO(out, rc = -ESTALE);
+
LASSERT(sanity_check_nolock(ext) == 0);
end_chunk = ext->oe_end >> ppc_bits;
if (chunk > end_chunk + 1)
* 2. otherwise, a new extent will be allocated. */
ext = oio->oi_active;
- if (ext != NULL && ext->oe_start <= index && ext->oe_max_end >= index) {
+ if (ext != NULL && ext->oe_state != OES_ACTIVE) {
+ need_release = 1;
+ } else if (ext != NULL && ext->oe_start <= index &&
+ ext->oe_max_end >= index) {
/* one chunk plus extent overhead must be enough to write this
* page */
grants = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax;
need_release = 1;
}
if (need_release) {
- osc_extent_release(env, ext);
+ osc_extent_release(env, ext, IO_PRIO_NORMAL);
oio->oi_active = NULL;
ext = NULL;
}
grants = tmp;
}
+restart_find:
tmp = grants;
if (rc == 0) {
ext = osc_extent_find(env, osc, index, &tmp);
LASSERT((oap->oap_brw_flags & OBD_BRW_FROM_GRANT) != 0);
osc_object_lock(osc);
+ if (ext->oe_state != OES_ACTIVE) {
+ if (ext->oe_state == OES_CACHE) {
+ osc_extent_state_set(ext, OES_ACTIVE);
+ osc_update_pending(osc, OBD_BRW_WRITE,
+ -ext->oe_nr_pages);
+ list_del_init(&ext->oe_link);
+ } else {
+ osc_object_unlock(osc);
+ osc_extent_get(ext);
+ osc_extent_release(env, ext, IO_PRIO_NORMAL);
+ oio->oi_active = NULL;
+
+ /* Waiting for IO finished. */
+ rc = osc_extent_wait(env, ext, OES_INV);
+ osc_extent_put(env, ext);
+ if (rc < 0)
+ RETURN(rc);
+
+ GOTO(restart_find, rc);
+ }
+ }
+
if (ext->oe_nr_pages == 0)
ext->oe_srvlock = ops->ops_srvlock;
else
* Return how many pages will be issued, or error code if error occurred.
*/
int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
- pgoff_t start, pgoff_t end, int hp, int discard)
+ pgoff_t start, pgoff_t end, int hp, int discard,
+ enum cl_io_priority prio)
{
struct osc_extent *ext;
LIST_HEAD(discard_list);
+ bool active_ext_check = false;
bool unplug = false;
int result = 0;
+
ENTRY;
+repeat:
osc_object_lock(obj);
ext = osc_extent_search(obj, start);
if (ext == NULL)
* grants. We do this for the correctness of fsync. */
LASSERT(hp == 0 && discard == 0);
ext->oe_urgent = 1;
+
+ if (active_ext_check) {
+ osc_extent_state_set(ext, OES_CACHE);
+ list_move_tail(&ext->oe_link,
+ &obj->oo_urgent_exts);
+ osc_update_pending(obj, OBD_BRW_WRITE,
+ ext->oe_nr_pages);
+ unplug = true;
+ }
+
break;
case OES_TRUNC:
/* this extent is being truncated, can't do anything
result = rc;
}
- OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
+ OSC_IO_DEBUG(obj, "pageout [%lu, %lu] npages %lu: rc=%d.\n",
+ start, end, obj->oo_npages, result);
+
+ /*
+ * Try to flush the active I/O extents of the object.
+ * Otherwise, the user process writing the file may be dirty exceeded
+ * and waiting endless in balance_dirty_pages().
+ */
+ if (result == 0 && prio == IO_PRIO_DIRTY_EXCEEDED &&
+ !active_ext_check && atomic_read(&obj->oo_nr_ios) &&
+ obj->oo_npages > 0) {
+ osc_extent_tree_dump(D_CACHE, obj);
+ active_ext_check = true;
+ GOTO(repeat, result);
+ }
+
RETURN(result);
}
EXPORT_SYMBOL(osc_cache_writeback_range);
int lru_queue_work(const struct lu_env *env, void *data);
int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
int sent, int rc);
-void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
+void osc_extent_release(const struct lu_env *env, struct osc_extent *ext,
+ enum cl_io_priority prio);
int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
pgoff_t start, pgoff_t end, bool discard);
int osc_ldlm_hp_handle(const struct lu_env *env, struct osc_object *obj,
return cli->cl_r_in_flight + cli->cl_w_in_flight;
}
-static inline char *cli_name(struct client_obd *cli)
-{
- return cli->cl_import->imp_obd->obd_name;
-}
-
static inline char list_empty_marker(struct list_head *list)
{
return list_empty(list) ? '-' : '+';
int osc_io_commit_async(const struct lu_env *env,
const struct cl_io_slice *ios,
struct cl_page_list *qin, int from, int to,
- cl_commit_cbt cb)
+ cl_commit_cbt cb, enum cl_io_priority prio)
{
struct cl_io *io = ios->cis_io;
struct osc_io *oio = cl2osc_io(env, ios);
/* for sync write, kernel will wait for this page to be flushed before
* osc_io_end() is called, so release it earlier.
* for mkwrite(), it's known there is no further pages. */
- if (cl_io_is_sync_write(io) && oio->oi_active != NULL) {
- osc_extent_release(env, oio->oi_active);
+ if (cl_io_is_sync_write(io) && oio->oi_active) {
+ osc_extent_release(env, oio->oi_active, prio);
oio->oi_active = NULL;
}
EXPORT_SYMBOL(osc_io_commit_async);
void osc_io_extent_release(const struct lu_env *env,
- const struct cl_io_slice *ios)
+ const struct cl_io_slice *ios,
+ enum cl_io_priority prio)
{
struct osc_io *oio = cl2osc_io(env, ios);
if (oio->oi_active != NULL) {
- osc_extent_release(env, oio->oi_active);
+ osc_extent_release(env, oio->oi_active, prio);
oio->oi_active = NULL;
}
}
int rc;
ENTRY;
- rc = osc_cache_writeback_range(env, osc, pg_start, pg_end, 1, 0);
+ rc = osc_cache_writeback_range(env, osc, pg_start, pg_end, 1, 0,
+ IO_PRIO_NORMAL);
if (rc < 0)
RETURN(rc);
if (fio->fi_mode == CL_FSYNC_RECLAIM) {
struct client_obd *cli = osc_cli(osc);
- if (!atomic_long_read(&cli->cl_unstable_count)) {
- /* Stop flush when there are no unstable pages? */
- CDEBUG(D_CACHE, "unstable count is zero\n");
+ if (!atomic_read(&osc->oo_nr_ios) &&
+ !atomic_read(&osc->oo_nr_writes) &&
+ !atomic_long_read(&cli->cl_unstable_count)) {
+ /*
+ * No active I/O, no dirty pages needing to write and
+ * no unstable pages needing to commit.
+ */
+ CDEBUG(D_CACHE,
+ "%s: unstable/dirty counts are both zero\n",
+ cli_name(cli));
RETURN(0);
}
}
end = CL_PAGE_EOF;
result = osc_cache_writeback_range(env, osc, start, end, 0,
- fio->fi_mode == CL_FSYNC_DISCARD);
+ fio->fi_mode == CL_FSYNC_DISCARD,
+ fio->fi_prio);
if (result < 0 && fio->fi_mode == CL_FSYNC_DISCARD) {
CDEBUG(D_CACHE,
"%s: ignore error %d on discarding "DFID":[%lu-%lu]\n",
struct osc_io *oio = cl2osc_io(env, slice);
if (oio->oi_active) {
- osc_extent_release(env, oio->oi_active);
+ osc_extent_release(env, oio->oi_active, IO_PRIO_NORMAL);
oio->oi_active = NULL;
}
}
if (mode == CLM_WRITE) {
rc = osc_cache_writeback_range(env, obj, start, end, 1,
- discard);
+ discard, IO_PRIO_NORMAL);
CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
obj, start, end, rc,
discard ? "discarded" : "written back");