*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* Implementation of cl_page for OSC layer.
*
policy->l_extent.end = cl_offset(obj, end + 1) - 1;
}
-static const char *osc_list(struct list_head *head)
-{
- return list_empty(head) ? "-" : "+";
-}
-
static inline s64 osc_submit_duration(struct osc_page *opg)
{
if (ktime_to_ns(opg->ops_submit_time) == 0)
}
static int osc_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
+ const struct cl_page_slice *slice,
+ void *cookie, lu_printer_t printer)
{
- struct osc_page *opg = cl2osc_page(slice);
- struct osc_async_page *oap = &opg->ops_oap;
- struct osc_object *obj = cl2osc(slice->cpl_obj);
- struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_async_page *oap = &opg->ops_oap;
+ struct osc_object *obj = cl2osc(slice->cpl_obj);
+ struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
return (*printer)(env, cookie, LUSTRE_OSC_NAME"-page@%p %lu: "
- "1< %#x %d %u %s %s > "
+ "1< %#x %d %c %c > "
"2< %lld %u %u %#x %#x | %p %p %p > "
"3< %d %lld %d > "
- "4< %d %d %d %lu %s | %s %s %s %s > "
- "5< %s %s %s %s | %d %s | %d %s %s>\n",
+ "4< %d %d %d %lu %c | %c %c %c %c > "
+ "5< %c %c %c %c | %d %c | %d %c %c>\n",
opg, osc_index(opg),
- /* 1 */
- oap->oap_magic, oap->oap_cmd,
- oap->oap_interrupted,
- osc_list(&oap->oap_pending_item),
- osc_list(&oap->oap_rpc_item),
- /* 2 */
- oap->oap_obj_off, oap->oap_page_off, oap->oap_count,
- oap->oap_async_flags, oap->oap_brw_flags,
+ /* 1 */
+ oap->oap_magic, oap->oap_cmd,
+ list_empty_marker(&oap->oap_pending_item),
+ list_empty_marker(&oap->oap_rpc_item),
+ /* 2 */
+ oap->oap_obj_off, oap->oap_page_off, oap->oap_count,
+ oap->oap_async_flags, oap->oap_brw_flags,
oap->oap_request, oap->oap_cli, obj,
/* 3 */
opg->ops_transfer_pinned,
osc_submit_duration(opg), opg->ops_srvlock,
- /* 4 */
- cli->cl_r_in_flight, cli->cl_w_in_flight,
- cli->cl_max_rpcs_in_flight,
- cli->cl_avail_grant,
- osc_list(&cli->cl_cache_waiters),
- osc_list(&cli->cl_loi_ready_list),
- osc_list(&cli->cl_loi_hp_ready_list),
- osc_list(&cli->cl_loi_write_list),
- osc_list(&cli->cl_loi_read_list),
- /* 5 */
- osc_list(&obj->oo_ready_item),
- osc_list(&obj->oo_hp_ready_item),
- osc_list(&obj->oo_write_item),
- osc_list(&obj->oo_read_item),
+ /* 4 */
+ cli->cl_r_in_flight, cli->cl_w_in_flight,
+ cli->cl_max_rpcs_in_flight,
+ cli->cl_avail_grant,
+ waitqueue_active(&cli->cl_cache_waiters) ? '+' : '-',
+ list_empty_marker(&cli->cl_loi_ready_list),
+ list_empty_marker(&cli->cl_loi_hp_ready_list),
+ list_empty_marker(&cli->cl_loi_write_list),
+ list_empty_marker(&cli->cl_loi_read_list),
+ /* 5 */
+ list_empty_marker(&obj->oo_ready_item),
+ list_empty_marker(&obj->oo_hp_ready_item),
+ list_empty_marker(&obj->oo_write_item),
+ list_empty_marker(&obj->oo_read_item),
atomic_read(&obj->oo_nr_reads),
- osc_list(&obj->oo_reading_exts),
+ list_empty_marker(&obj->oo_reading_exts),
atomic_read(&obj->oo_nr_writes),
- osc_list(&obj->oo_hp_exts),
- osc_list(&obj->oo_urgent_exts));
+ list_empty_marker(&obj->oo_hp_exts),
+ list_empty_marker(&obj->oo_urgent_exts));
}
static void osc_page_delete(const struct lu_env *env,
struct osc_async_page *oap = &opg->ops_oap;
opg->ops_from = from;
- opg->ops_to = to;
+ /* argument @to is exclusive, but @ops_to is inclusive */
+ opg->ops_to = to - 1;
+ /* This isn't really necessary for transient pages, but we also don't
+ * call clip on transient pages often, so it's OK.
+ */
spin_lock(&oap->oap_lock);
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
spin_unlock(&oap->oap_lock);
}
-static int osc_page_cancel(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct osc_page *opg = cl2osc_page(slice);
- int rc = 0;
-
- /* Check if the transferring against this page
- * is completed, or not even queued. */
- if (opg->ops_transfer_pinned)
- /* FIXME: may not be interrupted.. */
- rc = osc_cancel_async_page(env, opg);
- LASSERT(ergo(rc == 0, opg->ops_transfer_pinned == 0));
- return rc;
-}
-
static int osc_page_flush(const struct lu_env *env,
const struct cl_page_slice *slice,
struct cl_io *io)
.cpo_print = osc_page_print,
.cpo_delete = osc_page_delete,
.cpo_clip = osc_page_clip,
- .cpo_cancel = osc_page_cancel,
.cpo_flush = osc_page_flush,
.cpo_page_touch = osc_page_touch,
};
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
+ struct cl_page *cl_page, pgoff_t index)
{
struct osc_object *osc = cl2osc(obj);
- struct osc_page *opg = cl_object_page_slice(obj, page);
+ struct osc_page *opg = cl_object_page_slice(obj, cl_page);
struct osc_io *oio = osc_env_io(env);
int result;
opg->ops_from = 0;
- opg->ops_to = PAGE_SIZE;
+ opg->ops_to = PAGE_SIZE - 1;
INIT_LIST_HEAD(&opg->ops_lru);
- result = osc_prep_async_page(osc, opg, page->cp_vmpage,
- cl_offset(obj, index));
+ result = osc_prep_async_page(osc, opg, cl_page, cl_offset(obj, index));
if (result != 0)
return result;
opg->ops_srvlock = osc_io_srvlock(oio);
- cl_page_slice_add(page, &opg->ops_cl, obj, index,
- &osc_page_ops);
-
+ cl_page_slice_add(cl_page, &opg->ops_cl, obj, &osc_page_ops);
+ cl_page->cp_osc_index = index;
/* reserve an LRU space for this page */
- if (page->cp_type == CPT_CACHEABLE) {
+ if (cl_page->cp_type == CPT_CACHEABLE) {
result = osc_lru_alloc(env, osc_cli(osc), opg);
if (result == 0) {
result = radix_tree_preload(GFP_NOFS);
* transfer (i.e., transferred synchronously).
*/
void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
- enum cl_req_type crt, int brw_flags)
+ enum cl_req_type crt, int brw_flags, ktime_t submit_time)
{
struct osc_io *oio = osc_env_io(env);
struct osc_async_page *oap = &opg->ops_oap;
LASSERT(oap->oap_async_flags & ASYNC_READY);
LASSERT(oap->oap_async_flags & ASYNC_COUNT_STABLE);
- oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
- oap->oap_page_off = opg->ops_from;
- oap->oap_count = opg->ops_to - opg->ops_from;
+ oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
+ oap->oap_page_off = opg->ops_from;
+ oap->oap_count = opg->ops_to - opg->ops_from + 1;
oap->oap_brw_flags = OBD_BRW_SYNC | brw_flags;
if (oio->oi_cap_sys_resource) {
- oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
- oap->oap_cmd |= OBD_BRW_NOQUOTA;
+ oap->oap_brw_flags |= OBD_BRW_SYS_RESOURCE;
+ oap->oap_cmd |= OBD_BRW_SYS_RESOURCE;
}
- opg->ops_submit_time = ktime_get();
+ opg->ops_submit_time = submit_time;
osc_page_transfer_get(opg, "transfer\0imm");
osc_page_transfer_add(env, opg, crt);
}
void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
{
- struct list_head lru = LIST_HEAD_INIT(lru);
+ LIST_HEAD(lru);
struct osc_async_page *oap;
long npages = 0;
if (--maxscan < 0)
break;
- opg = list_entry(cli->cl_lru_list.next, struct osc_page,
- ops_lru);
+ opg = list_first_entry(&cli->cl_lru_list, struct osc_page,
+ ops_lru);
page = opg->ops_cl.cpl_page;
if (lru_page_busy(cli, page)) {
list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
atomic_dec(&cli->cl_lru_shrinkers);
if (count > 0) {
atomic_long_add(count, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
+ wake_up(&osc_lru_waitq);
}
RETURN(count > 0 ? count : rc);
}
{
struct lu_env *env;
struct cl_client_cache *cache = cli->cl_cache;
+ struct client_obd *scan;
int max_scans;
__u16 refcheck;
long rc = 0;
list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
max_scans = atomic_read(&cache->ccc_users) - 2;
- while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
- cli = list_entry(cache->ccc_lru.next, struct client_obd,
- cl_lru_osc);
-
+ while (--max_scans > 0 &&
+ (scan = list_first_entry_or_null(&cache->ccc_lru,
+ struct client_obd,
+ cl_lru_osc)) != NULL) {
CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
- cli_name(cli), cli,
- atomic_long_read(&cli->cl_lru_in_list),
- atomic_long_read(&cli->cl_lru_busy));
+ cli_name(scan), scan,
+ atomic_long_read(&scan->cl_lru_in_list),
+ atomic_long_read(&scan->cl_lru_busy));
- list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
- if (osc_cache_too_much(cli) > 0) {
+ list_move_tail(&scan->cl_lru_osc, &cache->ccc_lru);
+ if (osc_cache_too_much(scan) > 0) {
spin_unlock(&cache->ccc_lru_lock);
- rc = osc_lru_shrink(env, cli, npages, true);
+ rc = osc_lru_shrink(env, scan, npages, true);
spin_lock(&cache->ccc_lru_lock);
if (rc >= npages)
break;
out:
cl_env_put(env, &refcheck);
CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
- cli_name(cli), cli, rc);
+ cli_name(cli), cli, rc);
return rc;
}
static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli,
struct osc_page *opg)
{
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
struct osc_io *oio = osc_env_io(env);
int rc = 0;
break;
if (rc > 0)
continue;
+ /* IO issued by readahead, don't try hard */
+ if (oio->oi_is_readahead) {
+ if (atomic_long_read(cli->cl_lru_left) > 0)
+ continue;
+ rc = -EBUSY;
+ break;
+ }
cond_resched();
- rc = l_wait_event(osc_lru_waitq,
- atomic_long_read(cli->cl_lru_left) > 0,
- &lwi);
- if (rc < 0)
+ rc = l_wait_event_abortable(
+ osc_lru_waitq,
+ atomic_long_read(cli->cl_lru_left) > 0);
+ if (rc < 0) {
+ rc = -EINTR;
break;
+ }
}
out:
unsigned long reserved = 0;
unsigned long max_pages;
unsigned long c;
+ int rc;
- /* reserve a full RPC window at most to avoid that a thread accidentally
- * consumes too many LRU slots */
- max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
- if (npages > max_pages)
- npages = max_pages;
-
+again:
c = atomic_long_read(cli->cl_lru_left);
if (c < npages && osc_lru_reclaim(cli, npages) > 0)
c = atomic_long_read(cli->cl_lru_left);
+
+ if (c < npages) {
+ /*
+ * Trigger writeback in the hope some LRU slot could
+ * be freed.
+ */
+ rc = ptlrpcd_queue_work(cli->cl_writeback_work);
+ if (rc)
+ return 0;
+ }
+
while (c >= npages) {
if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
reserved = npages;
}
c = atomic_long_read(cli->cl_lru_left);
}
+
+ if (reserved != npages) {
+ cond_resched();
+ rc = l_wait_event_abortable(
+ osc_lru_waitq,
+ atomic_long_read(cli->cl_lru_left) > 0);
+ goto again;
+ }
+
+ max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
if (atomic_long_read(cli->cl_lru_left) < max_pages) {
/* If there aren't enough pages in the per-OSC LRU then
* wake up the LRU thread to try and clear out space, so
void osc_lru_unreserve(struct client_obd *cli, unsigned long npages)
{
atomic_long_add(npages, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
+ wake_up(&osc_lru_waitq);
}
/**
* In practice this can work pretty good because the pages in the same RPC
* are likely from the same page zone.
*/
+#ifdef HAVE_NR_UNSTABLE_NFS
+/* Old kernels use a separate counter for unstable pages,
+ * newer kernels treat them like any other writeback.
+ */
+#define NR_WRITEBACK NR_UNSTABLE_NFS
+#endif
+
static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
struct osc_brw_async_args *aa,
int factor)
int i;
if (desc != NULL) {
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
page_count = desc->bd_iov_count;
} else {
page_count = aa->aa_page_count;
for (i = 0; i < page_count; i++) {
void *pz;
if (desc)
- pz = page_zone(BD_GET_KIOV(desc, i).kiov_page);
+ pz = page_zone(desc->bd_vec[i].bv_page);
else
pz = page_zone(aa->aa_ppga[i]->pg);
}
if (count > 0) {
- mod_zone_page_state(zone, NR_UNSTABLE_NFS,
+ mod_zone_page_state(zone, NR_WRITEBACK,
factor * count);
count = 0;
}
++count;
}
if (count > 0)
- mod_zone_page_state(zone, NR_UNSTABLE_NFS, factor * count);
+ mod_zone_page_state(zone, NR_WRITEBACK, factor * count);
}
static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
&cli->cl_cache->ccc_unstable_nr);
LASSERT(unstable_count >= 0);
if (unstable_count == 0)
- wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+ wake_up(&cli->cl_cache->ccc_unstable_waitq);
if (waitqueue_active(&osc_lru_waitq))
(void)ptlrpcd_queue_work(cli->cl_lru_work);
return SHRINK_STOP;
spin_lock(&osc_shrink_lock);
- while (!list_empty(&osc_shrink_list)) {
- cli = list_entry(osc_shrink_list.next, struct client_obd,
- cl_shrink_list);
-
+ while ((cli = list_first_entry_or_null(&osc_shrink_list,
+ struct client_obd,
+ cl_shrink_list)) != NULL) {
if (stop_anchor == NULL)
stop_anchor = cli;
else if (cli == stop_anchor)