*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* Implementation of cl_page for OSC layer.
*
cli->cl_r_in_flight, cli->cl_w_in_flight,
cli->cl_max_rpcs_in_flight,
cli->cl_avail_grant,
- list_empty_marker(&cli->cl_cache_waiters),
+ waitqueue_active(&cli->cl_cache_waiters) ? '+' : '-',
list_empty_marker(&cli->cl_loi_ready_list),
list_empty_marker(&cli->cl_loi_hp_ready_list),
list_empty_marker(&cli->cl_loi_write_list),
struct osc_async_page *oap = &opg->ops_oap;
opg->ops_from = from;
- opg->ops_to = to;
+ /* argument @to is exclusive, but @ops_to is inclusive */
+ opg->ops_to = to - 1;
+ /* This isn't really necessary for transient pages, but we also don't
+ * call clip on transient pages often, so it's OK.
+ */
spin_lock(&oap->oap_lock);
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
spin_unlock(&oap->oap_lock);
};
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
+ struct cl_page *cl_page, pgoff_t index)
{
struct osc_object *osc = cl2osc(obj);
- struct osc_page *opg = cl_object_page_slice(obj, page);
+ struct osc_page *opg = cl_object_page_slice(obj, cl_page);
struct osc_io *oio = osc_env_io(env);
int result;
opg->ops_from = 0;
- opg->ops_to = PAGE_SIZE;
+ opg->ops_to = PAGE_SIZE - 1;
INIT_LIST_HEAD(&opg->ops_lru);
- result = osc_prep_async_page(osc, opg, page->cp_vmpage,
- cl_offset(obj, index));
+ result = osc_prep_async_page(osc, opg, cl_page, cl_offset(obj, index));
if (result != 0)
return result;
opg->ops_srvlock = osc_io_srvlock(oio);
- cl_page_slice_add(page, &opg->ops_cl, obj, index,
- &osc_page_ops);
-
+ cl_page_slice_add(cl_page, &opg->ops_cl, obj, &osc_page_ops);
+ cl_page->cp_osc_index = index;
/* reserve an LRU space for this page */
- if (page->cp_type == CPT_CACHEABLE) {
+ if (cl_page->cp_type == CPT_CACHEABLE) {
result = osc_lru_alloc(env, osc_cli(osc), opg);
if (result == 0) {
result = radix_tree_preload(GFP_NOFS);
* transfer (i.e., transferred synchronously).
*/
void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
- enum cl_req_type crt, int brw_flags)
+ enum cl_req_type crt, int brw_flags, ktime_t submit_time)
{
struct osc_io *oio = osc_env_io(env);
struct osc_async_page *oap = &opg->ops_oap;
LASSERT(oap->oap_async_flags & ASYNC_READY);
LASSERT(oap->oap_async_flags & ASYNC_COUNT_STABLE);
- oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
- oap->oap_page_off = opg->ops_from;
- oap->oap_count = opg->ops_to - opg->ops_from;
+ oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
+ oap->oap_page_off = opg->ops_from;
+ oap->oap_count = opg->ops_to - opg->ops_from + 1;
oap->oap_brw_flags = OBD_BRW_SYNC | brw_flags;
if (oio->oi_cap_sys_resource) {
- oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
- oap->oap_cmd |= OBD_BRW_NOQUOTA;
+ oap->oap_brw_flags |= OBD_BRW_SYS_RESOURCE;
+ oap->oap_cmd |= OBD_BRW_SYS_RESOURCE;
}
- opg->ops_submit_time = ktime_get();
+ opg->ops_submit_time = submit_time;
osc_page_transfer_get(opg, "transfer\0imm");
osc_page_transfer_add(env, opg, crt);
}
if (--maxscan < 0)
break;
- opg = list_entry(cli->cl_lru_list.next, struct osc_page,
- ops_lru);
+ opg = list_first_entry(&cli->cl_lru_list, struct osc_page,
+ ops_lru);
page = opg->ops_cl.cpl_page;
if (lru_page_busy(cli, page)) {
list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
atomic_dec(&cli->cl_lru_shrinkers);
if (count > 0) {
atomic_long_add(count, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
+ wake_up(&osc_lru_waitq);
}
RETURN(count > 0 ? count : rc);
}
{
struct lu_env *env;
struct cl_client_cache *cache = cli->cl_cache;
+ struct client_obd *scan;
int max_scans;
__u16 refcheck;
long rc = 0;
list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
max_scans = atomic_read(&cache->ccc_users) - 2;
- while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) {
- cli = list_entry(cache->ccc_lru.next, struct client_obd,
- cl_lru_osc);
-
+ while (--max_scans > 0 &&
+ (scan = list_first_entry_or_null(&cache->ccc_lru,
+ struct client_obd,
+ cl_lru_osc)) != NULL) {
CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
- cli_name(cli), cli,
- atomic_long_read(&cli->cl_lru_in_list),
- atomic_long_read(&cli->cl_lru_busy));
+ cli_name(scan), scan,
+ atomic_long_read(&scan->cl_lru_in_list),
+ atomic_long_read(&scan->cl_lru_busy));
- list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
- if (osc_cache_too_much(cli) > 0) {
+ list_move_tail(&scan->cl_lru_osc, &cache->ccc_lru);
+ if (osc_cache_too_much(scan) > 0) {
spin_unlock(&cache->ccc_lru_lock);
- rc = osc_lru_shrink(env, cli, npages, true);
+ rc = osc_lru_shrink(env, scan, npages, true);
spin_lock(&cache->ccc_lru_lock);
if (rc >= npages)
break;
out:
cl_env_put(env, &refcheck);
CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
- cli_name(cli), cli, rc);
+ cli_name(cli), cli, rc);
return rc;
}
break;
if (rc > 0)
continue;
+ /* IO issued by readahead, don't try hard */
+ if (oio->oi_is_readahead) {
+ if (atomic_long_read(cli->cl_lru_left) > 0)
+ continue;
+ rc = -EBUSY;
+ break;
+ }
cond_resched();
rc = l_wait_event_abortable(
unsigned long reserved = 0;
unsigned long max_pages;
unsigned long c;
+ int rc;
- /* reserve a full RPC window at most to avoid that a thread accidentally
- * consumes too many LRU slots */
- max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
- if (npages > max_pages)
- npages = max_pages;
-
+again:
c = atomic_long_read(cli->cl_lru_left);
if (c < npages && osc_lru_reclaim(cli, npages) > 0)
c = atomic_long_read(cli->cl_lru_left);
+
+ if (c < npages) {
+ /*
+ * Trigger writeback in the hope some LRU slot could
+ * be freed.
+ */
+ rc = ptlrpcd_queue_work(cli->cl_writeback_work);
+ if (rc)
+ return 0;
+ }
+
while (c >= npages) {
if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
reserved = npages;
}
c = atomic_long_read(cli->cl_lru_left);
}
+
+ if (reserved != npages) {
+ cond_resched();
+ rc = l_wait_event_abortable(
+ osc_lru_waitq,
+ atomic_long_read(cli->cl_lru_left) > 0);
+ goto again;
+ }
+
+ max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
if (atomic_long_read(cli->cl_lru_left) < max_pages) {
/* If there aren't enough pages in the per-OSC LRU then
* wake up the LRU thread to try and clear out space, so
void osc_lru_unreserve(struct client_obd *cli, unsigned long npages)
{
atomic_long_add(npages, cli->cl_lru_left);
- wake_up_all(&osc_lru_waitq);
+ wake_up(&osc_lru_waitq);
}
/**
* In practice this can work pretty good because the pages in the same RPC
* are likely from the same page zone.
*/
+#ifdef HAVE_NR_UNSTABLE_NFS
+/* Old kernels use a separate counter for unstable pages,
+ * newer kernels treat them like any other writeback.
+ */
+#define NR_WRITEBACK NR_UNSTABLE_NFS
+#endif
+
static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
struct osc_brw_async_args *aa,
int factor)
int i;
if (desc != NULL) {
- LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type));
page_count = desc->bd_iov_count;
} else {
page_count = aa->aa_page_count;
for (i = 0; i < page_count; i++) {
void *pz;
if (desc)
- pz = page_zone(BD_GET_KIOV(desc, i).kiov_page);
+ pz = page_zone(desc->bd_vec[i].bv_page);
else
pz = page_zone(aa->aa_ppga[i]->pg);
}
if (count > 0) {
- mod_zone_page_state(zone, NR_UNSTABLE_NFS,
+ mod_zone_page_state(zone, NR_WRITEBACK,
factor * count);
count = 0;
}
++count;
}
if (count > 0)
- mod_zone_page_state(zone, NR_UNSTABLE_NFS, factor * count);
+ mod_zone_page_state(zone, NR_WRITEBACK, factor * count);
}
static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
&cli->cl_cache->ccc_unstable_nr);
LASSERT(unstable_count >= 0);
if (unstable_count == 0)
- wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+ wake_up(&cli->cl_cache->ccc_unstable_waitq);
if (waitqueue_active(&osc_lru_waitq))
(void)ptlrpcd_queue_work(cli->cl_lru_work);
return SHRINK_STOP;
spin_lock(&osc_shrink_lock);
- while (!list_empty(&osc_shrink_list)) {
- cli = list_entry(osc_shrink_list.next, struct client_obd,
- cl_shrink_list);
-
+ while ((cli = list_first_entry_or_null(&osc_shrink_list,
+ struct client_obd,
+ cl_shrink_list)) != NULL) {
if (stop_anchor == NULL)
stop_anchor = cli;
else if (cli == stop_anchor)