/* ----- part 2 ----- */ \
__ext->oe_grants, __ext->oe_nr_pages, \
list_empty_marker(&__ext->oe_pages), \
- cfs_waitq_active(&__ext->oe_waitq) ? '+' : '-', \
+ waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \
__ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner, \
/* ----- part 4 ----- */ \
## __VA_ARGS__); \
/* TODO: validate the state machine */
ext->oe_state = state;
- cfs_waitq_broadcast(&ext->oe_waitq);
+ wake_up_all(&ext->oe_waitq);
}
static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
{
struct osc_extent *ext;
- OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, CFS_ALLOC_STD);
+ OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_IOFS);
if (ext == NULL)
return NULL;
CFS_INIT_LIST_HEAD(&ext->oe_link);
ext->oe_state = OES_INV;
CFS_INIT_LIST_HEAD(&ext->oe_pages);
- cfs_waitq_init(&ext->oe_waitq);
+ init_waitqueue_head(&ext->oe_waitq);
ext->oe_osclock = NULL;
return ext;
return -ERANGE;
LASSERT(cur->oe_osclock == victim->oe_osclock);
- ppc_bits = osc_cli(obj)->cl_chunkbits - CFS_PAGE_SHIFT;
+ ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
chunk_start = cur->oe_start >> ppc_bits;
chunk_end = cur->oe_end >> ppc_bits;
if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
LASSERT(lock != NULL);
LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
- LASSERT(cli->cl_chunkbits >= CFS_PAGE_SHIFT);
- ppc_bits = cli->cl_chunkbits - CFS_PAGE_SHIFT;
+ LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
+ ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
chunk_mask = ~((1 << ppc_bits) - 1);
chunksize = 1 << cli->cl_chunkbits;
chunk = index >> ppc_bits;
if (!sent) {
lost_grant = ext->oe_grants;
- } else if (blocksize < CFS_PAGE_SIZE &&
- last_count != CFS_PAGE_SIZE) {
+ } else if (blocksize < PAGE_CACHE_SIZE &&
+ last_count != PAGE_CACHE_SIZE) {
/* For short writes we shouldn't count parts of pages that
* span a whole chunk on the OST side, or our accounting goes
* wrong. Should match the code in filter_grant_check. */
if (end)
count += blocksize - end;
- lost_grant = CFS_PAGE_SIZE - count;
+ lost_grant = PAGE_CACHE_SIZE - count;
}
if (ext->oe_grants > 0)
osc_free_grant(cli, nr_pages, lost_grant);
"%s: wait ext to %d timedout, recovery in progress?\n",
osc_export(obj)->exp_obd->obd_name, state);
- lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
&lwi);
}
struct osc_async_page *oap;
struct osc_async_page *tmp;
int pages_in_chunk = 0;
- int ppc_bits = cli->cl_chunkbits - CFS_PAGE_SHIFT;
+ int ppc_bits = cli->cl_chunkbits -
+ PAGE_CACHE_SHIFT;
__u64 trunc_chunk = trunc_index >> ppc_bits;
int grants = 0;
int nr_pages = 0;
cfs_list_del_init(&oap->oap_pending_item);
cl_page_get(page);
- lu_ref_add(&page->cp_reference, "truncate", cfs_current());
+ lu_ref_add(&page->cp_reference, "truncate", current);
if (cl_page_own(env, io, page) == 0) {
cl_page_unmap(env, io, page);
LASSERT(0);
}
- lu_ref_del(&page->cp_reference, "truncate", cfs_current());
+ lu_ref_del(&page->cp_reference, "truncate", current);
cl_page_put(env, page);
--ext->oe_nr_pages;
if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
LASSERT(last->oap_count > 0);
- LASSERT(last->oap_page_off + last->oap_count <= CFS_PAGE_SIZE);
+ LASSERT(last->oap_page_off + last->oap_count <= PAGE_CACHE_SIZE);
+ spin_lock(&last->oap_lock);
last->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&last->oap_lock);
}
/* for the rest of pages, we don't need to call osf_refresh_count()
* because it's known they are not the last page */
cfs_list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
- oap->oap_count = CFS_PAGE_SIZE - oap->oap_page_off;
+ oap->oap_count = PAGE_CACHE_SIZE - oap->oap_page_off;
+ spin_lock(&oap->oap_lock);
oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+ spin_unlock(&oap->oap_lock);
}
}
struct osc_object *obj = ext->oe_obj;
struct client_obd *cli = osc_cli(obj);
struct osc_extent *next;
- int ppc_bits = cli->cl_chunkbits - CFS_PAGE_SHIFT;
+ int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
pgoff_t chunk = index >> ppc_bits;
pgoff_t end_chunk;
pgoff_t end_index;
return 0;
else if (cl_offset(obj, page->cp_index + 1) > kms)
/* catch sub-page write at end of file */
- return kms % CFS_PAGE_SIZE;
+ return kms % PAGE_CACHE_SIZE;
else
- return CFS_PAGE_SIZE;
+ return PAGE_CACHE_SIZE;
}
static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
ENTRY;
cmd &= ~OBD_BRW_NOQUOTA;
- LASSERT(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ));
- LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE));
+ LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ),
+ "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
+ LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE),
+ "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
LASSERT(opg->ops_transfer_pinned);
/*
RETURN(0);
}
-#define OSC_DUMP_GRANT(cli, fmt, args...) do { \
+#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
- CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \
- "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \
+ CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \
+ "unstable_pages: %d/%d dropped: %ld avail: %ld, " \
+ "reserved: %ld, flight: %d } lru {in list: %d, " \
+ "left: %d, waiters: %d }" fmt, \
__tmp->cl_import->imp_obd->obd_name, \
__tmp->cl_dirty, __tmp->cl_dirty_max, \
cfs_atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
+ cfs_atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
- __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \
+ __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
+ cfs_atomic_read(&__tmp->cl_lru_in_list), \
+ cfs_atomic_read(&__tmp->cl_lru_busy), \
+ cfs_atomic_read(&__tmp->cl_lru_shrinkers), ##args); \
} while (0)
/* caller must hold loi_list_lock */
static void osc_consume_write_grant(struct client_obd *cli,
struct brw_page *pga)
{
- LASSERT_SPIN_LOCKED(&cli->cl_loi_list_lock.lock);
+ LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock));
LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
cfs_atomic_inc(&obd_dirty_pages);
- cli->cl_dirty += CFS_PAGE_SIZE;
+ cli->cl_dirty += PAGE_CACHE_SIZE;
pga->flag |= OBD_BRW_FROM_GRANT;
CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
- CFS_PAGE_SIZE, pga, pga->pg);
+ PAGE_CACHE_SIZE, pga, pga->pg);
osc_update_next_shrink(cli);
}
{
ENTRY;
- LASSERT_SPIN_LOCKED(&cli->cl_loi_list_lock.lock);
+ LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock));
if (!(pga->flag & OBD_BRW_FROM_GRANT)) {
EXIT;
return;
pga->flag &= ~OBD_BRW_FROM_GRANT;
cfs_atomic_dec(&obd_dirty_pages);
- cli->cl_dirty -= CFS_PAGE_SIZE;
+ cli->cl_dirty -= PAGE_CACHE_SIZE;
if (pga->flag & OBD_BRW_NOCACHE) {
pga->flag &= ~OBD_BRW_NOCACHE;
cfs_atomic_dec(&obd_dirty_transit_pages);
- cli->cl_dirty_transit -= CFS_PAGE_SIZE;
+ cli->cl_dirty_transit -= PAGE_CACHE_SIZE;
}
EXIT;
}
* used, we should return these grants to OST. There're two cases where grants
* can be lost:
* 1. truncate;
- * 2. blocksize at OST is less than CFS_PAGE_SIZE and a partial page was
+ * 2. blocksize at OST is less than PAGE_CACHE_SIZE and a partial page was
* written. In this case OST may use less chunks to serve this partial
* write. OSTs don't actually know the page size on the client side. so
* clients have to calculate lost grant by the blocksize on the OST.
client_obd_list_lock(&cli->cl_loi_list_lock);
cfs_atomic_sub(nr_pages, &obd_dirty_pages);
- cli->cl_dirty -= nr_pages << CFS_PAGE_SHIFT;
+ cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
cli->cl_lost_grant += lost_grant;
if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
/* borrow some grant from truncate to avoid the case that
{
int rc;
- OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
rc = osc_reserve_grant(cli, bytes);
if (rc < 0)
return 0;
- if (cli->cl_dirty + CFS_PAGE_SIZE <= cli->cl_dirty_max &&
- cfs_atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
+ if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+ cfs_atomic_read(&obd_unstable_pages) + 1 +
+ cfs_atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
osc_consume_write_grant(cli, &oap->oap_brw_page);
if (transient) {
- cli->cl_dirty_transit += CFS_PAGE_SIZE;
+ cli->cl_dirty_transit += PAGE_CACHE_SIZE;
cfs_atomic_inc(&obd_dirty_transit_pages);
oap->oap_brw_flags |= OBD_BRW_NOCACHE;
}
struct osc_object *osc = oap->oap_obj;
struct lov_oinfo *loi = osc->oo_oinfo;
struct osc_cache_waiter ocw;
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+ LWI_ON_SIGNAL_NOOP, NULL);
int rc = -EDQUOT;
ENTRY;
- OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+ OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
client_obd_list_lock(&cli->cl_loi_list_lock);
/* force the caller to try sync io. this can jump the list
* of queued writes and create a discontiguous rpc stream */
if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
- cli->cl_dirty_max < CFS_PAGE_SIZE ||
+ cli->cl_dirty_max < PAGE_CACHE_SIZE ||
cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync)
GOTO(out, rc = -EDQUOT);
* RPC size will be.
* The exiting condition is no avail grants and no dirty pages caching,
* that really means there is no space on the OST. */
- cfs_waitq_init(&ocw.ocw_waitq);
+ init_waitqueue_head(&ocw.ocw_waitq);
ocw.ocw_oap = oap;
ocw.ocw_grant = bytes;
while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
client_obd_list_lock(&cli->cl_loi_list_lock);
- /* l_wait_event is interrupted by signal */
+ /* l_wait_event is interrupted by signal, or timed out */
if (rc < 0) {
+ switch (rc) {
+ case -ETIMEDOUT:
+ OSC_DUMP_GRANT(D_ERROR, cli,
+ "try to reserve %d.\n", bytes);
+ osc_extent_tree_dump(D_ERROR, osc);
+ rc = -EDQUOT;
+ break;
+ case -EINTR:
+ /* Ensures restartability - LU-3581 */
+ rc = -ERESTARTSYS;
+ break;
+ default:
+ CDEBUG(D_CACHE, "%s: event for cache space @"
+ " %p never arrived due to %d\n",
+ cli->cl_import->imp_obd->obd_name,
+ &ocw, rc);
+ break;
+ }
cfs_list_del_init(&ocw.ocw_entry);
GOTO(out, rc);
}
EXIT;
out:
client_obd_list_unlock(&cli->cl_loi_list_lock);
- OSC_DUMP_GRANT(cli, "returned %d.\n", rc);
+ OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
RETURN(rc);
}
ocw->ocw_rc = -EDQUOT;
/* we can't dirty more */
- if ((cli->cl_dirty + CFS_PAGE_SIZE > cli->cl_dirty_max) ||
- (cfs_atomic_read(&obd_dirty_pages) + 1 >
- obd_max_dirty_pages)) {
+ if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+ (cfs_atomic_read(&obd_unstable_pages) + 1 +
+ cfs_atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
CDEBUG(D_CACHE, "no dirty room: dirty: %ld "
"osc max %ld, sys max %d\n", cli->cl_dirty,
cli->cl_dirty_max, obd_max_dirty_pages);
CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n",
ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc);
- cfs_waitq_signal(&ocw->ocw_waitq);
+ wake_up(&ocw->ocw_waitq);
}
EXIT;
ar->ar_force_sync = 0;
}
+/* Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable. */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ obd_count page_count = desc->bd_iov_count;
+ int i;
+
+ /* No unstable page tracking */
+ if (cli->cl_cache == NULL)
+ return;
+
+ LASSERT(page_count >= 0);
+
+ for (i = 0; i < page_count; i++)
+ dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
+
+ cfs_atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
+ LASSERT(cfs_atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+
+ cfs_atomic_sub(page_count, &cli->cl_unstable_count);
+ LASSERT(cfs_atomic_read(&cli->cl_unstable_count) >= 0);
+
+ cfs_atomic_sub(page_count, &obd_unstable_pages);
+ LASSERT(cfs_atomic_read(&obd_unstable_pages) >= 0);
+
+ spin_lock(&req->rq_lock);
+ req->rq_committed = 1;
+ req->rq_unstable = 0;
+ spin_unlock(&req->rq_lock);
+
+ wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+}
+
+/* "unstable" page accounting. See: osc_dec_unstable_pages. */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ obd_count page_count = desc->bd_iov_count;
+ int i;
+
+ /* No unstable page tracking */
+ if (cli->cl_cache == NULL)
+ return;
+
+ LASSERT(page_count >= 0);
+
+ for (i = 0; i < page_count; i++)
+ inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
+
+ LASSERT(cfs_atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+ cfs_atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+ LASSERT(cfs_atomic_read(&cli->cl_unstable_count) >= 0);
+ cfs_atomic_add(page_count, &cli->cl_unstable_count);
+
+ LASSERT(cfs_atomic_read(&obd_unstable_pages) >= 0);
+ cfs_atomic_add(page_count, &obd_unstable_pages);
+
+ spin_lock(&req->rq_lock);
+
+ /* If the request has already been committed (i.e. brw_commit
+ * called via rq_commit_cb), we need to undo the unstable page
+ * increments we just performed because rq_commit_cb wont be
+ * called again. Otherwise, just set the commit callback so the
+ * unstable page accounting is properly updated when the request
+ * is committed */
+ if (req->rq_committed) {
+ /* Drop lock before calling osc_dec_unstable_pages */
+ spin_unlock(&req->rq_lock);
+ osc_dec_unstable_pages(req);
+ spin_lock(&req->rq_lock);
+ } else {
+ req->rq_unstable = 1;
+ req->rq_commit_cb = osc_dec_unstable_pages;
+ }
+
+ spin_unlock(&req->rq_lock);
+}
+
/* this must be called holding the loi list lock to give coverage to exit_cache,
* async_flag maintenance, and oap_request */
static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
ENTRY;
if (oap->oap_request != NULL) {
+ if (rc == 0)
+ osc_inc_unstable_pages(oap->oap_request);
+
xid = ptlrpc_req_xid(oap->oap_request);
ptlrpc_req_finished(oap->oap_request);
oap->oap_request = NULL;
RETURN(0);
cfs_list_for_each_entry(tmp, rpclist, oe_link) {
- EASSERT(tmp->oe_owner == cfs_current(), tmp);
+ EASSERT(tmp->oe_owner == current, tmp);
#if 0
if (overlapped(tmp, ext)) {
OSC_EXTENT_DUMP(D_ERROR, tmp, "overlapped %p.\n", ext);
*pc += ext->oe_nr_pages;
cfs_list_move_tail(&ext->oe_link, rpclist);
- ext->oe_owner = cfs_current();
+ ext->oe_owner = current;
RETURN(1);
}
while ((osc = osc_next_obj(cli)) != NULL) {
struct cl_object *obj = osc2cl(osc);
- struct lu_ref_link *link;
+ struct lu_ref_link link;
OSC_IO_DEBUG(osc, "%lu in flight\n", rpcs_in_flight(cli));
cl_object_get(obj);
client_obd_list_unlock(&cli->cl_loi_list_lock);
- link = lu_object_ref_add(&obj->co_lu, "check", cfs_current());
+ lu_object_ref_add_at(&obj->co_lu, &link, "check",
+ current);
/* attempt some read/write balancing by alternating between
* reads and writes in an object. The makes_rpc checks here
osc_object_unlock(osc);
osc_list_maint(cli, osc);
- lu_object_ref_del_at(&obj->co_lu, link, "check", cfs_current());
+ lu_object_ref_del_at(&obj->co_lu, &link, "check",
+ current);
cl_object_put(env, obj);
client_obd_list_lock(&cli->cl_loi_list_lock);
}
int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
- cfs_page_t *page, loff_t offset)
+ struct page *page, loff_t offset)
{
struct obd_export *exp = osc_export(osc);
struct osc_async_page *oap = &ops->ops_oap;
RETURN(rc);
}
+ if (osc_over_unstable_soft_limit(cli))
+ brw_flags |= OBD_BRW_SOFT_SYNC;
+
oap->oap_cmd = cmd;
oap->oap_page_off = ops->ops_from;
oap->oap_count = ops->ops_to - ops->ops_from;
+ /* No need to hold a lock here,
+ * since this page is not in any list yet. */
oap->oap_async_flags = 0;
oap->oap_brw_flags = brw_flags;
oap->oap_async_flags |= ASYNC_READY|ASYNC_URGENT;
spin_unlock(&oap->oap_lock);
- if (cfs_memory_pressure_get())
+ if (memory_pressure_get())
ext->oe_memalloc = 1;
ext->oe_urgent = 1;
* should take care of it. */
rc = osc_extent_wait(env, waiting, OES_INV);
if (rc < 0)
- OSC_EXTENT_DUMP(D_CACHE, ext, "wait error: %d.\n", rc);
+ OSC_EXTENT_DUMP(D_CACHE, waiting, "error: %d.\n", rc);
osc_extent_put(env, waiting);
waiting = NULL;
EASSERT(ext->oe_start >= start &&
ext->oe_max_end <= end, ext);
osc_extent_state_set(ext, OES_LOCKING);
- ext->oe_owner = cfs_current();
+ ext->oe_owner = current;
cfs_list_move_tail(&ext->oe_link,
&discard_list);
osc_update_pending(obj, OBD_BRW_WRITE,
result = rc;
}
- OSC_IO_DEBUG(obj, "cache page out.\n");
+ OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
RETURN(result);
}