* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2014, Intel Corporation.
+ * Copyright (c) 2012, 2015, Intel Corporation.
*
*/
/*
static int osc_io_unplug_async(const struct lu_env *env,
struct client_obd *cli, struct osc_object *osc);
static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
- unsigned int lost_grant);
+ unsigned int lost_grant, unsigned int dirty_grant);
static void osc_extent_tree_dump0(int level, struct osc_object *obj,
const char *func, int line);
#define osc_extent_tree_dump(lvl, obj) \
osc_extent_tree_dump0(lvl, obj, __func__, __LINE__)
+static void osc_unreserve_grant(struct client_obd *cli, unsigned int reserved,
+ unsigned int unused);
+
/** \addtogroup osc
* @{
*/
/* ----- part 4 ----- */ \
## __VA_ARGS__); \
if (lvl == D_ERROR && __ext->oe_dlmlock != NULL) \
- LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext); \
+ LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \
else \
- LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext); \
+ LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \
} while (0)
#undef EASSERTF
{
struct osc_extent *ext;
- OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_IOFS);
+ OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_NOFS);
if (ext == NULL)
return NULL;
RB_CLEAR_NODE(&ext->oe_node);
ext->oe_obj = obj;
+ cl_object_get(osc2cl(obj));
atomic_set(&ext->oe_refc, 1);
atomic_set(&ext->oe_users, 0);
INIT_LIST_HEAD(&ext->oe_link);
LDLM_LOCK_PUT(ext->oe_dlmlock);
ext->oe_dlmlock = NULL;
}
+ cl_object_put(env, osc2cl(ext->oe_obj));
osc_extent_free(ext);
}
}
/**
* This function is used to merge extents to get better performance. It checks
- * if @cur and @victim are contiguous at chunk level.
+ * if @cur and @victim are contiguous at block level.
*/
static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
struct osc_extent *victim)
{
- struct osc_object *obj = cur->oe_obj;
- pgoff_t chunk_start;
- pgoff_t chunk_end;
- int ppc_bits;
+ struct osc_object *obj = cur->oe_obj;
+ struct client_obd *cli = osc_cli(obj);
+ pgoff_t chunk_start;
+ pgoff_t chunk_end;
+ int ppc_bits;
LASSERT(cur->oe_state == OES_CACHE);
LASSERT(osc_object_is_locked(obj));
chunk_end + 1 != victim->oe_start >> ppc_bits)
return -ERANGE;
+ /* overall extent size should not exceed the max supported limit
+ * reported by the server */
+ if (cur->oe_end - cur->oe_start + 1 +
+ victim->oe_end - victim->oe_start + 1 > cli->cl_max_extent_pages)
+ return -ERANGE;
+
OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur);
cur->oe_start = min(cur->oe_start, victim->oe_start);
cur->oe_end = max(cur->oe_end, victim->oe_end);
- cur->oe_grants += victim->oe_grants;
+ /* per-extent tax should be accounted only once for the whole extent */
+ cur->oe_grants += victim->oe_grants - cli->cl_grant_extent_tax;
cur->oe_nr_pages += victim->oe_nr_pages;
/* only the following bits are needed to merge */
cur->oe_urgent |= victim->oe_urgent;
int osc_extent_release(const struct lu_env *env, struct osc_extent *ext)
{
struct osc_object *obj = ext->oe_obj;
+ struct client_obd *cli = osc_cli(obj);
int rc = 0;
ENTRY;
osc_extent_state_set(ext, OES_TRUNC);
ext->oe_trunc_pending = 0;
} else {
+ int grant = 0;
+
osc_extent_state_set(ext, OES_CACHE);
osc_update_pending(obj, OBD_BRW_WRITE,
ext->oe_nr_pages);
/* try to merge the previous and next extent. */
- osc_extent_merge(env, ext, prev_extent(ext));
- osc_extent_merge(env, ext, next_extent(ext));
+ if (osc_extent_merge(env, ext, prev_extent(ext)) == 0)
+ grant += cli->cl_grant_extent_tax;
+ if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
+ grant += cli->cl_grant_extent_tax;
+ if (grant > 0)
+ osc_unreserve_grant(cli, 0, grant);
if (ext->oe_urgent)
list_move_tail(&ext->oe_link,
}
osc_object_unlock(obj);
- osc_io_unplug_async(env, osc_cli(obj), obj);
+ osc_io_unplug_async(env, cli, obj);
}
osc_extent_put(env, ext);
RETURN(rc);
descr = &olck->ols_cl.cls_lock->cll_descr;
LASSERT(descr->cld_mode >= CLM_WRITE);
- LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
- ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+ LASSERTF(cli->cl_chunkbits >= PAGE_SHIFT,
+ "chunkbits: %u\n", cli->cl_chunkbits);
+ ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
chunk_mask = ~((1 << ppc_bits) - 1);
chunksize = 1 << cli->cl_chunkbits;
chunk = index >> ppc_bits;
- /* align end to rpc edge, rpc size may not be a power 2 integer. */
+ /* align end to RPC edge. */
max_pages = cli->cl_max_pages_per_rpc;
- LASSERT((max_pages & ~chunk_mask) == 0);
+ if ((max_pages & ~chunk_mask) != 0) {
+ CERROR("max_pages: %#x chunkbits: %u chunk_mask: %#lx\n",
+ max_pages, cli->cl_chunkbits, chunk_mask);
+ RETURN(ERR_PTR(-EINVAL));
+ }
max_end = index - (index % max_pages) + max_pages - 1;
max_end = min_t(pgoff_t, max_end, descr->cld_end);
}
/* grants has been allocated by caller */
- LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
- "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax);
+ LASSERTF(*grants >= chunksize + cli->cl_grant_extent_tax,
+ "%u/%u/%u.\n", *grants, chunksize, cli->cl_grant_extent_tax);
LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR"\n",
EXTPARA(cur));
continue;
}
+ /* check whether maximum extent size will be hit */
+ if ((ext_chk_end - ext_chk_start + 1 + 1) << ppc_bits >
+ cli->cl_max_extent_pages) {
+ ext = next_extent(ext);
+ continue;
+ }
+
/* it's required that an extent must be contiguous at chunk
* level so that we know the whole extent is covered by grant
* (the pages in the extent are NOT required to be contiguous).
* in a gap */
if (osc_extent_merge(env, ext, next_extent(ext)) == 0)
/* we can save extent tax from next extent */
- *grants += cli->cl_extent_tax;
+ *grants += cli->cl_grant_extent_tax;
found = osc_extent_hold(ext);
}
} else if (conflict == NULL) {
/* create a new extent */
EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur);
- cur->oe_grants = chunksize + cli->cl_extent_tax;
+ cur->oe_grants = chunksize + cli->cl_grant_extent_tax;
*grants -= cur->oe_grants;
LASSERT(*grants >= 0);
lost_grant = PAGE_CACHE_SIZE - count;
}
if (ext->oe_grants > 0)
- osc_free_grant(cli, nr_pages, lost_grant);
+ osc_free_grant(cli, nr_pages, lost_grant, ext->oe_grants);
osc_extent_remove(ext);
/* put the refcount for RPC */
static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
bool partial)
{
- struct cl_env_nest nest;
struct lu_env *env;
struct cl_io *io;
struct osc_object *obj = ext->oe_obj;
int grants = 0;
int nr_pages = 0;
int rc = 0;
+ __u16 refcheck;
ENTRY;
LASSERT(sanity_check(ext) == 0);
/* Request new lu_env.
* We can't use that env from osc_cache_truncate_start() because
* it's from lov_io_sub and not fully initialized. */
- env = cl_env_nested_get(&nest);
+ env = cl_env_get(&refcheck);
io = &osc_env_info(env)->oti_io;
io->ci_obj = cl_object_top(osc2cl(obj));
+ io->ci_ignore_layout = 1;
rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
if (rc < 0)
GOTO(out, rc);
osc_object_unlock(obj);
if (grants > 0 || nr_pages > 0)
- osc_free_grant(cli, nr_pages, grants);
+ osc_free_grant(cli, nr_pages, grants, grants);
out:
cl_io_fini(env, io);
- cl_env_nested_put(&nest, env);
+ cl_env_put(env, &refcheck);
RETURN(rc);
}
GOTO(out, rc = 0);
LASSERT(end_chunk + 1 == chunk);
+
/* try to expand this extent to cover @index */
end_index = min(ext->oe_max_end, ((chunk + 1) << ppc_bits) - 1);
+ /* don't go over the maximum extent size reported by server */
+ if (end_index - ext->oe_start + 1 > cli->cl_max_extent_pages)
+ GOTO(out, rc = -ERANGE);
+
next = next_extent(ext);
if (next != NULL && next->oe_start <= end_index)
/* complex mode - overlapped with the next extent,
#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \
struct client_obd *__tmp = (cli); \
- CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \
- "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \
- "lru {in list: %ld, left: %ld, waiters: %d }"fmt"\n", \
+ CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu " \
+ "dropped: %ld avail: %ld, dirty_grant: %ld, " \
+ "reserved: %ld, flight: %d } lru {in list: %ld, " \
+ "left: %ld, waiters: %d }" fmt "\n", \
cli_name(__tmp), \
__tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \
atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
+ __tmp->cl_dirty_grant, \
__tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \
atomic_long_read(&__tmp->cl_lru_in_list), \
atomic_long_read(&__tmp->cl_lru_busy), \
if (unused > reserved) {
cli->cl_avail_grant += reserved;
cli->cl_lost_grant += unused - reserved;
+ cli->cl_dirty_grant -= unused - reserved;
} else {
cli->cl_avail_grant += unused;
+ cli->cl_dirty_grant += reserved - unused;
}
}
* See filter_grant_check() for details.
*/
static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
- unsigned int lost_grant)
+ unsigned int lost_grant, unsigned int dirty_grant)
{
- unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ unsigned long grant;
+
+ grant = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax;
spin_lock(&cli->cl_loi_list_lock);
atomic_long_sub(nr_pages, &obd_dirty_pages);
cli->cl_dirty_pages -= nr_pages;
cli->cl_lost_grant += lost_grant;
+ cli->cl_dirty_grant -= dirty_grant;
if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
/* borrow some grant from truncate to avoid the case that
* truncate uses up all avail grant */
}
osc_wake_cache_waiters(cli);
spin_unlock(&cli->cl_loi_list_lock);
- CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
+ CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu/%lu\n",
lost_grant, cli->cl_lost_grant,
- cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
+ cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT,
+ cli->cl_dirty_grant);
}
/**
}
if (tmp->oe_srvlock != ext->oe_srvlock ||
- !tmp->oe_grants != !ext->oe_grants)
+ !tmp->oe_grants != !ext->oe_grants ||
+ tmp->oe_no_merge || ext->oe_no_merge)
RETURN(0);
/* remove break for strict check */
oap->oap_obj_off = offset;
LASSERT(!(offset & ~PAGE_MASK));
- if (!client_is_remote(exp) && cfs_capable(CFS_CAP_SYS_RESOURCE))
+ if (cfs_capable(CFS_CAP_SYS_RESOURCE))
oap->oap_brw_flags = OBD_BRW_NOQUOTA;
INIT_LIST_HEAD(&oap->oap_pending_item);
/* Set the OBD_BRW_SRVLOCK before the page is queued. */
brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0;
- if (!client_is_remote(osc_export(osc)) &&
- cfs_capable(CFS_CAP_SYS_RESOURCE)) {
+ if (cfs_capable(CFS_CAP_SYS_RESOURCE)) {
brw_flags |= OBD_BRW_NOQUOTA;
cmd |= OBD_BRW_NOQUOTA;
}
if (!(cmd & OBD_BRW_NOQUOTA)) {
struct cl_object *obj;
struct cl_attr *attr;
- unsigned int qid[MAXQUOTAS];
+ unsigned int qid[LL_MAXQUOTAS];
obj = cl_object_top(&osc->oo_cl);
attr = &osc_env_info(env)->oti_attr;
if (ext != NULL && ext->oe_start <= index && ext->oe_max_end >= index) {
/* one chunk plus extent overhead must be enough to write this
* page */
- grants = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ grants = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax;
if (ext->oe_end >= index)
grants = 0;
}
if (ext == NULL) {
- tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+ tmp = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax;
/* try to find new extent to cover this page */
LASSERT(oio->oi_active == NULL);
struct osc_async_page *oap;
int page_count = 0;
int mppr = cli->cl_max_pages_per_rpc;
+ bool can_merge = true;
pgoff_t start = CL_PAGE_EOF;
pgoff_t end = 0;
ENTRY;
list_for_each_entry(oap, list, oap_pending_item) {
- pgoff_t index = osc_index(oap2osc(oap));
+ struct osc_page *opg = oap2osc_page(oap);
+ pgoff_t index = osc_index(opg);
+
if (index > end)
end = index;
if (index < start)
start = index;
++page_count;
mppr <<= (page_count > mppr);
+
+ if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE))
+ can_merge = false;
}
ext = osc_extent_alloc(obj);
ext->oe_rw = !!(cmd & OBD_BRW_READ);
ext->oe_sync = 1;
+ ext->oe_no_merge = !can_merge;
ext->oe_urgent = 1;
ext->oe_start = start;
ext->oe_end = ext->oe_max_end = end;
* a page already having been flushed by write_page().
* We have to wait for this extent because we can't
* truncate that page. */
- LASSERT(!ext->oe_hp);
OSC_EXTENT_DUMP(D_CACHE, ext,
"waiting for busy extent\n");
waiting = osc_extent_get(ext);