X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosc%2Fosc_cache.c;h=27a034efd154bbf0bc963a514d0b41f41b5174bf;hp=0b01b36a6c352d5320af62a51735ed6c3324903e;hb=13834f5aeef42d3c358574ac59475c0758dce300;hpb=576f1994ae796a21fd4e4646102cdee64fdf5f83;ds=sidebyside diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 0b01b36..27a034e 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -27,7 +27,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2014, Intel Corporation. + * Copyright (c) 2012, 2015, Intel Corporation. * */ /* @@ -58,13 +58,16 @@ static int osc_refresh_count(const struct lu_env *env, static int osc_io_unplug_async(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc); static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, - unsigned int lost_grant); + unsigned int lost_grant, unsigned int dirty_grant); static void osc_extent_tree_dump0(int level, struct osc_object *obj, const char *func, int line); #define osc_extent_tree_dump(lvl, obj) \ osc_extent_tree_dump0(lvl, obj, __func__, __LINE__) +static void osc_unreserve_grant(struct client_obd *cli, unsigned int reserved, + unsigned int unused); + /** \addtogroup osc * @{ */ @@ -127,9 +130,9 @@ static const char *oes_strings[] = { /* ----- part 4 ----- */ \ ## __VA_ARGS__); \ if (lvl == D_ERROR && __ext->oe_dlmlock != NULL) \ - LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext); \ + LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \ else \ - LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext); \ + LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \ } while (0) #undef EASSERTF @@ -319,12 +322,13 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj) { struct osc_extent *ext; - OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_IOFS); + OBD_SLAB_ALLOC_PTR_GFP(ext, osc_extent_kmem, GFP_NOFS); if (ext == NULL) return NULL; RB_CLEAR_NODE(&ext->oe_node); ext->oe_obj = obj; + cl_object_get(osc2cl(obj)); atomic_set(&ext->oe_refc, 1); atomic_set(&ext->oe_users, 0); INIT_LIST_HEAD(&ext->oe_link); @@ -363,6 +367,7 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext) LDLM_LOCK_PUT(ext->oe_dlmlock); ext->oe_dlmlock = NULL; } + cl_object_put(env, osc2cl(ext->oe_obj)); osc_extent_free(ext); } } @@ -495,15 +500,16 @@ static void osc_extent_remove(struct osc_extent *ext) /** * This function is used to merge extents to get better performance. It checks - * if @cur and @victim are contiguous at chunk level. + * if @cur and @victim are contiguous at block level. */ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, struct osc_extent *victim) { - struct osc_object *obj = cur->oe_obj; - pgoff_t chunk_start; - pgoff_t chunk_end; - int ppc_bits; + struct osc_object *obj = cur->oe_obj; + struct client_obd *cli = osc_cli(obj); + pgoff_t chunk_start; + pgoff_t chunk_end; + int ppc_bits; LASSERT(cur->oe_state == OES_CACHE); LASSERT(osc_object_is_locked(obj)); @@ -524,11 +530,18 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, chunk_end + 1 != victim->oe_start >> ppc_bits) return -ERANGE; + /* overall extent size should not exceed the max supported limit + * reported by the server */ + if (cur->oe_end - cur->oe_start + 1 + + victim->oe_end - victim->oe_start + 1 > cli->cl_max_extent_pages) + return -ERANGE; + OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur); cur->oe_start = min(cur->oe_start, victim->oe_start); cur->oe_end = max(cur->oe_end, victim->oe_end); - cur->oe_grants += victim->oe_grants; + /* per-extent tax should be accounted only once for the whole extent */ + cur->oe_grants += victim->oe_grants - cli->cl_grant_extent_tax; cur->oe_nr_pages += victim->oe_nr_pages; /* only the following bits are needed to merge */ cur->oe_urgent |= victim->oe_urgent; @@ -551,6 +564,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) { struct osc_object *obj = ext->oe_obj; + struct client_obd *cli = osc_cli(obj); int rc = 0; ENTRY; @@ -567,13 +581,19 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) osc_extent_state_set(ext, OES_TRUNC); ext->oe_trunc_pending = 0; } else { + int grant = 0; + osc_extent_state_set(ext, OES_CACHE); osc_update_pending(obj, OBD_BRW_WRITE, ext->oe_nr_pages); /* try to merge the previous and next extent. */ - osc_extent_merge(env, ext, prev_extent(ext)); - osc_extent_merge(env, ext, next_extent(ext)); + if (osc_extent_merge(env, ext, prev_extent(ext)) == 0) + grant += cli->cl_grant_extent_tax; + if (osc_extent_merge(env, ext, next_extent(ext)) == 0) + grant += cli->cl_grant_extent_tax; + if (grant > 0) + osc_unreserve_grant(cli, 0, grant); if (ext->oe_urgent) list_move_tail(&ext->oe_link, @@ -581,7 +601,7 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) } osc_object_unlock(obj); - osc_io_unplug_async(env, osc_cli(obj), obj); + osc_io_unplug_async(env, cli, obj); } osc_extent_put(env, ext); RETURN(rc); @@ -627,15 +647,20 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, descr = &olck->ols_cl.cls_lock->cll_descr; LASSERT(descr->cld_mode >= CLM_WRITE); - LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT); - ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT; + LASSERTF(cli->cl_chunkbits >= PAGE_SHIFT, + "chunkbits: %u\n", cli->cl_chunkbits); + ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; chunk_mask = ~((1 << ppc_bits) - 1); chunksize = 1 << cli->cl_chunkbits; chunk = index >> ppc_bits; - /* align end to rpc edge, rpc size may not be a power 2 integer. */ + /* align end to RPC edge. */ max_pages = cli->cl_max_pages_per_rpc; - LASSERT((max_pages & ~chunk_mask) == 0); + if ((max_pages & ~chunk_mask) != 0) { + CERROR("max_pages: %#x chunkbits: %u chunk_mask: %#lx\n", + max_pages, cli->cl_chunkbits, chunk_mask); + RETURN(ERR_PTR(-EINVAL)); + } max_end = index - (index % max_pages) + max_pages - 1; max_end = min_t(pgoff_t, max_end, descr->cld_end); @@ -656,8 +681,8 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, } /* grants has been allocated by caller */ - LASSERTF(*grants >= chunksize + cli->cl_extent_tax, - "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax); + LASSERTF(*grants >= chunksize + cli->cl_grant_extent_tax, + "%u/%u/%u.\n", *grants, chunksize, cli->cl_grant_extent_tax); LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR"\n", EXTPARA(cur)); @@ -730,6 +755,13 @@ restart: continue; } + /* check whether maximum extent size will be hit */ + if ((ext_chk_end - ext_chk_start + 1 + 1) << ppc_bits > + cli->cl_max_extent_pages) { + ext = next_extent(ext); + continue; + } + /* it's required that an extent must be contiguous at chunk * level so that we know the whole extent is covered by grant * (the pages in the extent are NOT required to be contiguous). @@ -757,7 +789,7 @@ restart: * in a gap */ if (osc_extent_merge(env, ext, next_extent(ext)) == 0) /* we can save extent tax from next extent */ - *grants += cli->cl_extent_tax; + *grants += cli->cl_grant_extent_tax; found = osc_extent_hold(ext); } @@ -778,7 +810,7 @@ restart: } else if (conflict == NULL) { /* create a new extent */ EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur); - cur->oe_grants = chunksize + cli->cl_extent_tax; + cur->oe_grants = chunksize + cli->cl_grant_extent_tax; *grants -= cur->oe_grants; LASSERT(*grants >= 0); @@ -863,7 +895,7 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, lost_grant = PAGE_CACHE_SIZE - count; } if (ext->oe_grants > 0) - osc_free_grant(cli, nr_pages, lost_grant); + osc_free_grant(cli, nr_pages, lost_grant, ext->oe_grants); osc_extent_remove(ext); /* put the refcount for RPC */ @@ -934,7 +966,6 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, bool partial) { - struct cl_env_nest nest; struct lu_env *env; struct cl_io *io; struct osc_object *obj = ext->oe_obj; @@ -948,6 +979,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, int grants = 0; int nr_pages = 0; int rc = 0; + __u16 refcheck; ENTRY; LASSERT(sanity_check(ext) == 0); @@ -957,9 +989,10 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, /* Request new lu_env. * We can't use that env from osc_cache_truncate_start() because * it's from lov_io_sub and not fully initialized. */ - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); io = &osc_env_info(env)->oti_io; io->ci_obj = cl_object_top(osc2cl(obj)); + io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc < 0) GOTO(out, rc); @@ -1038,11 +1071,11 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, osc_object_unlock(obj); if (grants > 0 || nr_pages > 0) - osc_free_grant(cli, nr_pages, grants); + osc_free_grant(cli, nr_pages, grants, grants); out: cl_io_fini(env, io); - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); RETURN(rc); } @@ -1156,9 +1189,14 @@ static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, GOTO(out, rc = 0); LASSERT(end_chunk + 1 == chunk); + /* try to expand this extent to cover @index */ end_index = min(ext->oe_max_end, ((chunk + 1) << ppc_bits) - 1); + /* don't go over the maximum extent size reported by server */ + if (end_index - ext->oe_start + 1 > cli->cl_max_extent_pages) + GOTO(out, rc = -ERANGE); + next = next_extent(ext); if (next != NULL && next->oe_start <= end_index) /* complex mode - overlapped with the next extent, @@ -1325,13 +1363,15 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, #define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \ struct client_obd *__tmp = (cli); \ - CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \ - "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \ - "lru {in list: %ld, left: %ld, waiters: %d }"fmt"\n", \ + CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu " \ + "dropped: %ld avail: %ld, dirty_grant: %ld, " \ + "reserved: %ld, flight: %d } lru {in list: %ld, " \ + "left: %ld, waiters: %d }" fmt "\n", \ cli_name(__tmp), \ __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \ atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ + __tmp->cl_dirty_grant, \ __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \ atomic_long_read(&__tmp->cl_lru_in_list), \ atomic_long_read(&__tmp->cl_lru_busy), \ @@ -1405,8 +1445,10 @@ static void __osc_unreserve_grant(struct client_obd *cli, if (unused > reserved) { cli->cl_avail_grant += reserved; cli->cl_lost_grant += unused - reserved; + cli->cl_dirty_grant -= unused - reserved; } else { cli->cl_avail_grant += unused; + cli->cl_dirty_grant += reserved - unused; } } @@ -1434,14 +1476,17 @@ static void osc_unreserve_grant(struct client_obd *cli, * See filter_grant_check() for details. */ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, - unsigned int lost_grant) + unsigned int lost_grant, unsigned int dirty_grant) { - unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; + unsigned long grant; + + grant = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax; spin_lock(&cli->cl_loi_list_lock); atomic_long_sub(nr_pages, &obd_dirty_pages); cli->cl_dirty_pages -= nr_pages; cli->cl_lost_grant += lost_grant; + cli->cl_dirty_grant -= dirty_grant; if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) { /* borrow some grant from truncate to avoid the case that * truncate uses up all avail grant */ @@ -1450,9 +1495,10 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, } osc_wake_cache_waiters(cli); spin_unlock(&cli->cl_loi_list_lock); - CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n", + CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu/%lu\n", lost_grant, cli->cl_lost_grant, - cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT); + cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT, + cli->cl_dirty_grant); } /** @@ -1878,7 +1924,8 @@ static int try_to_add_extent_for_io(struct client_obd *cli, } if (tmp->oe_srvlock != ext->oe_srvlock || - !tmp->oe_grants != !ext->oe_grants) + !tmp->oe_grants != !ext->oe_grants || + tmp->oe_no_merge || ext->oe_no_merge) RETURN(0); /* remove break for strict check */ @@ -2240,7 +2287,7 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, oap->oap_obj_off = offset; LASSERT(!(offset & ~PAGE_MASK)); - if (!client_is_remote(exp) && cfs_capable(CFS_CAP_SYS_RESOURCE)) + if (cfs_capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags = OBD_BRW_NOQUOTA; INIT_LIST_HEAD(&oap->oap_pending_item); @@ -2281,8 +2328,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, /* Set the OBD_BRW_SRVLOCK before the page is queued. */ brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0; - if (!client_is_remote(osc_export(osc)) && - cfs_capable(CFS_CAP_SYS_RESOURCE)) { + if (cfs_capable(CFS_CAP_SYS_RESOURCE)) { brw_flags |= OBD_BRW_NOQUOTA; cmd |= OBD_BRW_NOQUOTA; } @@ -2291,7 +2337,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, if (!(cmd & OBD_BRW_NOQUOTA)) { struct cl_object *obj; struct cl_attr *attr; - unsigned int qid[MAXQUOTAS]; + unsigned int qid[LL_MAXQUOTAS]; obj = cl_object_top(&osc->oo_cl); attr = &osc_env_info(env)->oti_attr; @@ -2331,7 +2377,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, if (ext != NULL && ext->oe_start <= index && ext->oe_max_end >= index) { /* one chunk plus extent overhead must be enough to write this * page */ - grants = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; + grants = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax; if (ext->oe_end >= index) grants = 0; @@ -2368,7 +2414,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, } if (ext == NULL) { - tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; + tmp = (1 << cli->cl_chunkbits) + cli->cl_grant_extent_tax; /* try to find new extent to cover this page */ LASSERT(oio->oi_active == NULL); @@ -2608,18 +2654,24 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, struct osc_async_page *oap; int page_count = 0; int mppr = cli->cl_max_pages_per_rpc; + bool can_merge = true; pgoff_t start = CL_PAGE_EOF; pgoff_t end = 0; ENTRY; list_for_each_entry(oap, list, oap_pending_item) { - pgoff_t index = osc_index(oap2osc(oap)); + struct osc_page *opg = oap2osc_page(oap); + pgoff_t index = osc_index(opg); + if (index > end) end = index; if (index < start) start = index; ++page_count; mppr <<= (page_count > mppr); + + if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE)) + can_merge = false; } ext = osc_extent_alloc(obj); @@ -2635,6 +2687,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, ext->oe_rw = !!(cmd & OBD_BRW_READ); ext->oe_sync = 1; + ext->oe_no_merge = !can_merge; ext->oe_urgent = 1; ext->oe_start = start; ext->oe_end = ext->oe_max_end = end; @@ -2694,7 +2747,6 @@ again: * a page already having been flushed by write_page(). * We have to wait for this extent because we can't * truncate that page. */ - LASSERT(!ext->oe_hp); OSC_EXTENT_DUMP(D_CACHE, ext, "waiting for busy extent\n"); waiting = osc_extent_get(ext);