X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosc%2Fosc_cache.c;h=2481e51d460d976ce7638e5b6b2ccab72a477287;hb=6bce536725efd166d2772f13fe954f271f9c53b8;hp=ea721ba5a245abdff85b45441abda2af4decd859;hpb=9c028e74c2202a8a481557c4cb22225734aaf19f;p=fs%2Flustre-release.git diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index ea721ba..2481e51 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -220,6 +220,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, GOTO(out, rc = 60); if (ext->oe_fsync_wait && !ext->oe_urgent && !ext->oe_hp) GOTO(out, rc = 65); + /* fallthrough */ default: if (atomic_read(&ext->oe_users) > 0) GOTO(out, rc = 70); @@ -584,6 +585,7 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) * osc_cache_truncate_start(). */ osc_extent_state_set(ext, OES_TRUNC); ext->oe_trunc_pending = 0; + osc_object_unlock(obj); } else { int grant = 0; @@ -596,8 +598,6 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) grant += cli->cl_grant_extent_tax; if (osc_extent_merge(env, ext, next_extent(ext)) == 0) grant += cli->cl_grant_extent_tax; - if (grant > 0) - osc_unreserve_grant(cli, 0, grant); if (ext->oe_urgent) list_move_tail(&ext->oe_link, @@ -606,8 +606,10 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) list_move_tail(&ext->oe_link, &obj->oo_full_exts); } + osc_object_unlock(obj); + if (grant > 0) + osc_unreserve_grant(cli, 0, grant); } - osc_object_unlock(obj); osc_io_unplug_async(env, cli, obj); } @@ -615,7 +617,8 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) RETURN(rc); } -static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2) +static inline bool +overlapped(const struct osc_extent *ex1, const struct osc_extent *ex2) { return !(ex1->oe_end < ex2->oe_start || ex2->oe_end < ex1->oe_start); } @@ -930,8 +933,6 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, enum osc_extent_state state) { struct osc_object *obj = ext->oe_obj; - struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL, - LWI_ON_SIGNAL_NOOP, NULL); int rc = 0; ENTRY; @@ -953,18 +954,19 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, osc_extent_release(env, ext); /* wait for the extent until its state becomes @state */ - rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi); - if (rc == -ETIMEDOUT) { + rc = wait_event_idle_timeout(ext->oe_waitq, extent_wait_cb(ext, state), + cfs_time_seconds(600)); + if (rc == 0) { OSC_EXTENT_DUMP(D_ERROR, ext, "%s: wait ext to %u timedout, recovery in progress?\n", cli_name(osc_cli(obj)), state); - lwi = LWI_INTR(NULL, NULL); - rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), - &lwi); + wait_event_idle(ext->oe_waitq, extent_wait_cb(ext, state)); } - if (rc == 0 && ext->oe_rc < 0) + if (ext->oe_rc < 0) rc = ext->oe_rc; + else + rc = 0; RETURN(rc); } @@ -1326,7 +1328,7 @@ static int osc_refresh_count(const struct lu_env *env, return 0; else if (cl_offset(obj, index + 1) > kms) /* catch sub-page write at end of file */ - return kms % PAGE_SIZE; + return kms & ~PAGE_MASK; else return PAGE_SIZE; } @@ -1426,11 +1428,6 @@ static void osc_release_write_grant(struct client_obd *cli, pga->flag &= ~OBD_BRW_FROM_GRANT; atomic_long_dec(&obd_dirty_pages); cli->cl_dirty_pages--; - if (pga->flag & OBD_BRW_NOCACHE) { - pga->flag &= ~OBD_BRW_NOCACHE; - atomic_long_dec(&obd_dirty_transit_pages); - cli->cl_dirty_transit--; - } EXIT; } @@ -1470,13 +1467,20 @@ static void __osc_unreserve_grant(struct client_obd *cli, } } -static void osc_unreserve_grant(struct client_obd *cli, - unsigned int reserved, unsigned int unused) +static void osc_unreserve_grant_nolock(struct client_obd *cli, + unsigned int reserved, + unsigned int unused) { - spin_lock(&cli->cl_loi_list_lock); __osc_unreserve_grant(cli, reserved, unused); if (unused > 0) osc_wake_cache_waiters(cli); +} + +static void osc_unreserve_grant(struct client_obd *cli, + unsigned int reserved, unsigned int unused) +{ + spin_lock(&cli->cl_loi_list_lock); + osc_unreserve_grant_nolock(cli, reserved, unused); spin_unlock(&cli->cl_loi_list_lock); } @@ -1536,7 +1540,7 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) */ static int osc_enter_cache_try(struct client_obd *cli, struct osc_async_page *oap, - int bytes, int transient) + int bytes) { int rc; @@ -1550,11 +1554,6 @@ static int osc_enter_cache_try(struct client_obd *cli, if (atomic_long_add_return(1, &obd_dirty_pages) <= obd_max_dirty_pages) { osc_consume_write_grant(cli, &oap->oap_brw_page); - if (transient) { - cli->cl_dirty_transit++; - atomic_long_inc(&obd_dirty_transit_pages); - oap->oap_brw_flags |= OBD_BRW_NOCACHE; - } rc = 1; goto out; } else @@ -1588,13 +1587,9 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc = oap->oap_obj; struct lov_oinfo *loi = osc->oo_oinfo; struct osc_cache_waiter ocw; - struct l_wait_info lwi; int rc = -EDQUOT; ENTRY; - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(AT_OFF ? obd_timeout : at_max), - NULL, LWI_ON_SIGNAL_NOOP, NULL); - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes); spin_lock(&cli->cl_loi_list_lock); @@ -1610,7 +1605,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, /* Hopefully normal case - cache space and write credits available */ if (list_empty(&cli->cl_cache_waiters) && - osc_enter_cache_try(cli, oap, bytes, 0)) { + osc_enter_cache_try(cli, oap, bytes)) { OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n"); GOTO(out, rc = 0); } @@ -1634,13 +1629,19 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n", cli_name(cli), &ocw, oap); - rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); + rc = wait_event_idle_timeout(ocw.ocw_waitq, + ocw_granted(cli, &ocw), + cfs_time_seconds(AT_OFF ? + obd_timeout : + at_max)); spin_lock(&cli->cl_loi_list_lock); - if (rc < 0) { + if (rc <= 0) { /* l_wait_event is interrupted by signal or timed out */ list_del_init(&ocw.ocw_entry); + if (rc == 0) + rc = -ETIMEDOUT; break; } LASSERT(list_empty(&ocw.ocw_entry)); @@ -1648,7 +1649,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, if (rc != -EDQUOT) break; - if (osc_enter_cache_try(cli, oap, bytes, 0)) { + if (osc_enter_cache_try(cli, oap, bytes)) { rc = 0; break; } @@ -1698,7 +1699,7 @@ void osc_wake_cache_waiters(struct client_obd *cli) ocw->ocw_rc = -EDQUOT; - if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0)) + if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant)) ocw->ocw_rc = 0; if (ocw->ocw_rc == 0 || @@ -1918,6 +1919,31 @@ static inline unsigned osc_extent_chunks(const struct osc_extent *ext) return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1; } +static inline bool +can_merge(const struct osc_extent *ext, const struct osc_extent *in_rpc) +{ + if (ext->oe_no_merge || in_rpc->oe_no_merge) + return false; + + if (ext->oe_srvlock != in_rpc->oe_srvlock) + return false; + + if (ext->oe_ndelay != in_rpc->oe_ndelay) + return false; + + if (!ext->oe_grants != !in_rpc->oe_grants) + return false; + + if (ext->oe_dio != in_rpc->oe_dio) + return false; + + /* It's possible to have overlap on DIO */ + if (in_rpc->oe_dio && overlapped(ext, in_rpc)) + return false; + + return true; +} + /** * Try to add extent to one RPC. We need to think about the following things: * - # of pages must not be over max_pages_per_rpc @@ -1929,9 +1955,6 @@ static int try_to_add_extent_for_io(struct client_obd *cli, { struct osc_extent *tmp; unsigned int chunk_count; - struct osc_async_page *oap = list_first_entry(&ext->oe_pages, - struct osc_async_page, - oap_pending_item); ENTRY; EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), @@ -1960,26 +1983,9 @@ static int try_to_add_extent_for_io(struct client_obd *cli, RETURN(0); list_for_each_entry(tmp, data->erd_rpc_list, oe_link) { - struct osc_async_page *oap2; - oap2 = list_first_entry(&tmp->oe_pages, struct osc_async_page, - oap_pending_item); EASSERT(tmp->oe_owner == current, tmp); -#if 0 - if (overlapped(tmp, ext)) { - OSC_EXTENT_DUMP(D_ERROR, tmp, "overlapped %p.\n", ext); - EASSERT(0, ext); - } -#endif - if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) { - CDEBUG(D_CACHE, "Do not permit different types of IO " - "in one RPC\n"); - RETURN(0); - } - if (tmp->oe_srvlock != ext->oe_srvlock || - !tmp->oe_grants != !ext->oe_grants || - tmp->oe_ndelay != ext->oe_ndelay || - tmp->oe_no_merge || ext->oe_no_merge) + if (!can_merge(ext, tmp)) RETURN(0); /* remove break for strict check */ @@ -2077,7 +2083,7 @@ osc_send_write_rpc(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc) __must_hold(osc) { - struct list_head rpclist = LIST_HEAD_INIT(rpclist); + LIST_HEAD(rpclist); struct osc_extent *ext; struct osc_extent *tmp; struct osc_extent *first = NULL; @@ -2153,7 +2159,7 @@ __must_hold(osc) { struct osc_extent *ext; struct osc_extent *next; - struct list_head rpclist = LIST_HEAD_INIT(rpclist); + LIST_HEAD(rpclist); struct extent_rpc_data data = { .erd_rpc_list = &rpclist, .erd_page_count = 0, @@ -2347,13 +2353,14 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, EXPORT_SYMBOL(osc_prep_async_page); int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops) + struct osc_page *ops, cl_commit_cbt cb) { struct osc_io *oio = osc_env_io(env); struct osc_extent *ext = NULL; struct osc_async_page *oap = &ops->ops_oap; struct client_obd *cli = oap->oap_cli; struct osc_object *osc = oap->oap_obj; + struct pagevec *pvec = &osc_env_info(env)->oti_pagevec; pgoff_t index; unsigned int tmp; unsigned int grants = 0; @@ -2431,8 +2438,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, /* it doesn't need any grant to dirty this page */ spin_lock(&cli->cl_loi_list_lock); - rc = osc_enter_cache_try(cli, oap, grants, 0); - spin_unlock(&cli->cl_loi_list_lock); + rc = osc_enter_cache_try(cli, oap, grants); if (rc == 0) { /* try failed */ grants = 0; need_release = 1; @@ -2446,10 +2452,11 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, } else { OSC_EXTENT_DUMP(D_CACHE, ext, "expanded for %lu.\n", index); - osc_unreserve_grant(cli, grants, tmp); + osc_unreserve_grant_nolock(cli, grants, tmp); grants = 0; } } + spin_unlock(&cli->cl_loi_list_lock); rc = 0; } else if (ext != NULL) { /* index is located outside of active extent */ @@ -2472,7 +2479,14 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, rc = 0; if (grants == 0) { - /* we haven't allocated grant for this page. */ + /* We haven't allocated grant for this page, and we + * must not hold a page lock while we do enter_cache, + * so we must mark dirty & unlock any pages in the + * write commit pagevec. */ + if (pagevec_count(pvec)) { + cb(env, io, pvec); + pagevec_reinit(pvec); + } rc = osc_enter_cache(env, cli, oap, tmp); if (rc == 0) grants = tmp; @@ -2747,6 +2761,7 @@ int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io, ext->oe_obj = obj; ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK); ext->oe_ndelay = !!(brw_flags & OBD_BRW_NDELAY); + ext->oe_dio = !!(brw_flags & OBD_BRW_NOCACHE); ext->oe_nr_pages = page_count; ext->oe_mppr = mppr; list_splice_init(list, &ext->oe_pages); @@ -2778,7 +2793,7 @@ int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, struct osc_extent *ext; struct osc_extent *waiting = NULL; pgoff_t index; - struct list_head list = LIST_HEAD_INIT(list); + LIST_HEAD(list); int result = 0; bool partial; ENTRY; @@ -2996,7 +3011,7 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end, int hp, int discard) { struct osc_extent *ext; - struct list_head discard_list = LIST_HEAD_INIT(discard_list); + LIST_HEAD(discard_list); bool unplug = false; int result = 0; ENTRY; @@ -3029,10 +3044,25 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, list_move_tail(&ext->oe_link, list); unplug = true; } else { + struct client_obd *cli = osc_cli(obj); + int pcc_bits = cli->cl_chunkbits - PAGE_SHIFT; + pgoff_t align_by = (1 << pcc_bits); + pgoff_t a_start = round_down(start, align_by); + pgoff_t a_end = round_up(end, align_by); + + /* overflow case */ + if (end && !a_end) + a_end = CL_PAGE_EOF; /* the only discarder is lock cancelling, so - * [start, end] must contain this extent */ - EASSERT(ext->oe_start >= start && - ext->oe_end <= end, ext); + * [start, end], aligned by chunk size, must + * contain this extent */ + LASSERTF(ext->oe_start >= a_start && + ext->oe_end <= a_end, + "ext [%lu, %lu] reg [%lu, %lu] " + "orig [%lu %lu] align %lu bits " + "%d\n", ext->oe_start, ext->oe_end, + a_start, a_end, start, end, + align_by, pcc_bits); osc_extent_state_set(ext, OES_LOCKING); ext->oe_owner = current; list_move_tail(&ext->oe_link,