X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosc%2Fosc_page.c;h=affd0d265e0beedeb403c60018d9b2d64428bf54;hp=126256f7d86f851858ab5e101516ba815614240a;hb=27815a0611a2e315a9a7696a20c2f257d48aeb7e;hpb=72e1ce04a8b8f6887ff3df620f20755be0d244d8 diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 126256f..affd0d2 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -27,7 +27,7 @@ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, Whamcloud, Inc. + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -63,23 +63,23 @@ static int osc_page_is_dlocked(const struct lu_env *env, const struct osc_page *opg, enum cl_lock_mode mode, int pending, int unref) { - struct cl_page *page; - struct osc_object *obj; - struct osc_thread_info *info; - struct ldlm_res_id *resname; - struct lustre_handle *lockh; - ldlm_policy_data_t *policy; - ldlm_mode_t dlmmode; - int flags; - - cfs_might_sleep(); - - info = osc_env_info(env); - resname = &info->oti_resname; - policy = &info->oti_policy; - lockh = &info->oti_handle; - page = opg->ops_cl.cpl_page; - obj = cl2osc(opg->ops_cl.cpl_obj); + struct cl_page *page; + struct osc_object *obj; + struct osc_thread_info *info; + struct ldlm_res_id *resname; + struct lustre_handle *lockh; + ldlm_policy_data_t *policy; + ldlm_mode_t dlmmode; + __u64 flags; + + might_sleep(); + + info = osc_env_info(env); + resname = &info->oti_resname; + policy = &info->oti_policy; + lockh = &info->oti_handle; + page = opg->ops_cl.cpl_page; + obj = cl2osc(opg->ops_cl.cpl_obj); flags = LDLM_FL_TEST_LOCK | LDLM_FL_BLOCK_GRANTED; if (pending) @@ -126,7 +126,7 @@ static int osc_page_protected(const struct lu_env *env, descr->cld_mode = mode; descr->cld_start = page->cp_index; descr->cld_end = page->cp_index; - cfs_spin_lock(&hdr->coh_lock_guard); + spin_lock(&hdr->coh_lock_guard); cfs_list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) { /* * Lock-less sub-lock has to be either in HELD state @@ -144,7 +144,7 @@ static int osc_page_protected(const struct lu_env *env, break; } } - cfs_spin_unlock(&hdr->coh_lock_guard); + spin_unlock(&hdr->coh_lock_guard); } return result; } @@ -168,7 +168,6 @@ static void osc_page_fini(const struct lu_env *env, struct osc_page *opg = cl2osc_page(slice); CDEBUG(D_TRACE, "%p\n", opg); LASSERT(opg->ops_lock == NULL); - OBD_SLAB_FREE_PTR(opg, osc_page_kmem); } static void osc_page_transfer_get(struct osc_page *opg, const char *label) @@ -207,10 +206,10 @@ static void osc_page_transfer_add(const struct lu_env *env, * first and then use it as inflight. */ osc_lru_del(osc_cli(obj), opg, false); - cfs_spin_lock(&obj->oo_seatbelt); - cfs_list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); - opg->ops_submitter = cfs_current(); - cfs_spin_unlock(&obj->oo_seatbelt); + spin_lock(&obj->oo_seatbelt); + cfs_list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); + opg->ops_submitter = current; + spin_unlock(&obj->oo_seatbelt); } static int osc_page_cache_add(const struct lu_env *env, @@ -432,13 +431,13 @@ static void osc_page_delete(const struct lu_env *env, LASSERT(0); } - cfs_spin_lock(&obj->oo_seatbelt); + spin_lock(&obj->oo_seatbelt); if (opg->ops_submitter != NULL) { LASSERT(!cfs_list_empty(&opg->ops_inflight)); cfs_list_del_init(&opg->ops_inflight); opg->ops_submitter = NULL; } - cfs_spin_unlock(&obj->oo_seatbelt); + spin_unlock(&obj->oo_seatbelt); osc_lru_del(osc_cli(obj), opg, true); EXIT; @@ -454,9 +453,9 @@ void osc_page_clip(const struct lu_env *env, const struct cl_page_slice *slice, opg->ops_from = from; opg->ops_to = to; - cfs_spin_lock(&oap->oap_lock); - oap->oap_async_flags |= ASYNC_COUNT_STABLE; - cfs_spin_unlock(&oap->oap_lock); + spin_lock(&oap->oap_lock); + oap->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&oap->oap_lock); } static int osc_page_cancel(const struct lu_env *env, @@ -508,46 +507,65 @@ static const struct cl_page_operations osc_page_ops = { .cpo_flush = osc_page_flush }; -struct cl_page *osc_page_init(const struct lu_env *env, - struct cl_object *obj, - struct cl_page *page, cfs_page_t *vmpage) +int osc_page_init(const struct lu_env *env, struct cl_object *obj, + struct cl_page *page, struct page *vmpage) { - struct osc_object *osc = cl2osc(obj); - struct osc_page *opg; - int result; + struct osc_object *osc = cl2osc(obj); + struct osc_page *opg = cl_object_page_slice(obj, page); + int result; - OBD_SLAB_ALLOC_PTR_GFP(opg, osc_page_kmem, CFS_ALLOC_IO); - if (opg != NULL) { - opg->ops_from = 0; - opg->ops_to = CFS_PAGE_SIZE; - - result = osc_prep_async_page(osc, opg, vmpage, - cl_offset(obj, page->cp_index)); - if (result == 0) { - struct osc_io *oio = osc_env_io(env); - opg->ops_srvlock = osc_io_srvlock(oio); - cl_page_slice_add(page, &opg->ops_cl, obj, - &osc_page_ops); - } - /* - * Cannot assert osc_page_protected() here as read-ahead - * creates temporary pages outside of a lock. - */ -#ifdef INVARIANT_CHECK - opg->ops_temp = !osc_page_protected(env, opg, CLM_READ, 1); + opg->ops_from = 0; + opg->ops_to = PAGE_CACHE_SIZE; + + result = osc_prep_async_page(osc, opg, vmpage, + cl_offset(obj, page->cp_index)); + if (result == 0) { + struct osc_io *oio = osc_env_io(env); + opg->ops_srvlock = osc_io_srvlock(oio); + cl_page_slice_add(page, &opg->ops_cl, obj, + &osc_page_ops); + } + /* + * Cannot assert osc_page_protected() here as read-ahead + * creates temporary pages outside of a lock. + */ +#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK + opg->ops_temp = !osc_page_protected(env, opg, CLM_READ, 1); #endif - /* ops_inflight and ops_lru are the same field, but it doesn't - * hurt to initialize it twice :-) */ - CFS_INIT_LIST_HEAD(&opg->ops_inflight); - CFS_INIT_LIST_HEAD(&opg->ops_lru); - } else - result = -ENOMEM; + /* ops_inflight and ops_lru are the same field, but it doesn't + * hurt to initialize it twice :-) */ + CFS_INIT_LIST_HEAD(&opg->ops_inflight); + CFS_INIT_LIST_HEAD(&opg->ops_lru); /* reserve an LRU space for this page */ if (page->cp_type == CPT_CACHEABLE && result == 0) result = osc_lru_reserve(env, osc, opg); - return ERR_PTR(result); + return result; +} + +int osc_over_unstable_soft_limit(struct client_obd *cli) +{ + long obd_upages, obd_dpages, osc_upages; + + /* Can't check cli->cl_unstable_count, therefore, no soft limit */ + if (cli == NULL) + return 0; + + obd_upages = cfs_atomic_read(&obd_unstable_pages); + obd_dpages = cfs_atomic_read(&obd_dirty_pages); + + osc_upages = cfs_atomic_read(&cli->cl_unstable_count); + + /* obd_max_dirty_pages is the max number of (dirty + unstable) + * pages allowed at any given time. To simulate an unstable page + * only limit, we subtract the current number of dirty pages + * from this max. This difference is roughly the amount of pages + * currently available for unstable pages. Thus, the soft limit + * is half of that difference. Check osc_upages to ensure we don't + * set SOFT_SYNC for OSCs without any outstanding unstable pages. */ + return osc_upages != 0 && + obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2; } /** @@ -573,6 +591,9 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, oap->oap_count = opg->ops_to - opg->ops_from; oap->oap_brw_flags = OBD_BRW_SYNC | brw_flags; + if (osc_over_unstable_soft_limit(oap->oap_cli)) + oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; + if (!client_is_remote(osc_export(obj)) && cfs_capable(CFS_CAP_SYS_RESOURCE)) { oap->oap_brw_flags |= OBD_BRW_NOQUOTA; @@ -599,9 +620,9 @@ static CFS_DECL_WAITQ(osc_lru_waitq); static cfs_atomic_t osc_lru_waiters = CFS_ATOMIC_INIT(0); /* LRU pages are freed in batch mode. OSC should at least free this * number of pages to avoid running out of LRU budget, and.. */ -static const int lru_shrink_min = 2 << (20 - CFS_PAGE_SHIFT); /* 2M */ +static const int lru_shrink_min = 2 << (20 - PAGE_CACHE_SHIFT); /* 2M */ /* free this number at most otherwise it will take too long time to finsih. */ -static const int lru_shrink_max = 32 << (20 - CFS_PAGE_SHIFT); /* 32M */ +static const int lru_shrink_max = 32 << (20 - PAGE_CACHE_SHIFT); /* 32M */ /* Check if we can free LRU slots from this OSC. If there exists LRU waiters, * we should free slots aggressively. In this way, slots are freed in a steady @@ -647,7 +668,7 @@ static int discard_pagevec(const struct lu_env *env, struct cl_io *io, * This check is necessary to avoid freeing the pages * having already been removed from LRU and pinned * for IO. */ - if (cfs_atomic_read(&page->cp_ref) == 1) { + if (!cl_page_in_use(page)) { cl_page_unmap(env, io, page); cl_page_discard(env, io, page); ++count; @@ -700,8 +721,7 @@ int osc_lru_shrink(struct client_obd *cli, int target) opg = cfs_list_entry(cli->cl_lru_list.next, struct osc_page, ops_lru); page = cl_page_top(opg->ops_cl.cpl_page); - if (page->cp_state == CPS_FREEING || - cfs_atomic_read(&page->cp_ref) > 0) { + if (cl_page_in_use_noref(page)) { cfs_list_move_tail(&opg->ops_lru, &cli->cl_lru_list); continue; } @@ -724,12 +744,15 @@ int osc_lru_shrink(struct client_obd *cli, int target) clobj = tmp; io->ci_obj = clobj; + io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, clobj); + + client_obd_list_lock(&cli->cl_lru_list_lock); + if (rc != 0) break; ++maxscan; - client_obd_list_lock(&cli->cl_lru_list_lock); continue; } @@ -783,8 +806,10 @@ static void osc_lru_add(struct client_obd *cli, struct osc_page *opg) } client_obd_list_unlock(&cli->cl_lru_list_lock); - if (wakeup) - cfs_waitq_broadcast(&osc_lru_waitq); + if (wakeup) { + osc_lru_shrink(cli, osc_cache_too_much(cli)); + wake_up_all(&osc_lru_waitq); + } } /* delete page from LRUlist. The page can be deleted from LRUlist for two @@ -811,27 +836,32 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del) * stealing one of them. * cl_lru_shrinkers is to avoid recursive call in case * we're already in the context of osc_lru_shrink(). */ - if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0) + if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0 && + !memory_pressure_get()) osc_lru_shrink(cli, osc_cache_too_much(cli)); - cfs_waitq_signal(&osc_lru_waitq); + wake_up(&osc_lru_waitq); } } else { LASSERT(cfs_list_empty(&opg->ops_lru)); } } +static inline int max_to_shrink(struct client_obd *cli) +{ + return min(cfs_atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max); +} + static int osc_lru_reclaim(struct client_obd *cli) { struct cl_client_cache *cache = cli->cl_cache; - struct client_obd *victim; - struct client_obd *tmp; + int max_scans; int rc; LASSERT(cache != NULL); LASSERT(!cfs_list_empty(&cache->ccc_lru)); rc = osc_lru_shrink(cli, lru_shrink_min); - if (rc > 0) { + if (rc != 0) { CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n", cli->cl_import->imp_obd->obd_name, rc, cli); return rc; @@ -844,36 +874,34 @@ static int osc_lru_reclaim(struct client_obd *cli) /* Reclaim LRU slots from other client_obd as it can't free enough * from its own. This should rarely happen. */ - cfs_spin_lock(&cache->ccc_lru_lock); + spin_lock(&cache->ccc_lru_lock); cache->ccc_lru_shrinkers++; cfs_list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - cfs_list_for_each_entry_safe(victim, tmp, &cache->ccc_lru, cl_lru_osc) { - if (victim == cli) - break; - CDEBUG(D_CACHE, "%s: cli %p LRU pages: %d, busy: %d.\n", - victim->cl_import->imp_obd->obd_name, victim, - cfs_atomic_read(&victim->cl_lru_in_list), - cfs_atomic_read(&victim->cl_lru_busy)); + max_scans = cfs_atomic_read(&cache->ccc_users); + while (--max_scans > 0 && !cfs_list_empty(&cache->ccc_lru)) { + cli = cfs_list_entry(cache->ccc_lru.next, struct client_obd, + cl_lru_osc); - cfs_list_move_tail(&victim->cl_lru_osc, &cache->ccc_lru); - if (cfs_atomic_read(&victim->cl_lru_in_list) > 0) - break; - } - cfs_spin_unlock(&cache->ccc_lru_lock); - if (victim == cli) { - CDEBUG(D_CACHE, "%s: can't get any free LRU slots.\n", - cli->cl_import->imp_obd->obd_name); - return 0; - } + CDEBUG(D_CACHE, "%s: cli %p LRU pages: %d, busy: %d.\n", + cli->cl_import->imp_obd->obd_name, cli, + cfs_atomic_read(&cli->cl_lru_in_list), + cfs_atomic_read(&cli->cl_lru_busy)); - rc = osc_lru_shrink(victim, - min(cfs_atomic_read(&victim->cl_lru_in_list) >> 1, - lru_shrink_max)); + cfs_list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); + if (cfs_atomic_read(&cli->cl_lru_in_list) > 0) { + spin_unlock(&cache->ccc_lru_lock); - CDEBUG(D_CACHE, "%s: Free %d pages from other cli: %p.\n", - cli->cl_import->imp_obd->obd_name, rc, victim); + rc = osc_lru_shrink(cli, max_to_shrink(cli)); + spin_lock(&cache->ccc_lru_lock); + if (rc != 0) + break; + } + } + spin_unlock(&cache->ccc_lru_lock); + CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n", + cli->cl_import->imp_obd->obd_name, cli, rc); return rc; } @@ -899,7 +927,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, if (rc > 0) continue; - cfs_cond_resched(); + cond_resched(); /* slowest case, all of caching pages are busy, notifying * other OSCs that we're lack of LRU slots. */