X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fobdclass%2Flu_object.c;h=b633d26f8c2a13dca6c33b6151714c5c3ef57e17;hp=f6f5bb35f6288c6ffda4976bb1828cbc8f9ca725;hb=2aa5147cd05fa8342493333e92a0c0fc7f87bbde;hpb=d96a9248708d4da02728c9976a9a90ba29bd2bc0 diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index f6f5bb3..b633d26 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2015, Intel Corporation. + * Copyright (c) 2011, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -44,8 +40,9 @@ #define DEBUG_SUBSYSTEM S_CLASS -#include #include +#include +#include #include /* hash_long() */ #include #include @@ -53,14 +50,13 @@ #include #include #include -#include enum { LU_CACHE_PERCENT_MAX = 50, LU_CACHE_PERCENT_DEFAULT = 20 }; -#define LU_CACHE_NR_MAX_ADJUST 128 +#define LU_CACHE_NR_MAX_ADJUST 512 #define LU_CACHE_NR_UNLIMITED -1 #define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED #define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED @@ -156,7 +152,7 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o) LASSERT(list_empty(&top->loh_lru)); list_add_tail(&top->loh_lru, &bkt->lsb_lru); bkt->lsb_lru_len++; - lprocfs_counter_incr(site->ls_stats, LU_SS_LRU_LEN); + percpu_counter_inc(&site->ls_lru_len_counter); CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, " "lru_len: %ld\n", o, site->ls_obj_hash, bkt, bkt->lsb_lru_len); @@ -219,7 +215,7 @@ void lu_object_unhash(const struct lu_env *env, struct lu_object *o) list_del_init(&top->loh_lru); bkt = cfs_hash_bd_extra_get(obj_hash, &bd); bkt->lsb_lru_len--; - lprocfs_counter_decr(site->ls_stats, LU_SS_LRU_LEN); + percpu_counter_dec(&site->ls_lru_len_counter); } cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash); cfs_hash_bd_unlock(obj_hash, &bd, 1); @@ -348,8 +344,11 @@ static void lu_object_free(const struct lu_env *env, struct lu_object *o) /** * Free \a nr objects from the cold end of the site LRU list. + * if canblock is 0, then don't block awaiting for another + * instance of lu_site_purge() to complete */ -int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) +int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, + int nr, int canblock) { struct lu_object_header *h; struct lu_object_header *temp; @@ -379,7 +378,11 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) * It doesn't make any sense to make purge threads parallel, that can * only bring troubles to us. See LU-5331. */ - mutex_lock(&s->ls_purge_mutex); + if (canblock != 0) + mutex_lock(&s->ls_purge_mutex); + else if (mutex_trylock(&s->ls_purge_mutex) == 0) + goto out; + did_sth = 0; cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) { if (i < start) @@ -398,7 +401,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) &bd2, &h->loh_hash); list_move(&h->loh_lru, &dispose); bkt->lsb_lru_len--; - lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN); + percpu_counter_dec(&s->ls_lru_len_counter); if (did_sth == 0) did_sth = 1; @@ -435,9 +438,10 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) /* race on s->ls_purge_start, but nobody cares */ s->ls_purge_start = i % CFS_HASH_NBKT(s->ls_obj_hash); +out: return nr; } -EXPORT_SYMBOL(lu_site_purge); +EXPORT_SYMBOL(lu_site_purge_objects); /* * Object printing. @@ -614,7 +618,7 @@ static struct lu_object *htable_lookup(struct lu_site *s, if (!list_empty(&h->loh_lru)) { list_del_init(&h->loh_lru); bkt->lsb_lru_len--; - lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN); + percpu_counter_dec(&s->ls_lru_len_counter); } return lu_object_top(h); } @@ -665,11 +669,11 @@ static void lu_object_limit(const struct lu_env *env, size = cfs_hash_size_get(dev->ld_site->ls_obj_hash); nr = (__u64)lu_cache_nr; - if (size > nr) - lu_site_purge(env, dev->ld_site, - MIN(size - nr, LU_CACHE_NR_MAX_ADJUST)); + if (size <= nr) + return; - return; + lu_site_purge_objects(env, dev->ld_site, + MIN(size - nr, LU_CACHE_NR_MAX_ADJUST), 0); } static struct lu_object *lu_object_new(const struct lu_env *env, @@ -865,7 +869,7 @@ EXPORT_SYMBOL(lu_device_type_fini); * Global list of all sites on this node */ static struct list_head lu_sites; -static DEFINE_MUTEX(lu_sites_guard); +static struct rw_semaphore lu_sites_guard; /** * Global environment used by site shrinker. @@ -950,8 +954,8 @@ static unsigned long lu_htable_order(struct lu_device *top) #if BITS_PER_LONG == 32 /* limit hashtable size for lowmem systems to low RAM */ - if (cache_size > 1 << (30 - PAGE_CACHE_SHIFT)) - cache_size = 1 << (30 - PAGE_CACHE_SHIFT) * 3 / 4; + if (cache_size > 1 << (30 - PAGE_SHIFT)) + cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4; #endif /* clear off unreasonable cache setting. */ @@ -964,7 +968,7 @@ static unsigned long lu_htable_order(struct lu_device *top) lu_cache_percent = LU_CACHE_PERCENT_DEFAULT; } cache_size = cache_size / 100 * lu_cache_percent * - (PAGE_CACHE_SIZE / 1024); + (PAGE_SIZE / 1024); for (bits = 1; (1 << bits) < cache_size; ++bits) { ; @@ -1062,10 +1066,20 @@ int lu_site_init(struct lu_site *s, struct lu_device *top) char name[16]; unsigned long bits; unsigned int i; + int rc; ENTRY; memset(s, 0, sizeof *s); mutex_init(&s->ls_purge_mutex); + +#ifdef HAVE_PERCPU_COUNTER_INIT_GFP_FLAG + rc = percpu_counter_init(&s->ls_lru_len_counter, 0, GFP_NOFS); +#else + rc = percpu_counter_init(&s->ls_lru_len_counter, 0); +#endif + if (rc) + return -ENOMEM; + snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name); for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) { @@ -1112,12 +1126,6 @@ int lu_site_init(struct lu_site *s, struct lu_device *top) 0, "cache_death_race", "cache_death_race"); lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED, 0, "lru_purged", "lru_purged"); - /* - * Unlike other counters, lru_len can be decremented so - * need lc_sum instead of just lc_count - */ - lprocfs_counter_init(s->ls_stats, LU_SS_LRU_LEN, - LPROCFS_CNTR_AVGMINMAX, "lru_len", "lru_len"); INIT_LIST_HEAD(&s->ls_linkage); s->ls_top_dev = top; @@ -1139,9 +1147,11 @@ EXPORT_SYMBOL(lu_site_init); */ void lu_site_fini(struct lu_site *s) { - mutex_lock(&lu_sites_guard); + down_write(&lu_sites_guard); list_del_init(&s->ls_linkage); - mutex_unlock(&lu_sites_guard); + up_write(&lu_sites_guard); + + percpu_counter_destroy(&s->ls_lru_len_counter); if (s->ls_obj_hash != NULL) { cfs_hash_putref(s->ls_obj_hash); @@ -1166,11 +1176,11 @@ EXPORT_SYMBOL(lu_site_fini); int lu_site_init_finish(struct lu_site *s) { int result; - mutex_lock(&lu_sites_guard); + down_write(&lu_sites_guard); result = lu_context_refill(&lu_shrink_env.le_ctx); if (result == 0) list_add(&s->ls_linkage, &lu_sites); - mutex_unlock(&lu_sites_guard); + up_write(&lu_sites_guard); return result; } EXPORT_SYMBOL(lu_site_init_finish); @@ -1593,14 +1603,9 @@ static struct list_head lu_context_remembered; */ void lu_context_key_quiesce(struct lu_context_key *key) { - struct lu_context *ctx; - extern unsigned cl_env_cache_purge(unsigned nr); + struct lu_context *ctx; - if (!(key->lct_tags & LCT_QUIESCENT)) { - /* - * XXX layering violation. - */ - cl_env_cache_purge(~0); + if (!(key->lct_tags & LCT_QUIESCENT)) { /* * XXX memory barrier has to go here. */ @@ -1790,10 +1795,11 @@ void lu_context_exit(struct lu_context *ctx) LINVRNT(ctx->lc_state == LCS_ENTERED); ctx->lc_state = LCS_LEFT; if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value != NULL) { + /* could race with key quiescency */ + if (ctx->lc_tags & LCT_REMEMBER) + read_lock(&lu_keys_guard); + for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { - /* could race with key quiescency */ - if (ctx->lc_tags & LCT_REMEMBER) - read_lock(&lu_keys_guard); if (ctx->lc_value[i] != NULL) { struct lu_context_key *key; @@ -1803,9 +1809,10 @@ void lu_context_exit(struct lu_context *ctx) key->lct_exit(ctx, key, ctx->lc_value[i]); } - if (ctx->lc_tags & LCT_REMEMBER) - read_unlock(&lu_keys_guard); } + + if (ctx->lc_tags & LCT_REMEMBER) + read_unlock(&lu_keys_guard); } } EXPORT_SYMBOL(lu_context_exit); @@ -1965,12 +1972,15 @@ static void lu_site_stats_get(struct cfs_hash *hs, /* - * lu_cache_shrink_count returns the number of cached objects that are - * candidates to be freed by shrink_slab(). A counter, which tracks - * the number of items in the site's lru, is maintained in the per cpu - * stats of each site. The counter is incremented when an object is added - * to a site's lru and decremented when one is removed. The number of - * free-able objects is the sum of all per cpu counters for all sites. + * lu_cache_shrink_count() returns an approximate number of cached objects + * that can be freed by shrink_slab(). A counter, which tracks the + * number of items in the site's lru, is maintained in a percpu_counter + * for each site. The percpu values are incremented and decremented as + * objects are added or removed from the lru. The percpu values are summed + * and saved whenever a percpu value exceeds a threshold. Thus the saved, + * summed value at any given time may not accurately reflect the current + * lru length. But this value is sufficiently accurate for the needs of + * a shrinker. * * Using a per cpu counter is a compromise solution to concurrent access: * lu_object_put() can update the counter without locking the site and @@ -1987,11 +1997,10 @@ static unsigned long lu_cache_shrink_count(struct shrinker *sk, if (!(sc->gfp_mask & __GFP_FS)) return 0; - mutex_lock(&lu_sites_guard); - list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { - cached += ls_stats_read(s->ls_stats, LU_SS_LRU_LEN); - } - mutex_unlock(&lu_sites_guard); + down_read(&lu_sites_guard); + list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) + cached += percpu_counter_read_positive(&s->ls_lru_len_counter); + up_read(&lu_sites_guard); cached = (cached / 100) * sysctl_vfs_cache_pressure; CDEBUG(D_INODE, "%ld objects cached, cache pressure %d\n", @@ -2022,7 +2031,7 @@ static unsigned long lu_cache_shrink_scan(struct shrinker *sk, */ return SHRINK_STOP; - mutex_lock(&lu_sites_guard); + down_write(&lu_sites_guard); list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { remain = lu_site_purge(&lu_shrink_env, s, remain); /* @@ -2032,7 +2041,7 @@ static unsigned long lu_cache_shrink_scan(struct shrinker *sk, list_move_tail(&s->ls_linkage, &splice); } list_splice(&splice, lu_sites.prev); - mutex_unlock(&lu_sites_guard); + up_write(&lu_sites_guard); return sc->nr_to_scan - remain; } @@ -2142,6 +2151,7 @@ int lu_global_init(void) INIT_LIST_HEAD(&lu_device_types); INIT_LIST_HEAD(&lu_context_remembered); INIT_LIST_HEAD(&lu_sites); + init_rwsem(&lu_sites_guard); result = lu_ref_global_init(); if (result != 0) @@ -2157,9 +2167,9 @@ int lu_global_init(void) * conservatively. This should not be too bad, because this * environment is global. */ - mutex_lock(&lu_sites_guard); + down_write(&lu_sites_guard); result = lu_env_init(&lu_shrink_env, LCT_SHRINKER); - mutex_unlock(&lu_sites_guard); + up_write(&lu_sites_guard); if (result != 0) return result; @@ -2191,9 +2201,9 @@ void lu_global_fini(void) * Tear shrinker environment down _after_ de-registering * lu_global_key, because the latter has a value in the former. */ - mutex_lock(&lu_sites_guard); + down_write(&lu_sites_guard); lu_env_fini(&lu_shrink_env); - mutex_unlock(&lu_sites_guard); + up_write(&lu_sites_guard); lu_ref_global_fini(); } @@ -2204,14 +2214,7 @@ static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx) struct lprocfs_counter ret; lprocfs_stats_collect(stats, idx, &ret); - if (idx == LU_SS_LRU_LEN) - /* - * protect against counter on cpu A being decremented - * before counter is incremented on cpu B; unlikely - */ - return (__u32)((ret.lc_sum > 0) ? ret.lc_sum : 0); - else - return (__u32)ret.lc_count; + return (__u32)ret.lc_count; #else return 0; #endif @@ -2228,7 +2231,7 @@ int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m) memset(&stats, 0, sizeof(stats)); lu_site_stats_get(s->ls_obj_hash, &stats, 1); - seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d %d\n", + seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d\n", stats.lss_busy, stats.lss_total, stats.lss_populated, @@ -2239,8 +2242,7 @@ int lu_site_stats_seq_print(const struct lu_site *s, struct seq_file *m) ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS), ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE), ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE), - ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED), - ls_stats_read(s->ls_stats, LU_SS_LRU_LEN)); + ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED)); return 0; } EXPORT_SYMBOL(lu_site_stats_seq_print); @@ -2292,19 +2294,24 @@ void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o, { struct lu_site *s = o->lo_dev->ld_site; struct lu_fid *old = &o->lo_header->loh_fid; - struct lu_object *shadow; - wait_queue_t waiter; struct cfs_hash *hs; struct cfs_hash_bd bd; - __u64 version = 0; LASSERT(fid_is_zero(old)); + /* supposed to be unique */ hs = s->ls_obj_hash; cfs_hash_bd_get_and_lock(hs, (void *)fid, &bd, 1); - shadow = htable_lookup(s, &bd, fid, &waiter, &version); - /* supposed to be unique */ - LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT); +#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK + { + __u64 version = 0; + wait_queue_t waiter; + struct lu_object *shadow; + shadow = htable_lookup(s, &bd, fid, &waiter, &version); + /* supposed to be unique */ + LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT); + } +#endif *old = *fid; cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash); cfs_hash_bd_unlock(hs, &bd, 1);