* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
cfs_hash_bd_get(site->ls_obj_hash, &top->loh_fid, &bd);
bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
- if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
- if (lu_object_is_dying(top)) {
+ if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
+ if (lu_object_is_dying(top)) {
- /*
- * somebody may be waiting for this, currently only
- * used for cl_object, see cl_object_put_last().
- */
- cfs_waitq_broadcast(&bkt->lsb_marche_funebre);
- }
- return;
- }
+ /*
+ * somebody may be waiting for this, currently only
+ * used for cl_object, see cl_object_put_last().
+ */
+ wake_up_all(&bkt->lsb_marche_funebre);
+ }
+ return;
+ }
LASSERT(bkt->lsb_busy > 0);
bkt->lsb_busy--;
* struct lu_device_operations definition.
*/
static struct lu_object *lu_object_alloc(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
-{
- struct lu_object *scan;
- struct lu_object *top;
- cfs_list_t *layers;
- int clean;
- int result;
- ENTRY;
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_object *scan;
+ struct lu_object *top;
+ cfs_list_t *layers;
+ unsigned int init_mask = 0;
+ unsigned int init_flag;
+ int clean;
+ int result;
+ ENTRY;
/*
* Create top-level object slice. This will also create
*/
top->lo_header->loh_fid = *f;
layers = &top->lo_header->loh_layers;
- do {
- /*
- * Call ->loo_object_init() repeatedly, until no more new
- * object slices are created.
- */
- clean = 1;
- cfs_list_for_each_entry(scan, layers, lo_linkage) {
- if (scan->lo_flags & LU_OBJECT_ALLOCATED)
- continue;
- clean = 0;
- scan->lo_header = top->lo_header;
- result = scan->lo_ops->loo_object_init(env, scan, conf);
- if (result != 0) {
- lu_object_free(env, top);
- RETURN(ERR_PTR(result));
- }
- scan->lo_flags |= LU_OBJECT_ALLOCATED;
- }
- } while (!clean);
+
+ do {
+ /*
+ * Call ->loo_object_init() repeatedly, until no more new
+ * object slices are created.
+ */
+ clean = 1;
+ init_flag = 1;
+ cfs_list_for_each_entry(scan, layers, lo_linkage) {
+ if (init_mask & init_flag)
+ goto next;
+ clean = 0;
+ scan->lo_header = top->lo_header;
+ result = scan->lo_ops->loo_object_init(env, scan, conf);
+ if (result != 0) {
+ lu_object_free(env, top);
+ RETURN(ERR_PTR(result));
+ }
+ init_mask |= init_flag;
+next:
+ init_flag <<= 1;
+ }
+ } while (!clean);
cfs_list_for_each_entry_reverse(scan, layers, lo_linkage) {
if (scan->lo_ops->loo_object_start != NULL) {
*/
CFS_INIT_LIST_HEAD(&splice);
cfs_list_splice_init(layers, &splice);
- while (!cfs_list_empty(&splice)) {
- /*
- * Free layers in bottom-to-top order, so that object header
- * lives as long as possible and ->loo_object_free() methods
- * can look at its contents.
- */
- o = container_of0(splice.prev, struct lu_object, lo_linkage);
- cfs_list_del_init(&o->lo_linkage);
- LASSERT(o->lo_ops->loo_object_free != NULL);
- o->lo_ops->loo_object_free(env, o);
- }
+ while (!cfs_list_empty(&splice)) {
+ /*
+ * Free layers in bottom-to-top order, so that object header
+ * lives as long as possible and ->loo_object_free() methods
+ * can look at its contents.
+ */
+ o = container_of0(splice.prev, struct lu_object, lo_linkage);
+ cfs_list_del_init(&o->lo_linkage);
+ LASSERT(o->lo_ops->loo_object_free != NULL);
+ o->lo_ops->loo_object_free(env, o);
+ }
- if (cfs_waitq_active(&bkt->lsb_marche_funebre))
- cfs_waitq_broadcast(&bkt->lsb_marche_funebre);
+ if (waitqueue_active(&bkt->lsb_marche_funebre))
+ wake_up_all(&bkt->lsb_marche_funebre);
}
/**
if (count > 0 && --count == 0)
break;
- }
- cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
- cfs_cond_resched();
- /*
- * Free everything on the dispose list. This is safe against
- * races due to the reasons described in lu_object_put().
- */
+ }
+ cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
+ cond_resched();
+ /*
+ * Free everything on the dispose list. This is safe against
+ * races due to the reasons described in lu_object_put().
+ */
while (!cfs_list_empty(&dispose)) {
h = container_of0(dispose.next,
struct lu_object_header, loh_lru);
* lu_global_init().
*/
struct lu_context_key lu_global_key = {
- .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD |
- LCT_MG_THREAD | LCT_CL_THREAD,
- .lct_init = lu_global_key_init,
- .lct_fini = lu_global_key_fini
+ .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD |
+ LCT_MG_THREAD | LCT_CL_THREAD | LCT_LOCAL,
+ .lct_init = lu_global_key_init,
+ .lct_fini = lu_global_key_fini
};
/**
* Print human readable representation of the \a o to the \a printer.
*/
void lu_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t printer, const struct lu_object *o)
+ lu_printer_t printer, const struct lu_object *o)
{
- static const char ruler[] = "........................................";
- struct lu_object_header *top;
- int depth;
+ static const char ruler[] = "........................................";
+ struct lu_object_header *top;
+ int depth = 4;
- top = o->lo_header;
- lu_object_header_print(env, cookie, printer, top);
- (*printer)(env, cookie, "{ \n");
- cfs_list_for_each_entry(o, &top->loh_layers, lo_linkage) {
- depth = o->lo_depth + 4;
+ top = o->lo_header;
+ lu_object_header_print(env, cookie, printer, top);
+ (*printer)(env, cookie, "{\n");
- /*
- * print `.' \a depth times followed by type name and address
- */
- (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler,
- o->lo_dev->ld_type->ldt_name, o);
- if (o->lo_ops->loo_object_print != NULL)
- o->lo_ops->loo_object_print(env, cookie, printer, o);
- (*printer)(env, cookie, "\n");
- }
- (*printer)(env, cookie, "} header@%p\n", top);
+ cfs_list_for_each_entry(o, &top->loh_layers, lo_linkage) {
+ /*
+ * print `.' \a depth times followed by type name and address
+ */
+ (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler,
+ o->lo_dev->ld_type->ldt_name, o);
+
+ if (o->lo_ops->loo_object_print != NULL)
+ (*o->lo_ops->loo_object_print)(env, cookie, printer, o);
+
+ (*printer)(env, cookie, "\n");
+ }
+
+ (*printer)(env, cookie, "} header@%p\n", top);
}
EXPORT_SYMBOL(lu_object_print);
EXPORT_SYMBOL(lu_object_invariant);
static struct lu_object *htable_lookup(struct lu_site *s,
- cfs_hash_bd_t *bd,
- const struct lu_fid *f,
- cfs_waitlink_t *waiter,
- __u64 *version)
+ cfs_hash_bd_t *bd,
+ const struct lu_fid *f,
+ wait_queue_t *waiter,
+ __u64 *version)
{
struct lu_site_bkt_data *bkt;
struct lu_object_header *h;
__u64 ver = cfs_hash_bd_version_get(bd);
if (*version == ver)
- return NULL;
+ return ERR_PTR(-ENOENT);
*version = ver;
bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
if (hnode == NULL) {
lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
- return NULL;
+ return ERR_PTR(-ENOENT);
}
h = container_of0(hnode, struct lu_object_header, loh_hash);
* drained), and moreover, lookup has to wait until object is freed.
*/
- cfs_waitlink_init(waiter);
- cfs_waitq_add(&bkt->lsb_marche_funebre, waiter);
- cfs_set_current_state(CFS_TASK_UNINT);
- lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
- return ERR_PTR(-EAGAIN);
+ init_waitqueue_entry_current(waiter);
+ add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
+ return ERR_PTR(-EAGAIN);
+}
+
+static struct lu_object *htable_lookup_nowait(struct lu_site *s,
+ cfs_hash_bd_t *bd,
+ const struct lu_fid *f)
+{
+ cfs_hlist_node_t *hnode;
+ struct lu_object_header *h;
+
+ /* cfs_hash_bd_peek_locked is a somehow "internal" function
+ * of cfs_hash, it doesn't add refcount on object. */
+ hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
+ if (hnode == NULL) {
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
+ return ERR_PTR(-ENOENT);
+ }
+
+ h = container_of0(hnode, struct lu_object_header, loh_hash);
+ if (unlikely(lu_object_is_dying(h)))
+ return ERR_PTR(-ENOENT);
+
+ cfs_hash_get(s->ls_obj_hash, hnode);
+ lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+ cfs_list_del_init(&h->loh_lru);
+ return lu_object_top(h);
}
/**
* Core logic of lu_object_find*() functions.
*/
static struct lu_object *lu_object_find_try(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf,
- cfs_waitlink_t *waiter)
-{
- struct lu_object *o;
- struct lu_object *shadow;
- struct lu_site *s;
- cfs_hash_t *hs;
- cfs_hash_bd_t bd;
- __u64 version = 0;
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf,
+ wait_queue_t *waiter)
+{
+ struct lu_object *o;
+ struct lu_object *shadow;
+ struct lu_site *s;
+ cfs_hash_t *hs;
+ cfs_hash_bd_t bd;
+ __u64 version = 0;
/*
* This uses standard index maintenance protocol:
cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
o = htable_lookup(s, &bd, f, waiter, &version);
cfs_hash_bd_unlock(hs, &bd, 1);
- if (o != NULL)
+ if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
return o;
/*
cfs_hash_bd_lock(hs, &bd, 1);
shadow = htable_lookup(s, &bd, f, waiter, &version);
- if (likely(shadow == NULL)) {
+ if (likely(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT)) {
struct lu_site_bkt_data *bkt;
bkt = cfs_hash_bd_extra_get(hs, &bd);
* objects of different "stacking" to be created within the same site.
*/
struct lu_object *lu_object_find_at(const struct lu_env *env,
- struct lu_device *dev,
- const struct lu_fid *f,
- const struct lu_object_conf *conf)
+ struct lu_device *dev,
+ const struct lu_fid *f,
+ const struct lu_object_conf *conf)
+{
+ struct lu_site_bkt_data *bkt;
+ struct lu_object *obj;
+ wait_queue_t wait;
+
+ while (1) {
+ obj = lu_object_find_try(env, dev, f, conf, &wait);
+ if (obj != ERR_PTR(-EAGAIN))
+ return obj;
+ /*
+ * lu_object_find_try() already added waiter into the
+ * wait queue.
+ */
+ waitq_wait(&wait, TASK_UNINTERRUPTIBLE);
+ bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
+ remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
+ }
+}
+EXPORT_SYMBOL(lu_object_find_at);
+
+/**
+ * Try to find the object in cache without waiting for the dead object
+ * to be released nor allocating object if no cached one was found.
+ *
+ * The found object will be set as LU_OBJECT_HEARD_BANSHEE for purging.
+ */
+void lu_object_purge(const struct lu_env *env, struct lu_device *dev,
+ const struct lu_fid *f)
{
- struct lu_site_bkt_data *bkt;
- struct lu_object *obj;
- cfs_waitlink_t wait;
+ struct lu_site *s = dev->ld_site;
+ cfs_hash_t *hs = s->ls_obj_hash;
+ cfs_hash_bd_t bd;
+ struct lu_object *o;
- while (1) {
- obj = lu_object_find_try(env, dev, f, conf, &wait);
- if (obj != ERR_PTR(-EAGAIN))
- return obj;
- /*
- * lu_object_find_try() already added waiter into the
- * wait queue.
- */
- cfs_waitq_wait(&wait, CFS_TASK_UNINT);
- bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
- cfs_waitq_del(&bkt->lsb_marche_funebre, &wait);
- }
+ cfs_hash_bd_get_and_lock(hs, f, &bd, 1);
+ o = htable_lookup_nowait(s, &bd, f);
+ cfs_hash_bd_unlock(hs, &bd, 1);
+ if (!IS_ERR(o)) {
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
+ lu_object_put(env, o);
+ }
}
-EXPORT_SYMBOL(lu_object_find_at);
+EXPORT_SYMBOL(lu_object_purge);
/**
* Find object with given fid, and return its slice belonging to given device.
*
* Size of lu_object is (arbitrary) taken as 1K (together with inode).
*/
- cache_size = cfs_num_physpages;
+ cache_size = totalram_pages;
#if BITS_PER_LONG == 32
/* limit hashtable size for lowmem systems to low RAM */
- if (cache_size > 1 << (30 - CFS_PAGE_SHIFT))
- cache_size = 1 << (30 - CFS_PAGE_SHIFT) * 3 / 4;
+ if (cache_size > 1 << (30 - PAGE_CACHE_SHIFT))
+ cache_size = 1 << (30 - PAGE_CACHE_SHIFT) * 3 / 4;
#endif
/* clear off unreasonable cache setting. */
lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
}
cache_size = cache_size / 100 * lu_cache_percent *
- (CFS_PAGE_SIZE / 1024);
+ (PAGE_CACHE_SIZE / 1024);
for (bits = 1; (1 << bits) < cache_size; ++bits) {
;
return -ENOMEM;
}
- cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
- bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
- CFS_INIT_LIST_HEAD(&bkt->lsb_lru);
- cfs_waitq_init(&bkt->lsb_marche_funebre);
- }
+ cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
+ bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
+ CFS_INIT_LIST_HEAD(&bkt->lsb_lru);
+ init_waitqueue_head(&bkt->lsb_marche_funebre);
+ }
s->ls_stats = lprocfs_alloc_stats(LU_SS_LAST_STAT, 0);
if (s->ls_stats == NULL) {
* Initialize object \a o that is part of compound object \a h and was created
* by device \a d.
*/
-int lu_object_init(struct lu_object *o,
- struct lu_object_header *h, struct lu_device *d)
+int lu_object_init(struct lu_object *o, struct lu_object_header *h,
+ struct lu_device *d)
{
- memset(o, 0, sizeof *o);
- o->lo_header = h;
- o->lo_dev = d;
- lu_device_get(d);
- o->lo_dev_ref = lu_ref_add(&d->ld_reference, "lu_object", o);
- CFS_INIT_LIST_HEAD(&o->lo_linkage);
- return 0;
+ memset(o, 0, sizeof(*o));
+ o->lo_header = h;
+ o->lo_dev = d;
+ lu_device_get(d);
+ lu_ref_add_at(&d->ld_reference, &o->lo_dev_ref, "lu_object", o);
+ CFS_INIT_LIST_HEAD(&o->lo_linkage);
+
+ return 0;
}
EXPORT_SYMBOL(lu_object_init);
*/
void lu_object_fini(struct lu_object *o)
{
- struct lu_device *dev = o->lo_dev;
+ struct lu_device *dev = o->lo_dev;
- LASSERT(cfs_list_empty(&o->lo_linkage));
+ LASSERT(cfs_list_empty(&o->lo_linkage));
- if (dev != NULL) {
- lu_ref_del_at(&dev->ld_reference,
- o->lo_dev_ref , "lu_object", o);
- lu_device_put(dev);
- o->lo_dev = NULL;
- }
+ if (dev != NULL) {
+ lu_ref_del_at(&dev->ld_reference, &o->lo_dev_ref,
+ "lu_object", o);
+ lu_device_put(dev);
+ o->lo_dev = NULL;
+ }
}
EXPORT_SYMBOL(lu_object_fini);
LASSERT(key->lct_owner != NULL);
if ((ctx->lc_tags & LCT_NOREF) == 0) {
- LINVRNT(cfs_module_refcount(key->lct_owner) > 0);
- cfs_module_put(key->lct_owner);
+ LINVRNT(module_refcount(key->lct_owner) > 0);
+ module_put(key->lct_owner);
}
ctx->lc_value[index] = NULL;
}
void lu_context_key_quiesce(struct lu_context_key *key)
{
struct lu_context *ctx;
+ extern unsigned cl_env_cache_purge(unsigned nr);
if (!(key->lct_tags & LCT_QUIESCENT)) {
/*
* XXX layering violation.
*/
+ cl_env_cache_purge(~0);
key->lct_tags |= LCT_QUIESCENT;
/*
* XXX memory barrier has to go here.
if (unlikely(IS_ERR(value)))
return PTR_ERR(value);
- LASSERT(key->lct_owner != NULL);
- if (!(ctx->lc_tags & LCT_NOREF))
- cfs_try_module_get(key->lct_owner);
- lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
- cfs_atomic_inc(&key->lct_used);
+ LASSERT(key->lct_owner != NULL);
+ if (!(ctx->lc_tags & LCT_NOREF))
+ try_module_get(key->lct_owner);
+ lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
+ cfs_atomic_inc(&key->lct_used);
/*
* This is the only place in the code, where an
* element of ctx->lc_value[] array is set to non-NULL
}
EXPORT_SYMBOL(lu_env_refill_by_tags);
-static struct cfs_shrinker *lu_site_shrinker = NULL;
+static struct shrinker *lu_site_shrinker;
typedef struct lu_site_stats{
unsigned lss_populated;
* inode, one for ea. Unfortunately setting this high value results in
* lu_object/inode cache consuming all the memory.
*/
- lu_site_shrinker = cfs_set_shrinker(CFS_DEFAULT_SEEKS, lu_cache_shrink);
+ lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink);
if (lu_site_shrinker == NULL)
return -ENOMEM;
void lu_global_fini(void)
{
if (lu_site_shrinker != NULL) {
- cfs_remove_shrinker(lu_site_shrinker);
+ remove_shrinker(lu_site_shrinker);
lu_site_shrinker = NULL;
}
struct lu_kmem_descr *iter = caches;
for (result = 0; iter->ckd_cache != NULL; ++iter) {
- *iter->ckd_cache = cfs_mem_cache_create(iter->ckd_name,
- iter->ckd_size,
- 0, 0);
+ *iter->ckd_cache = kmem_cache_create(iter->ckd_name,
+ iter->ckd_size,
+ 0, 0, NULL);
if (*iter->ckd_cache == NULL) {
result = -ENOMEM;
/* free all previously allocated caches */
*/
void lu_kmem_fini(struct lu_kmem_descr *caches)
{
- int rc;
-
for (; caches->ckd_cache != NULL; ++caches) {
if (*caches->ckd_cache != NULL) {
- rc = cfs_mem_cache_destroy(*caches->ckd_cache);
- LASSERTF(rc == 0, "couldn't destroy %s slab\n",
- caches->ckd_name);
+ kmem_cache_destroy(*caches->ckd_cache);
*caches->ckd_cache = NULL;
}
}
struct lu_fid *old = &o->lo_header->loh_fid;
struct lu_site_bkt_data *bkt;
struct lu_object *shadow;
- cfs_waitlink_t waiter;
+ wait_queue_t waiter;
cfs_hash_t *hs;
cfs_hash_bd_t bd;
__u64 version = 0;
cfs_hash_bd_get_and_lock(hs, (void *)fid, &bd, 1);
shadow = htable_lookup(s, &bd, fid, &waiter, &version);
/* supposed to be unique */
- LASSERT(shadow == NULL);
+ LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT);
*old = *fid;
bkt = cfs_hash_bd_extra_get(hs, &bd);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);