extern spinlock_t obd_types_lock;
+enum {
+ LU_CACHE_PERCENT_MAX = 50,
+ LU_CACHE_PERCENT_DEFAULT = 20
+};
+
+#define LU_CACHE_NR_MAX_ADJUST 128
+#define LU_CACHE_NR_UNLIMITED -1
+#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_ZFS_LIMIT 256
+
+#define LU_SITE_BITS_MIN 12
+#define LU_SITE_BITS_MAX 24
+/**
+ * total 256 buckets, we don't want too many buckets because:
+ * - consume too much memory
+ * - avoid unbalanced LRU list
+ */
+#define LU_SITE_BKT_BITS 8
+
+
+static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+CFS_MODULE_PARM(lu_cache_percent, "i", int, 0644,
+ "Percentage of memory to be used as lu_object cache");
+
+static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
+CFS_MODULE_PARM(lu_cache_nr, "l", long, 0644,
+ "Maximum number of objects in lu_object cache");
+
static void lu_object_free(const struct lu_env *env, struct lu_object *o);
/**
}
EXPORT_SYMBOL(lu_object_find);
+/*
+ * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because
+ * the calculation for the number of objects to reclaim is not covered by
+ * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
+ * This ensures that many concurrent threads will not accidentally purge
+ * the entire cache.
+ */
+static void lu_object_limit(const struct lu_env *env,
+ struct lu_device *dev)
+{
+ __u64 size, nr;
+
+ if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
+ return;
+
+ size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
+ nr = (__u64)lu_cache_nr;
+ if (size > nr)
+ lu_site_purge(env, dev->ld_site,
+ MIN(size - nr, LU_CACHE_NR_MAX_ADJUST));
+
+ return;
+}
+
static struct lu_object *lu_object_new(const struct lu_env *env,
struct lu_device *dev,
const struct lu_fid *f,
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
bkt->lsb_busy++;
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
bkt->lsb_busy++;
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
}
EXPORT_SYMBOL(lu_site_print);
-enum {
- LU_CACHE_PERCENT_MAX = 50,
- LU_CACHE_PERCENT_DEFAULT = 20
-};
-
-static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
-CFS_MODULE_PARM(lu_cache_percent, "i", int, 0644,
- "Percentage of memory to be used as lu_object cache");
-
/**
* Return desired hash table order.
*/
-static int lu_htable_order(void)
+static int lu_htable_order(struct lu_device *top)
{
unsigned long cache_size;
int bits;
+ /*
+ * For ZFS based OSDs the cache should be disabled by default. This
+ * allows the ZFS ARC maximum flexibility in determining what buffers
+ * to cache. If Lustre has objects or buffer which it wants to ensure
+ * always stay cached it must maintain a hold on them.
+ */
+ if (strcmp(top->ld_type->ldt_name, LUSTRE_OSD_ZFS_NAME) == 0) {
+ lu_cache_percent = 1;
+ lu_cache_nr = LU_CACHE_NR_ZFS_LIMIT;
+ return LU_SITE_BITS_MIN;
+ }
+
/*
* Calculate hash table size, assuming that we want reasonable
* performance when 20% of total memory is occupied by cache of
EXPORT_SYMBOL(lu_dev_del_linkage);
/**
- * Initialize site \a s, with \a d as the top level device.
- */
-#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 24
-/**
- * total 256 buckets, we don't want too many buckets because:
- * - consume too much memory
- * - avoid unbalanced LRU list
- */
-#define LU_SITE_BKT_BITS 8
-
+ * Initialize site \a s, with \a d as the top level device.
+ */
int lu_site_init(struct lu_site *s, struct lu_device *top)
{
- struct lu_site_bkt_data *bkt;
- cfs_hash_bd_t bd;
- char name[16];
- int bits;
- int i;
- ENTRY;
-
- memset(s, 0, sizeof *s);
- bits = lu_htable_order();
- snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
- for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
- bits >= LU_SITE_BITS_MIN; bits--) {
- s->ls_obj_hash = cfs_hash_create(name, bits, bits,
- bits - LU_SITE_BKT_BITS,
- sizeof(*bkt), 0, 0,
- &lu_site_hash_ops,
- CFS_HASH_SPIN_BKTLOCK |
- CFS_HASH_NO_ITEMREF |
- CFS_HASH_DEPTH |
- CFS_HASH_ASSERT_EMPTY);
- if (s->ls_obj_hash != NULL)
- break;
- }
+ struct lu_site_bkt_data *bkt;
+ cfs_hash_bd_t bd;
+ char name[16];
+ int bits;
+ int i;
+ ENTRY;
- if (s->ls_obj_hash == NULL) {
- CERROR("failed to create lu_site hash with bits: %d\n", bits);
- return -ENOMEM;
- }
+ memset(s, 0, sizeof *s);
+ bits = lu_htable_order(top);
+ snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
+ for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
+ bits >= LU_SITE_BITS_MIN; bits--) {
+ s->ls_obj_hash = cfs_hash_create(name, bits, bits,
+ bits - LU_SITE_BKT_BITS,
+ sizeof(*bkt), 0, 0,
+ &lu_site_hash_ops,
+ CFS_HASH_SPIN_BKTLOCK |
+ CFS_HASH_NO_ITEMREF |
+ CFS_HASH_DEPTH |
+ CFS_HASH_ASSERT_EMPTY |
+ CFS_HASH_COUNTER);
+ if (s->ls_obj_hash != NULL)
+ break;
+ }
+
+ if (s->ls_obj_hash == NULL) {
+ CERROR("failed to create lu_site hash with bits: %d\n", bits);
+ return -ENOMEM;
+ }
cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);