# LC_FUNC_REGISTER_CACHE
#
# if register_cache() is defined by kernel
+#
+# There are two ways to shrink one customized cache in linux kernels. For the
+# kernels are prior than 2.6.5(?), register_cache() is used, and for latest
+# kernels, set_shrinker() is used instead.
#
AC_DEFUN([LC_FUNC_REGISTER_CACHE],
-[AC_MSG_CHECKING([if kernel defines register_cache()])
+[AC_MSG_CHECKING([if kernel defines cache pressure hook])
LB_LINUX_TRY_COMPILE([
- #include <linux/list.h>
- #include <linux/cache_def.h>
+ #include <linux/mm.h>
],[
- struct cache_definition cache;
+ shrinker_t shrinker;
+
+ set_shrinker(1, shrinker);
],[
- AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
- AC_MSG_CHECKING([if kernel expects return from cache shrink function])
- HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`"
- if test "$HAVE_CACHE_RETURN_INT" != 0 ; then
- AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache])
- AC_MSG_RESULT(yes)
- else
- AC_MSG_RESULT(no)
- fi
+ AC_MSG_RESULT([set_shrinker])
+ AC_DEFINE(HAVE_SHRINKER_CACHE, 1, [shrinker_cache found])
+ AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [shrinkers should return int])
],[
- AC_MSG_RESULT([no])
+ LB_LINUX_TRY_COMPILE([
+ #include <linux/list.h>
+ #include <linux/cache_def.h>
+ ],[
+ struct cache_definition cache;
+ ],[
+ AC_MSG_RESULT([register_cache])
+ AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
+ AC_MSG_CHECKING([if kernel expects return from cache shrink ])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+ LB_LINUX_TRY_COMPILE([
+ #include <linux/list.h>
+ #include <linux/cache_def.h>
+ ],[
+ struct cache_definition c;
+ c.shrinker = (int (*)(int, unsigned int))1;
+ ],[
+ AC_DEFINE(HAVE_CACHE_RETURN_INT, 1,
+ [kernel expects return from shrink_cache])
+ AC_MSG_RESULT(yes)
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+ ],[
+ AC_MSG_RESULT([no])
+ ])
])
])
LC_QUOTA_READ
LC_COOKIE_FOLLOW_LINK
LC_FUNC_RCU
+ LC_PERCPU_COUNTER
# does the kernel have VFS intent patches?
LC_VFS_INTENT_PATCHES
])
])
+AC_DEFUN([LC_PERCPU_COUNTER],
+[AC_MSG_CHECKING([if have struct percpu_counter defined])
+LB_LINUX_TRY_COMPILE([
+ #include <linux/percpu_counter.h>
+],[],[
+ AC_DEFINE(HAVE_PERCPU_COUNTER, 1, [percpu_counter found])
+ AC_MSG_RESULT([yes])
+
+ AC_MSG_CHECKING([if percpu_counter_inc takes the 2nd argument])
+ LB_LINUX_TRY_COMPILE([
+ #include <linux/percpu_counter.h>
+ ],[
+ struct percpu_counter c;
+ percpu_counter_init(&c, 0);
+ ],[
+ AC_DEFINE(HAVE_PERCPU_2ND_ARG, 1, [percpu_counter_init has two
+ arguments])
+ AC_MSG_RESULT([yes])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+],[
+ AC_MSG_RESULT([no])
+])
+])
+
#
# LC_CONFIGURE
#
vfs_rename(old,old_dir,new,new_dir)
#endif
+#ifndef get_cpu
+#ifdef CONFIG_PREEMPT
+#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
+#define put_cpu() preempt_enable()
+#else
+#define get_cpu() smp_processor_id()
+#define put_cpu()
+#endif
+#endif /* get_cpu & put_cpu */
+
+#ifndef for_each_possible_cpu
+#define for_each_possible_cpu(i) for_each_cpu(i)
+#endif
+
+#ifndef cpu_to_node
+#define cpu_to_node(cpu) 0
+#endif
+
#endif /* __KERNEL__ */
#endif /* _COMPAT25_H */
#include <linux/lustre_compat25.h>
#include <linux/pagemap.h>
+#ifdef HAVE_PERCPU_COUNTER
+#include <linux/percpu_counter.h>
+
+typedef struct percpu_counter lcounter_t;
+
+#define lcounter_read(counter) (int)percpu_counter_read(counter)
+#define lcounter_inc(counter) percpu_counter_inc(counter)
+#define lcounter_dec(counter) percpu_counter_dec(counter)
+
+#ifdef HAVE_PERCPU_2ND_ARG
+# define lcounter_init(counter) percpu_counter_init(counter, 0)
+#else
+# define lcounter_init(counter) percpu_counter_init(counter)
+#endif
+
+#define lcounter_destroy(counter) percpu_counter_destroy(counter)
+
+#else
+typedef struct { atomic_t count; } lcounter_t;
+
+#define lcounter_read(counter) atomic_read(&counter->count)
+#define lcounter_inc(counter) atomic_inc(&counter->count)
+#define lcounter_dec(counter) atomic_dec(&counter->count)
+#define lcounter_init(counter) atomic_set(&counter->count, 0)
+#define lcounter_destroy(counter)
+
+#endif /* if defined HAVE_PERCPU_COUNTER */
+
/* lprocfs.c */
enum {
LPROC_LL_DIRTY_HITS = 0,
_NR_RA_STAT,
};
+#define LL_RA_STAT _NR_RA_STAT
+#define LL_RA_STAT_STRINGS { \
+ [RA_STAT_HIT] = "hits", \
+ [RA_STAT_MISS] = "misses", \
+ [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive", \
+ [RA_STAT_MISS_IN_WINDOW] = "miss inside window", \
+ [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page", \
+ [RA_STAT_FAILED_MATCH] = "failed lock match", \
+ [RA_STAT_DISCARDED] = "read but discarded", \
+ [RA_STAT_ZERO_LEN] = "zero length file", \
+ [RA_STAT_ZERO_WINDOW] = "zero size window", \
+ [RA_STAT_EOF] = "read-ahead to EOF", \
+ [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", \
+ [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",\
+}
+
struct ll_ra_info {
- unsigned long ra_cur_pages;
+ atomic_t ra_cur_pages;
unsigned long ra_max_pages;
unsigned long ra_max_read_ahead_whole_pages;
- unsigned long ra_stats[_NR_RA_STAT];
};
/* LL_HIST_MAX=32 causes an overflow */
struct list_head et_entries[EE_HASHES];
};
+/* percpu data structure for lustre lru page list */
+struct ll_pglist_data {
+ spinlock_t llpd_lock; /* lock to protect llpg_list */
+ struct list_head llpd_list; /* all pages (llap_pglist_item) */
+ unsigned long llpd_gen; /* generation # of this list */
+ unsigned long llpd_count; /* How many pages in this list */
+ atomic_t llpd_sample_count;
+ unsigned long llpd_reblnc_count;
+ /* the pages in this list shouldn't be over this number */
+ unsigned long llpd_budget;
+ int llpd_cpu;
+ /* which page the pglist data is in */
+ struct page *llpd_page;
+
+ /* stats */
+ unsigned long llpd_hit;
+ unsigned long llpd_miss;
+ unsigned long llpd_cross;
+};
+
struct ll_sb_info {
struct list_head ll_list;
- /* this protects pglist and ra_info. It isn't safe to
- * grab from interrupt contexts */
+ /* this protects pglist(only ll_async_page_max) and ra_info.
+ * It isn't safe to grab from interrupt contexts. */
spinlock_t ll_lock;
spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */
spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
+ /* reblnc lock protects llpd_budget */
+ spinlock_t ll_async_page_reblnc_lock;
+ unsigned long ll_async_page_reblnc_count;
+ unsigned long ll_async_page_sample_max;
+ /* I defined this array here rather than in ll_pglist_data
+ * because it is always accessed by only one cpu. -jay */
+ unsigned long *ll_async_page_sample;
unsigned long ll_async_page_max;
- unsigned long ll_async_page_count;
- unsigned long ll_pglist_gen;
- struct list_head ll_pglist; /* all pages (llap_pglist_item) */
+ unsigned long ll_async_page_clock_hand;
+ lcounter_t ll_async_page_count;
+ struct ll_pglist_data **ll_pglist;
+
+ struct lprocfs_stats *ll_ra_stats;
unsigned ll_contention_time; /* seconds */
unsigned ll_lockless_truncate_enable; /* true/false */
struct eacl_table ll_et;
};
-#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
+#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
+
+#define LL_PGLIST_DATA_CPU(sbi, cpu) ((sbi)->ll_pglist[cpu])
+#define LL_PGLIST_DATA(sbi) LL_PGLIST_DATA_CPU(sbi, smp_processor_id())
+
+static inline struct ll_pglist_data *ll_pglist_cpu_lock(
+ struct ll_sb_info *sbi,
+ int cpu)
+{
+ spin_lock(&sbi->ll_pglist[cpu]->llpd_lock);
+ return LL_PGLIST_DATA_CPU(sbi, cpu);
+}
+
+static inline void ll_pglist_cpu_unlock(struct ll_sb_info *sbi, int cpu)
+{
+ spin_unlock(&sbi->ll_pglist[cpu]->llpd_lock);
+}
+
+static inline struct ll_pglist_data *ll_pglist_double_lock(
+ struct ll_sb_info *sbi,
+ int cpu, struct ll_pglist_data **pd_cpu)
+{
+ int current_cpu = get_cpu();
+
+ if (cpu == current_cpu) {
+ ll_pglist_cpu_lock(sbi, cpu);
+ } else if (current_cpu < cpu) {
+ ll_pglist_cpu_lock(sbi, current_cpu);
+ ll_pglist_cpu_lock(sbi, cpu);
+ } else {
+ ll_pglist_cpu_lock(sbi, cpu);
+ ll_pglist_cpu_lock(sbi, current_cpu);
+ }
+
+ if (pd_cpu)
+ *pd_cpu = LL_PGLIST_DATA_CPU(sbi, cpu);
+
+ return LL_PGLIST_DATA(sbi);
+}
+
+static inline void ll_pglist_double_unlock(struct ll_sb_info *sbi, int cpu)
+{
+ int current_cpu = smp_processor_id();
+ if (cpu == current_cpu) {
+ ll_pglist_cpu_unlock(sbi, cpu);
+ } else {
+ ll_pglist_cpu_unlock(sbi, cpu);
+ ll_pglist_cpu_unlock(sbi, current_cpu);
+ }
+ put_cpu();
+}
+
+static inline struct ll_pglist_data *ll_pglist_lock(struct ll_sb_info *sbi)
+{
+ ll_pglist_cpu_lock(sbi, get_cpu());
+ return LL_PGLIST_DATA(sbi);
+}
+
+static inline void ll_pglist_unlock(struct ll_sb_info *sbi)
+{
+ ll_pglist_cpu_unlock(sbi, smp_processor_id());
+ put_cpu();
+}
struct ll_ra_read {
pgoff_t lrr_start;
llap_ra_used:1,
llap_ignore_quota:1,
llap_nocache:1,
- llap_lockless_io_page:1;
+ llap_lockless_io_page:1,
+ llap_reserved:7;
+ unsigned int llap_pglist_cpu:16;
void *llap_cookie;
struct page *llap_page;
struct list_head llap_pending_write;
extern char *llap_origins[];
#ifdef HAVE_REGISTER_CACHE
+#include <linux/cache_def.h>
#define ll_register_cache(cache) register_cache(cache)
#define ll_unregister_cache(cache) unregister_cache(cache)
+#elif defined(HAVE_SHRINKER_CACHE)
+struct cache_definition {
+ const char *name;
+ shrinker_t shrink;
+ struct shrinker *shrinker;
+};
+
+#define ll_register_cache(cache) do { \
+ struct cache_definition *c = (cache); \
+ c->shrinker = set_shrinker(DEFAULT_SEEKS, c->shrink); \
+} while(0)
+
+#define ll_unregister_cache(cache) do { \
+ remove_shrinker((cache)->shrinker); \
+ (cache)->shrinker = NULL; \
+} while(0)
#else
#define ll_register_cache(cache) do {} while (0)
#define ll_unregister_cache(cache) do {} while (0)
struct super_block *);
void lustre_dump_dentry(struct dentry *, int recur);
void lustre_dump_inode(struct inode *);
-struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
+struct ll_async_page *llite_pglist_next_llap(struct list_head *head,
struct list_head *list);
int ll_obd_statfs(struct inode *inode, void *arg);
int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
#define log2(n) ffz(~(n))
#endif
+static inline void ll_pglist_fini(struct ll_sb_info *sbi)
+{
+ struct page *page;
+ int i;
+
+ if (sbi->ll_pglist == NULL)
+ return;
+
+ for_each_possible_cpu(i) {
+ page = sbi->ll_pglist[i]->llpd_page;
+ if (page) {
+ sbi->ll_pglist[i] = NULL;
+ __free_page(page);
+ }
+ }
+
+ OBD_FREE(sbi->ll_pglist, sizeof(void *)*num_possible_cpus());
+ sbi->ll_pglist = NULL;
+}
+
+static inline int ll_pglist_init(struct ll_sb_info *sbi)
+{
+ struct ll_pglist_data *pd;
+ unsigned long budget;
+ int i, color = 0;
+ ENTRY;
+
+ OBD_ALLOC(sbi->ll_pglist, sizeof(void *) * num_possible_cpus());
+ if (sbi->ll_pglist == NULL)
+ RETURN(-ENOMEM);
+
+ budget = sbi->ll_async_page_max / num_online_cpus();
+ for_each_possible_cpu(i) {
+ struct page *page = alloc_pages_node(cpu_to_node(i),
+ GFP_KERNEL, 0);
+ if (page == NULL) {
+ ll_pglist_fini(sbi);
+ RETURN(-ENOMEM);
+ }
+
+ if (color + L1_CACHE_ALIGN(sizeof(*pd)) > PAGE_SIZE)
+ color = 0;
+
+ pd = (struct ll_pglist_data *)(page_address(page) + color);
+ memset(pd, 0, sizeof(*pd));
+ spin_lock_init(&pd->llpd_lock);
+ INIT_LIST_HEAD(&pd->llpd_list);
+ if (cpu_online(i))
+ pd->llpd_budget = budget;
+ pd->llpd_cpu = i;
+ pd->llpd_page = page;
+ atomic_set(&pd->llpd_sample_count, 0);
+ sbi->ll_pglist[i] = pd;
+ color += L1_CACHE_ALIGN(sizeof(*pd));
+ }
+
+ RETURN(0);
+}
static struct ll_sb_info *ll_init_sbi(void)
{
if (!sbi)
RETURN(NULL);
+ OBD_ALLOC(sbi->ll_async_page_sample, sizeof(long)*num_possible_cpus());
+ if (sbi->ll_async_page_sample == NULL)
+ GOTO(out, 0);
+
spin_lock_init(&sbi->ll_lock);
spin_lock_init(&sbi->ll_lco.lco_lock);
spin_lock_init(&sbi->ll_pp_extent_lock);
spin_lock_init(&sbi->ll_process_lock);
sbi->ll_rw_stats_on = 0;
- INIT_LIST_HEAD(&sbi->ll_pglist);
si_meminfo(&si);
pages = si.totalram - si.totalhigh;
sbi->ll_async_page_max = pages / 2;
else
sbi->ll_async_page_max = (pages / 4) * 3;
+
+ lcounter_init(&sbi->ll_async_page_count);
+ spin_lock_init(&sbi->ll_async_page_reblnc_lock);
+ sbi->ll_async_page_sample_max = 64 * num_online_cpus();
+ sbi->ll_async_page_reblnc_count = 0;
+ sbi->ll_async_page_clock_hand = 0;
+ if (ll_pglist_init(sbi))
+ GOTO(out, 0);
+
sbi->ll_ra_info.ra_max_pages = min(pages / 32,
SBI_DEFAULT_READAHEAD_MAX);
sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
sbi->ll_sa_max = LL_SA_RPC_DEF;
RETURN(sbi);
+
+out:
+ if (sbi->ll_async_page_sample)
+ OBD_FREE(sbi->ll_async_page_sample,
+ sizeof(long) * num_possible_cpus());
+ ll_pglist_fini(sbi);
+ OBD_FREE(sbi, sizeof(*sbi));
+ RETURN(NULL);
}
void ll_free_sbi(struct super_block *sb)
ENTRY;
if (sbi != NULL) {
+ ll_pglist_fini(sbi);
spin_lock(&ll_sb_lock);
list_del(&sbi->ll_list);
spin_unlock(&ll_sb_lock);
+ lcounter_destroy(&sbi->ll_async_page_count);
+ OBD_FREE(sbi->ll_async_page_sample,
+ sizeof(long) * num_possible_cpus());
OBD_FREE(sbi, sizeof(*sbi));
}
EXIT;
EXIT;
} /* client_put_super */
-#ifdef HAVE_REGISTER_CACHE
-#include <linux/cache_def.h>
-#ifdef HAVE_CACHE_RETURN_INT
+#if defined(HAVE_REGISTER_CACHE) || defined(HAVE_SHRINKER_CACHE)
+
+#if defined(HAVE_CACHE_RETURN_INT)
static int
#else
static void
list_for_each_entry(sbi, &ll_super_blocks, ll_list)
count += llap_shrink_cache(sbi, priority);
-#ifdef HAVE_CACHE_RETURN_INT
+#if defined(HAVE_CACHE_RETURN_INT)
return count;
#endif
}
.name = "llap_cache",
.shrink = ll_shrink_cache
};
-#endif /* HAVE_REGISTER_CACHE */
+#endif /* HAVE_REGISTER_CACHE || HAVE_SHRINKER_CACHE */
struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
{
[LLAP_ORIGIN_LOCKLESS_IO] = "ls"
};
-struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
+struct ll_async_page *llite_pglist_next_llap(struct list_head *head,
struct list_head *list)
{
struct ll_async_page *llap;
struct list_head *pos;
list_for_each(pos, list) {
- if (pos == &sbi->ll_pglist)
+ if (pos == head)
return NULL;
llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
if (llap->llap_page == NULL)
#ifdef LPROCFS
/* /proc/lustre/llite mount point registration */
struct file_operations llite_dump_pgcache_fops;
-struct file_operations ll_ra_stats_fops;
struct file_operations ll_rw_extents_stats_fops;
struct file_operations ll_rw_extents_stats_pp_fops;
struct file_operations ll_rw_offset_stats_fops;
{
struct super_block *sb = data;
struct ll_sb_info *sbi = ll_s2sbi(sb);
- int mult, rc, pages_number;
+ unsigned long budget;
+ int mult, rc, pages_number, cpu;
mult = 1 << (20 - CFS_PAGE_SHIFT);
rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
/* Not set up yet, don't call llap_shrink_cache */
return count;
- if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
- llap_shrink_cache(sbi, 0);
+ spin_lock(&sbi->ll_async_page_reblnc_lock);
+ budget = sbi->ll_async_page_max / num_online_cpus();
+ for_each_online_cpu(cpu)
+ LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget = budget;
+ spin_unlock(&sbi->ll_async_page_reblnc_lock);
+
+ if (lcounter_read(&sbi->ll_async_page_count) >= sbi->ll_async_page_max)
+ llap_shrink_cache(sbi, -1);
return count;
}
+static int ll_rd_pgcache_bnlc(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct super_block *sb = data;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct ll_pglist_data *pd;
+ unsigned long total_budget = 0;
+ int n = 0, cpu;
+
+ n += snprintf(page +n, count - n,
+ "cpu\tpage count\tbudget\t\treblnc count\tgen\thit\tmiss\tcross\n");
+ for_each_online_cpu(cpu) {
+ pd = LL_PGLIST_DATA_CPU(sbi, cpu);
+ n += snprintf(page + n, count - n,
+ "%d\t%-8lu\t%-8lu\t%-8lu\t%lu\t%lu\t%lu\t%lu\n",
+ cpu, pd->llpd_count, pd->llpd_budget,
+ pd->llpd_reblnc_count, pd->llpd_gen,
+ pd->llpd_hit, pd->llpd_miss, pd->llpd_cross);
+ total_budget += pd->llpd_budget;
+ }
+ n += snprintf(page + n, count - n,
+ "Total budget: %lu, page max: %lu, rebalance cnt: %lu\n",
+ total_budget, sbi->ll_async_page_max,
+ sbi->ll_async_page_reblnc_count);
+ *eof = 1;
+ return n;
+}
+
static int ll_rd_checksum(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
{ "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb,
ll_wr_max_read_ahead_whole_mb, 0 },
{ "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 },
+ { "pgcache_balance",ll_rd_pgcache_bnlc, 0, 0 },
{ "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 },
{ "max_rw_chunk", ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 },
{ "stats_track_pid", ll_rd_track_pid, ll_wr_track_pid, 0 },
struct obd_device *obd;
char name[MAX_STRING_SIZE + 1], *ptr;
int err, id, len, rc;
+ static const char *ra_stats_string[] = LL_RA_STAT_STRINGS;
ENTRY;
memset(lvars, 0, sizeof(lvars));
if (rc)
CWARN("Error adding the dump_page_cache file\n");
- rc = lprocfs_seq_create(sbi->ll_proc_root, "read_ahead_stats", 0644,
- &ll_ra_stats_fops, sbi);
- if (rc)
- CWARN("Error adding the read_ahead_stats file\n");
-
rc = lprocfs_seq_create(sbi->ll_proc_root, "extents_stats", 0644,
&ll_rw_extents_stats_fops, sbi);
if (rc)
if (err)
GOTO(out, err);
+ sbi->ll_ra_stats = lprocfs_alloc_stats(LL_RA_STAT,
+ LPROCFS_STATS_FLAG_PERCPU);
+ if (sbi->ll_ra_stats == NULL)
+ GOTO(out, err = -ENOMEM);
+
+ for (id = 0; id < LL_RA_STAT; id++)
+ lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
+ ra_stats_string[id], "pages");
+ err = lprocfs_register_stats(sbi->ll_proc_root, "read_ahead_stats",
+ sbi->ll_ra_stats);
+ if (err)
+ GOTO(out, err);
+
+
err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_llite_obd_vars, sb);
if (err)
GOTO(out, err);
out:
if (err) {
lprocfs_remove(&sbi->ll_proc_root);
+ lprocfs_free_stats(&sbi->ll_ra_stats);
lprocfs_free_stats(&sbi->ll_stats);
}
RETURN(err);
{
if (sbi->ll_proc_root) {
lprocfs_remove(&sbi->ll_proc_root);
+ lprocfs_free_stats(&sbi->ll_ra_stats);
lprocfs_free_stats(&sbi->ll_stats);
}
}
{
struct ll_async_page *llap, *dummy_llap = seq->private;
struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+ struct ll_pglist_data *pd;
+ int cpu = dummy_llap->llap_pglist_cpu;
/* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
* it in our own state */
return 0;
}
- spin_lock(&sbi->ll_lock);
-
- llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
+ pd = ll_pglist_cpu_lock(sbi, cpu);
+ llap = llite_pglist_next_llap(&pd->llpd_list,
+ &dummy_llap->llap_pglist_item);
if (llap != NULL) {
- int has_flags = 0;
+ int has_flags = 0, i;
struct page *page = llap->llap_page;
+ unsigned long gen = 0UL;
LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
llap->llap_origin);
+ for_each_online_cpu(i)
+ gen += LL_PGLIST_DATA_CPU(sbi, i)->llpd_gen;
+
seq_printf(seq," %5lu | %p %p %s %s %s %s | %p %lu/%u(%p) "
"%lu %u [",
- sbi->ll_pglist_gen,
+ gen,
llap, llap->llap_cookie,
llap_origins[llap->llap_origin],
llap->llap_write_queued ? "wq" : "- ",
else
seq_puts(seq, "]\n");
}
-
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
return 0;
}
{
struct ll_async_page *llap, *dummy_llap = seq->private;
struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+ struct ll_pglist_data *pd, *next;
+ int cpu = dummy_llap->llap_pglist_cpu;
/* bail if we just displayed the banner */
if (dummy_llap->llap_magic == 0) {
/* we've just displayed the llap that is after us in the list.
* we advance to a position beyond it, returning null if there
* isn't another llap in the list beyond that new position. */
- spin_lock(&sbi->ll_lock);
- llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item);
+ pd = ll_pglist_cpu_lock(sbi, cpu);
+ llap = llite_pglist_next_llap(&pd->llpd_list,
+ &dummy_llap->llap_pglist_item);
list_del_init(&dummy_llap->llap_pglist_item);
if (llap) {
list_add(&dummy_llap->llap_pglist_item,&llap->llap_pglist_item);
- llap =llite_pglist_next_llap(sbi,&dummy_llap->llap_pglist_item);
+ llap = llite_pglist_next_llap(&pd->llpd_list,
+ &dummy_llap->llap_pglist_item);
}
- spin_unlock(&sbi->ll_lock);
+ if (llap == NULL) {
+ int i = cpu + 1;
+ for (next = NULL; i < num_possible_cpus(); i++, next = NULL) {
+ next = ll_pglist_cpu_lock(sbi, i);
+ if (!list_empty(&next->llpd_list))
+ break;
+ ll_pglist_cpu_unlock(sbi, i);
+ }
+ if (next != NULL) {
+ list_move(&dummy_llap->llap_pglist_item,
+ &next->llpd_list);
+ dummy_llap->llap_pglist_cpu = i;
+ ll_pglist_cpu_unlock(sbi, cpu);
+ llap = llite_pglist_next_llap(&next->llpd_list,
+ &dummy_llap->llap_pglist_item);
+ LASSERT(llap);
+ cpu = i;
+ }
+ }
+ ll_pglist_cpu_unlock(sbi, cpu);
++*pos;
if (llap == NULL) {
struct ll_async_page *dummy_llap;
struct seq_file *seq;
struct ll_sb_info *sbi = dp->data;
+ struct ll_pglist_data *pd;
int rc = -ENOMEM;
LPROCFS_ENTRY_AND_CHECK(dp);
dummy_llap->llap_page = NULL;
dummy_llap->llap_cookie = sbi;
dummy_llap->llap_magic = 0;
+ dummy_llap->llap_pglist_cpu = 0;
rc = seq_open(file, &llite_dump_pgcache_seq_sops);
if (rc) {
seq = file->private_data;
seq->private = dummy_llap;
- spin_lock(&sbi->ll_lock);
- list_add(&dummy_llap->llap_pglist_item, &sbi->ll_pglist);
- spin_unlock(&sbi->ll_lock);
+ pd = ll_pglist_cpu_lock(sbi, 0);
+ list_add(&dummy_llap->llap_pglist_item, &pd->llpd_list);
+ ll_pglist_cpu_unlock(sbi, 0);
out:
if (rc)
struct seq_file *seq = file->private_data;
struct ll_async_page *dummy_llap = seq->private;
struct ll_sb_info *sbi = dummy_llap->llap_cookie;
+ int cpu = dummy_llap->llap_pglist_cpu;
- spin_lock(&sbi->ll_lock);
+ ll_pglist_cpu_lock(sbi, cpu);
if (!list_empty(&dummy_llap->llap_pglist_item))
list_del_init(&dummy_llap->llap_pglist_item);
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
OBD_FREE(dummy_llap, sizeof(*dummy_llap));
return lprocfs_seq_release(inode, file);
.release = llite_dump_pgcache_seq_release,
};
-static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timeval now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- int i;
- static char *ra_stat_strings[] = {
- [RA_STAT_HIT] = "hits",
- [RA_STAT_MISS] = "misses",
- [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
- [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
- [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
- [RA_STAT_FAILED_MATCH] = "failed lock match",
- [RA_STAT_DISCARDED] = "read but discarded",
- [RA_STAT_ZERO_LEN] = "zero length file",
- [RA_STAT_ZERO_WINDOW] = "zero size window",
- [RA_STAT_EOF] = "read-ahead to EOF",
- [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
- [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
- };
-
- do_gettimeofday(&now);
-
- spin_lock(&sbi->ll_lock);
-
- seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
- now.tv_sec, now.tv_usec);
- seq_printf(seq, "pending issued pages: %lu\n",
- ra->ra_cur_pages);
-
- for(i = 0; i < _NR_RA_STAT; i++)
- seq_printf(seq, "%-25s %lu\n", ra_stat_strings[i],
- ra->ra_stats[i]);
-
- spin_unlock(&sbi->ll_lock);
-
- return 0;
-}
-
-static ssize_t ll_ra_stats_seq_write(struct file *file, const char *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_ra_info *ra = &sbi->ll_ra_info;
-
- spin_lock(&sbi->ll_lock);
- memset(ra->ra_stats, 0, sizeof(ra->ra_stats));
- spin_unlock(&sbi->ll_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_ra_stats);
-
#define pct(a,b) (b ? a * 100 / b : 0)
static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
return llap;
}
-/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
+/* Try to reap @target pages in the specific @cpu's async page list.
*
* There is an llap attached onto every page in lustre, linked off @sbi.
* We add an llap to the list so we don't lose our place during list walking.
* If llaps in the list are being moved they will only move to the end
* of the LRU, and we aren't terribly interested in those pages here (we
- * start at the beginning of the list where the least-used llaps are.
- */
-int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
+ * start at the beginning of the list where the least-used llaps are. */
+static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi,
+ int cpu, int target)
{
struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
- unsigned long total, want, count = 0;
-
- total = sbi->ll_async_page_count;
-
- /* There can be a large number of llaps (600k or more in a large
- * memory machine) so the VM 1/6 shrink ratio is likely too much.
- * Since we are freeing pages also, we don't necessarily want to
- * shrink so much. Limit to 40MB of pages + llaps per call. */
- if (shrink_fraction == 0)
- want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32;
- else
- want = (total + shrink_fraction - 1) / shrink_fraction;
-
- if (want > 40 << (20 - CFS_PAGE_SHIFT))
- want = 40 << (20 - CFS_PAGE_SHIFT);
-
- CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
- want, total, shrink_fraction);
-
- spin_lock(&sbi->ll_lock);
- list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist);
-
- while (--total >= 0 && count < want) {
+ struct ll_pglist_data *pd;
+ struct list_head *head;
+ int count = 0;
+
+ pd = ll_pglist_cpu_lock(sbi, cpu);
+ head = &pd->llpd_list;
+ list_add(&dummy_llap.llap_pglist_item, head);
+ while (count < target) {
struct page *page;
int keep;
if (unlikely(need_resched())) {
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
cond_resched();
- spin_lock(&sbi->ll_lock);
+ ll_pglist_cpu_lock(sbi, cpu);
}
- llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item);
+ llap = llite_pglist_next_llap(head,
+ &dummy_llap.llap_pglist_item);
list_del_init(&dummy_llap.llap_pglist_item);
if (llap == NULL)
break;
}
page_cache_get(page);
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
if (page->mapping != NULL) {
ll_teardown_mmaps(page->mapping,
unlock_page(page);
page_cache_release(page);
- spin_lock(&sbi->ll_lock);
+ ll_pglist_cpu_lock(sbi, cpu);
}
list_del(&dummy_llap.llap_pglist_item);
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
+
+ CDEBUG(D_CACHE, "shrank %d, expected %d however. \n", count, target);
+ return count;
+}
+
+
+/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
+ *
+ * At first, this code calculates total pages wanted by @shrink_fraction, then
+ * it deduces how many pages should be reaped from each cpu in proportion as
+ * their own # of page count(llpd_count).
+ */
+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
+{
+ unsigned long total, want, percpu_want, count = 0;
+ int cpu, nr_cpus;
+
+ total = lcounter_read(&sbi->ll_async_page_count);
+ if (total == 0)
+ return 0;
+
+#ifdef HAVE_SHRINKER_CACHE
+ want = shrink_fraction;
+ if (want == 0)
+ return total;
+#else
+ /* There can be a large number of llaps (600k or more in a large
+ * memory machine) so the VM 1/6 shrink ratio is likely too much.
+ * Since we are freeing pages also, we don't necessarily want to
+ * shrink so much. Limit to 40MB of pages + llaps per call. */
+ if (shrink_fraction <= 0)
+ want = total - sbi->ll_async_page_max + 32*num_online_cpus();
+ else
+ want = (total + shrink_fraction - 1) / shrink_fraction;
+#endif
+
+ if (want > 40 << (20 - CFS_PAGE_SHIFT))
+ want = 40 << (20 - CFS_PAGE_SHIFT);
+
+ CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
+ want, total, shrink_fraction);
+
+ nr_cpus = num_possible_cpus();
+ cpu = sbi->ll_async_page_clock_hand;
+ /* we at most do one round */
+ do {
+ int c;
+
+ cpu = (cpu + 1) % nr_cpus;
+ c = LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_count;
+ if (!cpu_online(cpu))
+ percpu_want = c;
+ else
+ percpu_want = want / ((total / (c + 1)) + 1);
+ if (percpu_want == 0)
+ continue;
+
+ count += llap_shrink_cache_internal(sbi, cpu, percpu_want);
+ if (count >= want)
+ sbi->ll_async_page_clock_hand = cpu;
+ } while (cpu != sbi->ll_async_page_clock_hand);
CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n",
count, want, total);
+#ifdef HAVE_SHRINKER_CACHE
+ return lcounter_read(&sbi->ll_async_page_count);
+#else
return count;
+#endif
+}
+
+/* Rebalance the async page queue len for each cpu. We hope that the cpu
+ * which do much IO job has a relative longer queue len.
+ * This function should be called with preempt disabled.
+ */
+static inline int llap_async_cache_rebalance(struct ll_sb_info *sbi)
+{
+ unsigned long sample = 0, *cpu_sample, bias, slice;
+ struct ll_pglist_data *pd;
+ cpumask_t mask;
+ int cpu, surplus;
+ int w1 = 7, w2 = 3, base = (w1 + w2); /* weight value */
+ atomic_t *pcnt;
+
+ if (!spin_trylock(&sbi->ll_async_page_reblnc_lock)) {
+ /* someone else is doing the job */
+ return 1;
+ }
+
+ pcnt = &LL_PGLIST_DATA(sbi)->llpd_sample_count;
+ if (!atomic_read(pcnt)) {
+ /* rare case, somebody else has gotten this job done */
+ spin_unlock(&sbi->ll_async_page_reblnc_lock);
+ return 1;
+ }
+
+ sbi->ll_async_page_reblnc_count++;
+ cpu_sample = sbi->ll_async_page_sample;
+ memset(cpu_sample, 0, num_possible_cpus() * sizeof(unsigned long));
+ for_each_online_cpu(cpu) {
+ pcnt = &LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_sample_count;
+ cpu_sample[cpu] = atomic_read(pcnt);
+ atomic_set(pcnt, 0);
+ sample += cpu_sample[cpu];
+ }
+
+ cpus_clear(mask);
+ surplus = sbi->ll_async_page_max;
+ slice = surplus / sample + 1;
+ sample /= num_online_cpus();
+ bias = sample >> 4;
+ for_each_online_cpu(cpu) {
+ pd = LL_PGLIST_DATA_CPU(sbi, cpu);
+ if (labs((long int)sample - cpu_sample[cpu]) > bias) {
+ unsigned long budget = pd->llpd_budget;
+ /* weighted original queue length and expected queue
+ * length to avoid thrashing. */
+ pd->llpd_budget = (budget * w1) / base +
+ (slice * cpu_sample[cpu]) * w2 / base;
+ cpu_set(cpu, mask);
+ }
+ surplus -= pd->llpd_budget;
+ }
+ surplus /= cpus_weight(mask) ?: 1;
+ for_each_cpu_mask(cpu, mask)
+ LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget += surplus;
+ spin_unlock(&sbi->ll_async_page_reblnc_lock);
+
+ /* TODO: do we really need to call llap_shrink_cache_internal
+ * for every cpus with its page_count greater than budget?
+ * for_each_cpu_mask(cpu, mask)
+ * ll_shrink_cache_internal(...)
+ */
+
+ return 0;
}
static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
struct obd_export *exp;
struct inode *inode = page->mapping->host;
struct ll_sb_info *sbi;
- int rc;
+ struct ll_pglist_data *pd;
+ int rc, cpu, target;
ENTRY;
if (!inode) {
/* move to end of LRU list, except when page is just about to
* die */
if (origin != LLAP_ORIGIN_REMOVEPAGE) {
- spin_lock(&sbi->ll_lock);
- sbi->ll_pglist_gen++;
- list_del_init(&llap->llap_pglist_item);
- list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
- spin_unlock(&sbi->ll_lock);
+ int old_cpu = llap->llap_pglist_cpu;
+ struct ll_pglist_data *old_pd;
+
+ pd = ll_pglist_double_lock(sbi, old_cpu, &old_pd);
+ pd->llpd_hit++;
+ while (old_cpu != llap->llap_pglist_cpu) {
+ /* rarely case, someone else is touching this
+ * page too. */
+ ll_pglist_double_unlock(sbi, old_cpu);
+ old_cpu = llap->llap_pglist_cpu;
+ pd=ll_pglist_double_lock(sbi, old_cpu, &old_pd);
+ }
+
+ list_move(&llap->llap_pglist_item,
+ &pd->llpd_list);
+ old_pd->llpd_gen++;
+ if (pd->llpd_cpu != old_cpu) {
+ pd->llpd_count++;
+ old_pd->llpd_count--;
+ old_pd->llpd_gen++;
+ llap->llap_pglist_cpu = pd->llpd_cpu;
+ pd->llpd_cross++;
+ }
+ ll_pglist_double_unlock(sbi, old_cpu);
}
GOTO(out, llap);
}
RETURN(ERR_PTR(-EINVAL));
/* limit the number of lustre-cached pages */
- if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
- llap_shrink_cache(sbi, 0);
+ cpu = get_cpu();
+ pd = LL_PGLIST_DATA(sbi);
+ target = pd->llpd_count - pd->llpd_budget;
+ if (target > 0) {
+ rc = 0;
+ atomic_inc(&pd->llpd_sample_count);
+ if (atomic_read(&pd->llpd_sample_count) >
+ sbi->ll_async_page_sample_max) {
+ pd->llpd_reblnc_count++;
+ rc = llap_async_cache_rebalance(sbi);
+ if (rc == 0)
+ target = pd->llpd_count - pd->llpd_budget;
+ }
+ /* if rc equals 1, it means other cpu is doing the rebalance
+ * job, and our budget # would be modified when we read it.
+ * Furthermore, it is much likely being increased because
+ * we have already reached the rebalance threshold. In this
+ * case, we skip to shrink cache here. */
+ if ((rc == 0) && target > 0)
+ llap_shrink_cache_internal(sbi, cpu, target + 32);
+ }
+ put_cpu();
OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
ll_async_page_slab_size);
/* also zeroing the PRIVBITS low order bitflags */
__set_page_ll_data(page, llap);
llap->llap_page = page;
- spin_lock(&sbi->ll_lock);
- sbi->ll_pglist_gen++;
- sbi->ll_async_page_count++;
- list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
+
+ lcounter_inc(&sbi->ll_async_page_count);
+ pd = ll_pglist_lock(sbi);
+ list_add_tail(&llap->llap_pglist_item, &pd->llpd_list);
INIT_LIST_HEAD(&llap->llap_pending_write);
- spin_unlock(&sbi->ll_lock);
+ pd->llpd_count++;
+ pd->llpd_gen++;
+ pd->llpd_miss++;
+ llap->llap_pglist_cpu = pd->llpd_cpu;
+ ll_pglist_unlock(sbi);
out:
if (unlikely(sbi->ll_flags & LL_SBI_CHECKSUM)) {
RETURN(rc);
}
+static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
+
+/* WARNING: This algorithm is used to reduce the contention on
+ * sbi->ll_lock. It should work well if the ra_max_pages is much
+ * greater than the single file's read-ahead window.
+ *
+ * TODO: There may exist a `global sync problem' in this implementation.
+ * Considering the global ra window is 100M, and each file's ra window is 10M,
+ * there are over 10 files trying to get its ra budget and reach
+ * ll_ra_count_get at the exactly same time. All of them will get a zero ra
+ * window, although the global window is 100M. -jay
+ */
static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
unsigned long ret;
ENTRY;
- spin_lock(&sbi->ll_lock);
- ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
- ra->ra_cur_pages += ret;
- spin_unlock(&sbi->ll_lock);
+ ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), len);
+ if ((int)ret < 0)
+ GOTO(out, ret = 0);
+ if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
+ atomic_sub(ret, &ra->ra_cur_pages);
+ ret = 0;
+ }
+out:
RETURN(ret);
}
static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
{
struct ll_ra_info *ra = &sbi->ll_ra_info;
- spin_lock(&sbi->ll_lock);
- LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
- ra->ra_cur_pages, len);
- ra->ra_cur_pages -= len;
- spin_unlock(&sbi->ll_lock);
+ atomic_sub(len, &ra->ra_cur_pages);
}
/* called for each page in a completed rpc.*/
struct obd_export *exp;
struct ll_async_page *llap;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc;
+ struct ll_pglist_data *pd;
+ int rc, cpu;
ENTRY;
exp = ll_i2dtexp(inode);
* is providing exclusivity to memory pressure/truncate/writeback..*/
__clear_page_ll_data(page);
- spin_lock(&sbi->ll_lock);
+ lcounter_dec(&sbi->ll_async_page_count);
+ cpu = llap->llap_pglist_cpu;
+ pd = ll_pglist_cpu_lock(sbi, cpu);
+ pd->llpd_gen++;
+ pd->llpd_count--;
if (!list_empty(&llap->llap_pglist_item))
list_del_init(&llap->llap_pglist_item);
- sbi->ll_pglist_gen++;
- sbi->ll_async_page_count--;
- spin_unlock(&sbi->ll_lock);
+ ll_pglist_cpu_unlock(sbi, cpu);
OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size);
EXIT;
}
RETURN(rc);
}
-static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which)
+static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
{
LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
- ra->ra_stats[which]++;
+ lprocfs_counter_incr(sbi->ll_ra_stats, which);
}
static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
{
struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
- struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info;
-
- spin_lock(&sbi->ll_lock);
- ll_ra_stats_inc_unlocked(ra, which);
- spin_unlock(&sbi->ll_lock);
+ ll_ra_stats_inc_sbi(sbi, which);
}
void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
int zero = 0, stride_zero = 0, stride_detect = 0, ra_miss = 0;
ENTRY;
- spin_lock(&sbi->ll_lock);
spin_lock(&ras->ras_lock);
- ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS);
+ ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
/* reset the read-ahead window in two cases. First when the app seeks
* or reads to some other part of the file. Secondly if we get a
* reclaiming it before we get to it. */
if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
zero = 1;
- ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE);
+ ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
/* check whether it is in stride I/O mode*/
if (!index_in_stride_window(index, ras, inode))
stride_zero = 1;
* stride I/O mode to avoid complication */
if (!stride_io_mode(ras))
stride_zero = 1;
- ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
+ ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
}
/* On the second access to a file smaller than the tunable
RAS_CDEBUG(ras);
ras->ras_request_index++;
spin_unlock(&ras->ras_lock);
- spin_unlock(&sbi->ll_lock);
return;
}