From: jxiong Date: Thu, 31 Jul 2008 04:23:28 +0000 (+0000) Subject: Revert the patch for 11817 X-Git-Tag: v1_8_0_110~387 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=3bea95fc4fe7da549b35a165862fde54230ab408;p=fs%2Flustre-release.git Revert the patch for 11817 --- diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 3d95e03..2792f61 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -237,52 +237,27 @@ LB_LINUX_TRY_COMPILE([ # LC_FUNC_REGISTER_CACHE # # if register_cache() is defined by kernel -# -# There are two ways to shrink one customized cache in linux kernels. For the -# kernels are prior than 2.6.5(?), register_cache() is used, and for latest -# kernels, set_shrinker() is used instead. # AC_DEFUN([LC_FUNC_REGISTER_CACHE], -[AC_MSG_CHECKING([if kernel defines cache pressure hook]) +[AC_MSG_CHECKING([if kernel defines register_cache()]) LB_LINUX_TRY_COMPILE([ - #include + #include + #include ],[ - shrinker_t shrinker; - - set_shrinker(1, shrinker); + struct cache_definition cache; ],[ - AC_MSG_RESULT([set_shrinker]) - AC_DEFINE(HAVE_SHRINKER_CACHE, 1, [shrinker_cache found]) - AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [shrinkers should return int]) + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found]) + AC_MSG_CHECKING([if kernel expects return from cache shrink function]) + HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`" + if test "$HAVE_CACHE_RETURN_INT" != 0 ; then + AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi ],[ - LB_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct cache_definition cache; - ],[ - AC_MSG_RESULT([register_cache]) - AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found]) - AC_MSG_CHECKING([if kernel expects return from cache shrink ]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - LB_LINUX_TRY_COMPILE([ - #include - #include - ],[ - struct cache_definition c; - c.shrinker = (int (*)(int, unsigned int))1; - ],[ - AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, - [kernel expects return from shrink_cache]) - AC_MSG_RESULT(yes) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" - ],[ - AC_MSG_RESULT([no]) - ]) + AC_MSG_RESULT([no]) ]) ]) @@ -1332,7 +1307,6 @@ AC_DEFUN([LC_PROG_LINUX], LC_QUOTA_READ LC_COOKIE_FOLLOW_LINK LC_FUNC_RCU - LC_PERCPU_COUNTER LC_QUOTA64 # does the kernel have VFS intent patches? @@ -1583,32 +1557,6 @@ LB_LINUX_TRY_COMPILE([ ]) ]) -AC_DEFUN([LC_PERCPU_COUNTER], -[AC_MSG_CHECKING([if have struct percpu_counter defined]) -LB_LINUX_TRY_COMPILE([ - #include -],[],[ - AC_DEFINE(HAVE_PERCPU_COUNTER, 1, [percpu_counter found]) - AC_MSG_RESULT([yes]) - - AC_MSG_CHECKING([if percpu_counter_inc takes the 2nd argument]) - LB_LINUX_TRY_COMPILE([ - #include - ],[ - struct percpu_counter c; - percpu_counter_init(&c, 0); - ],[ - AC_DEFINE(HAVE_PERCPU_2ND_ARG, 1, [percpu_counter_init has two - arguments]) - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - ]) -],[ - AC_MSG_RESULT([no]) -]) -]) - # # LC_CONFIGURE # diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 19c3da7..d9e4499 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -545,23 +545,5 @@ int ll_unregister_blkdev(unsigned int dev, const char *name) #define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME #endif -#ifndef get_cpu -#ifdef CONFIG_PREEMPT -#define get_cpu() ({ preempt_disable(); smp_processor_id(); }) -#define put_cpu() preempt_enable() -#else -#define get_cpu() smp_processor_id() -#define put_cpu() -#endif -#endif /* get_cpu & put_cpu */ - -#ifndef for_each_possible_cpu -#define for_each_possible_cpu(i) for_each_cpu(i) -#endif - -#ifndef cpu_to_node -#define cpu_to_node(cpu) 0 -#endif - #endif /* __KERNEL__ */ #endif /* _COMPAT25_H */ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 9bee596..c531c05 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -30,34 +30,6 @@ #include #include -#ifdef HAVE_PERCPU_COUNTER -#include - -typedef struct percpu_counter lcounter_t; - -#define lcounter_read(counter) (int)percpu_counter_read(counter) -#define lcounter_inc(counter) percpu_counter_inc(counter) -#define lcounter_dec(counter) percpu_counter_dec(counter) - -#ifdef HAVE_PERCPU_2ND_ARG -# define lcounter_init(counter) percpu_counter_init(counter, 0) -#else -# define lcounter_init(counter) percpu_counter_init(counter) -#endif - -#define lcounter_destroy(counter) percpu_counter_destroy(counter) - -#else -typedef struct { atomic_t count; } lcounter_t; - -#define lcounter_read(counter) atomic_read(&counter->count) -#define lcounter_inc(counter) atomic_inc(&counter->count) -#define lcounter_dec(counter) atomic_dec(&counter->count) -#define lcounter_init(counter) atomic_set(&counter->count, 0) -#define lcounter_destroy(counter) - -#endif /* if defined HAVE_PERCPU_COUNTER */ - /* lprocfs.c */ enum { LPROC_LL_DIRTY_HITS = 0, diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 8e564e2..0eab8b3 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -193,26 +193,11 @@ enum ra_stat { _NR_RA_STAT, }; -#define LL_RA_STAT _NR_RA_STAT -#define LL_RA_STAT_STRINGS { \ - [RA_STAT_HIT] = "hits", \ - [RA_STAT_MISS] = "misses", \ - [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive", \ - [RA_STAT_MISS_IN_WINDOW] = "miss inside window", \ - [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page", \ - [RA_STAT_FAILED_MATCH] = "failed lock match", \ - [RA_STAT_DISCARDED] = "read but discarded", \ - [RA_STAT_ZERO_LEN] = "zero length file", \ - [RA_STAT_ZERO_WINDOW] = "zero size window", \ - [RA_STAT_EOF] = "read-ahead to EOF", \ - [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", \ - [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",\ -} - struct ll_ra_info { - atomic_t ra_cur_pages; + unsigned long ra_cur_pages; unsigned long ra_max_pages; unsigned long ra_max_read_ahead_whole_pages; + unsigned long ra_stats[_NR_RA_STAT]; }; /* LL_HIST_MAX=32 causes an overflow */ @@ -268,30 +253,10 @@ enum stats_track_type { /* default value for lockless_truncate_enable */ #define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1 -/* percpu data structure for lustre lru page list */ -struct ll_pglist_data { - spinlock_t llpd_lock; /* lock to protect llpg_list */ - struct list_head llpd_list; /* all pages (llap_pglist_item) */ - unsigned long llpd_gen; /* generation # of this list */ - unsigned long llpd_count; /* How many pages in this list */ - atomic_t llpd_sample_count; - unsigned long llpd_reblnc_count; - /* the pages in this list shouldn't be over this number */ - unsigned long llpd_budget; - int llpd_cpu; - /* which page the pglist data is in */ - struct page *llpd_page; - - /* stats */ - unsigned long llpd_hit; - unsigned long llpd_miss; - unsigned long llpd_cross; -}; - struct ll_sb_info { struct list_head ll_list; - /* this protects pglist(only ll_async_page_max) and ra_info. - * It isn't safe to grab from interrupt contexts. */ + /* this protects pglist and ra_info. It isn't safe to + * grab from interrupt contexts */ spinlock_t ll_lock; spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */ spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */ @@ -310,19 +275,10 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ - /* reblnc lock protects llpd_budget */ - spinlock_t ll_async_page_reblnc_lock; - unsigned long ll_async_page_reblnc_count; - unsigned long ll_async_page_sample_max; - /* I defined this array here rather than in ll_pglist_data - * because it is always accessed by only one cpu. -jay */ - unsigned long *ll_async_page_sample; unsigned long ll_async_page_max; - unsigned long ll_async_page_clock_hand; - lcounter_t ll_async_page_count; - struct ll_pglist_data **ll_pglist; - - struct lprocfs_stats *ll_ra_stats; + unsigned long ll_async_page_count; + unsigned long ll_pglist_gen; + struct list_head ll_pglist; /* all pages (llap_pglist_item) */ unsigned ll_contention_time; /* seconds */ unsigned ll_lockless_truncate_enable; /* true/false */ @@ -365,69 +321,7 @@ struct ll_sb_info { unsigned long long ll_sa_miss; /* miss count */ }; -#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024) - -#define LL_PGLIST_DATA_CPU(sbi, cpu) ((sbi)->ll_pglist[cpu]) -#define LL_PGLIST_DATA(sbi) LL_PGLIST_DATA_CPU(sbi, smp_processor_id()) - -static inline struct ll_pglist_data *ll_pglist_cpu_lock( - struct ll_sb_info *sbi, - int cpu) -{ - spin_lock(&sbi->ll_pglist[cpu]->llpd_lock); - return LL_PGLIST_DATA_CPU(sbi, cpu); -} - -static inline void ll_pglist_cpu_unlock(struct ll_sb_info *sbi, int cpu) -{ - spin_unlock(&sbi->ll_pglist[cpu]->llpd_lock); -} - -static inline struct ll_pglist_data *ll_pglist_double_lock( - struct ll_sb_info *sbi, - int cpu, struct ll_pglist_data **pd_cpu) -{ - int current_cpu = get_cpu(); - - if (cpu == current_cpu) { - ll_pglist_cpu_lock(sbi, cpu); - } else if (current_cpu < cpu) { - ll_pglist_cpu_lock(sbi, current_cpu); - ll_pglist_cpu_lock(sbi, cpu); - } else { - ll_pglist_cpu_lock(sbi, cpu); - ll_pglist_cpu_lock(sbi, current_cpu); - } - - if (pd_cpu) - *pd_cpu = LL_PGLIST_DATA_CPU(sbi, cpu); - - return LL_PGLIST_DATA(sbi); -} - -static inline void ll_pglist_double_unlock(struct ll_sb_info *sbi, int cpu) -{ - int current_cpu = smp_processor_id(); - if (cpu == current_cpu) { - ll_pglist_cpu_unlock(sbi, cpu); - } else { - ll_pglist_cpu_unlock(sbi, cpu); - ll_pglist_cpu_unlock(sbi, current_cpu); - } - put_cpu(); -} - -static inline struct ll_pglist_data *ll_pglist_lock(struct ll_sb_info *sbi) -{ - ll_pglist_cpu_lock(sbi, get_cpu()); - return LL_PGLIST_DATA(sbi); -} - -static inline void ll_pglist_unlock(struct ll_sb_info *sbi) -{ - ll_pglist_cpu_unlock(sbi, smp_processor_id()); - put_cpu(); -} +#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024) struct ll_ra_read { pgoff_t lrr_start; @@ -568,9 +462,7 @@ struct ll_async_page { llap_ra_used:1, llap_ignore_quota:1, llap_nocache:1, - llap_lockless_io_page:1, - llap_reserved:7; - unsigned int llap_pglist_cpu:16; + llap_lockless_io_page:1; void *llap_cookie; struct page *llap_page; struct list_head llap_pending_write; @@ -596,25 +488,8 @@ enum { extern char *llap_origins[]; #ifdef HAVE_REGISTER_CACHE -#include #define ll_register_cache(cache) register_cache(cache) #define ll_unregister_cache(cache) unregister_cache(cache) -#elif defined(HAVE_SHRINKER_CACHE) -struct cache_definition { - const char *name; - shrinker_t shrink; - struct shrinker *shrinker; -}; - -#define ll_register_cache(cache) do { \ - struct cache_definition *c = (cache); \ - c->shrinker = set_shrinker(DEFAULT_SEEKS, c->shrink); \ -} while(0) - -#define ll_unregister_cache(cache) do { \ - remove_shrinker((cache)->shrinker); \ - (cache)->shrinker = NULL; \ -} while(0) #else #define ll_register_cache(cache) do {} while (0) #define ll_unregister_cache(cache) do {} while (0) @@ -820,7 +695,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, struct ptlrpc_request *req, int offset, struct super_block *); void lustre_dump_dentry(struct dentry *, int recur); void lustre_dump_inode(struct inode *); -struct ll_async_page *llite_pglist_next_llap(struct list_head *head, +struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, struct list_head *list); int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index d8462bc..8fe8acd 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -50,64 +50,6 @@ extern struct address_space_operations ll_dir_aops; #define log2(n) ffz(~(n)) #endif -static inline void ll_pglist_fini(struct ll_sb_info *sbi) -{ - struct page *page; - int i; - - if (sbi->ll_pglist == NULL) - return; - - for_each_possible_cpu(i) { - page = sbi->ll_pglist[i]->llpd_page; - if (page) { - sbi->ll_pglist[i] = NULL; - __free_page(page); - } - } - - OBD_FREE(sbi->ll_pglist, sizeof(void *)*num_possible_cpus()); - sbi->ll_pglist = NULL; -} - -static inline int ll_pglist_init(struct ll_sb_info *sbi) -{ - struct ll_pglist_data *pd; - unsigned long budget; - int i, color = 0; - ENTRY; - - OBD_ALLOC(sbi->ll_pglist, sizeof(void *) * num_possible_cpus()); - if (sbi->ll_pglist == NULL) - RETURN(-ENOMEM); - - budget = sbi->ll_async_page_max / num_online_cpus(); - for_each_possible_cpu(i) { - struct page *page = alloc_pages_node(cpu_to_node(i), - GFP_KERNEL, 0); - if (page == NULL) { - ll_pglist_fini(sbi); - RETURN(-ENOMEM); - } - - if (color + L1_CACHE_ALIGN(sizeof(*pd)) > PAGE_SIZE) - color = 0; - - pd = (struct ll_pglist_data *)(page_address(page) + color); - memset(pd, 0, sizeof(*pd)); - spin_lock_init(&pd->llpd_lock); - INIT_LIST_HEAD(&pd->llpd_list); - if (cpu_online(i)) - pd->llpd_budget = budget; - pd->llpd_cpu = i; - pd->llpd_page = page; - atomic_set(&pd->llpd_sample_count, 0); - sbi->ll_pglist[i] = pd; - color += L1_CACHE_ALIGN(sizeof(*pd)); - } - - RETURN(0); -} static struct ll_sb_info *ll_init_sbi(void) { @@ -122,15 +64,12 @@ static struct ll_sb_info *ll_init_sbi(void) if (!sbi) RETURN(NULL); - OBD_ALLOC(sbi->ll_async_page_sample, sizeof(long)*num_possible_cpus()); - if (sbi->ll_async_page_sample == NULL) - GOTO(out, 0); - spin_lock_init(&sbi->ll_lock); spin_lock_init(&sbi->ll_lco.lco_lock); spin_lock_init(&sbi->ll_pp_extent_lock); spin_lock_init(&sbi->ll_process_lock); sbi->ll_rw_stats_on = 0; + INIT_LIST_HEAD(&sbi->ll_pglist); si_meminfo(&si); pages = si.totalram - si.totalhigh; @@ -138,15 +77,6 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_async_page_max = pages / 2; else sbi->ll_async_page_max = (pages / 4) * 3; - - lcounter_init(&sbi->ll_async_page_count); - spin_lock_init(&sbi->ll_async_page_reblnc_lock); - sbi->ll_async_page_sample_max = 64 * num_online_cpus(); - sbi->ll_async_page_reblnc_count = 0; - sbi->ll_async_page_clock_hand = 0; - if (ll_pglist_init(sbi)) - GOTO(out, 0); - sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8, SBI_DEFAULT_READAHEAD_MAX); sbi->ll_ra_info.ra_max_read_ahead_whole_pages = @@ -188,14 +118,6 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_sa_max = LL_SA_RPC_DEF; RETURN(sbi); - -out: - if (sbi->ll_async_page_sample) - OBD_FREE(sbi->ll_async_page_sample, - sizeof(long) * num_possible_cpus()); - ll_pglist_fini(sbi); - OBD_FREE(sbi, sizeof(*sbi)); - RETURN(NULL); } void ll_free_sbi(struct super_block *sb) @@ -204,13 +126,9 @@ void ll_free_sbi(struct super_block *sb) ENTRY; if (sbi != NULL) { - ll_pglist_fini(sbi); spin_lock(&ll_sb_lock); list_del(&sbi->ll_list); spin_unlock(&ll_sb_lock); - lcounter_destroy(&sbi->ll_async_page_count); - OBD_FREE(sbi->ll_async_page_sample, - sizeof(long) * num_possible_cpus()); OBD_FREE(sbi, sizeof(*sbi)); } EXIT; @@ -1249,9 +1167,9 @@ void ll_put_super(struct super_block *sb) EXIT; } /* client_put_super */ -#if defined(HAVE_REGISTER_CACHE) || defined(HAVE_SHRINKER_CACHE) - -#if defined(HAVE_CACHE_RETURN_INT) +#ifdef HAVE_REGISTER_CACHE +#include +#ifdef HAVE_CACHE_RETURN_INT static int #else static void @@ -1264,7 +1182,7 @@ ll_shrink_cache(int priority, unsigned int gfp_mask) list_for_each_entry(sbi, &ll_super_blocks, ll_list) count += llap_shrink_cache(sbi, priority); -#if defined(HAVE_CACHE_RETURN_INT) +#ifdef HAVE_CACHE_RETURN_INT return count; #endif } @@ -1273,7 +1191,7 @@ struct cache_definition ll_cache_definition = { .name = "llap_cache", .shrink = ll_shrink_cache }; -#endif /* HAVE_REGISTER_CACHE || HAVE_SHRINKER_CACHE */ +#endif /* HAVE_REGISTER_CACHE */ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { @@ -2175,14 +2093,14 @@ char *llap_origins[] = { [LLAP_ORIGIN_LOCKLESS_IO] = "ls" }; -struct ll_async_page *llite_pglist_next_llap(struct list_head *head, +struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, struct list_head *list) { struct ll_async_page *llap; struct list_head *pos; list_for_each(pos, list) { - if (pos == head) + if (pos == &sbi->ll_pglist) return NULL; llap = list_entry(pos, struct ll_async_page, llap_pglist_item); if (llap->llap_page == NULL) diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index b2af553..9a735c8 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -34,6 +34,7 @@ struct proc_dir_entry *proc_lustre_fs_root; #ifdef LPROCFS /* /proc/lustre/llite mount point registration */ struct file_operations llite_dump_pgcache_fops; +struct file_operations ll_ra_stats_fops; struct file_operations ll_rw_extents_stats_fops; struct file_operations ll_rw_extents_stats_pp_fops; struct file_operations ll_rw_offset_stats_fops; @@ -287,8 +288,7 @@ static int ll_wr_max_cached_mb(struct file *file, const char *buffer, { struct super_block *sb = data; struct ll_sb_info *sbi = ll_s2sbi(sb); - unsigned long budget; - int mult, rc, pages_number, cpu; + int mult, rc, pages_number; mult = 1 << (20 - CFS_PAGE_SHIFT); rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult); @@ -309,46 +309,12 @@ static int ll_wr_max_cached_mb(struct file *file, const char *buffer, /* Not set up yet, don't call llap_shrink_cache */ return count; - spin_lock(&sbi->ll_async_page_reblnc_lock); - budget = sbi->ll_async_page_max / num_online_cpus(); - for_each_online_cpu(cpu) - LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget = budget; - spin_unlock(&sbi->ll_async_page_reblnc_lock); - - if (lcounter_read(&sbi->ll_async_page_count) >= sbi->ll_async_page_max) - llap_shrink_cache(sbi, -1); + if (sbi->ll_async_page_count >= sbi->ll_async_page_max) + llap_shrink_cache(sbi, 0); return count; } -static int ll_rd_pgcache_bnlc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct super_block *sb = data; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct ll_pglist_data *pd; - unsigned long total_budget = 0; - int n = 0, cpu; - - n += snprintf(page +n, count - n, - "cpu\tpage count\tbudget\t\treblnc count\tgen\thit\tmiss\tcross\n"); - for_each_online_cpu(cpu) { - pd = LL_PGLIST_DATA_CPU(sbi, cpu); - n += snprintf(page + n, count - n, - "%d\t%-8lu\t%-8lu\t%-8lu\t%lu\t%u\t%u\t%u\n", - cpu, pd->llpd_count, pd->llpd_budget, - pd->llpd_reblnc_count, pd->llpd_gen, - pd->llpd_hit, pd->llpd_miss, pd->llpd_cross); - total_budget += pd->llpd_budget; - } - n += snprintf(page + n, count - n, - "Total budget: %lu, page max: %lu, rebalance cnt: %lu\n", - total_budget, sbi->ll_async_page_max, - sbi->ll_async_page_reblnc_count); - *eof = 1; - return n; -} - static int ll_rd_checksum(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -584,7 +550,6 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = { { "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb, ll_wr_max_read_ahead_whole_mb, 0 }, { "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 }, - { "pgcache_balance",ll_rd_pgcache_bnlc, 0, 0 }, { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 }, { "max_rw_chunk", ll_rd_max_rw_chunk, ll_wr_max_rw_chunk, 0 }, { "stats_track_pid", ll_rd_track_pid, ll_wr_track_pid, 0 }, @@ -689,7 +654,6 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, char name[MAX_STRING_SIZE + 1], *ptr; int err, id, len; struct proc_dir_entry *entry; - static const char *ra_stats_string[] = LL_RA_STAT_STRINGS; ENTRY; memset(lvars, 0, sizeof(lvars)); @@ -724,13 +688,11 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, entry->proc_fops = &llite_dump_pgcache_fops; entry->data = sbi; - sbi->ll_ra_stats = lprocfs_alloc_stats(LL_RA_STAT, - LPROCFS_STATS_FLAG_PERCPU); - for (id = 0; id < LL_RA_STAT; id++) - lprocfs_counter_init(sbi->ll_ra_stats, id, 0, - ra_stats_string[id], "pages"); - lprocfs_register_stats(sbi->ll_proc_root, "read_ahead_stats", - sbi->ll_ra_stats); + entry = create_proc_entry("read_ahead_stats", 0644, sbi->ll_proc_root); + if (entry == NULL) + GOTO(out, err = -ENOMEM); + entry->proc_fops = &ll_ra_stats_fops; + entry->data = sbi; entry = create_proc_entry("extents_stats", 0644, sbi->ll_proc_root); if (entry == NULL) @@ -828,7 +790,6 @@ void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) { if (sbi->ll_proc_root) { lprocfs_remove(&sbi->ll_proc_root); - lprocfs_free_stats(&sbi->ll_ra_stats); lprocfs_free_stats(&sbi->ll_stats); } } @@ -858,8 +819,6 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) { struct ll_async_page *llap, *dummy_llap = seq->private; struct ll_sb_info *sbi = dummy_llap->llap_cookie; - struct ll_pglist_data *pd; - int cpu = dummy_llap->llap_pglist_cpu; /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement * it in our own state */ @@ -869,23 +828,19 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) return 0; } - pd = ll_pglist_cpu_lock(sbi, cpu); - llap = llite_pglist_next_llap(&pd->llpd_list, - &dummy_llap->llap_pglist_item); + spin_lock(&sbi->ll_lock); + + llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item); if (llap != NULL) { - int has_flags = 0, i; + int has_flags = 0; struct page *page = llap->llap_page; - unsigned long gen = 0UL; LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n", llap->llap_origin); - for_each_online_cpu(i) - gen += LL_PGLIST_DATA_CPU(sbi, i)->llpd_gen; - seq_printf(seq," %5lu | %p %p %s %s %s %s | %p %lu/%u(%p) " "%lu %u [", - gen, + sbi->ll_pglist_gen, llap, llap->llap_cookie, llap_origins[llap->llap_origin], llap->llap_write_queued ? "wq" : "- ", @@ -909,7 +864,8 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) else seq_puts(seq, "]\n"); } - ll_pglist_cpu_unlock(sbi, cpu); + + spin_unlock(&sbi->ll_lock); return 0; } @@ -919,8 +875,6 @@ static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v, { struct ll_async_page *llap, *dummy_llap = seq->private; struct ll_sb_info *sbi = dummy_llap->llap_cookie; - struct ll_pglist_data *pd, *next; - int cpu = dummy_llap->llap_pglist_cpu; /* bail if we just displayed the banner */ if (dummy_llap->llap_magic == 0) { @@ -931,35 +885,14 @@ static void *llite_dump_pgcache_seq_next(struct seq_file *seq, void *v, /* we've just displayed the llap that is after us in the list. * we advance to a position beyond it, returning null if there * isn't another llap in the list beyond that new position. */ - pd = ll_pglist_cpu_lock(sbi, cpu); - llap = llite_pglist_next_llap(&pd->llpd_list, - &dummy_llap->llap_pglist_item); + spin_lock(&sbi->ll_lock); + llap = llite_pglist_next_llap(sbi, &dummy_llap->llap_pglist_item); list_del_init(&dummy_llap->llap_pglist_item); if (llap) { list_add(&dummy_llap->llap_pglist_item,&llap->llap_pglist_item); - llap = llite_pglist_next_llap(&pd->llpd_list, - &dummy_llap->llap_pglist_item); - } - if (llap == NULL) { - int i = cpu + 1; - for (next = NULL; i < num_possible_cpus(); i++, next = NULL) { - next = ll_pglist_cpu_lock(sbi, i); - if (!list_empty(&next->llpd_list)) - break; - ll_pglist_cpu_unlock(sbi, i); - } - if (next != NULL) { - list_move(&dummy_llap->llap_pglist_item, - &next->llpd_list); - dummy_llap->llap_pglist_cpu = i; - ll_pglist_cpu_unlock(sbi, cpu); - llap = llite_pglist_next_llap(&next->llpd_list, - &dummy_llap->llap_pglist_item); - LASSERT(llap); - cpu = i; - } + llap =llite_pglist_next_llap(sbi,&dummy_llap->llap_pglist_item); } - ll_pglist_cpu_unlock(sbi, cpu); + spin_unlock(&sbi->ll_lock); ++*pos; if (llap == NULL) { @@ -997,7 +930,6 @@ static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file) struct ll_async_page *dummy_llap; struct seq_file *seq; struct ll_sb_info *sbi = dp->data; - struct ll_pglist_data *pd; int rc = -ENOMEM; LPROCFS_ENTRY_AND_CHECK(dp); @@ -1009,7 +941,6 @@ static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file) dummy_llap->llap_page = NULL; dummy_llap->llap_cookie = sbi; dummy_llap->llap_magic = 0; - dummy_llap->llap_pglist_cpu = 0; rc = seq_open(file, &llite_dump_pgcache_seq_sops); if (rc) { @@ -1019,9 +950,9 @@ static int llite_dump_pgcache_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = dummy_llap; - pd = ll_pglist_cpu_lock(sbi, 0); - list_add(&dummy_llap->llap_pglist_item, &pd->llpd_list); - ll_pglist_cpu_unlock(sbi, 0); + spin_lock(&sbi->ll_lock); + list_add(&dummy_llap->llap_pglist_item, &sbi->ll_pglist); + spin_unlock(&sbi->ll_lock); out: if (rc) @@ -1035,12 +966,11 @@ static int llite_dump_pgcache_seq_release(struct inode *inode, struct seq_file *seq = file->private_data; struct ll_async_page *dummy_llap = seq->private; struct ll_sb_info *sbi = dummy_llap->llap_cookie; - int cpu = dummy_llap->llap_pglist_cpu; - ll_pglist_cpu_lock(sbi, cpu); + spin_lock(&sbi->ll_lock); if (!list_empty(&dummy_llap->llap_pglist_item)) list_del_init(&dummy_llap->llap_pglist_item); - ll_pglist_cpu_unlock(sbi, cpu); + spin_unlock(&sbi->ll_lock); OBD_FREE(dummy_llap, sizeof(*dummy_llap)); return lprocfs_seq_release(inode, file); @@ -1053,6 +983,61 @@ struct file_operations llite_dump_pgcache_fops = { .release = llite_dump_pgcache_seq_release, }; +static int ll_ra_stats_seq_show(struct seq_file *seq, void *v) +{ + struct timeval now; + struct ll_sb_info *sbi = seq->private; + struct ll_ra_info *ra = &sbi->ll_ra_info; + int i; + static char *ra_stat_strings[] = { + [RA_STAT_HIT] = "hits", + [RA_STAT_MISS] = "misses", + [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive", + [RA_STAT_MISS_IN_WINDOW] = "miss inside window", + [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page", + [RA_STAT_FAILED_MATCH] = "failed lock match", + [RA_STAT_DISCARDED] = "read but discarded", + [RA_STAT_ZERO_LEN] = "zero length file", + [RA_STAT_ZERO_WINDOW] = "zero size window", + [RA_STAT_EOF] = "read-ahead to EOF", + [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", + [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page", + }; + + do_gettimeofday(&now); + + spin_lock(&sbi->ll_lock); + + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "pending issued pages: %lu\n", + ra->ra_cur_pages); + + for(i = 0; i < _NR_RA_STAT; i++) + seq_printf(seq, "%-25s %lu\n", ra_stat_strings[i], + ra->ra_stats[i]); + + spin_unlock(&sbi->ll_lock); + + return 0; +} + +static ssize_t ll_ra_stats_seq_write(struct file *file, const char *buf, + size_t len, loff_t *off) +{ + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + struct ll_ra_info *ra = &sbi->ll_ra_info; + + spin_lock(&sbi->ll_lock); + memset(ra->ra_stats, 0, sizeof(ra->ra_stats)); + spin_unlock(&sbi->ll_lock); + + return len; +} + +LPROC_SEQ_FOPS(ll_ra_stats); + #define pct(a,b) (b ? a * 100 / b : 0) static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 0a3044c..0e7466b 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -473,36 +473,50 @@ struct ll_async_page *llap_cast_private(struct page *page) return llap; } -/* Try to reap @target pages in the specific @cpu's async page list. +/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction. * * There is an llap attached onto every page in lustre, linked off @sbi. * We add an llap to the list so we don't lose our place during list walking. * If llaps in the list are being moved they will only move to the end * of the LRU, and we aren't terribly interested in those pages here (we - * start at the beginning of the list where the least-used llaps are. */ -static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi, - int cpu, int target) + * start at the beginning of the list where the least-used llaps are. + */ +int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction) { struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a }; - struct ll_pglist_data *pd; - struct list_head *head; - int count = 0; - - pd = ll_pglist_cpu_lock(sbi, cpu); - head = &pd->llpd_list; - list_add(&dummy_llap.llap_pglist_item, head); - while (count < target) { + unsigned long total, want, count = 0; + + total = sbi->ll_async_page_count; + + /* There can be a large number of llaps (600k or more in a large + * memory machine) so the VM 1/6 shrink ratio is likely too much. + * Since we are freeing pages also, we don't necessarily want to + * shrink so much. Limit to 40MB of pages + llaps per call. */ + if (shrink_fraction == 0) + want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32; + else + want = (total + shrink_fraction - 1) / shrink_fraction; + + if (want > 40 << (20 - CFS_PAGE_SHIFT)) + want = 40 << (20 - CFS_PAGE_SHIFT); + + CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n", + want, total, shrink_fraction); + + spin_lock(&sbi->ll_lock); + list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist); + + while (--total >= 0 && count < want) { struct page *page; int keep; if (unlikely(need_resched())) { - ll_pglist_cpu_unlock(sbi, cpu); + spin_unlock(&sbi->ll_lock); cond_resched(); - ll_pglist_cpu_lock(sbi, cpu); + spin_lock(&sbi->ll_lock); } - llap = llite_pglist_next_llap(head, - &dummy_llap.llap_pglist_item); + llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item); list_del_init(&dummy_llap.llap_pglist_item); if (llap == NULL) break; @@ -538,7 +552,7 @@ static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi, } page_cache_get(page); - ll_pglist_cpu_unlock(sbi, cpu); + spin_unlock(&sbi->ll_lock); if (page->mapping != NULL) { ll_teardown_mmaps(page->mapping, @@ -560,146 +574,15 @@ static inline int llap_shrink_cache_internal(struct ll_sb_info *sbi, unlock_page(page); page_cache_release(page); - ll_pglist_cpu_lock(sbi, cpu); + spin_lock(&sbi->ll_lock); } list_del(&dummy_llap.llap_pglist_item); - ll_pglist_cpu_unlock(sbi, cpu); - - CDEBUG(D_CACHE, "shrank %d, expected %d however. \n", count, target); - return count; -} - - -/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction. - * - * At first, this code calculates total pages wanted by @shrink_fraction, then - * it deduces how many pages should be reaped from each cpu in proportion as - * their own # of page count(llpd_count). - */ -int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction) -{ - unsigned long total, want, percpu_want, count = 0; - int cpu, nr_cpus; - - total = lcounter_read(&sbi->ll_async_page_count); - if (total == 0) - return 0; - -#ifdef HAVE_SHRINKER_CACHE - want = shrink_fraction; - if (want == 0) - return total; -#else - /* There can be a large number of llaps (600k or more in a large - * memory machine) so the VM 1/6 shrink ratio is likely too much. - * Since we are freeing pages also, we don't necessarily want to - * shrink so much. Limit to 40MB of pages + llaps per call. */ - if (shrink_fraction <= 0) - want = total - sbi->ll_async_page_max + 32*num_online_cpus(); - else - want = (total + shrink_fraction - 1) / shrink_fraction; -#endif - - if (want > 40 << (20 - CFS_PAGE_SHIFT)) - want = 40 << (20 - CFS_PAGE_SHIFT); - - CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n", - want, total, shrink_fraction); - - nr_cpus = num_possible_cpus(); - cpu = sbi->ll_async_page_clock_hand; - /* we at most do one round */ - do { - int c; - - cpu = (cpu + 1) % nr_cpus; - c = LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_count; - if (!cpu_online(cpu)) - percpu_want = c; - else - percpu_want = want / ((total / (c + 1)) + 1); - if (percpu_want == 0) - continue; - - count += llap_shrink_cache_internal(sbi, cpu, percpu_want); - if (count >= want) - sbi->ll_async_page_clock_hand = cpu; - } while (cpu != sbi->ll_async_page_clock_hand); + spin_unlock(&sbi->ll_lock); CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n", count, want, total); -#ifdef HAVE_SHRINKER_CACHE - return lcounter_read(&sbi->ll_async_page_count); -#else return count; -#endif -} - -/* Rebalance the async page queue len for each cpu. We hope that the cpu - * which do much IO job has a relative longer queue len. - * This function should be called with preempt disabled. - */ -static inline int llap_async_cache_rebalance(struct ll_sb_info *sbi) -{ - unsigned long sample = 0, *cpu_sample, bias, slice; - struct ll_pglist_data *pd; - cpumask_t mask; - int cpu, surplus; - int w1 = 7, w2 = 3, base = (w1 + w2); /* weight value */ - atomic_t *pcnt; - - if (!spin_trylock(&sbi->ll_async_page_reblnc_lock)) { - /* someone else is doing the job */ - return 1; - } - - pcnt = &LL_PGLIST_DATA(sbi)->llpd_sample_count; - if (!atomic_read(pcnt)) { - /* rare case, somebody else has gotten this job done */ - spin_unlock(&sbi->ll_async_page_reblnc_lock); - return 1; - } - - sbi->ll_async_page_reblnc_count++; - cpu_sample = sbi->ll_async_page_sample; - memset(cpu_sample, 0, num_possible_cpus() * sizeof(unsigned long)); - for_each_online_cpu(cpu) { - pcnt = &LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_sample_count; - cpu_sample[cpu] = atomic_read(pcnt); - atomic_set(pcnt, 0); - sample += cpu_sample[cpu]; - } - - cpus_clear(mask); - surplus = sbi->ll_async_page_max; - slice = surplus / sample + 1; - sample /= num_online_cpus(); - bias = sample >> 4; - for_each_online_cpu(cpu) { - pd = LL_PGLIST_DATA_CPU(sbi, cpu); - if (labs((long int)sample - cpu_sample[cpu]) > bias) { - unsigned long budget = pd->llpd_budget; - /* weighted original queue length and expected queue - * length to avoid thrashing. */ - pd->llpd_budget = (budget * w1) / base + - (slice * cpu_sample[cpu]) * w2 / base; - cpu_set(cpu, mask); - } - surplus -= pd->llpd_budget; - } - surplus /= cpus_weight(mask) ?: 1; - for_each_cpu_mask(cpu, mask) - LL_PGLIST_DATA_CPU(sbi, cpu)->llpd_budget += surplus; - spin_unlock(&sbi->ll_async_page_reblnc_lock); - - /* TODO: do we really need to call llap_shrink_cache_internal - * for every cpus with its page_count greater than budget? - * for_each_cpu_mask(cpu, mask) - * ll_shrink_cache_internal(...) - */ - - return 0; } static struct ll_async_page *llap_from_page_with_lockh(struct page *page, @@ -710,8 +593,7 @@ static struct ll_async_page *llap_from_page_with_lockh(struct page *page, struct obd_export *exp; struct inode *inode = page->mapping->host; struct ll_sb_info *sbi; - struct ll_pglist_data *pd; - int rc, cpu, target; + int rc; ENTRY; if (!inode) { @@ -734,30 +616,11 @@ static struct ll_async_page *llap_from_page_with_lockh(struct page *page, /* move to end of LRU list, except when page is just about to * die */ if (origin != LLAP_ORIGIN_REMOVEPAGE) { - int old_cpu = llap->llap_pglist_cpu; - struct ll_pglist_data *old_pd; - - pd = ll_pglist_double_lock(sbi, old_cpu, &old_pd); - pd->llpd_hit++; - while (old_cpu != llap->llap_pglist_cpu) { - /* rarely case, someone else is touching this - * page too. */ - ll_pglist_double_unlock(sbi, old_cpu); - old_cpu = llap->llap_pglist_cpu; - pd=ll_pglist_double_lock(sbi, old_cpu, &old_pd); - } - - list_move(&llap->llap_pglist_item, - &pd->llpd_list); - old_pd->llpd_gen++; - if (pd->llpd_cpu != old_cpu) { - pd->llpd_count++; - old_pd->llpd_count--; - old_pd->llpd_gen++; - llap->llap_pglist_cpu = pd->llpd_cpu; - pd->llpd_cross++; - } - ll_pglist_double_unlock(sbi, old_cpu); + spin_lock(&sbi->ll_lock); + sbi->ll_pglist_gen++; + list_del_init(&llap->llap_pglist_item); + list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist); + spin_unlock(&sbi->ll_lock); } GOTO(out, llap); } @@ -767,28 +630,8 @@ static struct ll_async_page *llap_from_page_with_lockh(struct page *page, RETURN(ERR_PTR(-EINVAL)); /* limit the number of lustre-cached pages */ - cpu = get_cpu(); - pd = LL_PGLIST_DATA(sbi); - target = pd->llpd_count - pd->llpd_budget; - if (target > 0) { - rc = 0; - atomic_inc(&pd->llpd_sample_count); - if (atomic_read(&pd->llpd_sample_count) > - sbi->ll_async_page_sample_max) { - pd->llpd_reblnc_count++; - rc = llap_async_cache_rebalance(sbi); - if (rc == 0) - target = pd->llpd_count - pd->llpd_budget; - } - /* if rc equals 1, it means other cpu is doing the rebalance - * job, and our budget # would be modified when we read it. - * Furthermore, it is much likely being increased because - * we have already reached the rebalance threshold. In this - * case, we skip to shrink cache here. */ - if ((rc == 0) && target > 0) - llap_shrink_cache_internal(sbi, cpu, target + 32); - } - put_cpu(); + if (sbi->ll_async_page_count >= sbi->ll_async_page_max) + llap_shrink_cache(sbi, 0); OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD, ll_async_page_slab_size); @@ -817,14 +660,11 @@ static struct ll_async_page *llap_from_page_with_lockh(struct page *page, __set_page_ll_data(page, llap); llap->llap_page = page; - lcounter_inc(&sbi->ll_async_page_count); - pd = ll_pglist_lock(sbi); - list_add_tail(&llap->llap_pglist_item, &pd->llpd_list); - pd->llpd_count++; - pd->llpd_gen++; - pd->llpd_miss++; - llap->llap_pglist_cpu = pd->llpd_cpu; - ll_pglist_unlock(sbi); + spin_lock(&sbi->ll_lock); + sbi->ll_pglist_gen++; + sbi->ll_async_page_count++; + list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist); + spin_unlock(&sbi->ll_lock); out: if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) { @@ -1028,40 +868,28 @@ out: RETURN(rc); } -static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); - -/* WARNING: This algorithm is used to reduce the contention on - * sbi->ll_lock. It should work well if the ra_max_pages is much - * greater than the single file's read-ahead window. - * - * TODO: There may exist a `global sync problem' in this implementation. - * Considering the global ra window is 100M, and each file's ra window is 10M, - * there are over 10 files trying to get its ra budget and reach - * ll_ra_count_get at the exactly same time. All of them will get a zero ra - * window, although the global window is 100M. -jay - */ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len) { struct ll_ra_info *ra = &sbi->ll_ra_info; unsigned long ret; ENTRY; - ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), len); - if ((int)ret < 0) - GOTO(out, ret = 0); + spin_lock(&sbi->ll_lock); + ret = min(ra->ra_max_pages - ra->ra_cur_pages, len); + ra->ra_cur_pages += ret; + spin_unlock(&sbi->ll_lock); - if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) { - atomic_sub(ret, &ra->ra_cur_pages); - ret = 0; - } -out: RETURN(ret); } static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) { struct ll_ra_info *ra = &sbi->ll_ra_info; - atomic_sub(len, &ra->ra_cur_pages); + spin_lock(&sbi->ll_lock); + LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n", + ra->ra_cur_pages, len); + ra->ra_cur_pages -= len; + spin_unlock(&sbi->ll_lock); } /* called for each page in a completed rpc.*/ @@ -1131,8 +959,7 @@ static void __ll_put_llap(struct page *page) struct obd_export *exp; struct ll_async_page *llap; struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_pglist_data *pd; - int rc, cpu; + int rc; ENTRY; exp = ll_i2obdexp(inode); @@ -1160,14 +987,12 @@ static void __ll_put_llap(struct page *page) * is providing exclusivity to memory pressure/truncate/writeback..*/ __clear_page_ll_data(page); - lcounter_dec(&sbi->ll_async_page_count); - cpu = llap->llap_pglist_cpu; - pd = ll_pglist_cpu_lock(sbi, cpu); - pd->llpd_gen++; - pd->llpd_count--; + spin_lock(&sbi->ll_lock); if (!list_empty(&llap->llap_pglist_item)) list_del_init(&llap->llap_pglist_item); - ll_pglist_cpu_unlock(sbi, cpu); + sbi->ll_pglist_gen++; + sbi->ll_async_page_count--; + spin_unlock(&sbi->ll_lock); OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size); EXIT; @@ -1221,16 +1046,20 @@ static int ll_issue_page_read(struct obd_export *exp, RETURN(rc); } -static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which) +static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which) { LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which); - lprocfs_counter_incr(sbi->ll_ra_stats, which); + ra->ra_stats[which]++; } static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which) { struct ll_sb_info *sbi = ll_i2sbi(mapping->host); - ll_ra_stats_inc_sbi(sbi, which); + struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info; + + spin_lock(&sbi->ll_lock); + ll_ra_stats_inc_unlocked(ra, which); + spin_unlock(&sbi->ll_lock); } void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping) @@ -1766,9 +1595,10 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, int zero = 0, stride_zero = 0, stride_detect = 0, ra_miss = 0; ENTRY; + spin_lock(&sbi->ll_lock); spin_lock(&ras->ras_lock); - ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS); + ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS); /* reset the read-ahead window in two cases. First when the app seeks * or reads to some other part of the file. Secondly if we get a @@ -1777,7 +1607,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, * reclaiming it before we get to it. */ if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) { zero = 1; - ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE); + ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE); /* check whether it is in stride I/O mode*/ if (!index_in_stride_window(index, ras, inode)) stride_zero = 1; @@ -1792,7 +1622,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode, * stride I/O mode to avoid complication */ if (!stride_io_mode(ras)) stride_zero = 1; - ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW); + ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW); } /* On the second access to a file smaller than the tunable @@ -1889,6 +1719,7 @@ out_unlock: RAS_CDEBUG(ras); ras->ras_request_index++; spin_unlock(&ras->ras_lock); + spin_unlock(&sbi->ll_lock); return; }