From: yury Date: Sat, 16 Feb 2008 00:49:20 +0000 (+0000) Subject: b=13766 X-Git-Tag: v1_7_0_51~239 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=710793eab04e08c5ce671cb99ed1b2db4baa333f;hp=4ab9da2f75d10022f6f65626f1cbef6bd611dae8 b=13766 r=vitaly,shadow - fixes many aspects of lru resize work with main idea to work more smoothly in different working load, avoid dropping SLV much (what causes dropping more cached locks) and improve memory pressure handling. Please look for more details in bug 13766; - cleanups in many places, removed obsolete code. --- diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index b79c0d6..7789517 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -224,12 +224,6 @@ struct ldlm_pool_ops { int (*po_setup)(struct ldlm_pool *pl, int limit); }; -enum { - LDLM_POOL_CTL_RECALC = 1 << 0, /* Pool recalc is enabled */ - LDLM_POOL_CTL_SHRINK = 1 << 1, /* Pool shrink is enabled */ - LDLM_POOL_CTL_FULL = (LDLM_POOL_CTL_RECALC | LDLM_POOL_CTL_SHRINK) -}; - /* One second for pools thread check interval. */ #define LDLM_POOLS_THREAD_PERIOD (1) @@ -262,26 +256,14 @@ struct ldlm_pool { * server was obtained. */ struct ldlm_pool_ops *pl_ops; /* Recalc and shrink ops. */ - int pl_control; /* Pool features mask */ - - atomic_t pl_grant_plan; /* Planned number of granted + int pl_grant_plan; /* Planned number of granted * locks for next T. */ - atomic_t pl_grant_step; /* Grant plan step for next + int pl_grant_step; /* Grant plan step for next * T. */ struct lprocfs_stats *pl_stats; /* Pool statistics. */ }; -static inline int pool_recalc_enabled(struct ldlm_pool *pl) -{ - return pl->pl_control & LDLM_POOL_CTL_RECALC; -} - -static inline int pool_shrink_enabled(struct ldlm_pool *pl) -{ - return pl->pl_control & LDLM_POOL_CTL_SHRINK; -} - typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, void *req_cookie, ldlm_mode_t mode, int flags, void *data); diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index c87468b..7e4c8b9 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -134,7 +134,8 @@ struct obd_export { static inline int exp_connect_cancelset(struct obd_export *exp) { - return exp ? !!(exp->exp_connect_flags & OBD_CONNECT_CANCELSET) : 0; + LASSERT(exp != NULL); + return !!(exp->exp_connect_flags & OBD_CONNECT_CANCELSET); } static inline int exp_connect_lru_resize(struct obd_export *exp) diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 741190aa..294ba72 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -47,6 +47,8 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync, int flags); int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, int count, int max, int cancel_flags, int flags); +int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns, int count, int max, + int flags); /* ldlm_resource.c */ int ldlm_resource_putref_locked(struct ldlm_resource *res); @@ -138,3 +140,46 @@ struct ldlm_state { int ldlm_init(void); void ldlm_exit(void); +enum ldlm_policy_res { + LDLM_POLICY_CANCEL_LOCK, + LDLM_POLICY_KEEP_LOCK +}; + +typedef enum ldlm_policy_res ldlm_policy_res_t; + +#define LDLM_POOL_PROC_READER(var, type) \ + static int lprocfs_rd_##var(char *page, char **start, off_t off, \ + int count, int *eof, void *data) \ + { \ + struct ldlm_pool *pl = data; \ + type tmp; \ + \ + spin_lock(&pl->pl_lock); \ + tmp = pl->pl_##var; \ + spin_unlock(&pl->pl_lock); \ + \ + return lprocfs_rd_uint(page, start, off, count, eof, &tmp); \ + } \ + struct __##var##__dummy_read {;} /* semicolon catcher */ + +#define LDLM_POOL_PROC_WRITER(var, type) \ + int lprocfs_wr_##var(struct file *file, const char *buffer, \ + unsigned long count, void *data) \ + { \ + struct ldlm_pool *pl = data; \ + type tmp; \ + int rc; \ + \ + rc = lprocfs_wr_uint(file, buffer, count, &tmp); \ + if (rc) { \ + CERROR("Can't parse user input, rc = %d\n", rc); \ + return rc; \ + } \ + \ + spin_lock(&pl->pl_lock); \ + pl->pl_##var = tmp; \ + spin_unlock(&pl->pl_lock); \ + \ + return rc; \ + } \ + struct __##var##__dummy_write {;} /* semicolon catcher */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index d31b78c..fe890ca 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1824,8 +1824,10 @@ int target_pack_pool_reply(struct ptlrpc_request *req) { struct ldlm_pool *pl; ENTRY; - - if (!req->rq_export || !exp_connect_lru_resize(req->rq_export)) { + + if (!req->rq_export || !req->rq_export->exp_obd || + !req->rq_export->exp_obd->obd_namespace || + !exp_connect_lru_resize(req->rq_export)) { lustre_msg_set_slv(req->rq_repmsg, 0); lustre_msg_set_limit(req->rq_repmsg, 0); RETURN(0); diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index 7aca642..541afe1 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -102,10 +102,10 @@ #define LDLM_POOL_HOST_L ((num_physpages >> (20 - PAGE_SHIFT)) * 50) /* Default step in % for grant plan. */ -#define LDLM_POOL_GSP (5) +#define LDLM_POOL_GSP (10) /* LDLM_POOL_GSP% of all locks is default GP. */ -#define LDLM_POOL_GP(L) ((L) * LDLM_POOL_GSP / 100) +#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_GSP) / 100) /* Max age for locks on clients. */ #define LDLM_POOL_MAX_AGE (36000) @@ -149,6 +149,7 @@ enum { LDLM_POOL_SHRINK_REQTD_STAT, LDLM_POOL_SHRINK_FREED_STAT, LDLM_POOL_RECALC_STAT, + LDLM_POOL_TIMING_STAT, LDLM_POOL_LAST_STAT }; @@ -160,33 +161,43 @@ static inline struct ldlm_namespace *ldlm_pl2ns(struct ldlm_pool *pl) /* Should be called under ->pl_lock taken */ static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl) { - int grant_plan, granted; - __u32 limit; + int granted, grant_step, limit; limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); - grant_plan = granted + ((limit - granted) * - atomic_read(&pl->pl_grant_step)) / 100; - atomic_set(&pl->pl_grant_plan, grant_plan); + grant_step = ((limit - granted) * pl->pl_grant_step) / 100; + pl->pl_grant_plan = granted + grant_step; } /* Should be called under ->pl_lock taken */ static inline void ldlm_pool_recalc_slv(struct ldlm_pool *pl) { - int slv_factor, granted, grant_plan; + int grant_usage, granted, grant_plan; + __u64 slv, slv_factor; __u32 limit; - __u64 slv; slv = ldlm_pool_get_slv(pl); + grant_plan = pl->pl_grant_plan; limit = ldlm_pool_get_limit(pl); granted = atomic_read(&pl->pl_granted); - grant_plan = atomic_read(&pl->pl_grant_plan); - - if ((slv_factor = limit - (granted - grant_plan)) <= 0) - slv_factor = 1; - slv = (slv * ((slv_factor * 100) / limit)); + grant_usage = limit - (granted - grant_plan); + if (grant_usage <= 0) + grant_usage = 1; + + /* Find out SLV change factor which is the ratio of grant usage + * from limit. SLV changes as fast as the ratio of grant plan + * consumtion. The more locks from grant plan are not consumed + * by clients in last interval (idle time), the faster grows + * SLV. And the opposite, the more grant plan is over-consumed + * (load time) the faster drops SLV. */ + slv_factor = (grant_usage * 100) / limit; + if (2 * abs(granted - limit) > limit) { + slv_factor *= slv_factor; + slv_factor = dru(slv_factor, 100); + } + slv = slv * slv_factor; slv = dru(slv, 100); if (slv > ldlm_pool_slv_max(limit)) { @@ -201,10 +212,10 @@ static inline void ldlm_pool_recalc_slv(struct ldlm_pool *pl) static inline void ldlm_pool_recalc_stats(struct ldlm_pool *pl) { __u64 slv = ldlm_pool_get_slv(pl); - __u32 granted = atomic_read(&pl->pl_granted); - __u32 grant_rate = atomic_read(&pl->pl_grant_rate); - __u32 grant_plan = atomic_read(&pl->pl_grant_plan); - __u32 cancel_rate = atomic_read(&pl->pl_cancel_rate); + int grant_plan = pl->pl_grant_plan; + int granted = atomic_read(&pl->pl_granted); + int grant_rate = atomic_read(&pl->pl_grant_rate); + int cancel_rate = atomic_read(&pl->pl_cancel_rate); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT, slv); @@ -241,6 +252,8 @@ static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); + lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, + recalc_interval_sec); } spin_unlock(&pl->pl_lock); RETURN(0); @@ -312,6 +325,8 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); pl->pl_recalc_time = cfs_time_current_sec(); + lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, + recalc_interval_sec); } spin_unlock(&pl->pl_lock); @@ -339,8 +354,8 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, /* Find out how many locks may be released according to shrink * policy. */ if (nr == 0) - RETURN(ldlm_cancel_lru_local(ldlm_pl2ns(pl), NULL, 0, - 0, 0, LDLM_CANCEL_SHRINK)); + RETURN(ldlm_cancel_lru_estimate(ldlm_pl2ns(pl), 0, 0, + LDLM_CANCEL_SHRINK)); /* Cancel @nr locks accoding to shrink policy */ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC, @@ -362,7 +377,7 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) { int count; - if (pl->pl_ops->po_recalc != NULL && pool_recalc_enabled(pl)) { + if (pl->pl_ops->po_recalc != NULL) { count = pl->pl_ops->po_recalc(pl); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, count); @@ -377,7 +392,7 @@ int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, { int cancel = 0; - if (pl->pl_ops->po_shrink != NULL && pool_shrink_enabled(pl)) { + if (pl->pl_ops->po_shrink != NULL) { cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask); if (nr > 0) { lprocfs_counter_add(pl->pl_stats, @@ -409,7 +424,7 @@ EXPORT_SYMBOL(ldlm_pool_setup); static int lprocfs_rd_pool_state(char *page, char **start, off_t off, int count, int *eof, void *data) { - __u32 granted, grant_rate, cancel_rate, grant_step; + int granted, grant_rate, cancel_rate, grant_step; int nr = 0, grant_speed, grant_plan; struct ldlm_pool *pl = data; __u32 limit; @@ -418,10 +433,10 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, spin_lock(&pl->pl_lock); slv = ldlm_pool_get_slv(pl); limit = ldlm_pool_get_limit(pl); + grant_plan = pl->pl_grant_plan; + grant_step = pl->pl_grant_step; granted = atomic_read(&pl->pl_granted); grant_rate = atomic_read(&pl->pl_grant_rate); - grant_plan = atomic_read(&pl->pl_grant_plan); - grant_step = atomic_read(&pl->pl_grant_step); grant_speed = atomic_read(&pl->pl_grant_speed); cancel_rate = atomic_read(&pl->pl_cancel_rate); spin_unlock(&pl->pl_lock); @@ -450,6 +465,10 @@ static int lprocfs_rd_pool_state(char *page, char **start, off_t off, return nr; } +LDLM_POOL_PROC_READER(grant_plan, int); +LDLM_POOL_PROC_READER(grant_step, int); +LDLM_POOL_PROC_WRITER(grant_step, int); + static int ldlm_pool_proc_init(struct ldlm_pool *pl) { struct ldlm_namespace *ns = ldlm_pl2ns(pl); @@ -497,12 +516,6 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) pool_vars[0].read_fptr = lprocfs_rd_atomic; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - snprintf(var_name, MAX_STRING_SIZE, "control"); - pool_vars[0].data = &pl->pl_control; - pool_vars[0].read_fptr = lprocfs_rd_uint; - pool_vars[0].write_fptr = lprocfs_wr_uint; - lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); - snprintf(var_name, MAX_STRING_SIZE, "grant_speed"); pool_vars[0].data = &pl->pl_grant_speed; pool_vars[0].read_fptr = lprocfs_rd_atomic; @@ -519,21 +532,21 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "grant_plan"); - pool_vars[0].data = &pl->pl_grant_plan; - pool_vars[0].read_fptr = lprocfs_rd_atomic; + pool_vars[0].data = pl; + pool_vars[0].read_fptr = lprocfs_rd_grant_plan; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "grant_step"); - pool_vars[0].data = &pl->pl_grant_step; - pool_vars[0].read_fptr = lprocfs_rd_atomic; + pool_vars[0].data = pl; + pool_vars[0].read_fptr = lprocfs_rd_grant_step; if (ns_is_server(ns)) - pool_vars[0].write_fptr = lprocfs_wr_atomic; + pool_vars[0].write_fptr = lprocfs_wr_grant_step; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor"); pool_vars[0].data = &pl->pl_lock_volume_factor; - pool_vars[0].read_fptr = lprocfs_rd_uint; - pool_vars[0].write_fptr = lprocfs_wr_uint; + pool_vars[0].read_fptr = lprocfs_rd_atomic; + pool_vars[0].write_fptr = lprocfs_wr_atomic; lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0); snprintf(var_name, MAX_STRING_SIZE, "state"); @@ -549,9 +562,11 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "granted", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, 0, + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "grant", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, 0, + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "cancel", "locks"); lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, @@ -574,6 +589,9 @@ static int ldlm_pool_proc_init(struct ldlm_pool *pl) lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT, LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, "recalc_freed", "locks"); + lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT, + LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, + "recalc_timing", "sec"); lprocfs_register_stats(pl->pl_proc_dir, "stats", pl->pl_stats); EXIT; @@ -612,9 +630,8 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, atomic_set(&pl->pl_grant_rate, 0); atomic_set(&pl->pl_cancel_rate, 0); atomic_set(&pl->pl_grant_speed, 0); - pl->pl_control = LDLM_POOL_CTL_FULL; - atomic_set(&pl->pl_grant_step, LDLM_POOL_GSP); - atomic_set(&pl->pl_grant_plan, LDLM_POOL_GP(LDLM_POOL_HOST_L)); + pl->pl_grant_step = LDLM_POOL_GSP; + pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L); snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d", ns->ns_name, idx); @@ -1024,7 +1041,7 @@ EXPORT_SYMBOL(ldlm_pools_fini); #endif /* __KERNEL__ */ #else /* !HAVE_LRU_RESIZE_SUPPORT */ -int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit) +int ldlm_pool_setup(struct ldlm_pool *pl, int limit) { return 0; } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 7110846..9ccf69c 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1003,30 +1003,43 @@ static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp) int ldlm_cli_update_pool(struct ptlrpc_request *req) { + __u64 old_slv, new_slv; struct ldlm_pool *pl; + __u32 new_limit; ENTRY; if (!imp_connect_lru_resize(req->rq_import)) RETURN(0); - if (lustre_msg_get_slv(req->rq_repmsg) == 0 || - lustre_msg_get_limit(req->rq_repmsg) == 0) + /* In some cases RPC may contain slv and limit zeroed out. This is + * the case when server does not support lru resize feature. This is + * also possible in some recovery cases when server side reqs have no + * ref to obd export and thus access to server side namespace is no + * possible. */ + if (lustre_msg_get_slv(req->rq_repmsg) == 0 || + lustre_msg_get_limit(req->rq_repmsg) == 0) { + DEBUG_REQ(D_HA, req, "zero SLV or Limit found " + "(SLV: "LPU64", Limit: %u)", + lustre_msg_get_slv(req->rq_repmsg), + lustre_msg_get_limit(req->rq_repmsg)); RETURN(0); + } + new_limit = lustre_msg_get_limit(req->rq_repmsg); + new_slv = lustre_msg_get_slv(req->rq_repmsg); pl = ldlm_imp2pl(req->rq_import); spin_lock(&pl->pl_lock); + old_slv = ldlm_pool_get_slv(pl); + ldlm_pool_set_slv(pl, new_slv); + ldlm_pool_set_limit(pl, new_limit); /* Check if we need to wakeup pools thread for fast SLV change. * This is only done when threads period is noticably long like * 10s or more. */ #if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10) { - __u64 old_slv, new_slv, fast_change; - - old_slv = ldlm_pool_get_slv(pl); - new_slv = lustre_msg_get_slv(req->rq_repmsg); - fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; + __u64 fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE; do_div(fast_change, 100); /* Wake up pools thread only if SLV has changed more than @@ -1037,23 +1050,7 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) ldlm_pools_wakeup(); } #endif - /* In some cases RPC may contain slv and limit zeroed out. This is - * the case when server does not support lru resize feature. This is - * also possible in some recovery cases when server side reqs have no - * ref to obd export and thus access to server side namespace is no - * possible. */ - if (lustre_msg_get_slv(req->rq_repmsg) != 0 && - lustre_msg_get_limit(req->rq_repmsg) != 0) { - ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg)); - ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg)); - } else { - DEBUG_REQ(D_HA, req, "zero SLV or Limit found " - "(SLV: "LPU64", Limit: %u)", - lustre_msg_get_slv(req->rq_repmsg), - lustre_msg_get_limit(req->rq_repmsg)); - } spin_unlock(&pl->pl_lock); - RETURN(0); } EXPORT_SYMBOL(ldlm_cli_update_pool); @@ -1139,16 +1136,21 @@ static int ldlm_cancel_list(struct list_head *cancels, int count, int flags) RETURN(count); } -/* Return 1 if @lock should be canceled according to shrinker policy. - * Return zero otherwise. */ -static int ldlm_cancel_shrink_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int asked) +/* Return 1 to stop lru processing and keep current lock cached. Return zero + * otherwise. */ +static ldlm_policy_res_t ldlm_cancel_shrink_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { int lock_cost; __u64 page_nr; + /* Stop lru processing when we reached passed @count or checked all + * locks in lru. */ + if (count && added >= count) + return LDLM_POLICY_KEEP_LOCK; + if (lock->l_resource->lr_type == LDLM_EXTENT) { struct ldlm_extent *l_extent; @@ -1178,21 +1180,27 @@ static int ldlm_cancel_shrink_policy(struct ldlm_namespace *ns, /* Keep all expensive locks in lru for the memory pressure time * cancel policy. They anyways may be canceled by lru resize * pplicy if they have not small enough CLV. */ - return (lock_cost <= ns->ns_shrink_thumb); + return lock_cost > ns->ns_shrink_thumb ? + LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -/* Return 1 if @lock should be canceled according to lru resize policy. - * Return zero otherwise. */ -static int ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int asked) +/* Return 1 to stop lru processing and keep current lock cached. Return zero + * otherwise. */ +static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { cfs_time_t cur = cfs_time_current(); struct ldlm_pool *pl = &ns->ns_pool; __u64 slv, lvf, lv; cfs_time_t la; + /* Stop lru processing when we reached passed @count or checked all + * locks in lru. */ + if (count && added >= count) + return LDLM_POLICY_KEEP_LOCK; + spin_lock(&pl->pl_lock); slv = ldlm_pool_get_slv(pl); lvf = atomic_read(&pl->pl_lock_volume_factor); @@ -1204,40 +1212,55 @@ static int ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, /* Stop when slv is not yet come from server or * lv is smaller than it is. */ lv = lvf * la * unused; - return (slv > 1 && lv >= slv); + return (slv == 1 || lv < slv) ? + LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; +} + +/* Return 1 to stop lru processing and keep current lock cached. Return zero + * otherwise. */ +static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) +{ + /* Stop lru processing when we reached passed @count or checked all + * locks in lru. */ + return (added >= count) ? + LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -/* Return 1 if @lock should be canceled according to passed policy. - * Return zero otherwise. */ -static int ldlm_cancel_passed_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int asked) +/* Return 1 to stop lru processing and keep current lock cached. Return zero + * otherwise. */ +static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { - /* Do nothing here, we allow canceling all locks which - * are passed here from upper layer logic. So that locks - * number to be canceled will be limited by @count and - * @max in ldlm_cancel_lru_local(). */ - return 1; + /* Stop lru processing if young lock is found and we reached passed + * @count. */ + return ((added >= count) && + cfs_time_before(cfs_time_current(), + cfs_time_add(lock->l_last_used, + ns->ns_max_age))) ? + LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -/* Return 1 if @lock should be canceled according to aged policy. - * Return zero otherwise. */ -static int ldlm_cancel_aged_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int asked) +/* Return 1 to stop lru processing and keep current lock cached. Return zero + * otherwise. */ +static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { - /* Cancel old locks if reached asked limit. */ - return !((added >= asked) && - cfs_time_before_64(cfs_time_current(), - cfs_time_add(lock->l_last_used, - ns->ns_max_age))); + /* Stop lru processing when we reached passed @count or checked all + * locks in lru. */ + return (added >= count) ? + LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -typedef int (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, - struct ldlm_lock *, int, - int, int); +typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, + struct ldlm_lock *, int, + int, int); static ldlm_cancel_lru_policy_t ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) @@ -1253,7 +1276,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) if (flags & LDLM_CANCEL_AGED) return ldlm_cancel_aged_policy; } - return NULL; + + return ldlm_cancel_default_policy; } /* - Free space in lru for @count new locks, @@ -1278,14 +1302,16 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) * the beginning of lru list); * * flags & LDLM_CANCEL_SHRINK - cancel not more than @count locks according to - * memory pressre policy function. + * memory pressre policy function; + * + * flags & LDLM_CANCEL_AGED - cancel alocks according to "aged policy". */ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, int count, int max, int cancel_flags, int flags) { - ldlm_cancel_lru_policy_t cancel_lru_policy_func; - int added = 0, unused, cancel; - struct ldlm_lock *lock, *next; + ldlm_cancel_lru_policy_t pf; + struct ldlm_lock *lock; + int added = 0, unused; ENTRY; spin_lock(&ns->ns_unused_lock); @@ -1294,90 +1320,130 @@ int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels, if (!ns_connect_lru_resize(ns)) count += unused - ns->ns_max_unused; - cancel_lru_policy_func = ldlm_cancel_lru_policy(ns, flags); - - list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { - /* Make sure that we skip locks being already in cancel. */ - if ((lock->l_flags & LDLM_FL_CANCELING) || - (lock->l_flags & LDLM_FL_BL_AST)) - continue; - - /* For any flags, stop scanning if @max or passed @count is - * reached. */ - if ((max && added >= max) || (count && added >= count)) + pf = ldlm_cancel_lru_policy(ns, flags); + LASSERT(pf != NULL); + + while (!list_empty(&ns->ns_unused_list)) { + /* For any flags, stop scanning if @max is reached. */ + if (max && added >= max) break; - /* Pass the lock through the policy filter and see if it - * should stay in lru. */ - if (cancel_lru_policy_func != NULL) { - cancel = cancel_lru_policy_func(ns, lock, unused, - added, count); - - /* Take next lock for shrink policy, we need to check - * whole list. Stop scanning for other policies. */ - if ((flags & LDLM_CANCEL_SHRINK) && !cancel) - continue; - else if (!cancel) + list_for_each_entry(lock, &ns->ns_unused_list, l_lru) { + /* Somebody is already doing CANCEL or there is a + * blocking request will send cancel. */ + if (!(lock->l_flags & LDLM_FL_CANCELING) && + !(lock->l_flags & LDLM_FL_BL_AST)) break; } + if (&lock->l_lru == &ns->ns_unused_list) + break; - if (cancels != NULL) { - LDLM_LOCK_GET(lock); /* dropped by bl thread */ - spin_unlock(&ns->ns_unused_lock); - - lock_res_and_lock(lock); - /* Check flags again under the lock. */ - if ((lock->l_flags & LDLM_FL_CANCELING) || - (lock->l_flags & LDLM_FL_BL_AST) || - (ldlm_lock_remove_from_lru(lock) == 0)) { - /* other thread is removing lock from lru or - * somebody is already doing CANCEL or - * there is a blocking request which will send - * cancel by itseft. */ - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - spin_lock(&ns->ns_unused_lock); - continue; - } - LASSERT(!lock->l_readers && !lock->l_writers); - - /* If we have chosen to cancel this lock voluntarily, we - * better send cancel notification to server, so that it - * frees appropriate state. This might lead to a race - * where while we are doing cancel here, server is also - * silently cancelling this lock. */ - lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; - - /* Setting the CBPENDING flag is a little misleading, - * but prevents an important race; namely, once - * CBPENDING is set, the lock can accumulate no more - * readers/writers. Since readers and writers are - * already zero here, ldlm_lock_decref() won't see - * this flag and call l_blocking_ast */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; - - /* We can't re-add to l_lru as it confuses the - * refcounting in ldlm_lock_remove_from_lru() if an AST - * arrives after we drop ns_lock below. We use l_bl_ast - * and can't use l_pending_chain as it is used both on - * server and client nevertheless bug 5666 says it is - * used only on server */ - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); + /* Pass the lock through the policy filter and see if it + * should stay in lru. + * + * Even for shrinker policy we stop scanning if + * we find a lock that should stay in the cache. + * We should take into account lock age anyway + * as new lock even if it is small of weight is + * valuable resource. + * + * That is, for shrinker policy we drop only + * old locks, but additionally chose them by + * their weight. Big extent locks will stay in + * the cache. */ + if (pf(ns, lock, unused, added, count) == LDLM_POLICY_KEEP_LOCK) + break; + + LDLM_LOCK_GET(lock); /* dropped by bl thread */ + spin_unlock(&ns->ns_unused_lock); + + lock_res_and_lock(lock); + /* Check flags again under the lock. */ + if ((lock->l_flags & LDLM_FL_CANCELING) || + (lock->l_flags & LDLM_FL_BL_AST) || + (ldlm_lock_remove_from_lru(lock) == 0)) { + /* other thread is removing lock from lru or + * somebody is already doing CANCEL or + * there is a blocking request which will send + * cancel by itseft. */ unlock_res_and_lock(lock); + LDLM_LOCK_PUT(lock); spin_lock(&ns->ns_unused_lock); + continue; } + LASSERT(!lock->l_readers && !lock->l_writers); + + /* If we have chosen to cancel this lock voluntarily, we + * better send cancel notification to server, so that it + * frees appropriate state. This might lead to a race + * where while we are doing cancel here, server is also + * silently cancelling this lock. */ + lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + + /* Setting the CBPENDING flag is a little misleading, + * but prevents an important race; namely, once + * CBPENDING is set, the lock can accumulate no more + * readers/writers. Since readers and writers are + * already zero here, ldlm_lock_decref() won't see + * this flag and call l_blocking_ast */ + lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; + + /* We can't re-add to l_lru as it confuses the + * refcounting in ldlm_lock_remove_from_lru() if an AST + * arrives after we drop ns_lock below. We use l_bl_ast + * and can't use l_pending_chain as it is used both on + * server and client nevertheless bug 5666 says it is + * used only on server */ + LASSERT(list_empty(&lock->l_bl_ast)); + list_add(&lock->l_bl_ast, cancels); + unlock_res_and_lock(lock); + spin_lock(&ns->ns_unused_lock); added++; unused--; } spin_unlock(&ns->ns_unused_lock); - - if (cancels == NULL) - RETURN(added); - RETURN(ldlm_cancel_list(cancels, added, cancel_flags)); } +/* Returns number of locks which could be canceled next time when + * ldlm_cancel_lru() is called. Used from locks pool shrinker. */ +int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns, + int count, int max, int flags) +{ + ldlm_cancel_lru_policy_t pf; + struct ldlm_lock *lock; + int added = 0, unused; + ENTRY; + + pf = ldlm_cancel_lru_policy(ns, flags); + LASSERT(pf != NULL); + spin_lock(&ns->ns_unused_lock); + unused = ns->ns_nr_unused; + + list_for_each_entry(lock, &ns->ns_unused_list, l_lru) { + /* For any flags, stop scanning if @max is reached. */ + if (max && added >= max) + break; + + /* Somebody is already doing CANCEL or there is a + * blocking request will send cancel. Let's not count + * this lock. */ + if ((lock->l_flags & LDLM_FL_CANCELING) || + (lock->l_flags & LDLM_FL_BL_AST)) + continue; + + /* Pass the lock through the policy filter and see if it + * should stay in lru. */ + if (pf(ns, lock, unused, added, count) == LDLM_POLICY_KEEP_LOCK) + break; + + added++; + unused--; + } + spin_unlock(&ns->ns_unused_lock); + RETURN(added); +} + /* when called with LDLM_ASYNC the blocking callback will be handled * in a thread and this function will return after the thread has been * asked to call the callback. when called with LDLM_SYNC the blocking diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 805f379..57bbc17 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -4513,79 +4513,11 @@ test_119c() # bug 13099 } run_test 119c "Testing for direct read hitting hole" -LDLM_POOL_CTL_RECALC=1 -LDLM_POOL_CTL_SHRINK=2 - -disable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool() { - disable_pool_shrink $1 - disable_pool_recalc $1 -} - -enable_pool() { - enable_pool_shrink $1 - enable_pool_recalc $1 -} - -lru_resize_enable() -{ - enable_pool osc - enable_pool "filter-$FSNAME" - enable_pool mdc - enable_pool "mds-$FSNAME" -} - -lru_resize_disable() -{ - disable_pool osc - disable_pool "filter-$FSNAME" - disable_pool mdc - disable_pool "mds-$FSNAME" -} - test_120a() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` @@ -4595,14 +4527,16 @@ test_120a() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120a "Early Lock Cancel: mkdir test" test_120b() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` @@ -4612,14 +4546,16 @@ test_120b() { can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120b "Early Lock Cancel: create test" test_120c() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 touch $DIR/$tdir/d1/f1 cancel_lru_locks mdc @@ -4631,14 +4567,16 @@ test_120c() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120c "Early Lock Cancel: link test" test_120d() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc touch $DIR/$tdir cancel_lru_locks mdc stat $DIR/$tdir > /dev/null @@ -4649,14 +4587,16 @@ test_120d() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120d "Early Lock Cancel: setattr test" test_120e() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc dd if=/dev/zero of=$DIR/$tdir/f1 count=1 cancel_lru_locks mdc cancel_lru_locks osc @@ -4669,14 +4609,16 @@ test_120e() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120e "Early Lock Cancel: unlink test" test_120f() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 dd if=/dev/zero of=$DIR/$tdir/d1/f1 count=1 dd if=/dev/zero of=$DIR/$tdir/d2/f2 count=1 @@ -4692,14 +4634,16 @@ test_120f() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120f "Early Lock Cancel: rename test" test_120g() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc count=10000 echo create $count files mkdir -p $DIR/$tdir @@ -4725,7 +4669,8 @@ test_120g() { echo total: $((can2-can1)) cancels, $((blk2-blk1)) blockings sleep 2 # wait for commitment of removal - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120g "Early Lock Cancel: performance test" @@ -4741,53 +4686,32 @@ test_121() { #bug #10589 } run_test 121 "read cancel race =========" -cmd_cancel_lru_locks() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - cancel_lru_locks $d - fi - done -} - test_124a() { [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ skip "no lru resize on server" && return 0 - cmd_cancel_lru_locks "mdc" - lru_resize_enable - - # we want to test main pool functionality, that is cancel based on SLV - # this is why shrinkers are disabled - disable_pool_shrink "mds-$FSNAME" - disable_pool_shrink mdc - NR=2000 mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" # use touch to produce $NR new locks log "create $NR files at $DIR/$tdir" - for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done + createmany -o $DIR/$tdir/f $NR || + error "failed to create $NR files in $DIR/$tdir" + cancel_lru_locks mdc + ls -l $DIR/$tdir > /dev/null + NSDIR="" LRU_SIZE=0 - for d in `find $LPROC/ldlm/namespaces | grep mdc-`; do - if test -f $d/lru_size; then - LRU_SIZE=`cat $d/lru_size` - if test $LRU_SIZE -gt 0; then - log "using $d namespace" - NSDIR=$d - break - fi + for F in $LPROC/ldlm/namespaces/*mdc-*/lru_size; do + LRU_SIZE=$(cat $F) + if [ $LRU_SIZE -gt $(default_lru_size) ]; then + NSDIR=$(dirname $F) + log "using $(basename $NSDIR) namespace" + break fi done - if test -z $NSDIR; then - skip "No cached locks created!" - return 0 - fi - - if test $LRU_SIZE -lt 100; then + if [ -z "$NSDIR" -o $LRU_SIZE -lt $(default_lru_size) ]; then skip "Not enough cached locks created!" return 0 fi @@ -4810,7 +4734,7 @@ test_124a() { # in the case of CMD, LRU_SIZE_B != $NR in most of cases LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP)) LRU_SIZE_B=$LRU_SIZE - log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)" + log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE lock(s)" OLD_LVF=`cat $NSDIR/pool/lock_volume_factor` echo "$LVF" > $NSDIR/pool/lock_volume_factor log "sleep for $((SLEEP+SLEEP_ADD))s" @@ -4820,102 +4744,84 @@ test_124a() { [ $LRU_SIZE_B -gt $LRU_SIZE_A ] || { error "No locks dropped in "$((SLEEP+SLEEP_ADD))"s. LRU size: $LRU_SIZE_A" - lru_resize_enable unlinkmany $DIR/$tdir/f $NR return } log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in "$((SLEEP+SLEEP_ADD))"s" - lru_resize_enable log "unlink $NR files at $DIR/$tdir" unlinkmany $DIR/$tdir/f $NR } run_test 124a "lru resize =======================================" -set_lru_size() { - NS=$1 - SIZE=$2 - test "x$NS" = "x" && NS="mdc" - test "x$SIZE" = "x" && SIZE="0" - test $SIZE -lt 0 && SIZE="0" - test $SIZE -gt 0 && ACTION="disabled" || ACTION="enabled" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru resize $ACTION" - log " lru_size=$SIZE" - echo $SIZE > $d/lru_size - fi - done -} - -get_lru_size() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru_size=$(cat $d/lru_size)" - fi - done -} - test_124b() { [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ skip "no lru resize on server" && return 0 + # even for cmd no matter what metadata namespace to use for getting + # the limit, we use appropriate. NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1` LIMIT=`cat $NSDIR/pool/limit` - NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l) - # 100 locks here is default value for non-shrinkable lru as well - # as the order to switch to static lru managing policy - # define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) - LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU)) - - NR=$((LIMIT-(LIMIT/3))) - log "starting lru resize disable cycle" - set_lru_size "mdc-" $LDLM_DEFAULT_LRU_SIZE - + NR=$(($(default_lru_size)*20)) + if [ $NR -gt $LIMIT ]; then + NR=$LIMIT + fi + lru_resize_disable mdc mkdir -p $DIR/$tdir/disable_lru_resize || - error "failed to create $DIR/$tdir/disable_lru_resize" + error "failed to create $DIR/$tdir/disable_lru_resize" createmany -o $DIR/$tdir/disable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` nolruresize_delta=$((etime-stime)) log "ls -la time: $nolruresize_delta seconds" - get_lru_size "mdc-" - - log "starting lru resize enable cycle" - mkdir -p $DIR/$tdir/enable_lru_resize || - error "failed to create $DIR/$tdir/enable_lru_resize" + log "lru_size = $(cat $NSDIR/lru_size)" + unlinkmany $DIR/$tdir/disable_lru_resize/f $NR - # 0 locks means here flush lru and switch to lru resize policy - set_lru_size "mdc-" 0 + lru_resize_enable mdc + mkdir -p $DIR/$tdir/enable_lru_resize || + error "failed to create $DIR/$tdir/enable_lru_resize" createmany -o $DIR/$tdir/enable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` lruresize_delta=$((etime-stime)) log "ls -la time: $lruresize_delta seconds" - get_lru_size "mdc-" + log "lru_size = $(cat $NSDIR/lru_size)" - if test $lruresize_delta -gt $nolruresize_delta; then + if [ $lruresize_delta -gt $nolruresize_delta ]; then log "ls -la is $(((lruresize_delta - $nolruresize_delta) * 100 / $nolruresize_delta))% slower with lru resize enabled" - elif test $nolruresize_delta -gt $lruresize_delta; then + elif [ $nolruresize_delta -gt $lruresize_delta ]; then log "ls -la is $(((nolruresize_delta - $lruresize_delta) * 100 / $nolruresize_delta))% faster with lru resize enabled" else log "lru resize performs the same with no lru resize" fi + unlinkmany $DIR/$tdir/enable_lru_resize/f $NR } run_test 124b "lru resize (performance test) =======================" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index c3d8dfe..0fa6898 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1142,6 +1142,35 @@ cancel_lru_locks() { $LCTL mark "cancel_lru_locks $1 stop" } +default_lru_size() +{ + NR_CPU=$(grep -c "processor" /proc/cpuinfo) + DEFAULT_LRU_SIZE=$((100 * NR_CPU)) + echo "$DEFAULT_LRU_SIZE" +} + +lru_resize_enable() +{ + NS=$1 + test "x$NS" = "x" && NS="mdc" + for F in $LPROC/ldlm/namespaces/*$NS*/lru_size; do + D=$(dirname $F) + log "Enable lru resize for $(basename $D)" + echo "0" > $F + done +} + +lru_resize_disable() +{ + NS=$1 + test "x$NS" = "x" && NS="mdc" + for F in $LPROC/ldlm/namespaces/*$NS*/lru_size; do + D=$(dirname $F) + log "Disable lru resize for $(basename $D)" + DEFAULT_LRU_SIZE=$(default_lru_size) + echo "$DEFAULT_LRU_SIZE" > $F + done +} pgcache_empty() { for a in /proc/fs/lustre/llite/*/dump_page_cache; do