From 3bae39f0a5b98a279fb5f7b8d00211ac0d09366f Mon Sep 17 00:00:00 2001 From: Rahul Deshmkuh Date: Wed, 13 Jul 2016 23:02:45 -0700 Subject: [PATCH] LU-7853 lod: fixes bitfield in lod qos code Updating bitfields in struct lod_qos struct is protected by lq_rw_sem in most places but an update can be lost due unprotected bitfield access from lod_qos_thresholdrr_seq_write() and qos_prio_free_store(). This patch fixes it by replacing bitfields with named bits and atomic bitops. Cray-bug-id: LUS-4651 Signed-off-by: Rahul Deshmukh Signed-off-by: Alexander Zarochentsev Change-Id: I28299ce4960e91be551d7f6e43a3b598daf4d7a2 Reviewed-on: https://review.whamcloud.com/18812 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Zarochentsev Reviewed-by: Oleg Drokin --- lustre/include/lu_object.h | 13 ++++++++++++- lustre/lmv/lmv_obd.c | 2 +- lustre/lmv/lproc_lmv.c | 6 +++--- lustre/lod/lod_pool.c | 4 ++-- lustre/lod/lod_qos.c | 27 +++++++++++++-------------- lustre/lod/lproc_lod.c | 6 +++--- lustre/obdclass/lu_tgt_descs.c | 38 ++++++++++++++++++++------------------ 7 files changed, 54 insertions(+), 42 deletions(-) diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 2dc95af..0448b79 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1519,6 +1519,14 @@ static inline bool lu_object_is_cl(const struct lu_object *o) return lu_device_is_cl(o->lo_dev); } +/* bitflags used in rr / qos allocation */ +enum lq_flag { + LQ_DIRTY = 0, /* recalc qos data */ + LQ_SAME_SPACE, /* the OSTs all have approx. + * the same space avail */ + LQ_RESET, /* zero current penalties */ +}; + /* round-robin QoS data for LOD/LMV */ struct lu_qos_rr { spinlock_t lqr_alloc; /* protect allocation index */ @@ -1526,7 +1534,7 @@ struct lu_qos_rr { __u32 lqr_offset_idx;/* aliasing for start_idx */ int lqr_start_count;/* reseed counter */ struct lu_tgt_pool lqr_pool; /* round-robin optimized list */ - unsigned long lqr_dirty:1; /* recalc round-robin list */ + unsigned long lqr_flags; }; /* QoS data per MDS/OSS */ @@ -1594,10 +1602,13 @@ struct lu_qos { unsigned int lq_prio_free; /* priority for free space */ unsigned int lq_threshold_rr;/* priority for rr */ struct lu_qos_rr lq_rr; /* round robin qos data */ + unsigned long lq_flags; +#if 0 unsigned long lq_dirty:1, /* recalc qos data */ lq_same_space:1,/* the servers all have approx. * the same space avail */ lq_reset:1; /* zero current penalties */ +#endif }; struct lu_tgt_descs { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index a8ee803..b639d7f 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1311,7 +1311,7 @@ static int lmv_statfs_update(void *cookie, int rc) tgt->ltd_statfs = *osfs; tgt->ltd_statfs_age = ktime_get_seconds(); spin_unlock(&lmv->lmv_lock); - lmv->lmv_qos.lq_dirty = 1; + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); } return rc; diff --git a/lustre/lmv/lproc_lmv.c b/lustre/lmv/lproc_lmv.c index 6e0e7cc..d313145 100644 --- a/lustre/lmv/lproc_lmv.c +++ b/lustre/lmv/lproc_lmv.c @@ -132,8 +132,8 @@ static ssize_t qos_prio_free_store(struct kobject *kobj, return -EINVAL; lmv->lmv_qos.lq_prio_free = (val << 8) / 100; - lmv->lmv_qos.lq_dirty = 1; - lmv->lmv_qos.lq_reset = 1; + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); + set_bit(LQ_RESET, &lmv->lmv_qos.lq_flags); return count; } @@ -169,7 +169,7 @@ static ssize_t qos_threshold_rr_store(struct kobject *kobj, return -EINVAL; lmv->lmv_qos.lq_threshold_rr = (val << 8) / 100; - lmv->lmv_qos.lq_dirty = 1; + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); return count; } diff --git a/lustre/lod/lod_pool.c b/lustre/lod/lod_pool.c index b59b0df..cb5a8bb 100644 --- a/lustre/lod/lod_pool.c +++ b/lustre/lod/lod_pool.c @@ -565,7 +565,7 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname) if (rc) GOTO(out, rc); - pool->pool_rr.lqr_dirty = 1; + set_bit(LQ_DIRTY, &pool->pool_rr.lqr_flags); CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n", ostname, poolname, pool_tgt_count(pool)); @@ -625,7 +625,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname) GOTO(out, rc); tgt_pool_remove(&pool->pool_obds, ost->ltd_index); - pool->pool_rr.lqr_dirty = 1; + set_bit(LQ_DIRTY, &pool->pool_rr.lqr_flags); CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname, poolname); diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 7075732..e1c590c 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -137,8 +137,8 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, LASSERT(desc->ld_active_tgt_count > 0); desc->ld_active_tgt_count--; - ltd->ltd_qos.lq_dirty = 1; - ltd->ltd_qos.lq_rr.lqr_dirty = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); + set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags); CDEBUG(D_CONFIG, "%s: turns inactive\n", tgt->ltd_exp->exp_obd->obd_name); } @@ -153,8 +153,8 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, tgt->ltd_active = 1; tgt->ltd_connecting = 0; desc->ld_active_tgt_count++; - ltd->ltd_qos.lq_dirty = 1; - ltd->ltd_qos.lq_rr.lqr_dirty = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); + set_bit(LQ_DIRTY, <d->ltd_qos.lq_rr.lqr_flags); CDEBUG(D_CONFIG, "%s: turns active\n", tgt->ltd_exp->exp_obd->obd_name); } @@ -223,7 +223,7 @@ void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod, if (tgt->ltd_statfs.os_bavail != avail) /* recalculate weigths */ - ltd->ltd_qos.lq_dirty = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); } obd->obd_osfs_age = ktime_get_seconds(); @@ -262,7 +262,7 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd, int rc; ENTRY; - if (!lqr->lqr_dirty) { + if (!test_bit(LQ_DIRTY, &lqr->lqr_flags)) { LASSERT(lqr->lqr_pool.op_size); RETURN(0); } @@ -274,7 +274,7 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd, * Check again. While we were sleeping on @lq_rw_sem something could * change. */ - if (!lqr->lqr_dirty) { + if (!test_bit(LQ_DIRTY, &lqr->lqr_flags)) { LASSERT(lqr->lqr_pool.op_size); up_write(<d->ltd_qos.lq_rw_sem); RETURN(0); @@ -323,7 +323,7 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd, } } - lqr->lqr_dirty = 0; + clear_bit(LQ_DIRTY, &lqr->lqr_flags); up_write(<d->ltd_qos.lq_rw_sem); if (placed != real_count) { @@ -335,7 +335,7 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct lu_tgt_descs *ltd, LCONSOLE(D_WARNING, "rr #%d tgt idx=%d\n", i, lqr->lqr_pool.op_array[i]); } - lqr->lqr_dirty = 1; + set_bit(LQ_DIRTY, &lqr->lqr_flags); RETURN(-EAGAIN); } @@ -1603,9 +1603,8 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo, } /* makes sense to rebalance next time */ - lod->lod_ost_descs.ltd_qos.lq_dirty = 1; - lod->lod_ost_descs.ltd_qos.lq_same_space = 0; - + set_bit(LQ_DIRTY, &lod->lod_ost_descs.ltd_qos.lq_flags); + clear_bit(LQ_SAME_SPACE, &lod->lod_ost_descs.ltd_qos.lq_flags); rc = -EAGAIN; } @@ -1824,8 +1823,8 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo, } /* makes sense to rebalance next time */ - ltd->ltd_qos.lq_dirty = 1; - ltd->ltd_qos.lq_same_space = 0; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); + clear_bit(LQ_SAME_SPACE, <d->ltd_qos.lq_flags); rc = -EAGAIN; } else { diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index 1ca3f5c..4b9a591 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -564,8 +564,8 @@ static ssize_t __qos_prio_free_store(struct kobject *kobj, if (val > 100) return -EINVAL; ltd->ltd_qos.lq_prio_free = (val << 8) / 100; - ltd->ltd_qos.lq_dirty = 1; - ltd->ltd_qos.lq_reset = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); + set_bit(LQ_RESET, <d->ltd_qos.lq_flags); return count; } @@ -655,7 +655,7 @@ static ssize_t __qos_threshold_rr_store(struct kobject *kobj, if (val > 100) return -EINVAL; ltd->ltd_qos.lq_threshold_rr = (val << 8) / 100; - ltd->ltd_qos.lq_dirty = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); return count; } diff --git a/lustre/obdclass/lu_tgt_descs.c b/lustre/obdclass/lu_tgt_descs.c index f15e18b..cecc6ea 100644 --- a/lustre/obdclass/lu_tgt_descs.c +++ b/lustre/obdclass/lu_tgt_descs.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(lu_prandom_u64_max); void lu_qos_rr_init(struct lu_qos_rr *lqr) { spin_lock_init(&lqr->lqr_alloc); - lqr->lqr_dirty = 1; + set_bit(LQ_DIRTY, &lqr->lqr_flags); } EXPORT_SYMBOL(lu_qos_rr_init); @@ -160,9 +160,8 @@ int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *tgt) */ list_add_tail(&svr->lsq_svr_list, &tempsvr->lsq_svr_list); - qos->lq_dirty = 1; - qos->lq_rr.lqr_dirty = 1; - + set_bit(LQ_DIRTY, &qos->lq_flags); + set_bit(LQ_DIRTY, &qos->lq_rr.lqr_flags); out: up_write(&qos->lq_rw_sem); RETURN(rc); @@ -202,8 +201,8 @@ static int lu_qos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd) OBD_FREE_PTR(svr); } - qos->lq_dirty = 1; - qos->lq_rr.lqr_dirty = 1; + set_bit(LQ_DIRTY, &qos->lq_flags); + set_bit(LQ_DIRTY, &qos->lq_rr.lqr_flags); out: up_write(&qos->lq_rw_sem); RETURN(rc); @@ -275,8 +274,8 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt) /* Set up allocation policy (QoS and RR) */ INIT_LIST_HEAD(<d->ltd_qos.lq_svr_list); init_rwsem(<d->ltd_qos.lq_rw_sem); - ltd->ltd_qos.lq_dirty = 1; - ltd->ltd_qos.lq_reset = 1; + set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); + set_bit(LQ_RESET, <d->ltd_qos.lq_flags); /* Default priority is toward free space balance */ ltd->ltd_qos.lq_prio_free = 232; /* Default threshold for rr (roughly 17%) */ @@ -421,7 +420,8 @@ EXPORT_SYMBOL(ltd_del_tgt); */ bool ltd_qos_is_usable(struct lu_tgt_descs *ltd) { - if (!ltd->ltd_qos.lq_dirty && ltd->ltd_qos.lq_same_space) + if (!test_bit(LQ_DIRTY, <d->ltd_qos.lq_flags) && + test_bit(LQ_SAME_SPACE, <d->ltd_qos.lq_flags)) return false; if (ltd->ltd_lov_desc.ld_active_tgt_count < 2) @@ -463,7 +463,7 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) ENTRY; - if (!qos->lq_dirty) + if (!test_bit(LQ_DIRTY, &qos->lq_flags)) GOTO(out, rc = 0); num_active = desc->ld_active_tgt_count - 1; @@ -534,7 +534,8 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) tgt->ltd_qos.ltq_penalty_per_obj >>= 1; age = (now - tgt->ltd_qos.ltq_used) >> 3; - if (qos->lq_reset || age > 32 * desc->ld_qos_maxage) + if (test_bit(LQ_RESET, &qos->lq_flags) || + age > 32 * desc->ld_qos_maxage) tgt->ltd_qos.ltq_penalty = 0; else if (age > desc->ld_qos_maxage) /* Decay tgt penalty. */ @@ -569,31 +570,32 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) svr->lsq_penalty_per_obj >>= 1; age = (now - svr->lsq_used) >> 3; - if (qos->lq_reset || age > 32 * desc->ld_qos_maxage) + if (test_bit(LQ_RESET, &qos->lq_flags) || + age > 32 * desc->ld_qos_maxage) svr->lsq_penalty = 0; else if (age > desc->ld_qos_maxage) /* Decay server penalty. */ svr->lsq_penalty >>= age / desc->ld_qos_maxage; } - qos->lq_dirty = 0; - qos->lq_reset = 0; + clear_bit(LQ_DIRTY, &qos->lq_flags); + clear_bit(LQ_RESET, &qos->lq_flags); /* * If each tgt has almost same free space, do rr allocation for better * creation performance */ - qos->lq_same_space = 0; + clear_bit(LQ_SAME_SPACE, &qos->lq_flags); if ((ba_max * (256 - qos->lq_threshold_rr)) >> 8 < ba_min && (ia_max * (256 - qos->lq_threshold_rr)) >> 8 < ia_min) { - qos->lq_same_space = 1; + set_bit(LQ_SAME_SPACE, &qos->lq_flags); /* Reset weights for the next time we enter qos mode */ - qos->lq_reset = 1; + set_bit(LQ_RESET, &qos->lq_flags); } rc = 0; out: - if (!rc && qos->lq_same_space) + if (!rc && test_bit(LQ_SAME_SPACE, &qos->lq_flags)) RETURN(-EAGAIN); RETURN(rc); -- 1.8.3.1