Keep "space balanced" subdirectories on the parent MDT if it is less
full than average, since it doesn't make sense to select another MDT
which may occasionally be *more* full. This also reduces random
"MDT jumping" and needless remote directories.
Reduce the QOS threshold for space balanced LMV layouts, so that the
MDTs don't become too imbalanced before trying to fix the problem.
Change the LUSTRE_OP_MKDIR opcode to be 1 instead of 0, so it can
be seen that a valid opcode has been stored into the structure.
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iab34c7eade03d761aa16b08f409f7e5d69cd70bd
Reviewed-on: https://review.whamcloud.com/43445
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
- LUSTRE_OPC_MKDIR = 0,
- LUSTRE_OPC_SYMLINK = 1,
- LUSTRE_OPC_MKNOD = 2,
- LUSTRE_OPC_CREATE = 3,
- LUSTRE_OPC_ANY = 5,
+ LUSTRE_OPC_MKDIR = 1,
+ LUSTRE_OPC_SYMLINK,
+ LUSTRE_OPC_MKNOD,
+ LUSTRE_OPC_CREATE,
+ LUSTRE_OPC_ANY,
#define LOV_OFFSET_DEFAULT ((__u16)-1)
#define LMV_OFFSET_DEFAULT ((__u32)-1)
#define LOV_OFFSET_DEFAULT ((__u16)-1)
#define LMV_OFFSET_DEFAULT ((__u32)-1)
+#define LOV_QOS_DEF_THRESHOLD_RR_PCT 17
+#define LMV_QOS_DEF_THRESHOLD_RR_PCT 5
+
+#define LOV_QOS_DEF_PRIO_FREE 90
+#define LMV_QOS_DEF_PRIO_FREE 90
+
static inline bool lov_pattern_supported(__u32 pattern)
{
return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
static inline bool lov_pattern_supported(__u32 pattern)
{
return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
{
static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
{
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *tgt, *cur = NULL;
__u64 total_weight = 0;
__u64 cur_weight = 0;
__u64 total_weight = 0;
__u64 cur_weight = 0;
GOTO(unlock, tgt = ERR_PTR(rc));
lmv_foreach_tgt(lmv, tgt) {
GOTO(unlock, tgt = ERR_PTR(rc));
lmv_foreach_tgt(lmv, tgt) {
- tgt->ltd_qos.ltq_usable = 0;
- if (!tgt->ltd_exp || !tgt->ltd_active)
+ if (!tgt->ltd_exp || !tgt->ltd_active) {
+ tgt->ltd_qos.ltq_usable = 0;
tgt->ltd_qos.ltq_usable = 1;
lu_tgt_qos_weight_calc(tgt);
tgt->ltd_qos.ltq_usable = 1;
lu_tgt_qos_weight_calc(tgt);
+ if (tgt->ltd_index == *mdt) {
+ cur = tgt;
+ cur_weight = tgt->ltd_qos.ltq_weight;
+ }
total_weight += tgt->ltd_qos.ltq_weight;
total_weight += tgt->ltd_qos.ltq_weight;
+ total_usable++;
+ }
+
+ /* if current MDT has higher-than-average space, stay on same MDT */
+ rand = total_weight / total_usable;
+ if (cur_weight >= rand) {
+ tgt = cur;
+ GOTO(unlock, rc = 0);
rand = lu_prandom_u64_max(total_weight);
lmv_foreach_connected_tgt(lmv, tgt) {
rand = lu_prandom_u64_max(total_weight);
lmv_foreach_connected_tgt(lmv, tgt) {
init_rwsem(<d->ltd_qos.lq_rw_sem);
set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
set_bit(LQ_RESET, <d->ltd_qos.lq_flags);
init_rwsem(<d->ltd_qos.lq_rw_sem);
set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
set_bit(LQ_RESET, <d->ltd_qos.lq_flags);
- /* Default priority is toward free space balance */
- ltd->ltd_qos.lq_prio_free = 232;
- /* Default threshold for rr (roughly 17%) */
- ltd->ltd_qos.lq_threshold_rr = 43;
ltd->ltd_is_mdt = is_mdt;
ltd->ltd_is_mdt = is_mdt;
+ /* MDT imbalance threshold is low to balance across MDTs
+ * relatively quickly, because each directory may result
+ * in a large number of files/subdirs created therein.
+ */
+ if (is_mdt) {
ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
+ ltd->ltd_qos.lq_prio_free = LMV_QOS_DEF_PRIO_FREE * 256 / 100;
+ ltd->ltd_qos.lq_threshold_rr =
+ LMV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+ } else {
+ ltd->ltd_qos.lq_prio_free = LOV_QOS_DEF_PRIO_FREE * 256 / 100;
+ ltd->ltd_qos.lq_threshold_rr =
+ LOV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+ }