unsigned int pool_spill_threshold_pct;
atomic_t pool_spill_hit;
char pool_spill_target[LOV_MAXPOOLNAME + 1];
+ bool pool_same_space; /* targets in pool balanced*/
+ time64_t pool_same_space_expire; /*uses ld_qos_maxage*/
};
struct lod_device;
RETURN(-ENAMETOOLONG);
/* OBD_ALLOC_* doesn't work with direct kfree_rcu use */
- new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
+ new_pool = kmalloc(sizeof(*new_pool), __GFP_ZERO);
if (new_pool == NULL)
RETURN(-ENOMEM);
strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
- new_pool->pool_spill_expire = 0;
- new_pool->pool_spill_is_active = false;
- new_pool->pool_spill_threshold_pct = 0;
new_pool->pool_spill_target[0] = '\0';
atomic_set(&new_pool->pool_spill_hit, 0);
new_pool->pool_lobd = obd;
}
/**
+ * Calculate penalties per-ost in a pool
+ *
+ * The algorithm is similar to ltd_qos_penalties_calc(), but much simpler,
+ * just considering the space of each OST in this pool.
+ *
+ * \param[in] lod lod_device
+ * \param[in] pool pool_desc
+ *
+ * \retval 0 on success
+ * \retval -EAGAIN the number of OSTs isn't enough or all tgt spaces are
+ * almost the same
+ */
+static int lod_pool_qos_penalties_calc(struct lod_device *lod,
+ struct pool_desc *pool)
+{
+ struct lu_tgt_descs *ltd = &lod->lod_ost_descs;
+ struct lu_qos *qos = <d->ltd_qos;
+ struct lov_desc *desc = <d->ltd_lov_desc;
+ struct lu_tgt_pool *osts = &pool->pool_obds;
+ struct lod_tgt_desc *ost;
+ __u64 ba_max, ba_min, ba;
+ __u32 num_active;
+ int prio_wide;
+ time64_t now, age;
+ int i, rc;
+
+ ENTRY;
+
+ now = ktime_get_real_seconds();
+
+ if (pool->pool_same_space && now < pool->pool_same_space_expire)
+ GOTO(out, rc = 0);
+
+ num_active = osts->op_count - 1;
+ if (num_active < 1)
+ GOTO(out, rc = -EAGAIN);
+
+ prio_wide = 256 - qos->lq_prio_free;
+
+ ba_min = (__u64)(-1);
+ ba_max = 0;
+
+ /* Calculate penalty per OST */
+ for (i = 0; i < osts->op_count; i++) {
+ if (!test_bit(osts->op_array[i], lod->lod_ost_bitmap))
+ continue;
+
+ ost = OST_TGT(lod, osts->op_array[i]);
+ if (!ost->ltd_active)
+ continue;
+
+ ba = ost->ltd_statfs.os_bavail * ost->ltd_statfs.os_bsize;
+ ba >>= 8;
+ if (!ba)
+ continue;
+
+ ba_min = min(ba, ba_min);
+ ba_max = max(ba, ba_max);
+ ost->ltd_qos.ltq_svr->lsq_bavail += ba;
+
+ /*
+ * per-ost penalty is
+ * prio * bavail * iavail / (num_tgt - 1) / 2
+ */
+ ost->ltd_qos.ltq_penalty_per_obj = prio_wide * ba >> 8;
+ do_div(ost->ltd_qos.ltq_penalty_per_obj, num_active);
+
+ age = (now - ost->ltd_qos.ltq_used) >> 3;
+ if (age > 32 * desc->ld_qos_maxage)
+ ost->ltd_qos.ltq_penalty = 0;
+ else if (age > desc->ld_qos_maxage)
+ /* Decay ost penalty. */
+ ost->ltd_qos.ltq_penalty >>= age / desc->ld_qos_maxage;
+ }
+
+ /*
+ * If each ost has almost same free space, do rr allocation for better
+ * creation performance
+ */
+ if ((ba_max * (256 - qos->lq_threshold_rr)) >> 8 < ba_min) {
+ pool->pool_same_space = true;
+ pool->pool_same_space_expire = now + desc->ld_qos_maxage;
+ } else {
+ pool->pool_same_space = false;
+ }
+ rc = 0;
+
+out:
+ if (!rc && pool->pool_same_space)
+ rc = -EAGAIN;
+
+ RETURN(rc);
+}
+
+/**
* Allocate a striping using an algorithm with weights.
*
* The function allocates OST objects to create a striping. The algorithm
if (!ltd_qos_is_usable(&lod->lod_ost_descs))
GOTO(out, rc = -EAGAIN);
- rc = ltd_qos_penalties_calc(&lod->lod_ost_descs);
+ if (pool != NULL)
+ rc = lod_pool_qos_penalties_calc(lod, pool);
+ else
+ rc = ltd_qos_penalties_calc(&lod->lod_ost_descs);
if (rc)
GOTO(out, rc);
}
run_test 132 "hsm_actions processed after failover"
+# This test verifies we do RR allocation within a pool even if there is a
+# significant imbalance vs an OST outside the pool
+test_133() {
+ [[ $OSTCOUNT -lt 4 ]] && skip_env "needs >= 4 OSTs"
+ # This is the easiest way to ensure OSTs start out balanced
+ reformat_and_config
+ setupall
+
+ check_set_fallocate_or_skip
+
+ local testfile=$DIR/$tdir/$tfile
+ local pool="testpool"
+ local ostrange=$((OSTCOUNT - 1))
+ # Select all but the last OST to add to the pool
+ local poolostrange=$((OSTCOUNT - 2))
+ local filenum=20
+ local filecount
+ local stripecount
+
+ declare -a AVAIL
+ free_min_max
+
+ [ $MINV -eq 0 ] && error "no free space in OST$MINI"
+ [ $MAXV -gt $((2 * $MINV)) ] &&
+ error "OSTs badly unbalanced after reformat"
+
+ create_pool $FSNAME.$pool || error "failed to create a pool"
+ do_facet mgs $LCTL pool_add $FSNAME.$pool OST[0-$poolostrange] ||
+ error "failed to add OST[0-$poolostrange] to the pool"
+
+ test_mkdir -p $DIR/$tdir || error "failed to mkdir $DIR/$tdir"
+ # Consume space on the OSTs in the pool so they are unbalanced with the
+ # OST outside of the pool
+ # fill each OST 90% with fallocate so they are widely
+ # imbalanced
+ local size=$(((MINV * 9 / 10) * 1024))
+ for ((i = 0; i <= poolostrange; i++)); do
+ $LFS setstripe -c 1 -i $i $testfile$i ||
+ error "failed to setstripe $testfile$i"
+ fallocate -l $size $testfile$i || error "fallocate failed"
+ done
+ ls -la $DIR/$tdir
+ sleep_maxage
+ $LFS df
+
+ # Create files in the pool now that there is an imbalance
+ filecount=$(((OSTCOUNT - 1) * filenum))
+ for ((i = 0; i < filecount; i++)); do
+ $LFS setstripe -p $pool $testfile-$i ||
+ error "failed to setstripe -p $pool $testfile-$i"
+ done
+ $LFS getstripe -i $testfile-* > /tmp/$tfile.log
+ # Count the number of files with a stripe on each OST to verify the
+ # pool allocated with round-robin
+ for ((i = 0; i <= poolostrange; i++)); do
+ stripecount=$(grep -c $i /tmp/$tfile.log)
+ # Allow a little leeway
+ if (( stripecount < filenum - 1 ||
+ stripecount > filenum + 1 )); then
+ cat /tmp/$tfile.log
+ error "$stripecount != $filenum files on OST$i"
+ fi
+ done
+
+ # Create files across the system now that there is an imbalance
+ filecount=$((OSTCOUNT * filenum))
+ for ((i = 1; i < filecount; i++)); do
+ $LFS setstripe $testfile-$i.2 ||
+ error "failed to setstripe $testilfe-$i.2"
+ done
+ $LFS getstripe -i $testfile-*.2 > /tmp/$tfile.log
+ local qos_used=""
+ # Count the number of files with a stripe on each OST to verify the
+ # files are *NOT* allocated with round-robin
+ for ((i = 0; i <= ostrange; i++)); do
+ stripecount=$(grep -c $i /tmp/$tfile.log)
+ if [[ $stripecount -ne $filenum ]]; then
+ qos_used="true"
+ echo "QOS: $stripecount != $filenum files on OST$i"
+ fi
+ done
+ if [ -z "$qos_used" ]; then
+ error "QOS not used on imbalanced OSTs!"
+ fi
+
+ rm -rf /tmp/$tfile.log $DIR/$tdir
+ do_facet mgs $LCTL pool_remove $FSNAME.$pool OST[0-$poolostrange] ||
+ "failed to remove OST[0-$poolostrange] from the pool"
+ do_facet mgs $LCTL pool_destroy $FSNAME.$pool ||
+ error "failed to destroy pool"
+}
+run_test 133 "stripe QOS: free space balance in a pool"
+
if ! combined_mgs_mds ; then
stop mgs
fi
}
run_test 115 "verify dynamic thread creation===================="
-free_min_max () {
- wait_delete_completed
- AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail))
- echo "OST kbytes available: ${AVAIL[*]}"
- MAXV=${AVAIL[0]}
- MAXI=0
- MINV=${AVAIL[0]}
- MINI=0
- for ((i = 0; i < ${#AVAIL[@]}; i++)); do
- #echo OST $i: ${AVAIL[i]}kb
- if [[ ${AVAIL[i]} -gt $MAXV ]]; then
- MAXV=${AVAIL[i]}
- MAXI=$i
- fi
- if [[ ${AVAIL[i]} -lt $MINV ]]; then
- MINV=${AVAIL[i]}
- MINI=$i
- fi
- done
- echo "Min free space: OST $MINI: $MINV"
- echo "Max free space: OST $MAXI: $MAXV"
-}
-
test_116a() { # was previously test_116()
[ $PARALLEL == "yes" ] && skip "skip parallel run"
[[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs"
$LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
}
+free_min_max () {
+ wait_delete_completed
+ AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail))
+ echo "OST kbytes available: ${AVAIL[*]}"
+ MAXV=${AVAIL[0]}
+ MAXI=0
+ MINV=${AVAIL[0]}
+ MINI=0
+ for ((i = 0; i < ${#AVAIL[@]}; i++)); do
+ #echo OST $i: ${AVAIL[i]}kb
+ if [[ ${AVAIL[i]} -gt $MAXV ]]; then
+ MAXV=${AVAIL[i]}
+ MAXI=$i
+ fi
+ if [[ ${AVAIL[i]} -lt $MINV ]]; then
+ MINV=${AVAIL[i]}
+ MINI=$i
+ fi
+ done
+ echo "Min free space: OST $MINI: $MINV"
+ echo "Max free space: OST $MAXI: $MAXV"
+}
+
# save_lustre_params(comma separated facet list, parameter_mask)
# generate a stream of formatted strings (<facet> <param name>=<param value>)
save_lustre_params() {