tgd_blockbit is recordsize bits set during mkfs.
This once set does not change. However, 'zfs set'
can be used to change the OST blocksize. Instead
of using cached value of 'tgd_blockbit' always
calculate the blocksize bits which may have
changed.
Test-case: sanity/104c added.
Signed-off-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Change-Id: Icc100cca0d5ae492c41d60f0bf97512450f796bc
Reviewed-on: https://review.whamcloud.com/43154
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
struct mdt_body *reqbody = NULL;
struct mdt_statfs_cache *msf;
ktime_t kstart = ktime_get();
+ int current_blockbits;
int rc;
ENTRY;
spin_unlock(&mdt->mdt_lock);
}
+ /* tgd_blockbit is recordsize bits set during mkfs.
+ * This once set does not change. However, 'zfs set'
+ * can be used to change the MDT blocksize. Instead
+ * of using cached value of 'tgd_blockbit' always
+ * calculate the blocksize bits which may have
+ * changed.
+ */
+ current_blockbits = fls64(osfs->os_bsize) - 1;
+
/* at least try to account for cached pages. its still racy and
* might be under-reporting if clients haven't announced their
* caches with brw recently */
" pending %llu free %llu avail %llu\n",
tgd->tgd_tot_dirty, tgd->tgd_tot_granted,
tgd->tgd_tot_pending,
- osfs->os_bfree << tgd->tgd_blockbits,
- osfs->os_bavail << tgd->tgd_blockbits);
+ osfs->os_bfree << current_blockbits,
+ osfs->os_bavail << current_blockbits);
osfs->os_bavail -= min_t(u64, osfs->os_bavail,
((tgd->tgd_tot_dirty + tgd->tgd_tot_pending +
- osfs->os_bsize - 1) >> tgd->tgd_blockbits));
+ osfs->os_bsize - 1) >> current_blockbits));
tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__);
CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; "
osfs->os_files, osfs->os_ffree, osfs->os_state);
if (!exp_grant_param_supp(tsi->tsi_exp) &&
- tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) {
+ current_blockbits > COMPAT_BSIZE_SHIFT) {
/* clients which don't support OBD_CONNECT_GRANT_PARAM
* should not see a block size > page size, otherwise
* cl_lost_grant goes mad. Therefore, we emulate a 4KB (=2^12)
* block size which is the biggest block size known to work
* with all client's page size. */
- osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_blocks <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bfree <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bavail <<= current_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT;
}
if (rc == 0)
struct obd_device *obd = class_exp2obd(exp);
struct ofd_device *ofd = ofd_exp(exp);
struct tg_grants_data *tgd = &ofd->ofd_lut.lut_tgd;
+ int current_blockbits;
int rc;
ENTRY;
if (unlikely(rc))
GOTO(out, rc);
+ /* tgd_blockbit is recordsize bits set during mkfs.
+ * This once set does not change. However, 'zfs set'
+ * can be used to change the OST blocksize. Instead
+ * of using cached value of 'tgd_blockbit' always
+ * calculate the blocksize bits which may have
+ * changed.
+ */
+ current_blockbits = fls64(osfs->os_bsize) - 1;
+
/*
* at least try to account for cached pages. its still racy and
* might be under-reporting if clients haven't announced their
* caches with brw recently
*/
-
CDEBUG(D_SUPER | D_CACHE,
"blocks cached %llu granted %llu pending %llu free %llu avail %llu\n",
tgd->tgd_tot_dirty, tgd->tgd_tot_granted,
tgd->tgd_tot_pending,
- osfs->os_bfree << tgd->tgd_blockbits,
- osfs->os_bavail << tgd->tgd_blockbits);
+ osfs->os_bfree << current_blockbits,
+ osfs->os_bavail << current_blockbits);
osfs->os_bavail -= min_t(u64, osfs->os_bavail,
((tgd->tgd_tot_dirty + tgd->tgd_tot_pending +
- osfs->os_bsize - 1) >> tgd->tgd_blockbits));
+ osfs->os_bsize - 1) >> current_blockbits));
/*
* The QoS code on the MDS does not care about space reserved for
ted = &obd->obd_self_export->exp_target_data;
osfs->os_granted = min_t(u64, osfs->os_bavail,
- ted->ted_grant >> tgd->tgd_blockbits);
+ ted->ted_grant >> current_blockbits);
osfs->os_bavail -= osfs->os_granted;
}
osfs->os_state |= OS_STATFS_NOPRECREATE;
if (obd->obd_self_export != exp && !exp_grant_param_supp(exp) &&
- tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) {
+ current_blockbits > COMPAT_BSIZE_SHIFT) {
/*
* clients which don't support OBD_CONNECT_GRANT_PARAM
* should not see a block size > page size, otherwise
* block size which is the biggest block size known to work
* with all client's page size.
*/
- osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_granted <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_blocks <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bfree <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bavail <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_granted <<= current_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT;
}
}
run_test 104b "$RUNAS lfs check servers test ===================="
+#
+# Verify $1 is within range of $2.
+# Success when $1 is within range. That is, when $1 is >= 2% of $2 and
+# $1 is <= 2% of $2. Else Fail.
+#
+value_in_range() {
+ # Strip all units (M, G, T)
+ actual=$(echo $1 | tr -d A-Z)
+ expect=$(echo $2 | tr -d A-Z)
+
+ expect_lo=$(($expect * 98 / 100)) # 2% below
+ expect_hi=$(($expect * 102 / 100)) # 2% above
+
+ # permit 2% drift above and below
+ (( $actual >= $expect_lo && $actual <= $expect_hi ))
+}
+
+test_104c() {
+ [ $PARALLEL == "yes" ] && skip "skip parallel run"
+ [ "$ost1_FSTYPE" == "zfs" ] || skip "zfs only test"
+
+ local ost_param="osd-zfs.$FSNAME-OST0000."
+ local mdt_param="osd-zfs.$FSNAME-MDT0000."
+ local ofacets=$(get_facets OST)
+ local mfacets=$(get_facets MDS)
+ local saved_ost_blocks=
+ local saved_mdt_blocks=
+
+ echo "Before recordsize change"
+ lfs_df=($($LFS df -h | grep "filesystem_summary:"))
+ df=($(df -h | grep "/mnt/lustre"$))
+
+ # For checking.
+ echo "lfs output : ${lfs_df[*]}"
+ echo "df output : ${df[*]}"
+
+ for facet in ${ofacets//,/ }; do
+ if [ -z $saved_ost_blocks ]; then
+ saved_ost_blocks=$(do_facet $facet \
+ lctl get_param -n $ost_param.blocksize)
+ echo "OST Blocksize: $saved_ost_blocks"
+ fi
+ ost=$(do_facet $facet lctl get_param -n $ost_param.mntdev)
+ do_facet $facet zfs set recordsize=32768 $ost
+ done
+
+ # BS too small. Sufficient for functional testing.
+ for facet in ${mfacets//,/ }; do
+ if [ -z $saved_mdt_blocks ]; then
+ saved_mdt_blocks=$(do_facet $facet \
+ lctl get_param -n $mdt_param.blocksize)
+ echo "MDT Blocksize: $saved_mdt_blocks"
+ fi
+ mdt=$(do_facet $facet lctl get_param -n $mdt_param.mntdev)
+ do_facet $facet zfs set recordsize=32768 $mdt
+ done
+
+ # Give new values chance to reflect change
+ sleep 2
+
+ echo "After recordsize change"
+ lfs_df_after=($($LFS df -h | grep "filesystem_summary:"))
+ df_after=($(df -h | grep "/mnt/lustre"$))
+
+ # For checking.
+ echo "lfs output : ${lfs_df_after[*]}"
+ echo "df output : ${df_after[*]}"
+
+ # Verify lfs df
+ value_in_range ${lfs_df_after[1]%.*} ${lfs_df[1]%.*} ||
+ error "lfs_df bytes: ${lfs_df_after[1]%.*} != ${lfs_df[1]%.*}"
+ value_in_range ${lfs_df_after[2]%.*} ${lfs_df[2]%.*} ||
+ error "lfs_df used: ${lfs_df_after[2]%.*} != ${lfs_df[2]%.*}"
+ value_in_range ${lfs_df_after[3]%.*} ${lfs_df[3]%.*} ||
+ error "lfs_df avail: ${lfs_df_after[3]%.*} != ${lfs_df[3]%.*}"
+
+ # Verify df
+ value_in_range ${df_after[1]%.*} ${df[1]%.*} ||
+ error "df bytes: ${df_after[1]%.*} != ${df[1]%.*}"
+ value_in_range ${df_after[2]%.*} ${df[2]%.*} ||
+ error "df used: ${df_after[2]%.*} != ${df[2]%.*}"
+ value_in_range ${df_after[3]%.*} ${df[3]%.*} ||
+ error "df avail: ${df_after[3]%.*} != ${df[3]%.*}"
+
+ # Restore MDT recordize back to original
+ for facet in ${mfacets//,/ }; do
+ mdt=$(do_facet $facet lctl get_param -n $mdt_param.mntdev)
+ do_facet $facet zfs set recordsize=$saved_mdt_blocks $mdt
+ done
+
+ # Restore OST recordize back to original
+ for facet in ${ofacets//,/ }; do
+ ost=$(do_facet $facet lctl get_param -n $ost_param.mntdev)
+ do_facet $facet zfs set recordsize=$saved_ost_blocks $ost
+ done
+
+ return 0
+}
+run_test 104c "Verify df vs lfs_df stays same after recordsize change"
+
test_105a() {
# doesn't work on 2.4 kernels
touch $DIR/$tfile