From: Vladimir Saveliev Date: Mon, 29 Jun 2020 11:26:57 +0000 (+0300) Subject: LU-12687 osc: consume grants for direct I/O X-Git-Tag: 2.13.55~32 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=05f326a7988a7a0d6954d1b0d318315526209ae6;ds=sidebyside LU-12687 osc: consume grants for direct I/O New IO engine implementation lost consuming grants by direct I/O writes. That led to early emergence of out of space condition during direct I/O. The below illustrates the problem: # OSTSIZE=100000 sh llmount.sh # dd if=/dev/zero of=/mnt/lustre/file bs=4k count=100 oflag=direct dd: error writing ‘/mnt/lustre/file’: No space left on device Consume grants for direct I/O. Try to consume grants in osc_queue_sync_pages() when it is called for pages which are being writted in direct i/o. Tests are added to verify grant consumption in buffered and direct i/o and to verify direct i/o overwrite when ost is full. The overwrite test is for ldiskfs only as zfs is unable to overwrite when it is full. Fixes: 9fe4b52ad2 ("LU-1030 osc: new IO engine implementation") Signed-off-by: Vladimir Saveliev Change-Id: I9a199452c564e8e8ad02f79231e8481166f3666e Cray-bug-id: LUS-7036 Reviewed-on: https://review.whamcloud.com/35896 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Wang Shilong Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 52960d8..8fe1f5d 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -487,6 +487,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721 #define OBD_FAIL_TGT_REPLY_DATA_RACE 0x722 #define OBD_FAIL_TGT_RECOVERY_CONNECT 0x724 +#define OBD_FAIL_TGT_NO_GRANT 0x725 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 795040d..2888530 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -2610,6 +2610,28 @@ int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io, ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK); ext->oe_ndelay = !!(brw_flags & OBD_BRW_NDELAY); ext->oe_dio = !!(brw_flags & OBD_BRW_NOCACHE); + if (ext->oe_dio && !ext->oe_rw) { /* direct io write */ + int grants; + int ppc; + + ppc = 1 << (cli->cl_chunkbits - PAGE_SHIFT); + grants = cli->cl_grant_extent_tax; + grants += (1 << cli->cl_chunkbits) * + ((page_count + ppc - 1) / ppc); + + spin_lock(&cli->cl_loi_list_lock); + if (osc_reserve_grant(cli, grants) == 0) { + list_for_each_entry(oap, list, oap_pending_item) { + osc_consume_write_grant(cli, + &oap->oap_brw_page); + atomic_long_inc(&obd_dirty_pages); + } + osc_unreserve_grant_nolock(cli, grants, 0); + ext->oe_grants = grants; + } + spin_unlock(&cli->cl_loi_list_lock); + } + ext->oe_is_rdma_only = !!(brw_flags & OBD_BRW_RDMA_ONLY); ext->oe_nr_pages = page_count; ext->oe_mppr = mppr; diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 3bcda7d..72416e0 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -902,6 +902,9 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, ENTRY; + if (OBD_FAIL_CHECK(OBD_FAIL_TGT_NO_GRANT)) + RETURN(0); + /* When tgd_grant_compat_disable is set, we don't grant any space to * clients not supporting OBD_CONNECT_GRANT_PARAM. * Otherwise, space granted to such a client is inflated since it diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 57b97c7..5821ef5 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -140,6 +140,9 @@ start_mds() { for num in $(seq $MDSCOUNT); do start_mdt $num $@ || return 94 done + for num in $(seq $MDSCOUNT); do + wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds${num} FULL + done } start_mgsmds() { @@ -165,6 +168,7 @@ stop_mgs() { start_ost() { echo "start ost1 service on `facet_active_host ost1`" start ost1 $(ostdevname 1) $OST_MOUNT_OPTS $@ || return 95 + wait_clients_import_state ${CLIENTS:-$HOSTNAME} ost1 FULL } stop_ost() { @@ -176,6 +180,7 @@ stop_ost() { start_ost2() { echo "start ost2 service on `facet_active_host ost2`" start ost2 $(ostdevname 2) $OST_MOUNT_OPTS $@ || return 92 + wait_clients_import_state ${CLIENTS:-$HOSTNAME} ost2 FULL } stop_ost2() { @@ -9053,6 +9058,35 @@ test_126() { } run_test 126 "mount in parallel shouldn't cause a crash" +test_127() { + [[ "$ost1_FSTYPE" == ldiskfs ]] || skip "ldiskfs only test" + + cleanup + setup + zconf_umount_clients $RCLIENTS $MOUNT + + wait_osp_active ost ${FSNAME}-OST0000 0 1 + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + local avail1=($($LCTL get_param -n osc.${osc_tgt}.kbytesavail)) + + $LFS setstripe -i 0 $DIR/$tfile || error "failed creating $DIR/$tfile" + dd if=/dev/zero of=$DIR/$tfile bs=1M oflag=direct || true + + local avail2=($($LCTL get_param -n osc.${osc_tgt}.kbytesavail)) + + if ((avail2 * 100 / avail1 > 1)); then + lfs df $DIR + ls -l $DIR/$tfile + error "more than 1% space left: before=$avail1 after=$avail2" + fi + + local mbs=$(($(stat -c %s $DIR/$tfile) / (1024 * 1024))) + + dd if=/dev/zero of=$DIR/$tfile bs=1M count=$mbs conv=notrunc \ + oflag=direct || error "overwrite failed" +} +run_test 127 "direct io overwrite on full ost" + if ! combined_mgs_mds ; then stop mgs fi diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 31b36c7..eff0c18 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7839,18 +7839,25 @@ test_64c() { } run_test 64c "verify grant shrink" +import_param() { + local tgt=$1 + local param=$2 + + $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }" +} + # this does exactly what osc_request.c:osc_announce_cached() does in # order to calculate max amount of grants to ask from server want_grant() { local tgt=$1 - local nrpages=$($LCTL get_param -n osc.${tgt}.max_pages_per_rpc) - local rpc_in_flight=$($LCTL get_param -n osc.${tgt}.max_rpcs_in_flight) + local nrpages=$($LCTL get_param -n osc.$tgt.max_pages_per_rpc) + local rpc_in_flight=$($LCTL get_param -n osc.$tgt.max_rpcs_in_flight) - ((rpc_in_flight ++)); + ((rpc_in_flight++)); nrpages=$((nrpages * rpc_in_flight)) - local dirty_max_pages=$($LCTL get_param -n osc.${tgt}.max_dirty_mb) + local dirty_max_pages=$($LCTL get_param -n osc.$tgt.max_dirty_mb) dirty_max_pages=$((dirty_max_pages * 1024 * 1024 / PAGE_SIZE)) @@ -7858,13 +7865,11 @@ want_grant() { local undirty=$((nrpages * PAGE_SIZE)) local max_extent_pages - max_extent_pages=$($LCTL get_param osc.${tgt}.import | - grep grant_max_extent_size | awk '{print $2}') + max_extent_pages=$(import_param $tgt grant_max_extent_size) max_extent_pages=$((max_extent_pages / PAGE_SIZE)) local nrextents=$(((nrpages + max_extent_pages - 1) / max_extent_pages)) local grant_extent_tax - grant_extent_tax=$($LCTL get_param osc.${tgt}.import | - grep grant_extent_tax | awk '{print $2}') + grant_extent_tax=$(import_param $tgt grant_extent_tax) undirty=$((undirty + nrextents * grant_extent_tax)) @@ -7878,56 +7883,171 @@ grant_chunk() { local max_brw_size local grant_extent_tax - max_brw_size=$($LCTL get_param osc.${tgt}.import | - grep max_brw_size | awk '{print $2}') + max_brw_size=$(import_param $tgt max_brw_size) - grant_extent_tax=$($LCTL get_param osc.${tgt}.import | - grep grant_extent_tax | awk '{print $2}') + grant_extent_tax=$(import_param $tgt grant_extent_tax) echo $(((max_brw_size + grant_extent_tax) * 2)) } test_64d() { - [ $OST1_VERSION -lt $(version_code 2.10.56) ] && + [ $OST1_VERSION -ge $(version_code 2.10.56) ] || skip "OST < 2.10.55 doesn't limit grants enough" - local tgt=$($LCTL dl | grep "0000-osc-[^mM]" | awk '{print $4}') - local file=$DIR/$tfile + local tgt=$($LCTL dl | awk '/OST0000-osc-[^mM]/ { print $4 }') - [[ $($LCTL get_param osc.${tgt}.import | - grep "connect_flags:.*grant_param") ]] || + [[ "$($LCTL get_param osc.${tgt}.import)" =~ "grant_param" ]] || skip "no grant_param connect flag" - local olddebug=$($LCTL get_param -n debug 2> /dev/null) + local olddebug="$($LCTL get_param -n debug 2> /dev/null)" + + $LCTL set_param -n -n debug="$OLDDEBUG" || true + stack_trap "$LCTL set_param -n debug='$olddebug'" EXIT - $LCTL set_param debug="$OLDDEBUG" 2> /dev/null || true local max_cur_granted=$(($(want_grant $tgt) + $(grant_chunk $tgt))) - stack_trap "rm -f $file" EXIT + stack_trap "rm -f $DIR/$tfile && wait_delete_completed" EXIT - $LFS setstripe $file -i 0 -c 1 - dd if=/dev/zero of=$file bs=1M count=1000 & + $LFS setstripe $DIR/$tfile -i 0 -c 1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1000 & ddpid=$! - while true - do - local cur_grant=$($LCTL get_param -n osc.${tgt}.cur_grant_bytes) - if [[ $cur_grant -gt $max_cur_granted ]] - then + while kill -0 $ddpid; do + local cur_grant=$($LCTL get_param -n osc.$tgt.cur_grant_bytes) + + if [[ $cur_grant -gt $max_cur_granted ]]; then kill $ddpid error "cur_grant $cur_grant > $max_cur_granted" fi - kill -0 $ddpid - [[ $? -ne 0 ]] && break; - sleep 2 - done - rm -f $DIR/$tfile - wait_delete_completed - $LCTL set_param debug="$olddebug" 2> /dev/null || true + sleep 1 + done } run_test 64d "check grant limit exceed" +check_grants() { + local tgt=$1 + local expected=$2 + local msg=$3 + local cur_grants=$($LCTL get_param -n osc.$tgt.cur_grant_bytes) + + ((cur_grants == expected)) || + error "$msg: grants mismatch: $cur_grants, expected $expected" +} + +round_up_p2() { + echo $((($1 + $2 - 1) & ~($2 - 1))) +} + +test_64e() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ $OST1_VERSION -ge $(version_code 2.11.56) ] || + skip "Need OSS version at least 2.11.56" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + local init_grants=$(import_param $osc_tgt initial_grant) + + check_grants $osc_tgt $init_grants "init grants" + + local extent_tax=$(import_param $osc_tgt grant_extent_tax) + local max_brw_size=$(import_param $osc_tgt max_brw_size) + local gbs=$(import_param $osc_tgt grant_block_size) + + # write random number of bytes from max_brw_size / 4 to max_brw_size + local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1) + # align for direct io + write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE) + # round to grant consumption unit + local wb_round_up=$(round_up_p2 $write_bytes gbs) + + local grants=$((wb_round_up + extent_tax)) + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + # define OBD_FAIL_TGT_NO_GRANT 0x725 + # make the server not grant more back + do_facet ost1 $LCTL set_param fail_loc=0x725 + dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct + + do_facet ost1 $LCTL set_param fail_loc=0 + + check_grants $osc_tgt $((init_grants - grants)) "dio w/o grant alloc" + + rm -f $DIR/$tfile || error "rm failed" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + # define OBD_FAIL_TGT_NO_GRANT 0x725 + # make the server not grant more back + do_facet ost1 $LCTL set_param fail_loc=0x725 + $MULTIOP $DIR/$tfile "oO_WRONLY:w${write_bytes}yc" + do_facet ost1 $LCTL set_param fail_loc=0 + + check_grants $osc_tgt $((init_grants - grants)) "buf io w/o grant alloc" +} +run_test 64e "check grant consumption (no grant allocation)" + +test_64f() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + local init_grants=$(import_param $osc_tgt initial_grant) + local extent_tax=$(import_param $osc_tgt grant_extent_tax) + local max_brw_size=$(import_param $osc_tgt max_brw_size) + local gbs=$(import_param $osc_tgt grant_block_size) + local chunk=$(grant_chunk $osc_tgt) + + # write random number of bytes from max_brw_size / 4 to max_brw_size + local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1) + # align for direct io + write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE) + # round to grant consumption unit + local wb_round_up=$(round_up_p2 $write_bytes gbs) + + local grants=$((wb_round_up + extent_tax)) + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct || + error "error writing to $DIR/$tfile" + + check_grants $osc_tgt $((init_grants - grants + chunk)) \ + "direct io with grant allocation" + + rm -f $DIR/$tfile || error "rm failed" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + local cmd="oO_WRONLY:w${write_bytes}_yc" + + $MULTIOP $DIR/$tfile $cmd & + MULTIPID=$! + sleep 1 + + check_grants $osc_tgt $((init_grants - grants)) \ + "buffered io, not write rpc" + + kill -USR1 $MULTIPID + wait + + check_grants $osc_tgt $((init_grants - grants + chunk)) \ + "buffered io, one RPC" +} +run_test 64f "check grant consumption (with grant allocation)" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run"