From: Shaun Tancheff Date: Fri, 7 Feb 2025 13:21:53 +0000 (+0700) Subject: LU-18691 quota: quota interop check for 64k page clients X-Git-Tag: 2.16.53~34 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=262957eaef3daeeb4a1bf4f587d4d864fa8aaab1;p=fs%2Flustre-release.git LU-18691 quota: quota interop check for 64k page clients When hitting the end of available quota a race condition can be hit which allows an 64k unaligned I/O to be submitted and causes the node to hang indefinitely. This happens when a partial write hits quota limits and a subsequent write is not aligned on 64k page boundary triggering a hang due to 64k vs 4k page aligned transfers. HPE-bug-id: LUS-12724 Test-Parameters: testlist=sanity-quota clientarch=ppc64le clientdistro=el8.9 serverdistro=el9.4 env=ONLY=88,ONLY_REPEAT=10 Test-Parameters: testlist=sanity-quota clientarch=ppc64le clientdistro=el8.9 serverdistro=el8.9 serverversion=2.15.4 env=ONLY=88,ONLY_REPEAT=10 Test-Parameters: testlist=sanity-quota clientarch=aarch64 clientdistro=el9.3 serverdistro=el8.10 env=ONLY=88,ONLY_REPEAT=10 Signed-off-by: Shaun Tancheff Change-Id: I0f8638062f8b0e57207695c45e1fccbd7492c32d Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57961 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: xinliang Reviewed-by: Sergey Cheremencev Reviewed-by: Oleg Drokin --- diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index fcc64cf..456008a 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1553,19 +1553,15 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa, bool enable_checksum = true; struct cl_page *clpage; u64 foffset = 0; + u32 iop_pages = 0; ENTRY; if (pga[0]->bp_page) { clpage = oap2cl_page(brw_page2oap(pga[0])); inode = clpage->cp_inode; - if (clpage->cp_type == CPT_TRANSIENT) { + foffset = pga[0]->bp_off; + if (clpage->cp_type == CPT_TRANSIENT) directio = true; - /* When page size interop logic is not supported by the - * remote server use the old logic. - */ - if (imp_connect_unaligned_dio(cli->cl_import)) - foffset = pga[0]->bp_off; - } } if (CFS_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ)) RETURN(-ENOMEM); /* Recoverable */ @@ -1789,6 +1785,12 @@ retry_encrypt: goto no_bulk; } + if (foffset) + iop_pages = interop_pages(foffset, page_count, pga); + /* need interop but server does not support, return failure */ + if (iop_pages && !imp_connect_unaligned_dio(cli->cl_import)) + GOTO(out, rc = -EINVAL); /* -EDQUOT? */ + desc = ptlrpc_prep_bulk_imp(req, page_count, cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS, (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE : @@ -1800,8 +1802,8 @@ retry_encrypt: GOTO(out, rc = -ENOMEM); /* NB request now owns desc and will free it when it gets freed */ desc->bd_is_rdma = gpu; - if (directio && foffset) - desc->bd_md_offset = interop_pages(foffset, page_count, pga); + if (iop_pages) + desc->bd_md_offset = iop_pages; no_bulk: body = req_capsule_client_get(pill, &RMF_OST_BODY); diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index c7dcc29..96c3ba8 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -6592,6 +6592,38 @@ test_87() } run_test 87 "lfs quota -a should print default quota setting" +# interop quota +test_88() +{ + (($PAGE_SIZE > 4096)) || skip "require client with >4k pages" + setup_quota_test || error "setup quota failed with $?" + + set_ost_qtype $QTYPE || error "enable ost quota failed" + + $LFS setquota -u $TSTUSR -B 100M -i 0 $MOUNT || + error "enable quota -B 100M failed." + + local tfile + local result + + local repeat=$(seq 10) + local arr=(1075761 1075770 1075800 1076000 1080000 1093000 2010000 \ + 2080000 2095000 4096000) + [[ "$SLOW" = "no" ]] && repeat=1 + + for r in $repeat; do + for bs in ${arr[@]}; do + tfile=$DIR/$tdir/dd_largefile.${bs} + ${RUNAS} dd if=/dev/urandom of=${tfile} bs=${bs} \ + count=100 status=progress + rm -f ${tfile} + done + done + + return 0 +} +run_test 88 "Writing over quota should not hang" + check_quota_no_mount() { local opts="$1"