From df2b5d99adfc6117a327f8e9102ba30076253213 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Wed, 12 Mar 2025 13:13:43 +0300 Subject: [PATCH] LU-17933 target: do not break grants on RPC failure When write RPC fails it may leave grant accounting inaccurate. Grants are allocated in: tgt_brw_write obd_preprw ofd_preprw ofd_preprw_write tgt_grant_prepare_write oa->o_grant = tgt_grant_alloc() tgd->tgd_tot_granted += grant; ted->ted_grant += grant; return grant; If further it happens to return error, oa->o_grant is zeroed: ofd_preprw_write .. out: tgt_grant_prepare_read oa->o_grant = 0; This results in a mismatch between grants owned by a client and grants the server thinks the client owns. Clients used to skip grant accounting update in case of write RPC failure: ptlrpc_req_interpret brw_interpret osc_brw_fini_request if (rc < 0 && rc != -EDQUOT) return rc osc_update_grant __osc_update_grant(cli, body->oa.o_grant) Have server to take care of correct grant accounting in case of RPC failure. Test to illustrate the issue is added. This test needs zero lost grants. Sync write on start is to get rid of those. Signed-off-by: Vladimir Saveliev Change-Id: Ic835eb0b7f1894207637ec541b1d39c59bde286d Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56547 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/include/lu_target.h | 1 + lustre/include/obd_support.h | 1 + lustre/mdt/mdt_io.c | 5 +++++ lustre/ofd/ofd_io.c | 15 ++++++++++++--- lustre/target/tgt_grant.c | 34 +++++++++++++++++++++++++++++++--- lustre/tests/sanity.sh | 14 +++++++++++++- 6 files changed, 63 insertions(+), 7 deletions(-) diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index fe97160..6f51f6b 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -543,6 +543,7 @@ static inline int exp_grant_param_supp(struct obd_export *exp) #define COMPAT_BSIZE_SHIFT 12 void tgt_grant_sanity_check(struct obd_device *obd, const char *func); +void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa); void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp, struct obd_connect_data *data, bool new_conn); void tgt_grant_discard(struct obd_export *exp); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 9e7a4bd..91d6710 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -355,6 +355,7 @@ extern bool obd_enable_fname_encoding; #define OBD_FAIL_OST_OPCODE 0x253 #define OBD_FAIL_OST_DELORPHAN_DELAY 0x254 #define OBD_FAIL_OST_ENOSPC_VALID 0x255 +#define OBD_FAIL_OST_GRANT_PREPARE 0x256 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 065935b..690b110 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -458,6 +458,8 @@ unlock: up_read(&mo->mot_dom_sem); /* tgt_grant_prepare_write() was called, so we must commit */ tgt_grant_commit(exp, oa->o_grant_used, rc); + /* dealloc grants, client won't receive them */ + tgt_grant_dealloc(exp, oa); /* let's still process incoming grant information packed in the oa, * but without enforcing grant since we won't proceed with the write. * Just like a read request actually. */ @@ -658,6 +660,9 @@ out: up_read(&mo->mot_dom_sem); if (granted > 0) tgt_grant_commit(exp, granted, old_rc); + if (rc) + /* dealloc grants, client won't receive them */ + tgt_grant_dealloc(exp, oa); RETURN(rc); } diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index c8b78da..34d4ae7 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -764,9 +764,12 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, * transactions to complete. */ tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt); + if (CFS_FAIL_CHECK(OBD_FAIL_OST_GRANT_PREPARE)) + GOTO(err_commit, rc = -EIO); + fo = ofd_object_find(env, ofd, fid); if (IS_ERR(fo)) - GOTO(out, rc = PTR_ERR(fo)); + GOTO(err_commit, rc = PTR_ERR(fo)); LASSERT(fo != NULL); ofd_info(env)->fti_obj = fo; @@ -774,8 +777,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, if (!ofd_object_exists(fo)) { CERROR("%s: BRW to missing obj "DOSTID"\n", exp->exp_obd->obd_name, POSTID(&obj->ioo_oid)); - ofd_object_put(env, fo); - GOTO(out, rc = -ENOENT); + GOTO(err_put, rc = -ENOENT); } if (ptlrpc_connection_is_local(exp->exp_connection)) @@ -854,9 +856,13 @@ err: ofd_read_unlock(env, fo); err_nolock: dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local); +err_put: ofd_object_put(env, fo); +err_commit: /* tgt_grant_prepare_write() was called, so we must commit */ tgt_grant_commit(exp, oa->o_grant_used, rc); + /* dealloc grants, client won't receive them */ + tgt_grant_dealloc(exp, oa); out: /* let's still process incoming grant information packed in the oa, * but without enforcing grant since we won't proceed with the write. @@ -1399,6 +1405,9 @@ out: ofd_object_put(env, fo); if (granted > 0) tgt_grant_commit(exp, granted, old_rc); + if (rc) + /* dealloc grants, client won't receive them */ + tgt_grant_dealloc(exp, oa); RETURN(rc); } diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 16e7762..0254c0a1 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -954,7 +954,6 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, tgd->tgd_tot_granted += grant; ted->ted_grant += grant; - if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) { CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, @@ -979,6 +978,35 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, } /** + * Deallocate space granted to a client + * + * This is used when write RPC fails. Server needs to update grant + * counters as a client won't get addiitional grants. + * + * \param[in] exp export of the client which sent the request + * \param[in] granted grants allocated via tgt_grant_prepare_write + */ +void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa) +{ + struct tg_grants_data *tgd = &obd2obt(exp->exp_obd)->obt_lut->lut_tgd; + struct tg_export_data *ted = &exp->exp_target_data; + int disconnected; + + if (!(oa->o_valid & OBD_MD_FLGRANT)) + return; + spin_lock(&tgd->tgd_grant_lock); + spin_lock(&exp->exp_lock); + disconnected = exp->exp_disconnected; + spin_unlock(&exp->exp_lock); + if (!disconnected) { + tgd->tgd_tot_granted -= oa->o_grant; + ted->ted_grant -= oa->o_grant; + } + spin_unlock(&tgd->tgd_grant_lock); +} +EXPORT_SYMBOL(tgt_grant_dealloc); + +/** * Handle grant space allocation on client connection & reconnection. * * A new non-readonly connection gets an initial grant allocation equals to @@ -1098,11 +1126,11 @@ void tgt_grant_discard(struct obd_export *exp) ttd += e->exp_target_data.ted_dirty; } if (tgd->tgd_tot_granted < ted->ted_grant) - CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu", + CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, tgd->tgd_tot_granted, ted->ted_grant, ttg); if (tgd->tgd_tot_dirty < ted->ted_dirty) - CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu", + CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, tgd->tgd_tot_dirty, ted->ted_dirty, ttd); tgd->tgd_tot_granted = ttg; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 60deb38..78ed1f5 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -9,7 +9,7 @@ set -e ONLY=${ONLY:-"$*"} # Check Grants after these tests -GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d" +GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d 64j" OSC=${OSC:-"osc"} @@ -10862,6 +10862,18 @@ test_64i() { } run_test 64i "shrink on reconnect" +test_64j() { + $LFS setstripe -c 1 -i 0 $DIR/$tfile + + # get rid of lost grants which could be formed on previous test + $MULTIOP $DIR/$tfile oO_RDWR:O_SYNC:w4096c +#define OBD_FAIL_OST_GRANT_PREPARE 0x256 + do_facet ost1 "$LCTL set_param fail_loc=0x80000256" + + $MULTIOP $DIR/$tfile oO_RDWR:O_DIRECT:w4096c +} +run_test 64j "check grants on re-done rpc" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" -- 1.8.3.1