When write RPC fails it may leave grant accounting inaccurate.
Grants are allocated in:
tgt_brw_write
obd_preprw
ofd_preprw
ofd_preprw_write
tgt_grant_prepare_write
oa->o_grant = tgt_grant_alloc()
tgd->tgd_tot_granted += grant;
ted->ted_grant += grant;
return grant;
If further it happens to return error, oa->o_grant is zeroed:
ofd_preprw_write
..
out:
tgt_grant_prepare_read
oa->o_grant = 0;
This results in a mismatch between grants owned by a client and grants
the server thinks the client owns.
Clients used to skip grant accounting update in case of write RPC
failure:
ptlrpc_req_interpret
brw_interpret
osc_brw_fini_request
if (rc < 0 && rc != -EDQUOT)
return rc
osc_update_grant
__osc_update_grant(cli, body->oa.o_grant)
Have server to take care of correct grant accounting in case of RPC
failure.
Test to illustrate the issue is added. This test needs zero lost
grants. Sync write on start is to get rid of those.
Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Change-Id: Ic835eb0b7f1894207637ec541b1d39c59bde286d
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56547
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
#define COMPAT_BSIZE_SHIFT 12
void tgt_grant_sanity_check(struct obd_device *obd, const char *func);
+void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa);
void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp,
struct obd_connect_data *data, bool new_conn);
void tgt_grant_discard(struct obd_export *exp);
#define OBD_FAIL_OST_OPCODE 0x253
#define OBD_FAIL_OST_DELORPHAN_DELAY 0x254
#define OBD_FAIL_OST_ENOSPC_VALID 0x255
+#define OBD_FAIL_OST_GRANT_PREPARE 0x256
#define OBD_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
up_read(&mo->mot_dom_sem);
/* tgt_grant_prepare_write() was called, so we must commit */
tgt_grant_commit(exp, oa->o_grant_used, rc);
+ /* dealloc grants, client won't receive them */
+ tgt_grant_dealloc(exp, oa);
/* let's still process incoming grant information packed in the oa,
* but without enforcing grant since we won't proceed with the write.
* Just like a read request actually. */
up_read(&mo->mot_dom_sem);
if (granted > 0)
tgt_grant_commit(exp, granted, old_rc);
+ if (rc)
+ /* dealloc grants, client won't receive them */
+ tgt_grant_dealloc(exp, oa);
RETURN(rc);
}
* transactions to complete. */
tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
+ if (CFS_FAIL_CHECK(OBD_FAIL_OST_GRANT_PREPARE))
+ GOTO(err_commit, rc = -EIO);
+
fo = ofd_object_find(env, ofd, fid);
if (IS_ERR(fo))
- GOTO(out, rc = PTR_ERR(fo));
+ GOTO(err_commit, rc = PTR_ERR(fo));
LASSERT(fo != NULL);
ofd_info(env)->fti_obj = fo;
if (!ofd_object_exists(fo)) {
CERROR("%s: BRW to missing obj "DOSTID"\n",
exp->exp_obd->obd_name, POSTID(&obj->ioo_oid));
- ofd_object_put(env, fo);
- GOTO(out, rc = -ENOENT);
+ GOTO(err_put, rc = -ENOENT);
}
if (ptlrpc_connection_is_local(exp->exp_connection))
ofd_read_unlock(env, fo);
err_nolock:
dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
+err_put:
ofd_object_put(env, fo);
+err_commit:
/* tgt_grant_prepare_write() was called, so we must commit */
tgt_grant_commit(exp, oa->o_grant_used, rc);
+ /* dealloc grants, client won't receive them */
+ tgt_grant_dealloc(exp, oa);
out:
/* let's still process incoming grant information packed in the oa,
* but without enforcing grant since we won't proceed with the write.
ofd_object_put(env, fo);
if (granted > 0)
tgt_grant_commit(exp, granted, old_rc);
+ if (rc)
+ /* dealloc grants, client won't receive them */
+ tgt_grant_dealloc(exp, oa);
RETURN(rc);
}
tgd->tgd_tot_granted += grant;
ted->ted_grant += grant;
-
if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) {
CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
}
/**
+ * Deallocate space granted to a client
+ *
+ * This is used when write RPC fails. Server needs to update grant
+ * counters as a client won't get addiitional grants.
+ *
+ * \param[in] exp export of the client which sent the request
+ * \param[in] granted grants allocated via tgt_grant_prepare_write
+ */
+void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa)
+{
+ struct tg_grants_data *tgd = &obd2obt(exp->exp_obd)->obt_lut->lut_tgd;
+ struct tg_export_data *ted = &exp->exp_target_data;
+ int disconnected;
+
+ if (!(oa->o_valid & OBD_MD_FLGRANT))
+ return;
+ spin_lock(&tgd->tgd_grant_lock);
+ spin_lock(&exp->exp_lock);
+ disconnected = exp->exp_disconnected;
+ spin_unlock(&exp->exp_lock);
+ if (!disconnected) {
+ tgd->tgd_tot_granted -= oa->o_grant;
+ ted->ted_grant -= oa->o_grant;
+ }
+ spin_unlock(&tgd->tgd_grant_lock);
+}
+EXPORT_SYMBOL(tgt_grant_dealloc);
+
+/**
* Handle grant space allocation on client connection & reconnection.
*
* A new non-readonly connection gets an initial grant allocation equals to
ttd += e->exp_target_data.ted_dirty;
}
if (tgd->tgd_tot_granted < ted->ted_grant)
- CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu",
+ CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
tgd->tgd_tot_granted, ted->ted_grant, ttg);
if (tgd->tgd_tot_dirty < ted->ted_dirty)
- CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu",
+ CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
tgd->tgd_tot_dirty, ted->ted_dirty, ttd);
tgd->tgd_tot_granted = ttg;
ONLY=${ONLY:-"$*"}
# Check Grants after these tests
-GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d"
+GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d 64j"
OSC=${OSC:-"osc"}
}
run_test 64i "shrink on reconnect"
+test_64j() {
+ $LFS setstripe -c 1 -i 0 $DIR/$tfile
+
+ # get rid of lost grants which could be formed on previous test
+ $MULTIOP $DIR/$tfile oO_RDWR:O_SYNC:w4096c
+#define OBD_FAIL_OST_GRANT_PREPARE 0x256
+ do_facet ost1 "$LCTL set_param fail_loc=0x80000256"
+
+ $MULTIOP $DIR/$tfile oO_RDWR:O_DIRECT:w4096c
+}
+run_test 64j "check grants on re-done rpc"
+
# bug 1414 - set/get directories' stripe info
test_65a() {
[ $PARALLEL == "yes" ] && skip "skip parallel run"