Whamcloud - gitweb
LU-17933 target: do not break grants on RPC failure 47/56547/5
authorVladimir Saveliev <vladimir.saveliev@hpe.com>
Wed, 12 Mar 2025 10:13:43 +0000 (13:13 +0300)
committerOleg Drokin <green@whamcloud.com>
Fri, 2 May 2025 02:17:42 +0000 (02:17 +0000)
When write RPC fails it may leave grant accounting inaccurate.
Grants are allocated in:
  tgt_brw_write
    obd_preprw
      ofd_preprw
        ofd_preprw_write
          tgt_grant_prepare_write
            oa->o_grant = tgt_grant_alloc()
              tgd->tgd_tot_granted += grant;
              ted->ted_grant += grant;
              return grant;
If further it happens to return error, oa->o_grant is zeroed:
  ofd_preprw_write
    ..
  out:
    tgt_grant_prepare_read
      oa->o_grant = 0;

This results in a mismatch between grants owned by a client and grants
the server thinks the client owns.

Clients used to skip grant accounting update in case of write RPC
failure:
ptlrpc_req_interpret
  brw_interpret
    osc_brw_fini_request
      if (rc < 0 && rc != -EDQUOT)
        return rc
     osc_update_grant
       __osc_update_grant(cli, body->oa.o_grant)

Have server to take care of correct grant accounting in case of RPC
failure.

Test to illustrate the issue is added. This test needs zero lost
grants. Sync write on start is to get rid of those.

Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Change-Id: Ic835eb0b7f1894207637ec541b1d39c59bde286d
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56547
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/lu_target.h
lustre/include/obd_support.h
lustre/mdt/mdt_io.c
lustre/ofd/ofd_io.c
lustre/target/tgt_grant.c
lustre/tests/sanity.sh

index fe97160..6f51f6b 100644 (file)
@@ -543,6 +543,7 @@ static inline int exp_grant_param_supp(struct obd_export *exp)
 #define COMPAT_BSIZE_SHIFT 12
 
 void tgt_grant_sanity_check(struct obd_device *obd, const char *func);
+void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa);
 void tgt_grant_connect(const struct lu_env *env, struct obd_export *exp,
                       struct obd_connect_data *data, bool new_conn);
 void tgt_grant_discard(struct obd_export *exp);
index 9e7a4bd..91d6710 100644 (file)
@@ -355,6 +355,7 @@ extern bool obd_enable_fname_encoding;
 #define OBD_FAIL_OST_OPCODE             0x253
 #define OBD_FAIL_OST_DELORPHAN_DELAY    0x254
 #define OBD_FAIL_OST_ENOSPC_VALID       0x255
+#define OBD_FAIL_OST_GRANT_PREPARE      0x256
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 065935b..690b110 100644 (file)
@@ -458,6 +458,8 @@ unlock:
        up_read(&mo->mot_dom_sem);
        /* tgt_grant_prepare_write() was called, so we must commit */
        tgt_grant_commit(exp, oa->o_grant_used, rc);
+       /* dealloc grants, client won't receive them */
+       tgt_grant_dealloc(exp, oa);
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
         * Just like a read request actually. */
@@ -658,6 +660,9 @@ out:
        up_read(&mo->mot_dom_sem);
        if (granted > 0)
                tgt_grant_commit(exp, granted, old_rc);
+       if (rc)
+               /* dealloc grants, client won't receive them */
+               tgt_grant_dealloc(exp, oa);
        RETURN(rc);
 }
 
index c8b78da..34d4ae7 100644 (file)
@@ -764,9 +764,12 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
         * transactions to complete. */
        tgt_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
+       if (CFS_FAIL_CHECK(OBD_FAIL_OST_GRANT_PREPARE))
+               GOTO(err_commit, rc = -EIO);
+
        fo = ofd_object_find(env, ofd, fid);
        if (IS_ERR(fo))
-               GOTO(out, rc = PTR_ERR(fo));
+               GOTO(err_commit, rc = PTR_ERR(fo));
        LASSERT(fo != NULL);
 
        ofd_info(env)->fti_obj = fo;
@@ -774,8 +777,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        if (!ofd_object_exists(fo)) {
                CERROR("%s: BRW to missing obj "DOSTID"\n",
                       exp->exp_obd->obd_name, POSTID(&obj->ioo_oid));
-               ofd_object_put(env, fo);
-               GOTO(out, rc = -ENOENT);
+               GOTO(err_put, rc = -ENOENT);
        }
 
        if (ptlrpc_connection_is_local(exp->exp_connection))
@@ -854,9 +856,13 @@ err:
        ofd_read_unlock(env, fo);
 err_nolock:
        dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
+err_put:
        ofd_object_put(env, fo);
+err_commit:
        /* tgt_grant_prepare_write() was called, so we must commit */
        tgt_grant_commit(exp, oa->o_grant_used, rc);
+       /* dealloc grants, client won't receive them */
+       tgt_grant_dealloc(exp, oa);
 out:
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
@@ -1399,6 +1405,9 @@ out:
        ofd_object_put(env, fo);
        if (granted > 0)
                tgt_grant_commit(exp, granted, old_rc);
+       if (rc)
+               /* dealloc grants, client won't receive them */
+               tgt_grant_dealloc(exp, oa);
        RETURN(rc);
 }
 
index 16e7762..0254c0a 100644 (file)
@@ -954,7 +954,6 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
 
        tgd->tgd_tot_granted += grant;
        ted->ted_grant += grant;
-
        if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) {
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
@@ -979,6 +978,35 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
 }
 
 /**
+ * Deallocate space granted to a client
+ *
+ * This is used when write RPC fails. Server needs to update grant
+ * counters as a client won't get addiitional grants.
+ *
+ * \param[in] exp              export of the client which sent the request
+ * \param[in] granted          grants allocated via tgt_grant_prepare_write
+ */
+void tgt_grant_dealloc(struct obd_export *exp, struct obdo *oa)
+{
+       struct tg_grants_data *tgd = &obd2obt(exp->exp_obd)->obt_lut->lut_tgd;
+       struct tg_export_data *ted = &exp->exp_target_data;
+       int disconnected;
+
+       if (!(oa->o_valid & OBD_MD_FLGRANT))
+               return;
+       spin_lock(&tgd->tgd_grant_lock);
+       spin_lock(&exp->exp_lock);
+       disconnected = exp->exp_disconnected;
+       spin_unlock(&exp->exp_lock);
+       if (!disconnected) {
+               tgd->tgd_tot_granted -= oa->o_grant;
+               ted->ted_grant -= oa->o_grant;
+       }
+       spin_unlock(&tgd->tgd_grant_lock);
+}
+EXPORT_SYMBOL(tgt_grant_dealloc);
+
+/**
  * Handle grant space allocation on client connection & reconnection.
  *
  * A new non-readonly connection gets an initial grant allocation equals to
@@ -1098,11 +1126,11 @@ void tgt_grant_discard(struct obd_export *exp)
                        ttd += e->exp_target_data.ted_dirty;
                }
                if (tgd->tgd_tot_granted < ted->ted_grant)
-                       CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu",
+                       CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu\n",
                               obd->obd_name,  exp->exp_client_uuid.uuid, exp,
                               tgd->tgd_tot_granted, ted->ted_grant, ttg);
                if (tgd->tgd_tot_dirty < ted->ted_dirty)
-                       CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu",
+                       CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu\n",
                               obd->obd_name, exp->exp_client_uuid.uuid, exp,
                               tgd->tgd_tot_dirty, ted->ted_dirty, ttd);
                tgd->tgd_tot_granted = ttg;
index 60deb38..78ed1f5 100755 (executable)
@@ -9,7 +9,7 @@ set -e
 ONLY=${ONLY:-"$*"}
 
 # Check Grants after these tests
-GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d"
+GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c 64d 64j"
 
 OSC=${OSC:-"osc"}
 
@@ -10862,6 +10862,18 @@ test_64i() {
 }
 run_test 64i "shrink on reconnect"
 
+test_64j() {
+       $LFS setstripe -c 1 -i 0 $DIR/$tfile
+
+       # get rid of lost grants which could be formed on previous test
+       $MULTIOP $DIR/$tfile oO_RDWR:O_SYNC:w4096c
+#define OBD_FAIL_OST_GRANT_PREPARE      0x256
+       do_facet ost1 "$LCTL set_param fail_loc=0x80000256"
+
+       $MULTIOP $DIR/$tfile oO_RDWR:O_DIRECT:w4096c
+}
+run_test 64j "check grants on re-done rpc"
+
 # bug 1414 - set/get directories' stripe info
 test_65a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"