Whamcloud - gitweb
LU-15935 target: keep track of multirpc slots in last_rcvd 99/49399/2
authorEtienne AUJAMES <eaujames@ddn.com>
Wed, 14 Dec 2022 01:46:19 +0000 (17:46 -0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 19 Apr 2023 03:32:24 +0000 (03:32 +0000)
OBD_INCOMPAT_MULTI_RPCS is cleared by tgt_boot_epoch_update() if the
recovery is aborted. This supposes that all the clients are evicted
but that is not true. Some clients could have successfully finished
their recovery. In that case, those clients will keep their last_rcvd
slot.

This patch modifies lut_num_client to keep track of multirpc
slots in last_rcvd.
For now the counter is use only by tgt_fini() to clear
OBD_INCOMPAT_MULTI_RPCS. So we can expand this use case for
tgt_boot_epoch_update().

Add replay-dual test_33.

Lustre-change: https://review.whamcloud.com/48082
Lustre-commit: 1a79d395dd61ea2e21598bfaa5b39375e64ec22c

Test-Parameters: testlist=replay-dual env=ONLY=33,ONLY_REPEAT=30
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I70791c9dcb7cc77f018b9e5c95568598d54f0322
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49399
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lu_target.h
lustre/target/tgt_lastrcvd.c
lustre/tests/replay-dual.sh

index 2a361ac..d061244 100644 (file)
@@ -192,7 +192,8 @@ struct lu_target {
        /** Bitmap of known clients */
        unsigned long           *lut_client_bitmap;
        /* Number of clients supporting multiple modify RPCs
-        * recorded in the bitmap */
+        * recorded in the last_rcvd file
+        */
        atomic_t                 lut_num_clients;
        /* Client generation to identify client slot reuse */
        atomic_t                 lut_client_generation;
index 7022ceb..4341d75 100644 (file)
@@ -384,6 +384,13 @@ static inline struct lu_buf *tti_buf_lcd(struct tgt_thread_info *tti)
        return &tti->tti_buf;
 }
 
+static inline bool tgt_is_multimodrpcs_record(struct lu_target *tgt,
+                                             struct lsd_client_data *lcd)
+{
+       return tgt->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS &&
+               lcd->lcd_generation != 0;
+}
+
 /**
  * Allocate in-memory data for client slot related to export.
  */
@@ -453,9 +460,6 @@ void tgt_client_free(struct obd_export *exp)
                       exp->exp_obd->obd_name, ted->ted_lr_idx);
                LBUG();
        }
-
-       if (tgt_is_multimodrpcs_client(exp) && !exp->exp_obd->obd_stopping)
-               atomic_dec(&lut->lut_num_clients);
 }
 EXPORT_SYMBOL(tgt_client_free);
 
@@ -837,13 +841,11 @@ void tgt_boot_epoch_update(struct lu_target *tgt)
        list_splice_init(&client_list, &tgt->lut_obd->obd_final_req_queue);
        spin_unlock(&tgt->lut_obd->obd_recovery_task_lock);
 
-       /** Clear MULTI RPCS incompatibility flag if
-        * - target is MDT and
-        * - there is no client to recover or the recovery was aborted
+       /**
+        * Clear MULTI RPCS incompatibility flag if there is no multi-rpcs
+        * client in last_rcvd file
         */
-       if (!strncmp(tgt->lut_obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) &&
-           (atomic_read(&tgt->lut_obd->obd_max_recoverable_clients) == 0 ||
-           tgt->lut_obd->obd_abort_recovery))
+       if (atomic_read(&tgt->lut_num_clients) == 0)
                tgt->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS;
 
        /** update server epoch */
@@ -1041,7 +1043,6 @@ repeat:
        if (tgt_is_multimodrpcs_client(exp)) {
                /* Set MULTI RPCS incompatibility flag to prevent previous
                 * Lustre versions to mount a target with reply_data file */
-               atomic_inc(&tgt->lut_num_clients);
                if (!(tgt->lut_lsd.lsd_feature_incompat &
                      OBD_INCOMPAT_MULTI_RPCS)) {
                        tgt->lut_lsd.lsd_feature_incompat |=
@@ -1071,11 +1072,16 @@ repeat:
                RETURN(-ENOSPC);
 
        rc = tgt_client_data_update(env, exp);
-       if (rc)
+       if (rc) {
                CERROR("%s: Failed to write client lcd at idx %d, rc %d\n",
                       tgt->lut_obd->obd_name, idx, rc);
+               RETURN(rc);
+       }
 
-       RETURN(rc);
+       if (tgt_is_multimodrpcs_client(exp))
+               atomic_inc(&tgt->lut_num_clients);
+
+       RETURN(0);
 }
 EXPORT_SYMBOL(tgt_client_new);
 
@@ -1105,7 +1111,6 @@ int tgt_client_add(const struct lu_env *env,  struct obd_export *exp, int idx)
                       tgt->lut_obd->obd_name,  idx);
                LBUG();
        }
-       atomic_inc(&tgt->lut_num_clients);
 
        CDEBUG(D_INFO, "%s: client at idx %d with UUID '%s' added, "
               "generation %d\n",
@@ -1184,9 +1189,21 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp)
                RETURN(rc);
        }
 
+       /* Race between an eviction and a disconnection ?*/
+       mutex_lock(&ted->ted_lcd_lock);
+       if (ted->ted_lcd->lcd_uuid[0] == '\0') {
+               mutex_unlock(&ted->ted_lcd_lock);
+               RETURN(rc);
+       }
+
        memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
+       mutex_unlock(&ted->ted_lcd_lock);
+
        rc = tgt_client_data_update(env, exp);
 
+       if (!rc && tgt_is_multimodrpcs_record(tgt, ted->ted_lcd))
+               atomic_dec(&tgt->lut_num_clients);
+
        CDEBUG(rc == 0 ? D_INFO : D_ERROR,
               "%s: zeroing out client %s at idx %u (%llu), rc %d\n",
               tgt->lut_obd->obd_name, ted->ted_lcd->lcd_uuid,
@@ -1658,9 +1675,9 @@ static int tgt_clients_data_init(const struct lu_env *env,
                spin_unlock(&exp->exp_lock);
                atomic_inc(&obd->obd_max_recoverable_clients);
 
-               if (tgt->lut_lsd.lsd_feature_incompat &
-                   OBD_INCOMPAT_MULTI_RPCS &&
-                   lcd->lcd_generation != 0) {
+               if (tgt_is_multimodrpcs_record(tgt, lcd)) {
+                       atomic_inc(&tgt->lut_num_clients);
+
                        /* compute the highest valid client generation */
                        generation = max(generation, lcd->lcd_generation);
                        /* fill client_generation <-> export hash table */
index 6547a39..44e5979 100755 (executable)
@@ -4,6 +4,7 @@ set -e
 
 PTLDEBUG=${PTLDEBUG:--1}
 MOUNT_2=${MOUNT_2:-"yes"}
+LR_READER=${LR_READER:-"$LUSTRE/utils/lr_reader"}
 
 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
 . $LUSTRE/tests/test-framework.sh
@@ -1177,6 +1178,49 @@ test_32() {
 }
 run_test 32 "gap in update llog shouldn't break recovery"
 
+last_rcvd_check_incompat_flag() {
+       local facet="$1"
+       local flag2check="$2"
+       local dev=$(facet_device $facet)
+       local incompat
+
+       incompat=$(do_facet $facet $LR_READER $dev |
+                       awk '/feature_incompat:/ {print $2}')
+       echo "last_rcvd in $dev: incompat = $incompat"
+
+       return $(( (incompat & flag2check) != flag2check ))
+}
+
+
+test_33() { # LU-15935
+       [[ "$mds1_FSTYPE" == "ldiskfs" ]] || skip "ldiskfs only test"
+
+       clients_up
+       stop mds1
+
+       # check for OBD_INCOMPAT_MULTI_RPCS (0x400) in last_rcvd
+       last_rcvd_check_incompat_flag mds1 0x400 ||
+               error "1st failover: OBD_INCOMPAT_MULTI_RPCS is not set on MDT0000 last_rcvd"
+
+       # lose 1 client while the MDT failover
+       umount -f $MOUNT2
+
+       mount_facet mds1
+       wait_clients_import_state "$HOSTNAME" mds1 "\(REPLAY_WAIT\|REPLAY_LOCKS\)"
+
+       do_facet mds1 $LCTL --device $(convert_facet2label mds1) abort_recovery
+       wait_clients_import_state "$HOSTNAME" mds1 "FULL"
+       stop mds1
+
+       last_rcvd_check_incompat_flag mds1 0x400 ||
+               error "2sd failover: OBD_INCOMPAT_MULTI_RPCS is not set on MDT0000 last_rcvd"
+
+       mount_facet mds1
+       zconf_mount $HOSTNAME $MOUNT2
+       wait_clients_import_state "$HOSTNAME" mds1 "FULL"
+}
+run_test 33 "Check for OBD_INCOMPAT_MULTI_RPCS in last_rcvd after abort_recovery"
+
 complete $SECONDS
 SLEEP=$((SECONDS - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP