Whamcloud - gitweb
LU-15935 target: keep track of multirpc slots in last_rcvd 82/48082/11
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Fri, 29 Jul 2022 12:35:33 +0000 (14:35 +0200)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Nov 2022 07:09:15 +0000 (07:09 +0000)
OBD_INCOMPAT_MULTI_RPCS is cleared by tgt_boot_epoch_update() if the
recovery is aborted. This supposes that all the clients are evicted
but that is not true. Some clients could have successfully finished
their recovery. In that case, those clients will keep their last_rcvd
slot.

This patch modifies lut_num_client to keep track of multirpc
slots in last_rcvd.
For now the counter is use only by tgt_fini() to clear
OBD_INCOMPAT_MULTI_RPCS. So we can expand this use case for
tgt_boot_epoch_update().

Add replay-dual test_33.

Test-Parameters: testlist=replay-dual env=ONLY=33,ONLY_REPEAT=30
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I70791c9dcb7cc77f018b9e5c95568598d54f0322
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48082
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/lu_target.h
lustre/target/tgt_lastrcvd.c
lustre/tests/replay-dual.sh

index dd08892..f2286c5 100644 (file)
@@ -192,7 +192,8 @@ struct lu_target {
        /** Bitmap of known clients */
        unsigned long           *lut_client_bitmap;
        /* Number of clients supporting multiple modify RPCs
-        * recorded in the bitmap */
+        * recorded in the last_rcvd file
+        */
        atomic_t                 lut_num_clients;
        /* Client generation to identify client slot reuse */
        atomic_t                 lut_client_generation;
index b21b1f4..035d35d 100644 (file)
@@ -384,6 +384,13 @@ static inline struct lu_buf *tti_buf_lcd(struct tgt_thread_info *tti)
        return &tti->tti_buf;
 }
 
+static inline bool tgt_is_multimodrpcs_record(struct lu_target *tgt,
+                                             struct lsd_client_data *lcd)
+{
+       return tgt->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS &&
+               lcd->lcd_generation != 0;
+}
+
 /**
  * Allocate in-memory data for client slot related to export.
  */
@@ -453,9 +460,6 @@ void tgt_client_free(struct obd_export *exp)
                       exp->exp_obd->obd_name, ted->ted_lr_idx);
                LBUG();
        }
-
-       if (tgt_is_multimodrpcs_client(exp) && !exp->exp_obd->obd_stopping)
-               atomic_dec(&lut->lut_num_clients);
 }
 EXPORT_SYMBOL(tgt_client_free);
 
@@ -837,13 +841,11 @@ void tgt_boot_epoch_update(struct lu_target *tgt)
        list_splice_init(&client_list, &tgt->lut_obd->obd_final_req_queue);
        spin_unlock(&tgt->lut_obd->obd_recovery_task_lock);
 
-       /** Clear MULTI RPCS incompatibility flag if
-        * - target is MDT and
-        * - there is no client to recover or the recovery was aborted
+       /**
+        * Clear MULTI RPCS incompatibility flag if there is no multi-rpcs
+        * client in last_rcvd file
         */
-       if (!strncmp(tgt->lut_obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) &&
-           (atomic_read(&tgt->lut_obd->obd_max_recoverable_clients) == 0 ||
-           tgt->lut_obd->obd_abort_recovery))
+       if (atomic_read(&tgt->lut_num_clients) == 0)
                tgt->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS;
 
        /** update server epoch */
@@ -1028,7 +1030,6 @@ repeat:
        if (tgt_is_multimodrpcs_client(exp)) {
                /* Set MULTI RPCS incompatibility flag to prevent previous
                 * Lustre versions to mount a target with reply_data file */
-               atomic_inc(&tgt->lut_num_clients);
                if (!(tgt->lut_lsd.lsd_feature_incompat &
                      OBD_INCOMPAT_MULTI_RPCS)) {
                        tgt->lut_lsd.lsd_feature_incompat |=
@@ -1058,11 +1059,16 @@ repeat:
                RETURN(-ENOSPC);
 
        rc = tgt_client_data_update(env, exp);
-       if (rc)
+       if (rc) {
                CERROR("%s: Failed to write client lcd at idx %d, rc %d\n",
                       tgt->lut_obd->obd_name, idx, rc);
+               RETURN(rc);
+       }
 
-       RETURN(rc);
+       if (tgt_is_multimodrpcs_client(exp))
+               atomic_inc(&tgt->lut_num_clients);
+
+       RETURN(0);
 }
 EXPORT_SYMBOL(tgt_client_new);
 
@@ -1092,7 +1098,6 @@ int tgt_client_add(const struct lu_env *env,  struct obd_export *exp, int idx)
                       tgt->lut_obd->obd_name,  idx);
                LBUG();
        }
-       atomic_inc(&tgt->lut_num_clients);
 
        CDEBUG(D_INFO, "%s: client at idx %d with UUID '%s' added, "
               "generation %d\n",
@@ -1171,9 +1176,21 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp)
                RETURN(rc);
        }
 
+       /* Race between an eviction and a disconnection ?*/
+       mutex_lock(&ted->ted_lcd_lock);
+       if (ted->ted_lcd->lcd_uuid[0] == '\0') {
+               mutex_unlock(&ted->ted_lcd_lock);
+               RETURN(rc);
+       }
+
        memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
+       mutex_unlock(&ted->ted_lcd_lock);
+
        rc = tgt_client_data_update(env, exp);
 
+       if (!rc && tgt_is_multimodrpcs_record(tgt, ted->ted_lcd))
+               atomic_dec(&tgt->lut_num_clients);
+
        CDEBUG(rc == 0 ? D_INFO : D_ERROR,
               "%s: zeroing out client %s at idx %u (%llu), rc %d\n",
               tgt->lut_obd->obd_name, ted->ted_lcd->lcd_uuid,
@@ -1645,9 +1662,9 @@ static int tgt_clients_data_init(const struct lu_env *env,
                spin_unlock(&exp->exp_lock);
                atomic_inc(&obd->obd_max_recoverable_clients);
 
-               if (tgt->lut_lsd.lsd_feature_incompat &
-                   OBD_INCOMPAT_MULTI_RPCS &&
-                   lcd->lcd_generation != 0) {
+               if (tgt_is_multimodrpcs_record(tgt, lcd)) {
+                       atomic_inc(&tgt->lut_num_clients);
+
                        /* compute the highest valid client generation */
                        generation = max(generation, lcd->lcd_generation);
                        /* fill client_generation <-> export hash table */
index 551296e..8e24394 100755 (executable)
@@ -4,6 +4,7 @@ set -e
 
 PTLDEBUG=${PTLDEBUG:--1}
 MOUNT_2=${MOUNT_2:-"yes"}
+LR_READER=${LR_READER:-"$LUSTRE/utils/lr_reader"}
 
 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
 . $LUSTRE/tests/test-framework.sh
@@ -1178,6 +1179,49 @@ test_32() {
 }
 run_test 32 "gap in update llog shouldn't break recovery"
 
+last_rcvd_check_incompat_flag() {
+       local facet="$1"
+       local flag2check="$2"
+       local dev=$(facet_device $facet)
+       local incompat
+
+       incompat=$(do_facet $facet $LR_READER $dev |
+                       awk '/feature_incompat:/ {print $2}')
+       echo "last_rcvd in $dev: incompat = $incompat"
+
+       return $(( (incompat & flag2check) != flag2check ))
+}
+
+
+test_33() { # LU-15935
+       [[ "$mds1_FSTYPE" == "ldiskfs" ]] || skip "ldiskfs only test"
+
+       clients_up
+       stop mds1
+
+       # check for OBD_INCOMPAT_MULTI_RPCS (0x400) in last_rcvd
+       last_rcvd_check_incompat_flag mds1 0x400 ||
+               error "1st failover: OBD_INCOMPAT_MULTI_RPCS is not set on MDT0000 last_rcvd"
+
+       # lose 1 client while the MDT failover
+       umount -f $MOUNT2
+
+       mount_facet mds1
+       wait_clients_import_state "$HOSTNAME" mds1 "\(REPLAY_WAIT\|REPLAY_LOCKS\)"
+
+       do_facet mds1 $LCTL --device $(convert_facet2label mds1) abort_recovery
+       wait_clients_import_state "$HOSTNAME" mds1 "FULL"
+       stop mds1
+
+       last_rcvd_check_incompat_flag mds1 0x400 ||
+               error "2sd failover: OBD_INCOMPAT_MULTI_RPCS is not set on MDT0000 last_rcvd"
+
+       mount_facet mds1
+       zconf_mount $HOSTNAME $MOUNT2
+       wait_clients_import_state "$HOSTNAME" mds1 "FULL"
+}
+run_test 33 "Check for OBD_INCOMPAT_MULTI_RPCS in last_rcvd after abort_recovery"
+
 complete $SECONDS
 SLEEP=$((SECONDS - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP