Whamcloud - gitweb
LU-12691 ldlm: obd_max_recoverable_clients is not atomic 14/35914/3
authorTatsushi Takamura <takamr.tatsushi@jp.fujitsu.com>
Mon, 26 Aug 2019 00:12:37 +0000 (09:12 +0900)
committerOleg Drokin <green@whamcloud.com>
Wed, 9 Oct 2019 22:35:25 +0000 (22:35 +0000)
Originally obd_max_recoverable_clients is not increased
at the same moment. But because of LU-3540,
it will be increased by multiple processes.

The type of obd_max_recoverable_clients should be
atomic_t and be handled by atomic operations.

Signed-off-by: Tatsushi Takamura <takamr.tatsushi@jp.fujitsu.com>
Change-Id: I9a67bbbfacab2e05858243f649e3a4e0d4b5d7f7
Reviewed-on: https://review.whamcloud.com/35914
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/obdclass/lprocfs_status_server.c
lustre/target/tgt_lastrcvd.c

index c43cafb..fcd78b1 100644 (file)
@@ -664,7 +664,7 @@ struct obd_device {
        struct list_head        obd_exports_timed;
        time64_t                obd_eviction_timer;     /* for ping evictor */
 
-       int                     obd_max_recoverable_clients;
+       atomic_t                obd_max_recoverable_clients;
        atomic_t                obd_connected_clients;
        int                     obd_stale_clients;
         /* this lock protects all recovery list_heads, timer and
index 4581df0..50ce61c 100644 (file)
@@ -845,7 +845,7 @@ static int target_handle_reconnect(struct lustre_handle *conn,
                              target->obd_name,
                              obd_uuid2str(&exp->exp_client_uuid),
                              obd_export_nid2str(exp),
-                             target->obd_max_recoverable_clients,
+                             atomic_read(&target->obd_max_recoverable_clients),
                              timeout / 60, timeout % 60);
        } else {
                struct target_distribute_txn_data *tdtd;
@@ -1324,7 +1324,8 @@ no_export:
 
                        connected = atomic_read(&target->obd_connected_clients);
                        in_progress = atomic_read(&target->obd_lock_replay_clients);
-                       known = target->obd_max_recoverable_clients;
+                       known =
+                          atomic_read(&target->obd_max_recoverable_clients);
                        stale = target->obd_stale_clients;
                        remaining = hrtimer_expires_remaining(timer);
                        left = ktime_divns(remaining, NSEC_PER_SEC);
@@ -1480,9 +1481,10 @@ dont_check_exports:
                 * condition.
                 */
                if (new_mds_mds_conn)
-                       target->obd_max_recoverable_clients++;
+                       atomic_inc(&target->obd_max_recoverable_clients);
+
                if (atomic_inc_return(&target->obd_connected_clients) ==
-                   target->obd_max_recoverable_clients)
+                   atomic_read(&target->obd_max_recoverable_clients))
                        wake_up(&target->obd_next_transno_waitq);
        }
 
@@ -1643,7 +1645,7 @@ static void target_finish_recovery(struct lu_target *lut)
                LCONSOLE_INFO("%s: Recovery over after %lld:%.02lld, of %d clients %d recovered and %d %s evicted.\n",
                              obd->obd_name, (s64)elapsed_time / 60,
                              (s64)elapsed_time % 60,
-                             obd->obd_max_recoverable_clients,
+                             atomic_read(&obd->obd_max_recoverable_clients),
                              atomic_read(&obd->obd_connected_clients),
                              obd->obd_stale_clients,
                              obd->obd_stale_clients == 1 ? "was" : "were");
@@ -1805,9 +1807,11 @@ static void target_start_recovery_timer(struct obd_device *obd)
                      obd->obd_name,
                      obd->obd_recovery_timeout / 60,
                      obd->obd_recovery_timeout % 60,
-                     obd->obd_max_recoverable_clients,
-                     (obd->obd_max_recoverable_clients == 1) ? "" : "s",
-                     (obd->obd_max_recoverable_clients == 1) ? "s" : "");
+                     atomic_read(&obd->obd_max_recoverable_clients),
+                     (atomic_read(&obd->obd_max_recoverable_clients) == 1) ?
+                     "" : "s",
+                     (atomic_read(&obd->obd_max_recoverable_clients) == 1) ?
+                     "s" : "");
 }
 
 /**
@@ -1993,7 +1997,8 @@ static int check_for_next_transno(struct lu_target *lut)
 
        CDEBUG(D_HA,
               "max: %d, connected: %d, completed: %d, queue_len: %d, req_transno: %llu, next_transno: %llu\n",
-              obd->obd_max_recoverable_clients, connected, completed,
+              atomic_read(&obd->obd_max_recoverable_clients),
+              connected, completed,
               queue_len, req_transno, next_transno);
 
        if (obd->obd_abort_recovery) {
@@ -2307,13 +2312,15 @@ static int check_for_recovery_ready(struct lu_target *lut)
 
        CDEBUG(D_HA,
               "connected %d stale %d max_recoverable_clients %d abort %d expired %d\n",
-              clnts, obd->obd_stale_clients, obd->obd_max_recoverable_clients,
+              clnts, obd->obd_stale_clients,
+              atomic_read(&obd->obd_max_recoverable_clients),
               obd->obd_abort_recovery, obd->obd_recovery_expired);
 
        if (!obd->obd_abort_recovery && !obd->obd_recovery_expired) {
-               LASSERT(clnts <= obd->obd_max_recoverable_clients);
+               LASSERT(clnts <=
+                       atomic_read(&obd->obd_max_recoverable_clients));
                if (clnts + obd->obd_stale_clients <
-                   obd->obd_max_recoverable_clients)
+                   atomic_read(&obd->obd_max_recoverable_clients))
                        return 0;
        }
 
@@ -2765,7 +2772,7 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
        if (lut->lut_bottom->dd_rdonly)
                return;
 
-       if (obd->obd_max_recoverable_clients == 0) {
+       if (atomic_read(&obd->obd_max_recoverable_clients) == 0) {
                /** Update server last boot epoch */
                tgt_boot_epoch_update(lut);
                return;
@@ -2773,7 +2780,8 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
 
        CDEBUG(D_HA, "RECOVERY: service %s, %d recoverable clients, "
               "last_transno %llu\n", obd->obd_name,
-              obd->obd_max_recoverable_clients, obd->obd_last_committed);
+              atomic_read(&obd->obd_max_recoverable_clients),
+              obd->obd_last_committed);
        LASSERT(obd->obd_stopping == 0);
        obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
        obd->obd_recovery_start = 0;
index 87c8e60..30e551f 100644 (file)
@@ -671,7 +671,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        LASSERT(obd != NULL);
 
        seq_printf(m, "status: ");
-       if (obd->obd_max_recoverable_clients == 0) {
+       if (atomic_read(&obd->obd_max_recoverable_clients) == 0) {
                seq_printf(m, "INACTIVE\n");
                goto out;
        }
@@ -687,9 +687,9 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                           ktime_get_real_seconds() - obd->obd_recovery_start);
                /* Number of clients that have completed recovery */
                seq_printf(m, "completed_clients: %d/%d\n",
-                          obd->obd_max_recoverable_clients -
+                          atomic_read(&obd->obd_max_recoverable_clients) -
                           obd->obd_stale_clients,
-                          obd->obd_max_recoverable_clients);
+                          atomic_read(&obd->obd_max_recoverable_clients));
                seq_printf(m, "replayed_requests: %d\n",
                           obd->obd_replayed_requests);
                seq_printf(m, "last_transno: %lld\n",
@@ -745,7 +745,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                         ktime_get_real_seconds()));
        seq_printf(m, "connected_clients: %d/%d\n",
                   atomic_read(&obd->obd_connected_clients),
-                  obd->obd_max_recoverable_clients);
+                  atomic_read(&obd->obd_max_recoverable_clients));
        /* Number of clients that have completed recovery */
        seq_printf(m, "req_replay_clients: %d\n",
                   atomic_read(&obd->obd_req_replay_clients));
index b36908b..f31a6a3 100644 (file)
@@ -844,7 +844,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt)
         * - there is no client to recover or the recovery was aborted
         */
        if (!strncmp(tgt->lut_obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) &&
-           (tgt->lut_obd->obd_max_recoverable_clients == 0 ||
+           (atomic_read(&tgt->lut_obd->obd_max_recoverable_clients) == 0 ||
            tgt->lut_obd->obd_abort_recovery))
                tgt->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS;
 
@@ -1565,7 +1565,7 @@ static int tgt_clients_data_init(const struct lu_env *env,
                exp->exp_connecting = 0;
                exp->exp_in_recovery = 0;
                spin_unlock(&exp->exp_lock);
-               obd->obd_max_recoverable_clients++;
+               atomic_inc(&obd->obd_max_recoverable_clients);
 
                if (tgt->lut_lsd.lsd_feature_incompat &
                    OBD_INCOMPAT_MULTI_RPCS &&