Whamcloud - gitweb
LU-8753 osp: add rpc generation 64/24364/6
authorDi Wang <di.wang@intel.com>
Wed, 14 Dec 2016 22:13:30 +0000 (17:13 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 24 Jan 2017 05:19:48 +0000 (05:19 +0000)
Add rpc generation to make sure current update
request will not be sent until the remote llog
object got refresh.

Signed-off-by: Di Wang <di.wang@intel.com>
Change-Id: Iae678686b522d545b69510444805a1e411acfcfe
Reviewed-on: https://review.whamcloud.com/24364
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/dt_object.h
lustre/obdclass/llog.c
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_trans.c

index cbd3b92..588d0c3 100644 (file)
@@ -1859,9 +1859,11 @@ struct thandle {
        unsigned int            th_sync:1,
        /* local transation, no need to inform other layers */
                                th_local:1,
-       /* Whether we need wait the transaction to be submitted */
+       /* Whether we need wait the transaction to be submitted
+        * (send to remote target) */
                                th_wait_submit:1,
-       /* complex transaction which will track updates on all targets */
+       /* complex transaction which will track updates on all targets,
+        * including OSTs */
                                th_complex:1;
 };
 
index 80e4f12..7251144 100644 (file)
@@ -538,9 +538,13 @@ repeat:
                        }
 
                        if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) {
-                               CWARN("invalid length %d in llog record for "
-                                     "index %d/%d\n", rec->lrh_len,
-                                     rec->lrh_index, index);
+                               CWARN("%s: invalid length %d in llog "DOSTID
+                                     "record for index %d/%d\n",
+                                      loghandle->lgh_ctxt->loc_obd->obd_name,
+                                      rec->lrh_len,
+                                      POSTID(&loghandle->lgh_id.lgl_oi),
+                                      rec->lrh_index, index);
+
                                GOTO(out, rc = -EINVAL);
                        }
 
@@ -551,9 +555,10 @@ repeat:
                        }
 
                        if (rec->lrh_index != index) {
-                               CERROR("%s: Invalid record: index %u but "
-                                      "expected %u\n",
+                               CERROR("%s: "DOSTID" Invalid record: index %u"
+                                      " but expected %u\n",
                                       loghandle->lgh_ctxt->loc_obd->obd_name,
+                                      POSTID(&loghandle->lgh_id.lgl_oi),
                                       rec->lrh_index, index);
                                GOTO(out, rc = -ERANGE);
                        }
index 7b5d36f..138bb43 100644 (file)
@@ -510,6 +510,7 @@ static int osp_update_init(struct osp_device *osp)
        INIT_LIST_HEAD(&osp->opd_update->ou_list);
        osp->opd_update->ou_rpc_version = 1;
        osp->opd_update->ou_version = 1;
+       osp->opd_update->ou_generation = 0;
 
        /* start thread handling sending updates to the remote MDT */
        task = kthread_run(osp_send_update_thread, osp,
index f61b2f5..481932d 100644 (file)
@@ -115,6 +115,7 @@ struct osp_update_request {
        struct osp_thandle              *our_th;
 
        __u64                           our_version;
+       __u64                           our_generation;
        /* protect our_list and flag */
        spinlock_t                      our_list_lock;
        /* linked to the list(ou_list) in osp_updates */
@@ -128,9 +129,22 @@ struct osp_updates {
        struct list_head        ou_list;
        spinlock_t              ou_lock;
        wait_queue_head_t       ou_waitq;
-       /* wait for next updates */
+
+       /* The next rpc version which supposed to be sent in
+        * osp_send_update_thread().*/
        __u64                   ou_rpc_version;
+
+       /* The rpc version assigned to the osp thandle during (osp_md_write()),
+        * which will be sent by this order. Note: the osp_thandle has be sent
+        * by this order to make sure the remote update log will follow the
+        * llog format rule. XXX: these probably should be removed once we
+        * invent new llog format */
        __u64                   ou_version;
+
+       /* The generation of current osp update RPC, which is used to make sure
+        * those stale RPC(with older generation) will not be sent, otherwise it
+        * will cause update lllog corruption */
+       __u64                   ou_generation;
 };
 
 struct osp_device {
index 545fefd..ebecd8a 100644 (file)
@@ -1258,7 +1258,7 @@ struct thandle *osp_get_storage_thandle(const struct lu_env *env,
  *
  * Set the version for the transaction and add the request to
  * the sending list, then after transaction stop, the request
- * will be picked in the order of version, by sending thread.
+ * will be sent in the order of version by the sending thread.
  *
  * \param [in] oth     osp thandle to be set version.
  *
@@ -1288,6 +1288,7 @@ int osp_check_and_set_rpc_version(struct osp_thandle *oth,
        /* Assign the version and add it to the sending list */
        osp_thandle_get(oth);
        oth->ot_our->our_version = ou->ou_version++;
+       oth->ot_our->our_generation = ou->ou_generation;
        list_add_tail(&oth->ot_our->our_list,
                      &osp->opd_update->ou_list);
        oth->ot_our->our_req_ready = 0;
@@ -1295,8 +1296,8 @@ int osp_check_and_set_rpc_version(struct osp_thandle *oth,
        spin_unlock(&ou->ou_lock);
 
        LASSERT(oth->ot_super.th_wait_submit == 1);
-       CDEBUG(D_INFO, "%s: version %llu oth:version %p:%llu\n",
-              osp->opd_obd->obd_name, ou->ou_version, oth,
+       CDEBUG(D_INFO, "%s: version %llu gen %llu oth:version %p:%llu\n",
+              osp->opd_obd->obd_name, ou->ou_version, ou->ou_generation, oth,
               oth->ot_our->our_version);
 
        return 0;
@@ -1372,6 +1373,11 @@ void osp_invalidate_request(struct osp_device *osp)
        if (rc < 0) {
                CERROR("%s: init env error: rc = %d\n", osp->opd_obd->obd_name,
                       rc);
+
+               spin_lock(&ou->ou_lock);
+               ou->ou_generation++;
+               spin_unlock(&ou->ou_lock);
+
                return;
        }
 
@@ -1397,6 +1403,9 @@ void osp_invalidate_request(struct osp_device *osp)
                       our);
        }
 
+       /* Increase the generation, then the update request with old generation
+        * will fail with -EIO. */
+       ou->ou_generation++;
        spin_unlock(&ou->ou_lock);
 
        /* invalidate all of request in the sending list */
@@ -1464,7 +1473,8 @@ int osp_send_update_thread(void *arg)
                        osp_trans_callback(&env, our->our_th,
                                our->our_th->ot_super.th_result);
                        rc = our->our_th->ot_super.th_result;
-               } else if (OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) {
+               } else if (ou->ou_generation != our->our_generation ||
+                          OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) {
                        rc = -EIO;
                        osp_trans_callback(&env, our->our_th, rc);
                } else {