Whamcloud - gitweb
LU-9817 lnet: safe access to msg 08/28308/2
authorAmir Shehata <amir.shehata@intel.com>
Tue, 1 Aug 2017 21:24:57 +0000 (14:24 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 9 Aug 2017 04:18:18 +0000 (04:18 +0000)
When tx credits are returned if there are pending messages they
need to be sent. Messages could have different tx_cpts, so the
correct one needs to be locked. After lnet_post_send_locked(),
if we locked a different CPT then we need to relock the correct one
However, as part of lnet_post_send_locked(), lnet_finalze() can
be called which can free the message. Therefore, the cpt of the
message being passed must be cached in order to prevent access to
freed memory.

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Change-Id: I959fdc30daf87b5575d8371da20d5cf6f64e7d3c
Reviewed-on: https://review.whamcloud.com/28308
Tested-by: Jenkins
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Sonia Sharma <sonia.sharma@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/lnet/lib-move.c

index 9e75ca3..5863836 100644 (file)
@@ -1039,6 +1039,8 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
 
                txpeer->lpni_txcredits++;
                if (txpeer->lpni_txcredits <= 0) {
+                       int msg2_cpt;
+
                        msg2 = list_entry(txpeer->lpni_txq.next,
                                              struct lnet_msg, msg_list);
                        list_del(&msg2->msg_list);
@@ -1047,13 +1049,26 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)
                        LASSERT(msg2->msg_txpeer == txpeer);
                        LASSERT(msg2->msg_tx_delayed);
 
-                       if (msg2->msg_tx_cpt != msg->msg_tx_cpt) {
+                       msg2_cpt = msg2->msg_tx_cpt;
+
+                       /*
+                        * The msg_cpt can be different from the msg2_cpt
+                        * so we need to make sure we lock the correct cpt
+                        * for msg2.
+                        * Once we call lnet_post_send_locked() it is no
+                        * longer safe to access msg2, since it could've
+                        * been freed by lnet_finalize(), but we still
+                        * need to relock the correct cpt, so we cache the
+                        * msg2_cpt for the purpose of the check that
+                        * follows the call to lnet_pose_send_locked().
+                        */
+                       if (msg2_cpt != msg->msg_tx_cpt) {
                                lnet_net_unlock(msg->msg_tx_cpt);
-                               lnet_net_lock(msg2->msg_tx_cpt);
+                               lnet_net_lock(msg2_cpt);
                        }
                         (void) lnet_post_send_locked(msg2, 1);
-                       if (msg2->msg_tx_cpt != msg->msg_tx_cpt) {
-                               lnet_net_unlock(msg2->msg_tx_cpt);
+                       if (msg2_cpt != msg->msg_tx_cpt) {
+                               lnet_net_unlock(msg2_cpt);
                                lnet_net_lock(msg->msg_tx_cpt);
                        }
                 } else {