Whamcloud - gitweb
LU-7593 target: umount vs tgt_last_rcvd_update deadlock 04/17704/11
authorAndriy Skulysh <andriy.skulysh@seagate.com>
Tue, 12 Jul 2016 15:13:48 +0000 (18:13 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 2 Sep 2016 02:23:55 +0000 (02:23 +0000)
tgt_client_del() and
ofd_commitrw_write->tgt_last_rcvd_update
take transaction and ted->ted_lcd_lock
in different order:

thread1:
    osd_trans_start
    tgt_client_data_update
    tgt_client_del       <<< mutex_lock(&ted->ted_lcd_lock);
    ofd_obd_disconnect
    class_disconnect_export_list
    class_disconnect_exports
    class_cleanup
    ...
    sys_umount

thread2:
    __mutex_lock_slowpath
    mutex_lock          <<< mutex_lock(&ted->ted_lcd_lock);
    tgt_last_rcvd_update
    tgt_txn_stop_cb
    dt_txn_hook_stop
    osd_trans_stop
    ofd_trans_stop
    ofd_commitrw_write
    ...
    tgt_brw_write

Lock only around tgt_client_data_write() inside
the tgt_client_data_update()

Change-Id: Id3f60636be2abb3b70a99ee44b735aab7dfb7657
Seagate-bug-id: MRP-3109
Signed-off-by: Andriy Skulysh <andriy.skulysh@seagate.com>
Reviewed-on: http://review.whamcloud.com/17704
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/target/tgt_lastrcvd.c

index 1c907a6..55e5995 100644 (file)
@@ -517,6 +517,7 @@ static int tgt_client_data_update(const struct lu_env *env,
                RETURN(PTR_ERR(th));
 
        tti_buf_lcd(tti);
                RETURN(PTR_ERR(th));
 
        tti_buf_lcd(tti);
+       mutex_lock(&ted->ted_lcd_lock);
        rc = dt_declare_record_write(env, tgt->lut_last_rcvd,
                                     &tti->tti_buf,
                                     ted->ted_lr_off, th);
        rc = dt_declare_record_write(env, tgt->lut_last_rcvd,
                                     &tti->tti_buf,
                                     ted->ted_lr_off, th);
@@ -546,6 +547,7 @@ static int tgt_client_data_update(const struct lu_env *env,
        rc = tgt_client_data_write(env, tgt, ted->ted_lcd, &tti->tti_off, th);
        EXIT;
 out:
        rc = tgt_client_data_write(env, tgt, ted->ted_lcd, &tti->tti_off, th);
        EXIT;
 out:
+       mutex_unlock(&ted->ted_lcd_lock);
        dt_trans_stop(env, tgt->lut_bottom, th);
        CDEBUG(D_INFO, "%s: update last_rcvd client data for UUID = %s, "
               "last_transno = "LPU64": rc = %d\n", tgt->lut_obd->obd_name,
        dt_trans_stop(env, tgt->lut_bottom, th);
        CDEBUG(D_INFO, "%s: update last_rcvd client data for UUID = %s, "
               "last_transno = "LPU64": rc = %d\n", tgt->lut_obd->obd_name,
@@ -1069,10 +1071,8 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp)
                RETURN(rc);
        }
 
                RETURN(rc);
        }
 
-       mutex_lock(&ted->ted_lcd_lock);
        memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
        rc = tgt_client_data_update(env, exp);
        memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
        rc = tgt_client_data_update(env, exp);
-       mutex_unlock(&ted->ted_lcd_lock);
 
        CDEBUG(rc == 0 ? D_INFO : D_ERROR,
               "%s: zeroing out client %s at idx %u (%llu), rc %d\n",
 
        CDEBUG(rc == 0 ? D_INFO : D_ERROR,
               "%s: zeroing out client %s at idx %u (%llu), rc %d\n",