Whamcloud - gitweb
LU-2591 lov: race between ptlrpc_rcv and umount/umount thread
authorHiroya Nozaki <nozaki.hiroya@jp.fujitsu.com>
Wed, 9 Jan 2013 08:33:53 +0000 (17:33 +0900)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 21 Feb 2013 04:35:21 +0000 (23:35 -0500)
The race which is refered here happens in the following scenario

1) mount runs but fails to communicate with some OSTs. Then the
   import objects which represents the OSTs are registered to
   a pinger list.
2) pinger succeeds to communicate with, at least, ONE OST. Then
   ptlrpc_rcv calls lov_set_osc_active to activate the OST and
   holds lov_refcount.
4) For some reason ... possibly mount finally fails or umount runs,
   ll_put_super is called
5) ll_put_super tries to disconnect all OSTs with lov_disconnect
   and this func calls lov_del_target to set all OSC's
   target->ltd_reap flags in order for lov_putref to handle all
   of them.
6) ptlrpc_rcv thread puts lov_refcount and if lov_refcount becomes
   0 here, the thread has to disconnect all the OSCs whose
   ltd->reap has been set by __lov_del_obd.
7) Some OSCs' imports have still been LUSTRE_IMP_CONNECTING state
   because of (2), so ptlrpc_rcv thread has to wait for these
   import states to be changed to non-recovery states, such as FULL,
   CLOSED or DISCON at ptlrpc_disconnect_import.

Now that ptlrpc_rcv thread is waiting for the import states to be
changed to non-recovery states but ptlrpc_rcv is the one who is
supposed to change a recovery state to a non-recovery state, So
ptlrpc_rcv mush hung, And mount/umount thread which has called
ll_put_super has to also wait for changing state at
ptlrpc_disconnect_import, so umount must hung too.

Signed-off-by: Hiroya Nozaki <nozaki.hiroya@jp.fujitsu.com>
Change-Id: Idcf6831d1ee6b72332c943dfde5316fddba6c13f
Reviewed-on: http://review.whamcloud.com/4979
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd.h
lustre/lov/lov_obd.c

index 723d68b..7a91186 100644 (file)
@@ -705,6 +705,8 @@ struct lov_obd {
 
        /* Cached LRU pages from upper layer */
        void                   *lov_cache;
 
        /* Cached LRU pages from upper layer */
        void                   *lov_cache;
+
+       struct rw_semaphore     lov_notify_lock;
 };
 
 struct lmv_tgt_desc {
 };
 
 struct lmv_tgt_desc {
index 9d78609..534f40d 100644 (file)
@@ -462,8 +462,15 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                       enum obd_notify_event ev, void *data)
 {
         int rc = 0;
                       enum obd_notify_event ev, void *data)
 {
         int rc = 0;
+       struct lov_obd *lov = &obd->u.lov;
         ENTRY;
 
         ENTRY;
 
+       down_read(&lov->lov_notify_lock);
+       if (!lov->lov_connects) {
+               up_read(&lov->lov_notify_lock);
+               RETURN(rc);
+       }
+
         if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE ||
             ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) {
                 struct obd_uuid *uuid;
         if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE ||
             ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) {
                 struct obd_uuid *uuid;
@@ -471,6 +478,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                 LASSERT(watched);
 
                 if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
                 LASSERT(watched);
 
                 if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+                       up_read(&lov->lov_notify_lock);
                         CERROR("unexpected notification of %s %s!\n",
                                watched->obd_type->typ_name,
                                watched->obd_name);
                         CERROR("unexpected notification of %s %s!\n",
                                watched->obd_type->typ_name,
                                watched->obd_name);
@@ -483,6 +491,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                  */
                 rc = lov_set_osc_active(obd, uuid, ev);
                 if (rc < 0) {
                  */
                 rc = lov_set_osc_active(obd, uuid, ev);
                 if (rc < 0) {
+                       up_read(&lov->lov_notify_lock);
                         CERROR("event(%d) of %s failed: %d\n", ev,
                                obd_uuid2str(uuid), rc);
                         RETURN(rc);
                         CERROR("event(%d) of %s failed: %d\n", ev,
                                obd_uuid2str(uuid), rc);
                         RETURN(rc);
@@ -527,6 +536,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
                 obd_putref(obd);
         }
 
                 obd_putref(obd);
         }
 
+       up_read(&lov->lov_notify_lock);
         RETURN(rc);
 }
 
         RETURN(rc);
 }
 
@@ -679,6 +689,8 @@ int lov_del_target(struct obd_device *obd, __u32 index,
                 RETURN(-EINVAL);
         }
 
                 RETURN(-EINVAL);
         }
 
+       /* to make sure there's no ongoing lov_notify() now */
+       down_write(&lov->lov_notify_lock);
         obd_getref(obd);
 
         if (!lov->lov_tgts[index]) {
         obd_getref(obd);
 
         if (!lov->lov_tgts[index]) {
@@ -703,6 +715,7 @@ int lov_del_target(struct obd_device *obd, __u32 index,
         /* we really delete it from obd_putref */
 out:
         obd_putref(obd);
         /* we really delete it from obd_putref */
 out:
         obd_putref(obd);
+       up_write(&lov->lov_notify_lock);
 
         RETURN(rc);
 }
 
         RETURN(rc);
 }
@@ -819,6 +832,8 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         cfs_atomic_set(&lov->lov_refcount, 0);
         lov->lov_sp_me = LUSTRE_SP_CLI;
 
         cfs_atomic_set(&lov->lov_refcount, 0);
         lov->lov_sp_me = LUSTRE_SP_CLI;
 
+       init_rwsem(&lov->lov_notify_lock);
+
         lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
                                                    HASH_POOLS_MAX_BITS,
                                                    HASH_POOLS_BKT_BITS, 0,
         lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
                                                    HASH_POOLS_MAX_BITS,
                                                    HASH_POOLS_BKT_BITS, 0,