Whamcloud - gitweb
LU-15724 osp: wakeup all precreate threads 48/48548/2
authorAlexander Boyko <alexander.boyko@hpe.com>
Wed, 14 Sep 2022 19:59:41 +0000 (12:59 -0700)
committerOleg Drokin <green@whamcloud.com>
Mon, 26 Sep 2022 16:58:07 +0000 (16:58 +0000)
Number of threads could sleep at osp_precreate_reserve() and
wait objects from OST. When MDT stops Lustre should wakeup
all threads. When opd_pre_recovering is set any wakeup of
opd_pre_user_waitq is useless. Failover of MDT does not produce
disconnect event, only inactive, so osp_precreate_cleanup_orphans()
can not be awakened.

LustreError: 0-0: Forced cleanup waiting for mdt-kjcf05-MDT0001_UUID
namespace with 46 resources in use, (rc=-110)

 schedule_timeout at ffffffff8e551cd3
 osp_precreate_reserve at ffffffffc17d2d83 [osp]
 osp_declare_create at ffffffffc17c7eb9 [osp]
 lod_sub_declare_create at ffffffffc156415b [lod]
 lod_qos_declare_object_on at ffffffffc155bf42 [lod]
 lod_ost_alloc_rr.constprop.23 at ffffffffc155db2f [lod]
 lod_qos_prep_create at ffffffffc15630a6 [lod]
 lod_declare_instantiate_components at ffffffffc154b237 [lod]

Lustre-change: https://review.whamcloud.com/47005
Lustre-commit: e55fc043679cdfadfff6874ef78e2e0128ec37ac

HPE-bug-id: LUS-10750
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: If0164cfbecb1e358d9857421cb234559dc8cecbc
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-on: https://review.whamcloud.com/48548
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osp/osp_precreate.c
lustre/ptlrpc/import.c

index 124ef8d..5585ce1 100644 (file)
@@ -101,12 +101,13 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs);
 static void osp_pre_update_status_msfs(struct osp_device *d,
                                       struct obd_statfs *msfs, int rc)
 {
+       CDEBUG(D_INFO, "%s: Updating status = %d\n", d->opd_obd->obd_name, rc);
        if (rc)
                d->opd_pre_status = rc;
        else
                osp_pre_update_msfs(d, msfs);
 
-       wake_up(&d->opd_pre_user_waitq);
+       wake_up_all(&d->opd_pre_user_waitq);
 }
 
 /* Pass in the old statfs data in case the limits have changed */
@@ -722,7 +723,6 @@ ready:
 out_req:
        /* now we can wakeup all users awaiting for objects */
        osp_pre_update_status(d, rc);
-       wake_up(&d->opd_pre_user_waitq);
 
        /* pause to let osp_precreate_reserve to go first */
        CFS_FAIL_TIMEOUT(OBD_FAIL_OSP_PRECREATE_PAUSE, 2);
@@ -955,6 +955,7 @@ out:
        if (req)
                ptlrpc_req_finished(req);
 
+
        /*
         * If rc is zero, the pre-creation window should have been emptied.
         * Since waking up the herd would be useless without pre-created
@@ -971,7 +972,7 @@ out:
                         * this OSP isn't quite functional yet */
                        osp_pre_update_status(d, rc);
                } else {
-                       wake_up(&d->opd_pre_user_waitq);
+                       wake_up_all(&d->opd_pre_user_waitq);
                }
        } else {
                spin_lock(&d->opd_pre_lock);
@@ -1364,6 +1365,19 @@ static int osp_precreate_thread(void *_args)
 static int osp_precreate_ready_condition(const struct lu_env *env,
                                         struct osp_device *d)
 {
+       /* Bail out I/O fails to OST */
+       if (d->opd_pre_status != 0 &&
+           d->opd_pre_status != -EAGAIN &&
+           d->opd_pre_status != -ENODEV &&
+           d->opd_pre_status != -ENOTCONN &&
+           d->opd_pre_status != -ENOSPC) {
+               /* DEBUG LU-3230 */
+               if (d->opd_pre_status != -EIO)
+                       CERROR("%s: precreate failed opd_pre_status %d\n",
+                              d->opd_obd->obd_name, d->opd_pre_status);
+               return 1;
+       }
+
        if (d->opd_pre_recovering)
                return 0;
 
@@ -1378,19 +1392,6 @@ static int osp_precreate_ready_condition(const struct lu_env *env,
            d->opd_pre_status == -ENOSPC)
                return 1;
 
-       /* Bail out I/O fails to OST */
-       if (d->opd_pre_status != 0 &&
-           d->opd_pre_status != -EAGAIN &&
-           d->opd_pre_status != -ENODEV &&
-           d->opd_pre_status != -ENOTCONN &&
-           d->opd_pre_status != -ENOSPC) {
-               /* DEBUG LU-3230 */
-               if (d->opd_pre_status != -EIO)
-                       CERROR("%s: precreate failed opd_pre_status %d\n",
-                              d->opd_obd->obd_name, d->opd_pre_status);
-               return 1;
-       }
-
        return 0;
 }
 
@@ -1513,6 +1514,8 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d,
                        break;
                }
 
+               CDEBUG(D_INFO, "%s: Sleeping on objects\n",
+                      d->opd_obd->obd_name);
                if (wait_event_idle_timeout(
                            d->opd_pre_user_waitq,
                            osp_precreate_ready_condition(env, d),
@@ -1527,6 +1530,9 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d,
                               atomic_read(&d->opd_sync_changes),
                               atomic_read(&d->opd_sync_rpcs_in_progress),
                               d->opd_pre_status);
+               } else {
+                       CDEBUG(D_INFO, "%s: Waked up, status=%d\n",
+                              d->opd_obd->obd_name, d->opd_pre_status);
                }
        }
 
index 1e0dd2d..565ba18 100644 (file)
@@ -1769,6 +1769,10 @@ out:
        memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
        spin_unlock(&imp->imp_lock);
 
+       obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+       if (!noclose)
+               obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+
        if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
                rc = 0;
        RETURN(rc);