From e55fc043679cdfadfff6874ef78e2e0128ec37ac Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Wed, 6 Apr 2022 05:06:46 -0400 Subject: [PATCH] LU-15724 osp: wakeup all precreate threads Number of threads could sleep at osp_precreate_reserve() and wait objects from OST. When MDT stops Lustre should wakeup all threads. When opd_pre_recovering is set any wakeup of opd_pre_user_waitq is useless. Failover of MDT does not produce disconnect event, only inactive, so osp_precreate_cleanup_orphans() can not be awakened. LustreError: 0-0: Forced cleanup waiting for mdt-kjcf05-MDT0001_UUID namespace with 46 resources in use, (rc=-110) schedule_timeout at ffffffff8e551cd3 osp_precreate_reserve at ffffffffc17d2d83 [osp] osp_declare_create at ffffffffc17c7eb9 [osp] lod_sub_declare_create at ffffffffc156415b [lod] lod_qos_declare_object_on at ffffffffc155bf42 [lod] lod_ost_alloc_rr.constprop.23 at ffffffffc155db2f [lod] lod_qos_prep_create at ffffffffc15630a6 [lod] lod_declare_instantiate_components at ffffffffc154b237 [lod] HPE-bug-id: LUS-10750 Signed-off-by: Alexander Boyko Change-Id: If0164cfbecb1e358d9857421cb234559dc8cecbc Reviewed-on: https://review.whamcloud.com/47005 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Sergey Cheremencev Reviewed-by: Oleg Drokin --- lustre/osp/osp_precreate.c | 38 ++++++++++++++++++++++---------------- lustre/ptlrpc/import.c | 4 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 124ef8d..5585ce1 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -101,12 +101,13 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs); static void osp_pre_update_status_msfs(struct osp_device *d, struct obd_statfs *msfs, int rc) { + CDEBUG(D_INFO, "%s: Updating status = %d\n", d->opd_obd->obd_name, rc); if (rc) d->opd_pre_status = rc; else osp_pre_update_msfs(d, msfs); - wake_up(&d->opd_pre_user_waitq); + wake_up_all(&d->opd_pre_user_waitq); } /* Pass in the old statfs data in case the limits have changed */ @@ -722,7 +723,6 @@ ready: out_req: /* now we can wakeup all users awaiting for objects */ osp_pre_update_status(d, rc); - wake_up(&d->opd_pre_user_waitq); /* pause to let osp_precreate_reserve to go first */ CFS_FAIL_TIMEOUT(OBD_FAIL_OSP_PRECREATE_PAUSE, 2); @@ -955,6 +955,7 @@ out: if (req) ptlrpc_req_finished(req); + /* * If rc is zero, the pre-creation window should have been emptied. * Since waking up the herd would be useless without pre-created @@ -971,7 +972,7 @@ out: * this OSP isn't quite functional yet */ osp_pre_update_status(d, rc); } else { - wake_up(&d->opd_pre_user_waitq); + wake_up_all(&d->opd_pre_user_waitq); } } else { spin_lock(&d->opd_pre_lock); @@ -1364,6 +1365,19 @@ static int osp_precreate_thread(void *_args) static int osp_precreate_ready_condition(const struct lu_env *env, struct osp_device *d) { + /* Bail out I/O fails to OST */ + if (d->opd_pre_status != 0 && + d->opd_pre_status != -EAGAIN && + d->opd_pre_status != -ENODEV && + d->opd_pre_status != -ENOTCONN && + d->opd_pre_status != -ENOSPC) { + /* DEBUG LU-3230 */ + if (d->opd_pre_status != -EIO) + CERROR("%s: precreate failed opd_pre_status %d\n", + d->opd_obd->obd_name, d->opd_pre_status); + return 1; + } + if (d->opd_pre_recovering) return 0; @@ -1378,19 +1392,6 @@ static int osp_precreate_ready_condition(const struct lu_env *env, d->opd_pre_status == -ENOSPC) return 1; - /* Bail out I/O fails to OST */ - if (d->opd_pre_status != 0 && - d->opd_pre_status != -EAGAIN && - d->opd_pre_status != -ENODEV && - d->opd_pre_status != -ENOTCONN && - d->opd_pre_status != -ENOSPC) { - /* DEBUG LU-3230 */ - if (d->opd_pre_status != -EIO) - CERROR("%s: precreate failed opd_pre_status %d\n", - d->opd_obd->obd_name, d->opd_pre_status); - return 1; - } - return 0; } @@ -1513,6 +1514,8 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d, break; } + CDEBUG(D_INFO, "%s: Sleeping on objects\n", + d->opd_obd->obd_name); if (wait_event_idle_timeout( d->opd_pre_user_waitq, osp_precreate_ready_condition(env, d), @@ -1527,6 +1530,9 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d, atomic_read(&d->opd_sync_changes), atomic_read(&d->opd_sync_rpcs_in_progress), d->opd_pre_status); + } else { + CDEBUG(D_INFO, "%s: Waked up, status=%d\n", + d->opd_obd->obd_name, d->opd_pre_status); } } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 77d9da0..64412cc 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1767,6 +1767,10 @@ out: memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); spin_unlock(&imp->imp_lock); + obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); + if (!noclose) + obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE); + if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN) rc = 0; RETURN(rc); -- 1.8.3.1