From f667cc6a477e0e17b5263669b4592668fbf005bb Mon Sep 17 00:00:00 2001 From: Jeya ganesh babu Jegatheesan Date: Thu, 31 Aug 2023 15:29:07 -0700 Subject: [PATCH] LU-17089 mdd: fix for bi_writers ref counter in case of error If the mdd_child_ops() called from mdd_trans_create returns error, we dont call barrier_exit to decrement the bi_writers. Call barrier_exit in case of error returned from mdd_child_ops(). This patch also added more CDEBUG messages in lustre/target/barrier.c. Signed-off-by: Jeya ganesh babu J Signed-off-by: Timothy Day Change-Id: Ia430df404b700167cb9207eb13ac938575a2030a Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52275 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Mikhail Pershin Reviewed-by: Oleg Drokin --- lustre/mdd/mdd_trans.c | 3 +++ lustre/target/barrier.c | 25 ++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lustre/mdd/mdd_trans.c b/lustre/mdd/mdd_trans.c index c2581ad..02e5e4a 100644 --- a/lustre/mdd/mdd_trans.c +++ b/lustre/mdd/mdd_trans.c @@ -63,6 +63,9 @@ struct thandle *mdd_trans_create(const struct lu_env *env, if (!IS_ERR(th) && uc) th->th_ignore_quota = !!cap_raised(uc->uc_cap, CAP_SYS_RESOURCE); + if (IS_ERR(th)) + barrier_exit(mdd->mdd_bottom); + return th; } diff --git a/lustre/target/barrier.c b/lustre/target/barrier.c index 6b764ee..c49c4f8 100644 --- a/lustre/target/barrier.c +++ b/lustre/target/barrier.c @@ -191,30 +191,45 @@ static int barrier_freeze(const struct lu_env *env, write_unlock(&barrier->bi_rwlock); rc = dt_sync(env, barrier->bi_next); - if (rc) + if (rc) { + CDEBUG(D_SNAPSHOT, "failed with dt_sync: %d\n", rc); RETURN(rc); + } LASSERT(barrier->bi_deadline != 0); left = barrier->bi_deadline - ktime_get_real_seconds(); - if (left <= 0) + if (left <= 0) { + CDEBUG(D_SNAPSHOT, "timed out after dt_sync: %lld, %lld\n", + left, barrier->bi_deadline); RETURN(1); + } if (phase1 && inflight != 0) { rc = wait_event_idle_timeout( barrier->bi_waitq, percpu_counter_sum(&barrier->bi_writers) == 0, cfs_time_seconds(left)); - if (rc <= 0) + + if (rc <= 0) { + CDEBUG(D_SNAPSHOT, "timed out waiting for bi_writers: %lld %d\n", + percpu_counter_sum(&barrier->bi_writers), + rc); RETURN(1); + } /* sync again after all inflight modifications done. */ rc = dt_sync(env, barrier->bi_next); - if (rc) + if (rc) { + CDEBUG(D_SNAPSHOT, "failed with dt_sync: %d\n", rc); RETURN(rc); + } - if (ktime_get_real_seconds() > barrier->bi_deadline) + if (ktime_get_real_seconds() > barrier->bi_deadline) { + CDEBUG(D_SNAPSHOT, "timed out after dt_sync: %lld, %lld\n", + left, barrier->bi_deadline); RETURN(1); + } } CDEBUG(D_SNAPSHOT, "%s: barrier freezing %s done.\n", -- 1.8.3.1