Whamcloud - gitweb
LU-17089 mdd: fix for bi_writers ref counter in case of error 75/52275/4
authorJeya ganesh babu Jegatheesan <jeyaga@amazon.com>
Thu, 31 Aug 2023 22:29:07 +0000 (15:29 -0700)
committerOleg Drokin <green@whamcloud.com>
Thu, 28 Sep 2023 08:00:55 +0000 (08:00 +0000)
If the mdd_child_ops() called from mdd_trans_create returns error,
we dont call barrier_exit to decrement the bi_writers. Call
barrier_exit in case of error returned from mdd_child_ops().

This patch also added more CDEBUG messages in lustre/target/barrier.c.

Signed-off-by: Jeya ganesh babu J <jeyaga@amazon.com>
Signed-off-by: Timothy Day <timday@amazon.com>
Change-Id: Ia430df404b700167cb9207eb13ac938575a2030a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52275
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdd/mdd_trans.c
lustre/target/barrier.c

index c2581ad..02e5e4a 100644 (file)
@@ -63,6 +63,9 @@ struct thandle *mdd_trans_create(const struct lu_env *env,
        if (!IS_ERR(th) && uc)
                th->th_ignore_quota = !!cap_raised(uc->uc_cap, CAP_SYS_RESOURCE);
 
+       if (IS_ERR(th))
+               barrier_exit(mdd->mdd_bottom);
+
        return th;
 }
 
index 6b764ee..c49c4f8 100644 (file)
@@ -191,30 +191,45 @@ static int barrier_freeze(const struct lu_env *env,
        write_unlock(&barrier->bi_rwlock);
 
        rc = dt_sync(env, barrier->bi_next);
-       if (rc)
+       if (rc) {
+               CDEBUG(D_SNAPSHOT, "failed with dt_sync: %d\n", rc);
                RETURN(rc);
+       }
 
        LASSERT(barrier->bi_deadline != 0);
 
        left = barrier->bi_deadline - ktime_get_real_seconds();
-       if (left <= 0)
+       if (left <= 0) {
+               CDEBUG(D_SNAPSHOT, "timed out after dt_sync: %lld, %lld\n",
+                      left, barrier->bi_deadline);
                RETURN(1);
+       }
 
        if (phase1 && inflight != 0) {
                rc = wait_event_idle_timeout(
                        barrier->bi_waitq,
                        percpu_counter_sum(&barrier->bi_writers) == 0,
                        cfs_time_seconds(left));
-               if (rc <= 0)
+
+               if (rc <= 0) {
+                       CDEBUG(D_SNAPSHOT, "timed out waiting for bi_writers: %lld %d\n",
+                              percpu_counter_sum(&barrier->bi_writers),
+                              rc);
                        RETURN(1);
+               }
 
                /* sync again after all inflight modifications done. */
                rc = dt_sync(env, barrier->bi_next);
-               if (rc)
+               if (rc) {
+                       CDEBUG(D_SNAPSHOT, "failed with dt_sync: %d\n", rc);
                        RETURN(rc);
+               }
 
-               if (ktime_get_real_seconds() > barrier->bi_deadline)
+               if (ktime_get_real_seconds() > barrier->bi_deadline) {
+                       CDEBUG(D_SNAPSHOT, "timed out after dt_sync: %lld, %lld\n",
+                              left, barrier->bi_deadline);
                        RETURN(1);
+               }
        }
 
        CDEBUG(D_SNAPSHOT, "%s: barrier freezing %s done.\n",