Whamcloud - gitweb
LU-753 obdfilter: improper LASSERT in filter_commitrw_write()
authorNiu Yawei <niu@whamcloud.com>
Sat, 22 Oct 2011 08:16:55 +0000 (16:16 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 26 Oct 2011 14:38:42 +0000 (10:38 -0400)
In rare cases fsfilt_commit_wait() will wake up and return after the
transaction has finished its work and updated j_commit_sequence but
the commit callbacks have not been run yet. Which will trigger the
LASSERT(oti->oti_transno <= obd->obd_last_committed) improperly.

We should just wait for the commit callback finished instead of put
an improper LASSERT here.

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: Ibd5add8d352d2e7598be49b0bf8fa37d40ce6e1f
Reviewed-on: http://review.whamcloud.com/1583
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jay@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdfilter/filter_io_26.c

index a15884a..cc0d72c 100644 (file)
@@ -785,10 +785,28 @@ retry:
                         rc = err;
         }
 
-        if (obd->obd_replayable && !rc && wait_handle)
-                LASSERTF(oti->oti_transno <= obd->obd_last_committed,
-                         "oti_transno "LPU64" last_committed "LPU64"\n",
-                         oti->oti_transno, obd->obd_last_committed);
+        /* In rare cases fsfilt_commit_wait() will wake up and return after
+         * the transaction has finished its work and updated j_commit_sequence
+         * but the commit callbacks have not been run yet.  Wait here until
+         * that is finished so that clients requesting sync IO don't see the
+         * reply transno < last_committed.  LU-753 */
+        if (unlikely(obd->obd_replayable && !rc && wait_handle &&
+                     oti->oti_transno > obd->obd_last_committed)) {
+                cfs_waitq_t wq;
+                struct l_wait_info lwi =
+                        LWI_TIMEOUT_INTERVAL(cfs_time_seconds(5),
+                                             (cfs_duration_t)((HZ + 4)/5),
+                                             NULL, NULL);
+                cfs_waitq_init(&wq);
+                l_wait_event(wq,
+                             oti->oti_transno <= obd->obd_last_committed,
+                             &lwi);
+
+                /* commit callback isn't done after waiting for 5 secs ? */
+                if (unlikely(oti->oti_transno > obd->obd_last_committed))
+                        CERROR("transno:"LPU64" > last_committed:"LPU64"\n",
+                               oti->oti_transno, obd->obd_last_committed);
+        }
 
         fsfilt_check_slow(obd, now, "commitrw commit");