Whamcloud - gitweb
Branch HEAD
authorzam <zam>
Tue, 13 Oct 2009 11:33:00 +0000 (11:33 +0000)
committerzam <zam>
Tue, 13 Oct 2009 11:33:00 +0000 (11:33 +0000)
b=20642
i=zhen.liang
i=alexander.zarochentsev

fix for already freed rs object access in commit_replies:
1 . Introduce rs->rs_committed flag to indicate that commit_replies() has done
  all access to the rs object.
2. check rs->rs_committed instead of list_empty_careful() to sure that
   commit_replies does not access the rs object in parallel with
   ptlrpc_handle_rs and it is safe not to use exp_uncommitted_replies_lock.

lustre/include/lustre_net.h
lustre/ptlrpc/service.c

index b739289..6d46b00 100644 (file)
@@ -270,7 +270,9 @@ struct ptlrpc_reply_state {
         unsigned long          rs_handled:1;  /* been handled yet? */
         unsigned long          rs_on_net:1;   /* reply_out_callback pending? */
         unsigned long          rs_prealloc:1; /* rs from prealloc list */
-
+        unsigned long          rs_committed:1;/* the transaction was committed
+                                                 and the rs was dispatched
+                                                 by ptlrpc_commit_replies */
         int                    rs_size;
         __u32                  rs_opc;
         __u64                  rs_transno;
index 2276196..5eb8cef 100644 (file)
@@ -280,6 +280,7 @@ static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
                 rs->rs_scheduled = 1;
                 b->rsb_n_replies++;
         }
+        rs->rs_committed = 1;
         spin_unlock(&rs->rs_lock);
 }
 
@@ -1664,9 +1665,29 @@ ptlrpc_handle_rs (struct ptlrpc_reply_state *rs)
         list_del_init (&rs->rs_exp_list);
         spin_unlock (&exp->exp_lock);
 
-        /* Avoid exp_uncommitted_replies_lock contention if we 100% sure that
-         * rs has been removed from the list already */
-        if (!list_empty_careful(&rs->rs_obd_list)) {
+        /* The disk commit callback holds exp_uncommitted_replies_lock while it
+         * iterates over newly committed replies, removing them from
+         * exp_uncommitted_replies.  It then drops this lock and schedules the
+         * replies it found for handling here.
+         *
+         * We can avoid contention for exp_uncommitted_replies_lock between the
+         * HRT threads and further commit callbacks by checking rs_committed
+         * which is set in the commit callback while it holds both
+         * rs_lock and exp_uncommitted_reples.
+         * 
+         * If we see rs_committed clear, the commit callback _may_ not have
+         * handled this reply yet and we race with it to grab
+         * exp_uncommitted_replies_lock before removing the reply from
+         * exp_uncommitted_replies.  Note that if we lose the race and the
+         * reply has already been removed, list_del_init() is a noop.
+         *
+         * If we see rs_committed set, we know the commit callback is handling,
+         * or has handled this reply since store reordering might allow us to
+         * see rs_committed set out of sequence.  But since this is done
+         * holding rs_lock, we can be sure it has all completed once we hold
+         * rs_lock, which we do right next.
+         */
+        if (!rs->rs_committed) {
                 spin_lock(&exp->exp_uncommitted_replies_lock);
                 list_del_init(&rs->rs_obd_list);
                 spin_unlock(&exp->exp_uncommitted_replies_lock);