From 760f027f7271aa47fe962e1f01b7a753e3b0b1d0 Mon Sep 17 00:00:00 2001 From: zam Date: Tue, 13 Oct 2009 11:33:00 +0000 Subject: [PATCH] Branch HEAD b=20642 i=zhen.liang i=alexander.zarochentsev fix for already freed rs object access in commit_replies: 1 . Introduce rs->rs_committed flag to indicate that commit_replies() has done all access to the rs object. 2. check rs->rs_committed instead of list_empty_careful() to sure that commit_replies does not access the rs object in parallel with ptlrpc_handle_rs and it is safe not to use exp_uncommitted_replies_lock. --- lustre/include/lustre_net.h | 4 +++- lustre/ptlrpc/service.c | 27 ++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index b739289..6d46b00 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -270,7 +270,9 @@ struct ptlrpc_reply_state { unsigned long rs_handled:1; /* been handled yet? */ unsigned long rs_on_net:1; /* reply_out_callback pending? */ unsigned long rs_prealloc:1; /* rs from prealloc list */ - + unsigned long rs_committed:1;/* the transaction was committed + and the rs was dispatched + by ptlrpc_commit_replies */ int rs_size; __u32 rs_opc; __u64 rs_transno; diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 2276196..5eb8cef 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -280,6 +280,7 @@ static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs) rs->rs_scheduled = 1; b->rsb_n_replies++; } + rs->rs_committed = 1; spin_unlock(&rs->rs_lock); } @@ -1664,9 +1665,29 @@ ptlrpc_handle_rs (struct ptlrpc_reply_state *rs) list_del_init (&rs->rs_exp_list); spin_unlock (&exp->exp_lock); - /* Avoid exp_uncommitted_replies_lock contention if we 100% sure that - * rs has been removed from the list already */ - if (!list_empty_careful(&rs->rs_obd_list)) { + /* The disk commit callback holds exp_uncommitted_replies_lock while it + * iterates over newly committed replies, removing them from + * exp_uncommitted_replies. It then drops this lock and schedules the + * replies it found for handling here. + * + * We can avoid contention for exp_uncommitted_replies_lock between the + * HRT threads and further commit callbacks by checking rs_committed + * which is set in the commit callback while it holds both + * rs_lock and exp_uncommitted_reples. + * + * If we see rs_committed clear, the commit callback _may_ not have + * handled this reply yet and we race with it to grab + * exp_uncommitted_replies_lock before removing the reply from + * exp_uncommitted_replies. Note that if we lose the race and the + * reply has already been removed, list_del_init() is a noop. + * + * If we see rs_committed set, we know the commit callback is handling, + * or has handled this reply since store reordering might allow us to + * see rs_committed set out of sequence. But since this is done + * holding rs_lock, we can be sure it has all completed once we hold + * rs_lock, which we do right next. + */ + if (!rs->rs_committed) { spin_lock(&exp->exp_uncommitted_replies_lock); list_del_init(&rs->rs_obd_list); spin_unlock(&exp->exp_uncommitted_replies_lock); -- 1.8.3.1