From a10c0beeb8e1ee76f9cbdb1dea9bce4ad61fa768 Mon Sep 17 00:00:00 2001 From: zhanghc Date: Thu, 16 Jul 2009 05:22:21 +0000 Subject: [PATCH] b=19870 fix a bug of processing export->exp_rpc_count during recovery i=tappro@sun.com i=alexey.lyashkov@sun.com --- lustre/include/lustre_net.h | 6 +++++- lustre/ldlm/ldlm_lib.c | 15 ++++++++++++++- lustre/ptlrpc/service.c | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 5fce5c9..8cc7483 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -324,7 +324,11 @@ struct ptlrpc_request { rq_sent_final:1, /* stop sending early replies */ rq_hp:1, /* high priority RPC */ rq_at_linked:1, /* link into service's srv_at_array */ - rq_fake:1; /* fake request - just for timeout only */ + rq_fake:1, /* fake request - just for timeout only */ + /* a copy of the request is queued to replay during recovery */ + rq_copy_queued:1, + /* whether the rquest is a copy of another replay request */ + rq_copy:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ atomic_t rq_refcount; /* client-side refcount for SENT race, diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index f6061c9..0b305b9 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1197,6 +1197,9 @@ static void abort_recovery_queue(struct obd_device *obd) else DEBUG_REQ(D_ERROR, req, "packing failed for abort-reply; skipping"); + + LASSERT(req->rq_copy); + class_export_rpc_put(req->rq_export); target_release_saved_req(req); } } @@ -1243,6 +1246,9 @@ void target_cleanup_recovery(struct obd_device *obd) req = list_entry(tmp, struct ptlrpc_request, rq_list); target_exp_dequeue_req_replay(req); list_del_init(&req->rq_list); + + LASSERT(req->rq_copy); + class_export_rpc_put(req->rq_export); target_release_saved_req(req); } EXIT; @@ -1501,6 +1507,10 @@ static void process_recovery_queue(struct obd_device *obd) obd->obd_next_recovery_transno++; spin_unlock_bh(&obd->obd_processing_task_lock); target_exp_dequeue_req_replay(req); + + LASSERT(req->rq_copy); + class_export_rpc_put(req->rq_export); + class_export_put(req->rq_export); ptlrpc_req_drop_rs(req); OBD_FREE(req->rq_reqmsg, req->rq_reqlen); @@ -1524,7 +1534,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, struct list_head *tmp; int inserted = 0; __u64 transno = lustre_msg_get_transno(req->rq_reqmsg); - struct ptlrpc_request *saved_req; + struct ptlrpc_request *saved_req, *orig_req; struct lustre_msg *reqmsg; int rc = 0; @@ -1574,6 +1584,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, memcpy(saved_req, req, sizeof *req); memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); + orig_req = req; req = saved_req; req->rq_reqmsg = reqmsg; class_export_get(req->rq_export); @@ -1612,6 +1623,8 @@ int target_queue_recovery_request(struct ptlrpc_request *req, } obd->obd_requests_queued_for_recovery++; + orig_req->rq_copy_queued = 1; + req->rq_copy = 1; if (obd->obd_processing_task != 0) { /* Someone else is processing this queue, we'll leave it to diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 293ab95..0fd0b42 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1357,7 +1357,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, lustre_msg_get_opc(request->rq_reqmsg)); put_rpc_export: - if (export != NULL) + if (export != NULL && !request->rq_copy_queued) class_export_rpc_put(export); put_conn: -- 1.8.3.1