From 36915bd9b0838c3344ab6dd7db2c8b74c0707484 Mon Sep 17 00:00:00 2001 From: pschwan Date: Thu, 18 Apr 2002 03:00:36 +0000 Subject: [PATCH] - fixed some warnings in the DLM - fixed the connection reference leak when bulk is used - fixed, I hope, a race condition with the HA lists that was corrupting memory --- lustre/include/linux/lustre_net.h | 9 +++++---- lustre/ldlm/ldlm_request.c | 16 ++++++++++------ lustre/mdc/mdc_request.c | 3 +-- lustre/mds/handler.c | 2 +- lustre/osc/osc_request.c | 8 ++++---- lustre/ptlrpc/client.c | 21 ++++++++++++++++++--- lustre/ptlrpc/events.c | 2 ++ lustre/ptlrpc/niobuf.c | 7 +++++-- lustre/ptlrpc/service.c | 11 ++++++++++- 9 files changed, 56 insertions(+), 23 deletions(-) diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 1dbc2ca..9a249bd 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -87,10 +87,12 @@ struct ptlrpc_client { __u32 cli_request_portal; __u32 cli_reply_portal; - struct semaphore cli_rpc_sem; - struct list_head cli_sending_head; + struct semaphore cli_rpc_sem; /* limits outstanding requests */ + + struct list_head cli_sending_head; /* lists protected by ha_mgr lock */ struct list_head cli_sent_head; struct list_head cli_ha_item; + struct connmgr_obd *cli_ha_mgr; }; @@ -206,8 +208,6 @@ void ptlrpc_cleanup_connection(void); /* rpc/niobuf.c */ int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *); int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *, int portal); -int ptl_send_buf(struct ptlrpc_request *, struct ptlrpc_connection *, - int portal); int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *); int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk); int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req); @@ -223,6 +223,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req); struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, struct ptlrpc_connection *u, int opcode, int count, int *lengths, char **bufs); +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); void ptlrpc_free_req(struct ptlrpc_request *request); struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *); int ptlrpc_check_status(struct ptlrpc_request *req, int err); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index f1293aa..ac23beb 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -202,9 +202,9 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh, struct ldlm_request *body; struct ldlm_reply *reply; struct ldlm_lock *lock; - struct ptlrpc_request *req; - int rc, size[2] = {sizeof(*body), lock->l_data_len}; - char *bufs[2] = {NULL, lock->l_data}; + struct ptlrpc_request *req = NULL; + int rc, size[2] = {sizeof(*body), 0}; + char *bufs[2] = {NULL, NULL}; ENTRY; lock = ldlm_handle2object(lockh); @@ -212,6 +212,8 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh, if (is_local_conn(lock->l_connection)) GOTO(local, 0); + size[1] = lock->l_data_len; + bufs[1] = lock->l_data; req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CONVERT, 2, size, bufs); if (!req) @@ -247,9 +249,9 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_handle *lockh, { struct ldlm_request *body; struct ldlm_lock *lock; - struct ptlrpc_request *req; - int rc, size[2] = {sizeof(*body), lock->l_data_len}; - char *bufs[2] = {NULL, lock->l_data}; + struct ptlrpc_request *req = NULL; + int rc, size[2] = {sizeof(*body), 0}; + char *bufs[2] = {NULL, NULL}; ENTRY; lock = ldlm_handle2object(lockh); @@ -257,6 +259,8 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_handle *lockh, if (is_local_conn(lock->l_connection)) GOTO(local, 0); + size[1] = lock->l_data_len; + bufs[1] = lock->l_data; req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CANCEL, 2, size, bufs); if (!req) diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index f4e7b50..17a8ee1 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -215,8 +215,7 @@ int mdc_readpage(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, out: *request = req; - if (bulk != NULL) - OBD_FREE(bulk, sizeof(*bulk)); + ptlrpc_free_bulk(bulk); return rc; } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 2cb85ff..8b5792c 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -87,7 +87,7 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file, cleanup_buf: OBD_FREE(buf, PAGE_SIZE); cleanup_bulk: - OBD_FREE(bulk, sizeof(*bulk)); + ptlrpc_free_bulk(bulk); out: return rc; } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index d89ce93..93923de 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -367,6 +367,7 @@ int osc_sendpage(struct obd_conn *conn, struct ptlrpc_request *req, rc = ptlrpc_send_bulk(bulk, OSC_BULK_PORTAL); if (rc != 0) { CERROR("send_bulk failed: %d\n", rc); + ptlrpc_free_bulk(bulk); LBUG(); return rc; } @@ -374,12 +375,11 @@ int osc_sendpage(struct obd_conn *conn, struct ptlrpc_request *req, ptlrpc_check_bulk_sent(bulk)); if (bulk->b_flags == PTL_RPC_INTR) { - EXIT; - /* FIXME: hey hey, we leak here. */ - return -EINTR; + ptlrpc_free_bulk(bulk); + RETURN(-EINTR); } - OBD_FREE(bulk, sizeof(*bulk)); + ptlrpc_free_bulk(bulk); } return 0; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2be7a3c..2daebfa 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -73,6 +73,16 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn) return bulk; } +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk) +{ + if (bulk == NULL) + return; + + ptlrpc_put_connection(bulk->b_connection); + + OBD_FREE(bulk, sizeof(*bulk)); +} + struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, int opcode, int count, int *lengths, @@ -103,6 +113,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, request->rq_reqmsg->token = conn->c_remote_token; request->rq_reqmsg->opc = HTON__u32(opcode); request->rq_reqmsg->type = HTON__u32(request->rq_type); + INIT_LIST_HEAD(&request->rq_list); spin_lock(&conn->c_lock); request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out); @@ -121,6 +132,10 @@ void ptlrpc_free_req(struct ptlrpc_request *request) if (request->rq_repmsg != NULL) OBD_FREE(request->rq_repmsg, request->rq_replen); + spin_lock(&request->rq_client->cli_ha_mgr->mgr_lock); + list_del(&request->rq_list); + spin_unlock(&request->rq_client->cli_ha_mgr->mgr_lock); + ptlrpc_put_connection(request->rq_connection); OBD_FREE(request, sizeof(*request)); @@ -136,10 +151,10 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) GOTO(out, rc = 1); } - if (CURRENT_TIME - req->rq_time >= 3) { - CERROR("-- REQ TIMEOUT --\n"); + if (CURRENT_TIME - req->rq_time >= 3) { + CERROR("-- REQ TIMEOUT --\n"); if (req->rq_client && req->rq_client->cli_ha_mgr) - connmgr_cli_fail(req->rq_client); + connmgr_cli_fail(req->rq_client); return 0; } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index bf275b8..16ddff1 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -46,8 +46,10 @@ static int request_out_callback(ptl_event_t *ev, void *data) ENTRY; if (ev->type == PTL_EVENT_SENT) { + spin_lock(&req->rq_client->cli_ha_mgr->mgr_lock); list_del(&req->rq_list); list_add(&req->rq_list, &cl->cli_sent_head); + spin_unlock(&req->rq_client->cli_ha_mgr->mgr_lock); } else { // XXX make sure we understand all events, including ACK's CERROR("Unknown event %d\n", ev->type); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 5a0786f..fde43a2 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -51,8 +51,8 @@ int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *bulk) RETURN(0); } -int ptl_send_buf(struct ptlrpc_request *request, struct ptlrpc_connection *conn, - int portal) +static int ptl_send_buf(struct ptlrpc_request *request, + struct ptlrpc_connection *conn, int portal) { int rc; ptl_process_id_t remote_id; @@ -295,7 +295,10 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_replen, request->rq_reqmsg->xid, request->rq_client->cli_request_portal); + spin_lock(&request->rq_client->cli_ha_mgr->mgr_lock); list_add(&request->rq_list, &request->rq_client->cli_sending_head); + spin_unlock(&request->rq_client->cli_ha_mgr->mgr_lock); + rc = ptl_send_buf(request, request->rq_connection, request->rq_client->cli_request_portal); RETURN(rc); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 5a23889..d81767a 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -168,8 +168,17 @@ static int handle_incoming_request(struct obd_device *obddev, if (request.rq_reqmsg->conn) { request.rq_connection = (void *)(unsigned long)request.rq_reqmsg->conn; - if (request.rq_reqmsg->token != request.rq_connection->c_token) + if (request.rq_reqmsg->token != + request.rq_connection->c_token) { + struct ptlrpc_connection *tmp; + tmp = ptlrpc_get_connection(&peer); + CERROR("rq_reqmsg->conn: %p\n", request.rq_connection); + CERROR("real connection: %p\n", tmp); + CERROR("rq_reqmsg->token: %Lu\n", + request.rq_reqmsg->token); + CERROR("real token : %Lu\n", tmp->c_token); LBUG(); + } ptlrpc_connection_addref(request.rq_connection); } else { request.rq_connection = ptlrpc_get_connection(&peer); -- 1.8.3.1