Whamcloud - gitweb
- fixed some warnings in the DLM
authorpschwan <pschwan>
Thu, 18 Apr 2002 03:00:36 +0000 (03:00 +0000)
committerpschwan <pschwan>
Thu, 18 Apr 2002 03:00:36 +0000 (03:00 +0000)
- fixed the connection reference leak when bulk is used
- fixed, I hope, a race condition with the HA lists that was corrupting memory

lustre/include/linux/lustre_net.h
lustre/ldlm/ldlm_request.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/service.c

index 1dbc2ca..9a249bd 100644 (file)
@@ -87,10 +87,12 @@ struct ptlrpc_client {
         __u32 cli_request_portal;
         __u32 cli_reply_portal;
 
-        struct semaphore cli_rpc_sem;
-        struct list_head cli_sending_head;
+        struct semaphore cli_rpc_sem; /* limits outstanding requests */
+
+        struct list_head cli_sending_head; /* lists protected by ha_mgr lock */
         struct list_head cli_sent_head;
         struct list_head cli_ha_item; 
+
         struct connmgr_obd *cli_ha_mgr;
 };
 
@@ -206,8 +208,6 @@ void ptlrpc_cleanup_connection(void);
 /* rpc/niobuf.c */
 int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *);
 int ptlrpc_send_bulk(struct ptlrpc_bulk_desc *, int portal);
-int ptl_send_buf(struct ptlrpc_request *, struct ptlrpc_connection *,
-                 int portal);
 int ptlrpc_register_bulk(struct ptlrpc_bulk_desc *);
 int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk);
 int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req);
@@ -223,6 +223,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req);
 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
                                        struct ptlrpc_connection *u, int opcode,
                                        int count, int *lengths, char **bufs);
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
 void ptlrpc_free_req(struct ptlrpc_request *request);
 struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *);
 int ptlrpc_check_status(struct ptlrpc_request *req, int err);
index f1293aa..ac23beb 100644 (file)
@@ -202,9 +202,9 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh,
         struct ldlm_request *body;
         struct ldlm_reply *reply;
         struct ldlm_lock *lock;
-        struct ptlrpc_request *req;
-        int rc, size[2] = {sizeof(*body), lock->l_data_len};
-        char *bufs[2] = {NULL, lock->l_data};
+        struct ptlrpc_request *req = NULL;
+        int rc, size[2] = {sizeof(*body), 0};
+        char *bufs[2] = {NULL, NULL};
         ENTRY;
 
         lock = ldlm_handle2object(lockh);
@@ -212,6 +212,8 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct ldlm_handle *lockh,
         if (is_local_conn(lock->l_connection))
                 GOTO(local, 0);
 
+        size[1] = lock->l_data_len;
+        bufs[1] = lock->l_data;
         req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CONVERT, 2, size,
                               bufs);
         if (!req)
@@ -247,9 +249,9 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_handle *lockh,
 {
         struct ldlm_request *body;
         struct ldlm_lock *lock;
-        struct ptlrpc_request *req;
-        int rc, size[2] = {sizeof(*body), lock->l_data_len};
-        char *bufs[2] = {NULL, lock->l_data};
+        struct ptlrpc_request *req = NULL;
+        int rc, size[2] = {sizeof(*body), 0};
+        char *bufs[2] = {NULL, NULL};
         ENTRY;
 
         lock = ldlm_handle2object(lockh);
@@ -257,6 +259,8 @@ int ldlm_cli_cancel(struct ptlrpc_client *cl, struct ldlm_handle *lockh,
         if (is_local_conn(lock->l_connection))
                 GOTO(local, 0);
 
+        size[1] = lock->l_data_len;
+        bufs[1] = lock->l_data;
         req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CANCEL, 2, size,
                               bufs);
         if (!req)
index f4e7b50..17a8ee1 100644 (file)
@@ -215,8 +215,7 @@ int mdc_readpage(struct ptlrpc_client *cl, struct ptlrpc_connection *conn,
 
  out:
         *request = req;
-        if (bulk != NULL)
-                OBD_FREE(bulk, sizeof(*bulk));
+        ptlrpc_free_bulk(bulk);
         return rc;
 }
 
index 2cb85ff..8b5792c 100644 (file)
@@ -87,7 +87,7 @@ int mds_sendpage(struct ptlrpc_request *req, struct file *file,
  cleanup_buf:
         OBD_FREE(buf, PAGE_SIZE);
  cleanup_bulk:
-        OBD_FREE(bulk, sizeof(*bulk));
+        ptlrpc_free_bulk(bulk);
  out:
         return rc;
 }
index d89ce93..93923de 100644 (file)
@@ -367,6 +367,7 @@ int osc_sendpage(struct obd_conn *conn, struct ptlrpc_request *req,
                 rc = ptlrpc_send_bulk(bulk, OSC_BULK_PORTAL);
                 if (rc != 0) {
                         CERROR("send_bulk failed: %d\n", rc);
+                        ptlrpc_free_bulk(bulk);
                         LBUG();
                         return rc;
                 }
@@ -374,12 +375,11 @@ int osc_sendpage(struct obd_conn *conn, struct ptlrpc_request *req,
                                          ptlrpc_check_bulk_sent(bulk));
 
                 if (bulk->b_flags == PTL_RPC_INTR) {
-                        EXIT;
-                        /* FIXME: hey hey, we leak here. */
-                        return -EINTR;
+                        ptlrpc_free_bulk(bulk);
+                        RETURN(-EINTR);
                 }
 
-                OBD_FREE(bulk, sizeof(*bulk));
+                ptlrpc_free_bulk(bulk);
         }
 
         return 0;
index 2be7a3c..2daebfa 100644 (file)
@@ -73,6 +73,16 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
         return bulk;
 }
 
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk)
+{
+        if (bulk == NULL)
+                return;
+
+        ptlrpc_put_connection(bulk->b_connection);
+
+        OBD_FREE(bulk, sizeof(*bulk));
+}
+
 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
                                        struct ptlrpc_connection *conn,
                                        int opcode, int count, int *lengths,
@@ -103,6 +113,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
         request->rq_reqmsg->token = conn->c_remote_token;
         request->rq_reqmsg->opc = HTON__u32(opcode);
         request->rq_reqmsg->type = HTON__u32(request->rq_type);
+        INIT_LIST_HEAD(&request->rq_list);
 
         spin_lock(&conn->c_lock);
         request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out);
@@ -121,6 +132,10 @@ void ptlrpc_free_req(struct ptlrpc_request *request)
         if (request->rq_repmsg != NULL)
                 OBD_FREE(request->rq_repmsg, request->rq_replen);
 
+        spin_lock(&request->rq_client->cli_ha_mgr->mgr_lock);
+        list_del(&request->rq_list);
+        spin_unlock(&request->rq_client->cli_ha_mgr->mgr_lock);
+
         ptlrpc_put_connection(request->rq_connection);
 
         OBD_FREE(request, sizeof(*request));
@@ -136,10 +151,10 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req)
                 GOTO(out, rc = 1);
         }
 
-        if (CURRENT_TIME - req->rq_time >= 3) { 
-                CERROR("-- REQ TIMEOUT --\n"); 
+        if (CURRENT_TIME - req->rq_time >= 3) {
+                CERROR("-- REQ TIMEOUT --\n");
                 if (req->rq_client && req->rq_client->cli_ha_mgr)
-                        connmgr_cli_fail(req->rq_client); 
+                        connmgr_cli_fail(req->rq_client);
                 return 0;
         }
 
index bf275b8..16ddff1 100644 (file)
@@ -46,8 +46,10 @@ static int request_out_callback(ptl_event_t *ev, void *data)
         ENTRY;
 
         if (ev->type == PTL_EVENT_SENT) {
+                spin_lock(&req->rq_client->cli_ha_mgr->mgr_lock);
                 list_del(&req->rq_list);
                 list_add(&req->rq_list, &cl->cli_sent_head);
+                spin_unlock(&req->rq_client->cli_ha_mgr->mgr_lock);
         } else { 
                 // XXX make sure we understand all events, including ACK's
                 CERROR("Unknown event %d\n", ev->type); 
index 5a0786f..fde43a2 100644 (file)
@@ -51,8 +51,8 @@ int ptlrpc_check_bulk_sent(struct ptlrpc_bulk_desc *bulk)
         RETURN(0);
 }
 
-int ptl_send_buf(struct ptlrpc_request *request, struct ptlrpc_connection *conn,
-                 int portal)
+static int ptl_send_buf(struct ptlrpc_request *request,
+                        struct ptlrpc_connection *conn, int portal)
 {
         int rc;
         ptl_process_id_t remote_id;
@@ -295,7 +295,10 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                request->rq_replen, request->rq_reqmsg->xid,
                request->rq_client->cli_request_portal);
 
+        spin_lock(&request->rq_client->cli_ha_mgr->mgr_lock);
         list_add(&request->rq_list, &request->rq_client->cli_sending_head);
+        spin_unlock(&request->rq_client->cli_ha_mgr->mgr_lock);
+
         rc = ptl_send_buf(request, request->rq_connection,
                           request->rq_client->cli_request_portal);
         RETURN(rc);
index 5a23889..d81767a 100644 (file)
@@ -168,8 +168,17 @@ static int handle_incoming_request(struct obd_device *obddev,
         if (request.rq_reqmsg->conn) {
                 request.rq_connection =
                         (void *)(unsigned long)request.rq_reqmsg->conn;
-                if (request.rq_reqmsg->token != request.rq_connection->c_token)
+                if (request.rq_reqmsg->token !=
+                    request.rq_connection->c_token) {
+                        struct ptlrpc_connection *tmp;
+                        tmp = ptlrpc_get_connection(&peer);
+                        CERROR("rq_reqmsg->conn: %p\n", request.rq_connection);
+                        CERROR("real connection: %p\n", tmp);
+                        CERROR("rq_reqmsg->token: %Lu\n",
+                               request.rq_reqmsg->token);
+                        CERROR("real token      : %Lu\n", tmp->c_token);
                         LBUG();
+                }
                 ptlrpc_connection_addref(request.rq_connection);
         } else {
                 request.rq_connection = ptlrpc_get_connection(&peer);