Whamcloud - gitweb
LU-7760 ptlrpc: remove incorrect pid printing
[fs/lustre-release.git] / lustre / ptlrpc / client.c
index e31010c..bac3301 100644 (file)
@@ -763,11 +763,11 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
                                *fail2_t = ktime_get_real_seconds() +
                                           LONG_UNLINK;
 
-                       /* The RPC is infected, let the test to change the
-                        * fail_loc */
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(2));
-                       set_current_state(TASK_RUNNING);
+                       /*
+                        * The RPC is infected, let the test to change the
+                        * fail_loc
+                        */
+                       msleep(4 * MSEC_PER_SEC);
                }
        }
 
@@ -1557,8 +1557,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
                req->rq_waiting = 1;
                spin_unlock(&req->rq_lock);
 
-               DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: "
-                         "(%s != %s)", lustre_msg_get_status(req->rq_reqmsg),
+               DEBUG_REQ(D_HA, req, "req waiting for recovery: (%s != %s)",
                          ptlrpc_import_state_name(req->rq_send_state),
                          ptlrpc_import_state_name(imp->imp_state));
                LASSERT(list_empty(&req->rq_list));
@@ -2506,29 +2505,54 @@ void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
  * Drops one reference count for request \a request.
  * \a locked set indicates that caller holds import imp_lock.
  * Frees the request whe reference count reaches zero.
+ *
+ * \retval 1   the request is freed
+ * \retval 0   some others still hold references on the request
  */
 static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
 {
-        ENTRY;
-        if (request == NULL)
-                RETURN(1);
+       int count;
+       ENTRY;
 
-        if (request == LP_POISON ||
-            request->rq_reqmsg == LP_POISON) {
-                CERROR("dereferencing freed request (bug 575)\n");
-                LBUG();
-                RETURN(1);
-        }
+       if (!request)
+               RETURN(1);
+
+       LASSERT(request != LP_POISON);
+       LASSERT(request->rq_reqmsg != LP_POISON);
 
-        DEBUG_REQ(D_INFO, request, "refcount now %u",
+       DEBUG_REQ(D_INFO, request, "refcount now %u",
                  atomic_read(&request->rq_refcount) - 1);
 
-       if (atomic_dec_and_test(&request->rq_refcount)) {
-                __ptlrpc_free_req(request, locked);
-                RETURN(1);
-        }
+       spin_lock(&request->rq_lock);
+       count = atomic_dec_return(&request->rq_refcount);
+       LASSERTF(count >= 0, "Invalid ref count %d\n", count);
 
-        RETURN(0);
+       /* For open RPC, the client does not know the EA size (LOV, ACL, and
+        * so on) before replied, then the client has to reserve very large
+        * reply buffer. Such buffer will not be released until the RPC freed.
+        * Since The open RPC is replayable, we need to keep it in the replay
+        * list until close. If there are a lot of files opened concurrently,
+        * then the client may be OOM.
+        *
+        * If fact, it is unnecessary to keep reply buffer for open replay,
+        * related EAs have already been saved via mdc_save_lovea() before
+        * coming here. So it is safe to free the reply buffer some earlier
+        * before releasing the RPC to avoid client OOM. LU-9514 */
+       if (count == 1 && request->rq_early_free_repbuf && request->rq_repbuf) {
+               spin_lock(&request->rq_early_free_lock);
+               sptlrpc_cli_free_repbuf(request);
+               request->rq_repbuf = NULL;
+               request->rq_repbuf_len = 0;
+               request->rq_repdata = NULL;
+               request->rq_reqdata_len = 0;
+               spin_unlock(&request->rq_early_free_lock);
+       }
+       spin_unlock(&request->rq_lock);
+
+       if (!count)
+               __ptlrpc_free_req(request, locked);
+
+       RETURN(!count);
 }
 
 /**
@@ -3082,6 +3106,9 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         DEBUG_REQ(D_HA, req, "REPLAY");
 
        atomic_inc(&req->rq_import->imp_replay_inflight);
+       spin_lock(&req->rq_lock);
+       req->rq_early_free_repbuf = 0;
+       spin_unlock(&req->rq_lock);
        ptlrpc_request_addref(req);     /* ptlrpcd needs a ref */
 
        ptlrpcd_add_req(req);