LU-9683 ptlrpc: fix argument misorder

[fs/lustre-release.git] / lustre / ptlrpc / client.c
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 8d2e01b..4409de6 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -23,7 +23,7 @@
   * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
   *
- * Copyright (c) 2011, 2015, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -79,31 +79,33 @@ EXPORT_SYMBOL(ptlrpc_init_client);
  /**
   * Return PortalRPC connection for remore uud \a uuid
   */
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid,
+                                                   lnet_nid_t nid4refnet)
  {
-        struct ptlrpc_connection *c;
-        lnet_nid_t                self;
-        lnet_process_id_t         peer;
-        int                       err;
+       struct ptlrpc_connection *c;
+       lnet_nid_t                self;
+       struct lnet_process_id peer;
+       int                       err;
  
         /* ptlrpc_uuid_to_peer() initializes its 2nd parameter
          * before accessing its values. */
         /* coverity[uninit_use_in_call] */
-        err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
-        if (err != 0) {
-                CNETERR("cannot find peer %s!\n", uuid->uuid);
-                return NULL;
-        }
+       peer.nid = nid4refnet;
+       err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
+       if (err != 0) {
+               CNETERR("cannot find peer %s!\n", uuid->uuid);
+               return NULL;
+       }
  
-        c = ptlrpc_connection_get(peer, self, uuid);
-        if (c) {
-                memcpy(c->c_remote_uuid.uuid,
-                       uuid->uuid, sizeof(c->c_remote_uuid.uuid));
-        }
+       c = ptlrpc_connection_get(peer, self, uuid);
+       if (c) {
+               memcpy(c->c_remote_uuid.uuid,
+                      uuid->uuid, sizeof(c->c_remote_uuid.uuid));
+       }
  
-        CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
+       CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
  
-        return c;
+       return c;
  }
  
  /**
@@ -152,7 +154,7 @@ struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned nfrags, unsigned max_brw,
         /* PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this
          * node. Negotiated ocd_brw_size will always be <= this number. */
         for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++)
-               LNetInvalidateHandle(&desc->bd_mds[i]);
+               LNetInvalidateMDHandle(&desc->bd_mds[i]);
  
         return desc;
  out:
@@ -355,7 +357,7 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
  {
          unsigned int nl, oldnl;
          struct imp_at *at;
-        time_t now = cfs_time_current_sec();
+       time64_t now = ktime_get_real_seconds();
  
          LASSERT(req->rq_import);
  
@@ -369,9 +371,8 @@ void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
                  */
                 CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ?
                        D_ADAPTTO : D_WARNING,
-                      "Reported service time %u > total measured time "
-                      CFS_DURATION_T"\n", service_time,
-                      cfs_time_sub(now, req->rq_sent));
+                      "Reported service time %u > total measured time %lld\n",
+                      service_time, now - req->rq_sent);
                 return;
         }
  
@@ -417,11 +418,11 @@ static int unpack_reply(struct ptlrpc_request *req)
  static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
  __must_hold(&req->rq_lock)
  {
-        struct ptlrpc_request *early_req;
-        time_t                 olddl;
-        int                    rc;
-        ENTRY;
+       struct ptlrpc_request *early_req;
+       time64_t olddl;
+       int rc;
  
+       ENTRY;
          req->rq_early = 0;
         spin_unlock(&req->rq_lock);
  
@@ -462,10 +463,10 @@ __must_hold(&req->rq_lock)
                            ptlrpc_at_get_net_latency(req);
  
         DEBUG_REQ(D_ADAPTTO, req,
-                 "Early reply #%d, new deadline in "CFS_DURATION_T"s "
-                 "("CFS_DURATION_T"s)", req->rq_early_count,
-                 cfs_time_sub(req->rq_deadline, cfs_time_current_sec()),
-                 cfs_time_sub(req->rq_deadline, olddl));
+                 "Early reply #%d, new deadline in %llds (%llds)",
+                 req->rq_early_count,
+                 req->rq_deadline - ktime_get_real_seconds(),
+                 req->rq_deadline - olddl);
  
         RETURN(rc);
  }
@@ -742,7 +743,7 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
  
         /* Let's setup deadline for req/reply/bulk unlink for opcode. */
         if (cfs_fail_val == opcode) {
-               time_t *fail_t = NULL, *fail2_t = NULL;
+               time64_t *fail_t = NULL, *fail2_t = NULL;
  
                 if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
                         fail_t = &request->rq_bulk_deadline;
@@ -756,16 +757,17 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
                 }
  
                 if (fail_t) {
-                       *fail_t = cfs_time_current_sec() + LONG_UNLINK;
+                       *fail_t = ktime_get_real_seconds() + LONG_UNLINK;
  
                         if (fail2_t)
-                               *fail2_t = cfs_time_current_sec() + LONG_UNLINK;
+                               *fail2_t = ktime_get_real_seconds() +
+                                          LONG_UNLINK;
  
-                       /* The RPC is infected, let the test to change the
-                        * fail_loc */
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(2));
-                       set_current_state(TASK_RUNNING);
+                       /*
+                        * The RPC is infected, let the test to change the
+                        * fail_loc
+                        */
+                       msleep(4 * MSEC_PER_SEC);
                 }
         }
  
@@ -1325,14 +1327,14 @@ __u64 ptlrpc_known_replied_xid(struct obd_import *imp)
   */
  static int after_reply(struct ptlrpc_request *req)
  {
-        struct obd_import *imp = req->rq_import;
-        struct obd_device *obd = req->rq_import->imp_obd;
-        int rc;
-        struct timeval work_start;
-       __u64 committed;
-        long timediff;
-        ENTRY;
+       struct obd_import *imp = req->rq_import;
+       struct obd_device *obd = req->rq_import->imp_obd;
+       ktime_t work_start;
+       u64 committed;
+       s64 timediff;
+       int rc;
  
+       ENTRY;
          LASSERT(obd != NULL);
          /* repbuf must be unlinked */
         LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
@@ -1358,8 +1360,8 @@ static int after_reply(struct ptlrpc_request *req)
                  RETURN(0);
          }
  
-       do_gettimeofday(&work_start);
-       timediff = cfs_timeval_sub(&work_start, &req->rq_sent_tv, NULL);
+       work_start = ktime_get_real();
+       timediff = ktime_us_delta(work_start, req->rq_sent_ns);
  
          /*
           * NB Until this point, the whole of the incoming message,
@@ -1384,7 +1386,7 @@ static int after_reply(struct ptlrpc_request *req)
         /* retry indefinitely on EINPROGRESS */
         if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS &&
             ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) {
-               time_t  now = cfs_time_current_sec();
+               time64_t now = ktime_get_real_seconds();
  
                 DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS");
                 spin_lock(&req->rq_lock);
@@ -1535,7 +1537,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
                     pool_is_at_full_capacity())
                         RETURN(-ENOMEM);
  
-        if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) &&
+       if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) &&
              (!req->rq_generation_set ||
               req->rq_import_generation == imp->imp_generation))
                  RETURN (0);
@@ -1555,8 +1557,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
                 req->rq_waiting = 1;
                 spin_unlock(&req->rq_lock);
  
-               DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: "
-                         "(%s != %s)", lustre_msg_get_status(req->rq_reqmsg),
+               DEBUG_REQ(D_HA, req, "req waiting for recovery: (%s != %s)",
                           ptlrpc_import_state_name(req->rq_send_state),
                           ptlrpc_import_state_name(imp->imp_state));
                 LASSERT(list_empty(&req->rq_list));
@@ -1727,7 +1728,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
  
                 /* delayed resend - skip */
                 if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend &&
-                   req->rq_sent > cfs_time_current_sec())
+                   req->rq_sent > ktime_get_real_seconds())
                         continue;
  
                 if (!(req->rq_phase == RQ_PHASE_RPC ||
@@ -2032,13 +2033,15 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                 }
                 ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
  
-               CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
-                       "Completed RPC pname:cluuid:pid:xid:nid:"
-                       "opc %s:%s:%d:%llu:%s:%d\n",
-                       current_comm(), imp->imp_obd->obd_uuid.uuid,
-                       lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
-                       libcfs_nid2str(imp->imp_connection->c_peer.nid),
-                       lustre_msg_get_opc(req->rq_reqmsg));
+               if (req->rq_reqmsg != NULL)
+                       CDEBUG(D_RPCTRACE,
+                              "Completed RPC pname:cluuid:pid:xid:nid:"
+                              "opc %s:%s:%d:%llu:%s:%d\n", current_comm(),
+                              imp->imp_obd->obd_uuid.uuid,
+                              lustre_msg_get_status(req->rq_reqmsg),
+                              req->rq_xid,
+                              libcfs_nid2str(imp->imp_connection->c_peer.nid),
+                              lustre_msg_get_opc(req->rq_reqmsg));
  
                 spin_lock(&imp->imp_lock);
                 /* Request already may be not on sending or delaying list. This
@@ -2101,14 +2104,13 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
         req->rq_timedout = 1;
         spin_unlock(&req->rq_lock);
  
-       DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent "CFS_DURATION_T
-                 "/real "CFS_DURATION_T"]",
+       DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent %lld/real %lld]",
                    req->rq_net_err ? "failed due to network error" :
                       ((req->rq_real_sent == 0 ||
-                       cfs_time_before(req->rq_real_sent, req->rq_sent) ||
-                       cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+                      req->rq_real_sent < req->rq_sent ||
+                      req->rq_real_sent >= req->rq_deadline) ?
                        "timed out for sent delay" : "timed out for slow reply"),
-                  req->rq_sent, req->rq_real_sent);
+                 (s64)req->rq_sent, (s64)req->rq_real_sent);
  
         if (imp != NULL && obd_debug_peer_on_timeout)
                 LNetDebugPeer(imp->imp_connection->c_peer);
@@ -2164,11 +2166,11 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
   */
  int ptlrpc_expired_set(void *data)
  {
-       struct ptlrpc_request_set       *set = data;
-       struct list_head                *tmp;
-       time_t                          now = cfs_time_current_sec();
-       ENTRY;
+       struct ptlrpc_request_set *set = data;
+       struct list_head *tmp;
+       time64_t now = ktime_get_real_seconds();
  
+       ENTRY;
         LASSERT(set != NULL);
  
         /*
@@ -2250,13 +2252,13 @@ static void ptlrpc_interrupted_set(void *data)
   */
  int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
  {
-       struct list_head        *tmp;
-       time_t                   now = cfs_time_current_sec();
-       int                      timeout = 0;
-       struct ptlrpc_request   *req;
-       int                      deadline;
-       ENTRY;
+       struct list_head *tmp;
+       time64_t now = ktime_get_real_seconds();
+       int timeout = 0;
+       struct ptlrpc_request *req;
+       time64_t deadline;
  
+       ENTRY;
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
  
@@ -2503,29 +2505,54 @@ void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
   * Drops one reference count for request \a request.
   * \a locked set indicates that caller holds import imp_lock.
   * Frees the request whe reference count reaches zero.
+ *
+ * \retval 1   the request is freed
+ * \retval 0   some others still hold references on the request
   */
  static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
  {
-        ENTRY;
-        if (request == NULL)
-                RETURN(1);
+       int count;
+       ENTRY;
  
-        if (request == LP_POISON ||
-            request->rq_reqmsg == LP_POISON) {
-                CERROR("dereferencing freed request (bug 575)\n");
-                LBUG();
-                RETURN(1);
-        }
+       if (!request)
+               RETURN(1);
  
-        DEBUG_REQ(D_INFO, request, "refcount now %u",
+       LASSERT(request != LP_POISON);
+       LASSERT(request->rq_reqmsg != LP_POISON);
+
+       DEBUG_REQ(D_INFO, request, "refcount now %u",
                   atomic_read(&request->rq_refcount) - 1);
  
-       if (atomic_dec_and_test(&request->rq_refcount)) {
-                __ptlrpc_free_req(request, locked);
-                RETURN(1);
-        }
+       spin_lock(&request->rq_lock);
+       count = atomic_dec_return(&request->rq_refcount);
+       LASSERTF(count >= 0, "Invalid ref count %d\n", count);
  
-        RETURN(0);
+       /* For open RPC, the client does not know the EA size (LOV, ACL, and
+        * so on) before replied, then the client has to reserve very large
+        * reply buffer. Such buffer will not be released until the RPC freed.
+        * Since The open RPC is replayable, we need to keep it in the replay
+        * list until close. If there are a lot of files opened concurrently,
+        * then the client may be OOM.
+        *
+        * If fact, it is unnecessary to keep reply buffer for open replay,
+        * related EAs have already been saved via mdc_save_lovea() before
+        * coming here. So it is safe to free the reply buffer some earlier
+        * before releasing the RPC to avoid client OOM. LU-9514 */
+       if (count == 1 && request->rq_early_free_repbuf && request->rq_repbuf) {
+               spin_lock(&request->rq_early_free_lock);
+               sptlrpc_cli_free_repbuf(request);
+               request->rq_repbuf = NULL;
+               request->rq_repbuf_len = 0;
+               request->rq_repdata = NULL;
+               request->rq_reqdata_len = 0;
+               spin_unlock(&request->rq_early_free_lock);
+       }
+       spin_unlock(&request->rq_lock);
+
+       if (!count)
+               __ptlrpc_free_req(request, locked);
+
+       RETURN(!count);
  }
  
  /**
@@ -2566,8 +2593,8 @@ static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
         /* Let's setup deadline for reply unlink. */
         if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
             async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
-               request->rq_reply_deadline =
-                       cfs_time_current_sec() + LONG_UNLINK;
+               request->rq_reply_deadline = ktime_get_real_seconds() +
+                                            LONG_UNLINK;
  
          /*
           * Nothing left to do.
@@ -2729,13 +2756,18 @@ free_req:
                 GOTO(out, 0);
  
         list_for_each_entry_safe(req, saved, &imp->imp_committed_list,
-                                    rq_replay_list) {
+                                rq_replay_list) {
                 LASSERT(req->rq_transno != 0);
-               if (req->rq_import_generation < imp->imp_generation) {
-                       DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
-                       ptlrpc_free_request(req);
-               } else if (!req->rq_replay) {
-                       DEBUG_REQ(D_RPCTRACE, req, "free closed open request");
+               if (req->rq_import_generation < imp->imp_generation ||
+                   !req->rq_replay) {
+                       DEBUG_REQ(D_RPCTRACE, req, "free %s open request",
+                                 req->rq_import_generation <
+                                 imp->imp_generation ? "stale" : "closed");
+
+                       if (imp->imp_replay_cursor == &req->rq_replay_list)
+                               imp->imp_replay_cursor =
+                                       req->rq_replay_list.next;
+
                         ptlrpc_free_request(req);
                 }
         }
@@ -3074,6 +3106,9 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
          DEBUG_REQ(D_HA, req, "REPLAY");
  
         atomic_inc(&req->rq_import->imp_replay_inflight);
+       spin_lock(&req->rq_lock);
+       req->rq_early_free_repbuf = 0;
+       spin_unlock(&req->rq_lock);
         ptlrpc_request_addref(req);     /* ptlrpcd needs a ref */
  
         ptlrpcd_add_req(req);
@@ -3187,7 +3222,7 @@ static spinlock_t ptlrpc_last_xid_lock;
  #define YEAR_2004 (1ULL << 30)
  void ptlrpc_init_xid(void)
  {
-       time_t now = cfs_time_current_sec();
+       time64_t now = ktime_get_real_seconds();
  
         spin_lock_init(&ptlrpc_last_xid_lock);
         if (now < YEAR_2004) {
@@ -3335,7 +3370,7 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
  {
         /* re-initialize the req */
         req->rq_timeout         = obd_timeout;
-       req->rq_sent            = cfs_time_current_sec();
+       req->rq_sent            = ktime_get_real_seconds();
         req->rq_deadline        = req->rq_sent + req->rq_timeout;
         req->rq_phase           = RQ_PHASE_INTERPRET;
         req->rq_next_phase      = RQ_PHASE_COMPLETE;