Whamcloud - gitweb
- Fix the mdc_replay_open fixup so that it updates the handle in the Lustre
authorshaver <shaver>
Wed, 16 Oct 2002 06:21:34 +0000 (06:21 +0000)
committershaver <shaver>
Wed, 16 Oct 2002 06:21:34 +0000 (06:21 +0000)
  fh, not just in the request.
- Diagnostic for close-releasing-open req balancing.
- Don't allocate new transnos for replayed requests, or all hell _does_ break
  loose with the committing of "old" requests as things get renumbered.
- Set request level to LUSTRE_CONN_RECOVD during replay.
- Only call the replay callback if the replay succeeded (which it had darned
  well better do, but still...)
- Unlink old MEs when we free old reply buffers -- very important, sigh.
- Bullet-proof some resend diagnostics.

lustre/include/linux/lustre_net.h
lustre/llite/file.c
lustre/mdc/mdc_request.c
lustre/mds/mds_reint.c
lustre/ptlrpc/client.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/recover.c

index b35b999..41e1b16 100644 (file)
@@ -166,8 +166,8 @@ struct ptlrpc_request {
         struct obd_import *rq_import;
         struct ptlrpc_service *rq_svc;
 
-        void (*rq_replay_cb)(struct ptlrpc_request *, struct lustre_handle *);
-        struct lustre_handle rq_replay_cb_handle;
+        void (*rq_replay_cb)(struct ptlrpc_request *, void *);
+        void *rq_replay_cb_data;
 };
 
 struct ptlrpc_bulk_page {
index ef95464..a0c4c61 100644 (file)
@@ -319,7 +319,11 @@ out_mdc:
                         rc = -abs(rc2);
                 GOTO(out_fd, rc);
         }
-        /* XXX Mike, we have also done this in ll_file_open? */
+        CDEBUG(D_HA, "matched req %p xid "LPD64" transno "LPD64" op %d->%s:%d\n",
+               fd->fd_req, fd->fd_req->rq_xid, fd->fd_req->rq_repmsg->transno,
+               fd->fd_req->rq_reqmsg->opc,
+               fd->fd_req->rq_import->imp_connection->c_remote_uuid,
+               fd->fd_req->rq_import->imp_client->cli_request_portal);
         ptlrpc_req_finished(fd->fd_req);
 
         rc = obd_cancel_unused(ll_i2obdconn(inode), lsm, 0);
index 5bfba9e..9268606 100644 (file)
@@ -367,15 +367,15 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
         RETURN(0);
 }
 
-static void mdc_replay_open(struct ptlrpc_request *req,
-                            struct lustre_handle *data)
+static void mdc_replay_open(struct ptlrpc_request *req, void *data)
 {
+        struct lustre_handle *fh = data;
         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
 
         mds_unpack_body(body);
         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
-               data->addr, data->cookie, body->handle.addr, body->handle.cookie);
-        memcpy(data, &body->handle, sizeof(*data));
+               fh->addr, fh->cookie, body->handle.addr, body->handle.cookie);
+        memcpy(fh, &body->handle, sizeof(*fh));
 }
 
 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
@@ -420,7 +420,7 @@ int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
 
         /* If open is replayed, we need to fix up the fh. */
         req->rq_replay_cb = mdc_replay_open;
-        memcpy(&req->rq_replay_cb_handle, fh, sizeof(fh));
+        req->rq_replay_cb_data = fh;
 
         EXIT;
  out:
index 18ba01c..381a67a 100644 (file)
@@ -49,6 +49,10 @@ int mds_update_last_rcvd(struct mds_obd *mds, void *handle,
         loff_t off;
         int rc;
 
+        /* we don't allocate new transnos for replayed requests */
+        if (req->rq_level == LUSTRE_CONN_RECOVD)
+                RETURN(0);
+
         off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
 
         spin_lock(&mds->mds_last_lock);
index a67a50e..2459ad3 100644 (file)
@@ -579,11 +579,10 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
         if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
             PTL_RPC_FL_RESEND) {
                 req->rq_flags &= ~PTL_RPC_FL_RESEND;
-                CDEBUG(D_HA, "req xid "LPD64" op %d to %s:%d\n",
+                CDEBUG(D_HA, "resending req xid "LPD64" op %d to %s:%d\n",
                        (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
                        req->rq_connection->c_remote_uuid,
                        req->rq_import->imp_client->cli_request_portal);
-                /* we'll get sent again, so balance 2nd request_out_callback */
                 goto resend;
         }
 
@@ -635,7 +634,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 
 int ptlrpc_replay_req(struct ptlrpc_request *req)
 {
-        int rc = 0;
+        int rc = 0, old_level;
         // struct ptlrpc_client *cli = req->rq_import->imp_client;
         struct l_wait_info lwi;
         ENTRY;
@@ -649,12 +648,15 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
         req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
 
+        /* temporarily set request to RECOVD level (reset at out:) */
+        old_level = req->rq_level;
+        req->rq_level = LUSTRE_CONN_RECOVD;
         rc = ptl_send_rpc(req);
         if (rc) {
                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
                 ptlrpc_cleanup_request_buf(req);
                 // up(&cli->cli_rpc_sem);
-                RETURN(-rc);
+                GOTO(out, rc = -rc);
         }
 
         CDEBUG(D_OTHER, "-- sleeping\n");
@@ -678,10 +680,13 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
         }
 
         CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid);
-        if (req->rq_repmsg->status == 0)
+        if (req->rq_repmsg->status == 0) {
                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
                        req->rq_replen, req->rq_repmsg->status);
-        else {
+                if (req->rq_replay_cb)
+                        req->rq_replay_cb(req, req->rq_replay_cb_data);
+
+        } else {
                 CERROR("recovery failed: "); 
                 CERROR("req "LPD64" opc %d level %d, conn level %d\n", 
                        req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
@@ -689,9 +694,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
                 LBUG();
         }
 
-        if (req->rq_replay_cb)
-                req->rq_replay_cb(req, &req->rq_replay_cb_handle);
-
  out:
+        req->rq_level = old_level;
         RETURN(rc);
 }
index 82c2547..13b32d7 100644 (file)
@@ -340,6 +340,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                 /* request->rq_repmsg is set only when the reply comes in, in
                  * client_packet_callback() */
                 if (request->rq_reply_md.start) {
+                        PtlMEUnlink(request->rq_reply_me_h);
                         OBD_FREE(request->rq_reply_md.start,
                                  request->rq_replen);
                         /* If we're resending, rq_repmsg needs to be NULLed out
index 2260a5f..0b3a1b8 100644 (file)
@@ -197,8 +197,10 @@ int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn)
                 /* service has not seen req, no reply: resend */
                 if ( !(req->rq_flags & PTL_RPC_FL_REPLIED)  &&
                      req->rq_xid > conn->c_last_xid) {
-                        CDEBUG(D_HA, "RESEND: xid "LPD64" transno "LPD64" op %d @ %d\n",
-                               req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
+                        CDEBUG(D_HA, "RESEND: xid "LPD64" transno "LPD64
+                               " op %d @ %d\n", req->rq_xid,
+                               req->rq_repmsg ? req->rq_repmsg->transno : 0,
+                               req->rq_reqmsg->opc,
                                req->rq_import->imp_client->cli_request_portal);
                         ptlrpc_resend_req(req);
                 }