Whamcloud - gitweb
- Cancel any and all outstanding locks when an export is disconnected.
authorshaver <shaver>
Wed, 25 Sep 2002 02:25:26 +0000 (02:25 +0000)
committershaver <shaver>
Wed, 25 Sep 2002 02:25:26 +0000 (02:25 +0000)
- Remove icky and unused ldlm_destroy_export and mds_destroy_export hooks.
- Trigger recovery from timed-out lock callbacks.
- Support for replyless requests:
 - add 1 to initial request refcount, balanced in request_out_callback
 - don't set up reply portal buffer if replen is 0
- Ignore reply to DLM blocking/completed ASTs.  (Note: we still very much care
  about cancellation in response to blocking ASTs.)
- Server-side recovery now "simply" forces a disconnect of every export using
  the failed connection.
- Handle (better, not perfectly) the case where we signal failure on a connection
  that is already undergoing recovery.  We need to do more here, but this will
  keep us from going too deeply insane for now.

12 files changed:
lustre/include/linux/lustre_dlm.h
lustre/include/linux/obd_class.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/lib/l_net.c
lustre/mds/handler.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/recovd.c

index 9f6c747..8fb5e3e 100644 (file)
@@ -301,6 +301,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock *lock, void *cookie,
 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
                                         int *flags);
 void ldlm_lock_cancel(struct ldlm_lock *lock);
+void ldlm_cancel_locks_for_export(struct obd_export *export);
 void ldlm_run_ast_work(struct list_head *rpc_list);
 void ldlm_reprocess_all(struct ldlm_resource *res);
 void ldlm_lock_dump(struct ldlm_lock *lock);
index ec53d6b..524e9fd 100644 (file)
@@ -716,11 +716,6 @@ int class_multi_cleanup(struct obd_device *obddev);
 
 extern void (*class_signal_connection_failure)(struct ptlrpc_connection *);
 
-/* == mds_client_free if MDS running here */
-extern int (*mds_destroy_export)(struct obd_export *exp);
-/* == ldlm_client_free if(?) DLM running here */
-extern int (*ldlm_destroy_export)(struct obd_export *exp);
-
 static inline struct ptlrpc_connection *class_rd2conn(struct recovd_data *rd)
 {
         /* reuse list_entry's member-pointer offset stuff */
index d3e2f47..27c6c6f 100644 (file)
@@ -813,6 +813,23 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
         EXIT;
 }
 
+void ldlm_cancel_locks_for_export(struct obd_export *exp)
+{
+        struct list_head *iter, *n; /* MUST BE CALLED "n"! */
+
+        list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) {
+                struct ldlm_lock *lock;
+                struct ldlm_resource *res;
+                lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+                res = ldlm_resource_getref(lock->l_resource);
+                CDEBUG(D_INFO, "Cancelling lock:");
+                ldlm_lock_dump(lock);
+                ldlm_lock_cancel(lock);
+                ldlm_reprocess_all(res);
+                ldlm_resource_put(res);
+        }
+}
+
 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
                                         int *flags)
 {
index a370571..4166887 100644 (file)
@@ -41,19 +41,32 @@ inline unsigned long round_timeout(unsigned long timeout)
         return ((timeout / HZ) + 1) * HZ;
 }
 
+static struct list_head waiting_locks_list;
+static spinlock_t waiting_locks_spinlock;
+static struct timer_list waiting_locks_timer;
+
 static void waiting_locks_callback(unsigned long unused)
 {
-        CERROR("lock(s) expired! need to start recovery!\n");
+        struct list_head *liter, *n;
+        
+        spin_lock_bh(&waiting_locks_spinlock);
+        list_for_each_safe(liter, n, &waiting_locks_list) {
+                struct ldlm_lock *l = list_entry(liter, struct ldlm_lock,
+                                                 l_pending_chain);
+                if (l->l_callback_timeout > jiffies)
+                        break;
+                LDLM_DEBUG(l, "timer expired, recovering conn %p\n",
+                           l->l_export->exp_connection);
+                recovd_conn_fail(l->l_export->exp_connection);
+        }
+        spin_unlock_bh(&waiting_locks_spinlock);
 }
 
-static struct list_head waiting_locks_list;
-static spinlock_t waiting_locks_spinlock;
-static struct timer_list waiting_locks_timer;
 /*
  * Indicate that we're waiting for a client to call us back cancelling a given
  * lock.  We add it to the pending-callback chain, and schedule the lock-timeout
  * timer to fire appropriately.  (We round up to the next second, to avoid
- * floods of timer firings during periods of high lock contention and traffic.
+ * floods of timer firings during periods of high lock contention and traffic).
  */
 static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
 {
@@ -138,12 +151,14 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         memcpy(&body->lock_desc, desc, sizeof(*desc));
 
         LDLM_DEBUG(lock, "server preparing blocking AST");
-        req->rq_replen = lustre_msg_size(0, NULL);
+        req->rq_replen = 0; /* no reply needed */
 
         ldlm_add_waiting_lock(lock);
-        rc = ptlrpc_queue_wait(req);
-        rc = ptlrpc_check_status(req, rc);
-        ptlrpc_free_req(req);
+        (void)ptl_send_rpc(req);
+
+        /* no commit, and no waiting for reply, so 2x decref now */
+        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(req);
 
         RETURN(rc);
 }
@@ -172,11 +187,13 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags)
         ldlm_lock2desc(lock, &body->lock_desc);
 
         LDLM_DEBUG(lock, "server preparing completion AST");
-        req->rq_replen = lustre_msg_size(0, NULL);
+        req->rq_replen = 0; /* no reply needed */
+
+        (void)ptl_send_rpc(req);
+        /* no commit, and no waiting for reply, so 2x decref now */
+        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(req);
 
-        rc = ptlrpc_queue_wait(req);
-        rc = ptlrpc_check_status(req, rc);
-        ptlrpc_free_req(req);
         RETURN(rc);
 }
 
@@ -689,6 +706,7 @@ EXPORT_SYMBOL(ldlm_regression_stop);
 EXPORT_SYMBOL(ldlm_lock_dump);
 EXPORT_SYMBOL(ldlm_namespace_new);
 EXPORT_SYMBOL(ldlm_namespace_free);
+EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
 EXPORT_SYMBOL(l_lock);
 EXPORT_SYMBOL(l_unlock);
 
index c4965db..8a83095 100644 (file)
@@ -315,19 +315,27 @@ int target_handle_disconnect(struct ptlrpc_request *req)
                 RETURN(rc);
 
         req->rq_status = obd_disconnect(conn);
+
         RETURN(0);
 }
 
-static int target_revoke_client_resources(struct ptlrpc_connection *conn)
+static int target_disconnect_client(struct ptlrpc_connection *conn)
 {
-        struct list_head *tmp, *pos;
-
+        struct list_head *expiter, *n;
+        struct lustre_handle hdl;
+        struct obd_export *exp;
+        int rc;
         ENTRY;
 
-        /* Cancel outstanding locks. */
-        list_for_each_safe(tmp, pos, &conn->c_exports) {
-        }
+        list_for_each_safe(expiter, n, &conn->c_exports) {
+                exp = list_entry(expiter, struct obd_export, exp_conn_chain);
 
+                hdl.addr = (__u64)(unsigned long)exp;
+                hdl.cookie = exp->exp_cookie;
+                rc = obd_disconnect(&hdl);
+                if (rc)
+                        CERROR("disconnecting export %p failed: %d\n", exp, rc);
+        }
         RETURN(0);
 }
 
@@ -336,6 +344,7 @@ static int target_fence_failed_connection(struct ptlrpc_connection *conn)
         ENTRY;
 
         conn->c_level = LUSTRE_CONN_RECOVD;
+        conn->c_recovd_data.rd_phase = RECOVD_PREPARED;
 
         RETURN(0);
 }
@@ -351,7 +360,7 @@ int target_revoke_connection(struct recovd_data *rd, int phase)
             case PTLRPC_RECOVD_PHASE_PREPARE:
                 RETURN(target_fence_failed_connection(conn));
             case PTLRPC_RECOVD_PHASE_RECOVER:
-                RETURN(target_revoke_client_resources(conn));
+                RETURN(target_disconnect_client(conn));
             case PTLRPC_RECOVD_PHASE_FAILURE:
                 LBUG();
                 RETURN(0);
index 83a544c..147f7a3 100644 (file)
@@ -333,6 +333,10 @@ out_dec:
 static int mds_disconnect(struct lustre_handle *conn)
 {
         int rc;
+        struct obd_export *export = class_conn2export(conn);
+
+        ldlm_cancel_locks_for_export(export);
+        mds_client_free(export);
 
         rc = class_disconnect(conn);
         if (!rc)
@@ -1133,8 +1137,6 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
         if (rc)
                 GOTO(err_thread, rc);
 
-        mds_destroy_export = mds_client_free;
-
         ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL,
                            "mds_ldlm_client", &obddev->obd_ldlm_client);
 
index ff1341b..f7811b3 100644 (file)
@@ -935,8 +935,6 @@ static struct miscdevice obd_psdev = {
 };
 
 void (*class_signal_connection_failure)(struct ptlrpc_connection *);
-int (*mds_destroy_export)(struct obd_export *exp);
-int (*ldlm_destroy_export)(struct obd_export *exp);
 
 EXPORT_SYMBOL(obd_dev);
 EXPORT_SYMBOL(obdo_cachep);
@@ -965,8 +963,6 @@ EXPORT_SYMBOL(class_uuid_unparse);
 //EXPORT_SYMBOL(class_multi_cleanup);
 
 EXPORT_SYMBOL(class_signal_connection_failure);
-EXPORT_SYMBOL(mds_destroy_export);
-EXPORT_SYMBOL(ldlm_destroy_export);
 EXPORT_SYMBOL(ll_sync_io_cb);
 EXPORT_SYMBOL(ll_init_cb);
 
index acf1bd7..aaa0402 100644 (file)
@@ -35,11 +35,6 @@ static int sync_io_timeout(void *data)
         desc->bd_connection->c_level = LUSTRE_CONN_RECOVD;
         desc->bd_flags |= PTL_RPC_FL_TIMEOUT;
         if (desc->bd_connection && class_signal_connection_failure) {
-
-                /* XXXshaver Do we need a resend strategy, or do we just
-                 * XXXshaver return -ERESTARTSYS and punt it?
-                 */
-                CERROR("signalling failure of conn %p\n", desc->bd_connection);
                 class_signal_connection_failure(desc->bd_connection);
 
                 /* We go back to sleep, until we're resumed or interrupted. */
@@ -389,19 +384,6 @@ void class_destroy_export(struct obd_export *exp)
         list_del(&exp->exp_conn_chain);
         if (exp->exp_connection) spin_unlock(&exp->exp_connection->c_lock);
 
-        /* XXXshaver these bits want to be hung off the export, instead of
-         * XXXshaver hard-coded here.
-         */
-        if (mds_destroy_export) {
-                rc = mds_destroy_export(exp);
-                if (rc)
-                        CERROR("error freeing mds client data: rc = %d\n", rc);
-        }
-        if (ldlm_destroy_export) {
-                rc = ldlm_destroy_export(exp);
-                if (rc)
-                        CERROR("error freeing dlm client data: rc = %d\n", rc);
-        }
         kmem_cache_free(export_cachep, exp);
 
         EXIT;
index 79e4174..a57dafb 100644 (file)
@@ -173,8 +173,16 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
 
         INIT_LIST_HEAD(&request->rq_list);
         INIT_LIST_HEAD(&request->rq_multi);
-        /* this will be dec()d once in req_finished, once in free_committed */
-        atomic_set(&request->rq_refcount, 2);
+        /*
+         * This will be reduced once when the sender is finished (waiting for
+         * reply, f.e.), once when the request has been committed and is
+         * removed from the to-be-committed list, and once when portals is
+         * finished with it and has called request_out_callback.
+         *
+         * (Except in the DLM server case, where it will be dropped twice
+         * by the sender, and then the last time by request_out_callback.)
+         */
+        atomic_set(&request->rq_refcount, 3);
 
         spin_lock(&conn->c_lock);
         request->rq_xid = HTON__u32(++conn->c_xid_out);
index 4e9b29c..e6e0a06 100644 (file)
@@ -35,6 +35,7 @@ static const ptl_handle_ni_t *socknal_nip = NULL, *qswnal_nip = NULL, *gmnal_nip
  */
 static int request_out_callback(ptl_event_t *ev)
 {
+        struct ptlrpc_request *req = ev->mem_desc.user_ptr;
         ENTRY;
 
         LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
@@ -45,6 +46,7 @@ static int request_out_callback(ptl_event_t *ev)
                 LBUG();
         }
 
+        ptlrpc_req_finished(req);
         RETURN(1);
 }
 
index d7884f9..0668c2b 100644 (file)
@@ -327,56 +327,54 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                 LBUG();
                 RETURN(EINVAL);
         }
-        if (request->rq_replen == 0) {
-                CERROR("request->rq_replen is 0!\n");
-                RETURN(EINVAL);
-        }
-
-        /* request->rq_repmsg is set only when the reply comes in, in
-         * client_packet_callback() */
-        if (request->rq_reply_md.start)
-                OBD_FREE(request->rq_reply_md.start, request->rq_replen);
-
-        OBD_ALLOC(repbuf, request->rq_replen);
-        if (!repbuf) {
-                LBUG();
-                RETURN(ENOMEM);
-        }
-
-        // down(&request->rq_client->cli_rpc_sem);
 
         source_id.nid = request->rq_connection->c_peer.peer_nid;
         source_id.pid = PTL_PID_ANY;
 
-        rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
-                         request->rq_import->imp_client->cli_reply_portal,
-                         source_id, request->rq_xid, 0, PTL_UNLINK,
-                         PTL_INS_AFTER, &request->rq_reply_me_h);
-        if (rc != PTL_OK) {
-                CERROR("PtlMEAttach failed: %d\n", rc);
-                LBUG();
-                GOTO(cleanup, rc);
-        }
-
-        request->rq_reply_md.start = repbuf;
-        request->rq_reply_md.length = request->rq_replen;
-        request->rq_reply_md.threshold = 1;
-        request->rq_reply_md.options = PTL_MD_OP_PUT;
-        request->rq_reply_md.user_ptr = request;
-        request->rq_reply_md.eventq = reply_in_eq;
-
-        rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md,
-                         PTL_UNLINK, &request->rq_reply_md_h);
-        if (rc != PTL_OK) {
-                CERROR("PtlMDAttach failed: %d\n", rc);
-                LBUG();
-                GOTO(cleanup2, rc);
+        if (request->rq_replen != 0) {
+
+                /* request->rq_repmsg is set only when the reply comes in, in
+                 * client_packet_callback() */
+                if (request->rq_reply_md.start)
+                        OBD_FREE(request->rq_reply_md.start, request->rq_replen);
+                
+                OBD_ALLOC(repbuf, request->rq_replen);
+                if (!repbuf) {
+                        LBUG();
+                        RETURN(ENOMEM);
+                }
+
+                rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
+                               request->rq_import->imp_client->cli_reply_portal,
+                                 source_id, request->rq_xid, 0, PTL_UNLINK,
+                                 PTL_INS_AFTER, &request->rq_reply_me_h);
+                if (rc != PTL_OK) {
+                        CERROR("PtlMEAttach failed: %d\n", rc);
+                        LBUG();
+                        GOTO(cleanup, rc);
+                }
+
+                request->rq_reply_md.start = repbuf;
+                request->rq_reply_md.length = request->rq_replen;
+                request->rq_reply_md.threshold = 1;
+                request->rq_reply_md.options = PTL_MD_OP_PUT;
+                request->rq_reply_md.user_ptr = request;
+                request->rq_reply_md.eventq = reply_in_eq;
+                
+                rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md,
+                                 PTL_UNLINK, &request->rq_reply_md_h);
+                if (rc != PTL_OK) {
+                        CERROR("PtlMDAttach failed: %d\n", rc);
+                        LBUG();
+                        GOTO(cleanup2, rc);
+                }
+                
+                CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
+                       ", portal %u\n",
+                       request->rq_replen, request->rq_xid,
+                       request->rq_import->imp_client->cli_reply_portal);
         }
 
-        CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64", portal %u\n",
-               request->rq_replen, request->rq_xid,
-               request->rq_import->imp_client->cli_reply_portal);
-
         rc = ptl_send_buf(request, request->rq_connection,
                           request->rq_import->imp_client->cli_request_portal);
         RETURN(rc);
index 7561ba0..f7787d3 100644 (file)
@@ -44,13 +44,24 @@ void recovd_conn_fail(struct ptlrpc_connection *conn)
 
         if (!recovd) {
                 CERROR("no recovd for connection %p\n", conn);
+                EXIT;
                 return;
         }
 
-        CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
+
         spin_lock(&recovd->recovd_lock);
+        if (rd->rd_phase != RECOVD_IDLE || rd->rd_next_phase != RECOVD_IDLE) {
+                CDEBUG(D_INFO, "connection %p to %s already in recovery\n",
+                       conn, conn->c_remote_uuid);
+                spin_unlock(&recovd->recovd_lock);
+                EXIT;
+                return;
+        }
+                
+        CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
         list_del(&rd->rd_managed_chain);
         list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
+        rd->rd_next_phase = RECOVD_PREPARING;
         spin_unlock(&recovd->recovd_lock);
 
         wake_up(&recovd->recovd_waitq);
@@ -89,6 +100,8 @@ static int recovd_check_event(struct recovd_obd *recovd)
                                                     rd_managed_chain);
 
                 if (rd->rd_phase == rd->rd_next_phase ||
+                    (rd->rd_phase == RECOVD_IDLE && 
+                     rd->rd_next_phase == RECOVD_PREPARING) ||
                     rd->rd_phase == RECOVD_FAILED)
                         GOTO(out, rc = 1);
         }
@@ -131,7 +144,10 @@ static int recovd_handle_event(struct recovd_obd *recovd)
                 struct recovd_data *rd = list_entry(tmp, struct recovd_data,
                                                     rd_managed_chain);
 
+                /* XXXshaver This is very ugly -- add a RECOVD_TROUBLED state! */
                 if (rd->rd_phase != RECOVD_FAILED &&
+                    !(rd->rd_phase == RECOVD_IDLE &&
+                      rd->rd_next_phase == RECOVD_PREPARING) &&
                     rd->rd_phase != rd->rd_next_phase)
                         continue;
 
@@ -236,8 +252,6 @@ static int recovd_main(void *arg)
 int recovd_setup(struct recovd_obd *recovd)
 {
         int rc;
-        extern void (*class_signal_connection_failure)
-                (struct ptlrpc_connection *);
 
         ENTRY;
 
@@ -258,8 +272,6 @@ int recovd_setup(struct recovd_obd *recovd)
         wait_event(recovd->recovd_ctl_waitq,
                    recovd->recovd_state == RECOVD_READY);
 
-        /* exported and called by obdclass timeout handlers */
-        class_signal_connection_failure = recovd_conn_fail;
         ptlrpc_recovd = recovd;
 
         RETURN(0);