- Remove icky and unused ldlm_destroy_export and mds_destroy_export hooks.
- Trigger recovery from timed-out lock callbacks.
- Support for replyless requests:
- add 1 to initial request refcount, balanced in request_out_callback
- don't set up reply portal buffer if replen is 0
- Ignore reply to DLM blocking/completed ASTs. (Note: we still very much care
about cancellation in response to blocking ASTs.)
- Server-side recovery now "simply" forces a disconnect of every export using
the failed connection.
- Handle (better, not perfectly) the case where we signal failure on a connection
that is already undergoing recovery. We need to do more here, but this will
keep us from going too deeply insane for now.
struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
int *flags);
void ldlm_lock_cancel(struct ldlm_lock *lock);
+void ldlm_cancel_locks_for_export(struct obd_export *export);
void ldlm_run_ast_work(struct list_head *rpc_list);
void ldlm_reprocess_all(struct ldlm_resource *res);
void ldlm_lock_dump(struct ldlm_lock *lock);
extern void (*class_signal_connection_failure)(struct ptlrpc_connection *);
-/* == mds_client_free if MDS running here */
-extern int (*mds_destroy_export)(struct obd_export *exp);
-/* == ldlm_client_free if(?) DLM running here */
-extern int (*ldlm_destroy_export)(struct obd_export *exp);
-
static inline struct ptlrpc_connection *class_rd2conn(struct recovd_data *rd)
{
/* reuse list_entry's member-pointer offset stuff */
EXIT;
}
+void ldlm_cancel_locks_for_export(struct obd_export *exp)
+{
+ struct list_head *iter, *n; /* MUST BE CALLED "n"! */
+
+ list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) {
+ struct ldlm_lock *lock;
+ struct ldlm_resource *res;
+ lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+ res = ldlm_resource_getref(lock->l_resource);
+ CDEBUG(D_INFO, "Cancelling lock:");
+ ldlm_lock_dump(lock);
+ ldlm_lock_cancel(lock);
+ ldlm_reprocess_all(res);
+ ldlm_resource_put(res);
+ }
+}
+
struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
int *flags)
{
return ((timeout / HZ) + 1) * HZ;
}
+static struct list_head waiting_locks_list;
+static spinlock_t waiting_locks_spinlock;
+static struct timer_list waiting_locks_timer;
+
static void waiting_locks_callback(unsigned long unused)
{
- CERROR("lock(s) expired! need to start recovery!\n");
+ struct list_head *liter, *n;
+
+ spin_lock_bh(&waiting_locks_spinlock);
+ list_for_each_safe(liter, n, &waiting_locks_list) {
+ struct ldlm_lock *l = list_entry(liter, struct ldlm_lock,
+ l_pending_chain);
+ if (l->l_callback_timeout > jiffies)
+ break;
+ LDLM_DEBUG(l, "timer expired, recovering conn %p\n",
+ l->l_export->exp_connection);
+ recovd_conn_fail(l->l_export->exp_connection);
+ }
+ spin_unlock_bh(&waiting_locks_spinlock);
}
-static struct list_head waiting_locks_list;
-static spinlock_t waiting_locks_spinlock;
-static struct timer_list waiting_locks_timer;
/*
* Indicate that we're waiting for a client to call us back cancelling a given
* lock. We add it to the pending-callback chain, and schedule the lock-timeout
* timer to fire appropriately. (We round up to the next second, to avoid
- * floods of timer firings during periods of high lock contention and traffic.
+ * floods of timer firings during periods of high lock contention and traffic).
*/
static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
{
memcpy(&body->lock_desc, desc, sizeof(*desc));
LDLM_DEBUG(lock, "server preparing blocking AST");
- req->rq_replen = lustre_msg_size(0, NULL);
+ req->rq_replen = 0; /* no reply needed */
ldlm_add_waiting_lock(lock);
- rc = ptlrpc_queue_wait(req);
- rc = ptlrpc_check_status(req, rc);
- ptlrpc_free_req(req);
+ (void)ptl_send_rpc(req);
+
+ /* no commit, and no waiting for reply, so 2x decref now */
+ ptlrpc_req_finished(req);
+ ptlrpc_req_finished(req);
RETURN(rc);
}
ldlm_lock2desc(lock, &body->lock_desc);
LDLM_DEBUG(lock, "server preparing completion AST");
- req->rq_replen = lustre_msg_size(0, NULL);
+ req->rq_replen = 0; /* no reply needed */
+
+ (void)ptl_send_rpc(req);
+ /* no commit, and no waiting for reply, so 2x decref now */
+ ptlrpc_req_finished(req);
+ ptlrpc_req_finished(req);
- rc = ptlrpc_queue_wait(req);
- rc = ptlrpc_check_status(req, rc);
- ptlrpc_free_req(req);
RETURN(rc);
}
EXPORT_SYMBOL(ldlm_lock_dump);
EXPORT_SYMBOL(ldlm_namespace_new);
EXPORT_SYMBOL(ldlm_namespace_free);
+EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
EXPORT_SYMBOL(l_lock);
EXPORT_SYMBOL(l_unlock);
RETURN(rc);
req->rq_status = obd_disconnect(conn);
+
RETURN(0);
}
-static int target_revoke_client_resources(struct ptlrpc_connection *conn)
+static int target_disconnect_client(struct ptlrpc_connection *conn)
{
- struct list_head *tmp, *pos;
-
+ struct list_head *expiter, *n;
+ struct lustre_handle hdl;
+ struct obd_export *exp;
+ int rc;
ENTRY;
- /* Cancel outstanding locks. */
- list_for_each_safe(tmp, pos, &conn->c_exports) {
- }
+ list_for_each_safe(expiter, n, &conn->c_exports) {
+ exp = list_entry(expiter, struct obd_export, exp_conn_chain);
+ hdl.addr = (__u64)(unsigned long)exp;
+ hdl.cookie = exp->exp_cookie;
+ rc = obd_disconnect(&hdl);
+ if (rc)
+ CERROR("disconnecting export %p failed: %d\n", exp, rc);
+ }
RETURN(0);
}
ENTRY;
conn->c_level = LUSTRE_CONN_RECOVD;
+ conn->c_recovd_data.rd_phase = RECOVD_PREPARED;
RETURN(0);
}
case PTLRPC_RECOVD_PHASE_PREPARE:
RETURN(target_fence_failed_connection(conn));
case PTLRPC_RECOVD_PHASE_RECOVER:
- RETURN(target_revoke_client_resources(conn));
+ RETURN(target_disconnect_client(conn));
case PTLRPC_RECOVD_PHASE_FAILURE:
LBUG();
RETURN(0);
static int mds_disconnect(struct lustre_handle *conn)
{
int rc;
+ struct obd_export *export = class_conn2export(conn);
+
+ ldlm_cancel_locks_for_export(export);
+ mds_client_free(export);
rc = class_disconnect(conn);
if (!rc)
if (rc)
GOTO(err_thread, rc);
- mds_destroy_export = mds_client_free;
-
ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL,
"mds_ldlm_client", &obddev->obd_ldlm_client);
};
void (*class_signal_connection_failure)(struct ptlrpc_connection *);
-int (*mds_destroy_export)(struct obd_export *exp);
-int (*ldlm_destroy_export)(struct obd_export *exp);
EXPORT_SYMBOL(obd_dev);
EXPORT_SYMBOL(obdo_cachep);
//EXPORT_SYMBOL(class_multi_cleanup);
EXPORT_SYMBOL(class_signal_connection_failure);
-EXPORT_SYMBOL(mds_destroy_export);
-EXPORT_SYMBOL(ldlm_destroy_export);
EXPORT_SYMBOL(ll_sync_io_cb);
EXPORT_SYMBOL(ll_init_cb);
desc->bd_connection->c_level = LUSTRE_CONN_RECOVD;
desc->bd_flags |= PTL_RPC_FL_TIMEOUT;
if (desc->bd_connection && class_signal_connection_failure) {
-
- /* XXXshaver Do we need a resend strategy, or do we just
- * XXXshaver return -ERESTARTSYS and punt it?
- */
- CERROR("signalling failure of conn %p\n", desc->bd_connection);
class_signal_connection_failure(desc->bd_connection);
/* We go back to sleep, until we're resumed or interrupted. */
list_del(&exp->exp_conn_chain);
if (exp->exp_connection) spin_unlock(&exp->exp_connection->c_lock);
- /* XXXshaver these bits want to be hung off the export, instead of
- * XXXshaver hard-coded here.
- */
- if (mds_destroy_export) {
- rc = mds_destroy_export(exp);
- if (rc)
- CERROR("error freeing mds client data: rc = %d\n", rc);
- }
- if (ldlm_destroy_export) {
- rc = ldlm_destroy_export(exp);
- if (rc)
- CERROR("error freeing dlm client data: rc = %d\n", rc);
- }
kmem_cache_free(export_cachep, exp);
EXIT;
INIT_LIST_HEAD(&request->rq_list);
INIT_LIST_HEAD(&request->rq_multi);
- /* this will be dec()d once in req_finished, once in free_committed */
- atomic_set(&request->rq_refcount, 2);
+ /*
+ * This will be reduced once when the sender is finished (waiting for
+ * reply, f.e.), once when the request has been committed and is
+ * removed from the to-be-committed list, and once when portals is
+ * finished with it and has called request_out_callback.
+ *
+ * (Except in the DLM server case, where it will be dropped twice
+ * by the sender, and then the last time by request_out_callback.)
+ */
+ atomic_set(&request->rq_refcount, 3);
spin_lock(&conn->c_lock);
request->rq_xid = HTON__u32(++conn->c_xid_out);
*/
static int request_out_callback(ptl_event_t *ev)
{
+ struct ptlrpc_request *req = ev->mem_desc.user_ptr;
ENTRY;
LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */
LBUG();
}
+ ptlrpc_req_finished(req);
RETURN(1);
}
LBUG();
RETURN(EINVAL);
}
- if (request->rq_replen == 0) {
- CERROR("request->rq_replen is 0!\n");
- RETURN(EINVAL);
- }
-
- /* request->rq_repmsg is set only when the reply comes in, in
- * client_packet_callback() */
- if (request->rq_reply_md.start)
- OBD_FREE(request->rq_reply_md.start, request->rq_replen);
-
- OBD_ALLOC(repbuf, request->rq_replen);
- if (!repbuf) {
- LBUG();
- RETURN(ENOMEM);
- }
-
- // down(&request->rq_client->cli_rpc_sem);
source_id.nid = request->rq_connection->c_peer.peer_nid;
source_id.pid = PTL_PID_ANY;
- rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
- request->rq_import->imp_client->cli_reply_portal,
- source_id, request->rq_xid, 0, PTL_UNLINK,
- PTL_INS_AFTER, &request->rq_reply_me_h);
- if (rc != PTL_OK) {
- CERROR("PtlMEAttach failed: %d\n", rc);
- LBUG();
- GOTO(cleanup, rc);
- }
-
- request->rq_reply_md.start = repbuf;
- request->rq_reply_md.length = request->rq_replen;
- request->rq_reply_md.threshold = 1;
- request->rq_reply_md.options = PTL_MD_OP_PUT;
- request->rq_reply_md.user_ptr = request;
- request->rq_reply_md.eventq = reply_in_eq;
-
- rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md,
- PTL_UNLINK, &request->rq_reply_md_h);
- if (rc != PTL_OK) {
- CERROR("PtlMDAttach failed: %d\n", rc);
- LBUG();
- GOTO(cleanup2, rc);
+ if (request->rq_replen != 0) {
+
+ /* request->rq_repmsg is set only when the reply comes in, in
+ * client_packet_callback() */
+ if (request->rq_reply_md.start)
+ OBD_FREE(request->rq_reply_md.start, request->rq_replen);
+
+ OBD_ALLOC(repbuf, request->rq_replen);
+ if (!repbuf) {
+ LBUG();
+ RETURN(ENOMEM);
+ }
+
+ rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
+ request->rq_import->imp_client->cli_reply_portal,
+ source_id, request->rq_xid, 0, PTL_UNLINK,
+ PTL_INS_AFTER, &request->rq_reply_me_h);
+ if (rc != PTL_OK) {
+ CERROR("PtlMEAttach failed: %d\n", rc);
+ LBUG();
+ GOTO(cleanup, rc);
+ }
+
+ request->rq_reply_md.start = repbuf;
+ request->rq_reply_md.length = request->rq_replen;
+ request->rq_reply_md.threshold = 1;
+ request->rq_reply_md.options = PTL_MD_OP_PUT;
+ request->rq_reply_md.user_ptr = request;
+ request->rq_reply_md.eventq = reply_in_eq;
+
+ rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md,
+ PTL_UNLINK, &request->rq_reply_md_h);
+ if (rc != PTL_OK) {
+ CERROR("PtlMDAttach failed: %d\n", rc);
+ LBUG();
+ GOTO(cleanup2, rc);
+ }
+
+ CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
+ ", portal %u\n",
+ request->rq_replen, request->rq_xid,
+ request->rq_import->imp_client->cli_reply_portal);
}
- CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64", portal %u\n",
- request->rq_replen, request->rq_xid,
- request->rq_import->imp_client->cli_reply_portal);
-
rc = ptl_send_buf(request, request->rq_connection,
request->rq_import->imp_client->cli_request_portal);
RETURN(rc);
if (!recovd) {
CERROR("no recovd for connection %p\n", conn);
+ EXIT;
return;
}
- CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
+
spin_lock(&recovd->recovd_lock);
+ if (rd->rd_phase != RECOVD_IDLE || rd->rd_next_phase != RECOVD_IDLE) {
+ CDEBUG(D_INFO, "connection %p to %s already in recovery\n",
+ conn, conn->c_remote_uuid);
+ spin_unlock(&recovd->recovd_lock);
+ EXIT;
+ return;
+ }
+
+ CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid);
list_del(&rd->rd_managed_chain);
list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
+ rd->rd_next_phase = RECOVD_PREPARING;
spin_unlock(&recovd->recovd_lock);
wake_up(&recovd->recovd_waitq);
rd_managed_chain);
if (rd->rd_phase == rd->rd_next_phase ||
+ (rd->rd_phase == RECOVD_IDLE &&
+ rd->rd_next_phase == RECOVD_PREPARING) ||
rd->rd_phase == RECOVD_FAILED)
GOTO(out, rc = 1);
}
struct recovd_data *rd = list_entry(tmp, struct recovd_data,
rd_managed_chain);
+ /* XXXshaver This is very ugly -- add a RECOVD_TROUBLED state! */
if (rd->rd_phase != RECOVD_FAILED &&
+ !(rd->rd_phase == RECOVD_IDLE &&
+ rd->rd_next_phase == RECOVD_PREPARING) &&
rd->rd_phase != rd->rd_next_phase)
continue;
int recovd_setup(struct recovd_obd *recovd)
{
int rc;
- extern void (*class_signal_connection_failure)
- (struct ptlrpc_connection *);
ENTRY;
wait_event(recovd->recovd_ctl_waitq,
recovd->recovd_state == RECOVD_READY);
- /* exported and called by obdclass timeout handlers */
- class_signal_connection_failure = recovd_conn_fail;
ptlrpc_recovd = recovd;
RETURN(0);