From: shaver Date: Wed, 25 Sep 2002 02:25:26 +0000 (+0000) Subject: - Cancel any and all outstanding locks when an export is disconnected. X-Git-Tag: v1_7_100~4681 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=56b50ee1acba93ae55ebd3a2f5ec21e3fc7a8977;p=fs%2Flustre-release.git - Cancel any and all outstanding locks when an export is disconnected. - Remove icky and unused ldlm_destroy_export and mds_destroy_export hooks. - Trigger recovery from timed-out lock callbacks. - Support for replyless requests: - add 1 to initial request refcount, balanced in request_out_callback - don't set up reply portal buffer if replen is 0 - Ignore reply to DLM blocking/completed ASTs. (Note: we still very much care about cancellation in response to blocking ASTs.) - Server-side recovery now "simply" forces a disconnect of every export using the failed connection. - Handle (better, not perfectly) the case where we signal failure on a connection that is already undergoing recovery. We need to do more here, but this will keep us from going too deeply insane for now. --- diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 9f6c747..8fb5e3e 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -301,6 +301,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock *lock, void *cookie, struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, int *flags); void ldlm_lock_cancel(struct ldlm_lock *lock); +void ldlm_cancel_locks_for_export(struct obd_export *export); void ldlm_run_ast_work(struct list_head *rpc_list); void ldlm_reprocess_all(struct ldlm_resource *res); void ldlm_lock_dump(struct ldlm_lock *lock); diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index ec53d6b..524e9fd 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -716,11 +716,6 @@ int class_multi_cleanup(struct obd_device *obddev); extern void (*class_signal_connection_failure)(struct ptlrpc_connection *); -/* == mds_client_free if MDS running here */ -extern int (*mds_destroy_export)(struct obd_export *exp); -/* == ldlm_client_free if(?) DLM running here */ -extern int (*ldlm_destroy_export)(struct obd_export *exp); - static inline struct ptlrpc_connection *class_rd2conn(struct recovd_data *rd) { /* reuse list_entry's member-pointer offset stuff */ diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index d3e2f47..27c6c6f 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -813,6 +813,23 @@ void ldlm_lock_cancel(struct ldlm_lock *lock) EXIT; } +void ldlm_cancel_locks_for_export(struct obd_export *exp) +{ + struct list_head *iter, *n; /* MUST BE CALLED "n"! */ + + list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) { + struct ldlm_lock *lock; + struct ldlm_resource *res; + lock = list_entry(iter, struct ldlm_lock, l_export_chain); + res = ldlm_resource_getref(lock->l_resource); + CDEBUG(D_INFO, "Cancelling lock:"); + ldlm_lock_dump(lock); + ldlm_lock_cancel(lock); + ldlm_reprocess_all(res); + ldlm_resource_put(res); + } +} + struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, int *flags) { diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index a370571..4166887 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -41,19 +41,32 @@ inline unsigned long round_timeout(unsigned long timeout) return ((timeout / HZ) + 1) * HZ; } +static struct list_head waiting_locks_list; +static spinlock_t waiting_locks_spinlock; +static struct timer_list waiting_locks_timer; + static void waiting_locks_callback(unsigned long unused) { - CERROR("lock(s) expired! need to start recovery!\n"); + struct list_head *liter, *n; + + spin_lock_bh(&waiting_locks_spinlock); + list_for_each_safe(liter, n, &waiting_locks_list) { + struct ldlm_lock *l = list_entry(liter, struct ldlm_lock, + l_pending_chain); + if (l->l_callback_timeout > jiffies) + break; + LDLM_DEBUG(l, "timer expired, recovering conn %p\n", + l->l_export->exp_connection); + recovd_conn_fail(l->l_export->exp_connection); + } + spin_unlock_bh(&waiting_locks_spinlock); } -static struct list_head waiting_locks_list; -static spinlock_t waiting_locks_spinlock; -static struct timer_list waiting_locks_timer; /* * Indicate that we're waiting for a client to call us back cancelling a given * lock. We add it to the pending-callback chain, and schedule the lock-timeout * timer to fire appropriately. (We round up to the next second, to avoid - * floods of timer firings during periods of high lock contention and traffic. + * floods of timer firings during periods of high lock contention and traffic). */ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) { @@ -138,12 +151,14 @@ static int ldlm_server_blocking_ast(struct ldlm_lock *lock, memcpy(&body->lock_desc, desc, sizeof(*desc)); LDLM_DEBUG(lock, "server preparing blocking AST"); - req->rq_replen = lustre_msg_size(0, NULL); + req->rq_replen = 0; /* no reply needed */ ldlm_add_waiting_lock(lock); - rc = ptlrpc_queue_wait(req); - rc = ptlrpc_check_status(req, rc); - ptlrpc_free_req(req); + (void)ptl_send_rpc(req); + + /* no commit, and no waiting for reply, so 2x decref now */ + ptlrpc_req_finished(req); + ptlrpc_req_finished(req); RETURN(rc); } @@ -172,11 +187,13 @@ static int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags) ldlm_lock2desc(lock, &body->lock_desc); LDLM_DEBUG(lock, "server preparing completion AST"); - req->rq_replen = lustre_msg_size(0, NULL); + req->rq_replen = 0; /* no reply needed */ + + (void)ptl_send_rpc(req); + /* no commit, and no waiting for reply, so 2x decref now */ + ptlrpc_req_finished(req); + ptlrpc_req_finished(req); - rc = ptlrpc_queue_wait(req); - rc = ptlrpc_check_status(req, rc); - ptlrpc_free_req(req); RETURN(rc); } @@ -689,6 +706,7 @@ EXPORT_SYMBOL(ldlm_regression_stop); EXPORT_SYMBOL(ldlm_lock_dump); EXPORT_SYMBOL(ldlm_namespace_new); EXPORT_SYMBOL(ldlm_namespace_free); +EXPORT_SYMBOL(ldlm_cancel_locks_for_export); EXPORT_SYMBOL(l_lock); EXPORT_SYMBOL(l_unlock); diff --git a/lustre/lib/l_net.c b/lustre/lib/l_net.c index c4965db..8a83095 100644 --- a/lustre/lib/l_net.c +++ b/lustre/lib/l_net.c @@ -315,19 +315,27 @@ int target_handle_disconnect(struct ptlrpc_request *req) RETURN(rc); req->rq_status = obd_disconnect(conn); + RETURN(0); } -static int target_revoke_client_resources(struct ptlrpc_connection *conn) +static int target_disconnect_client(struct ptlrpc_connection *conn) { - struct list_head *tmp, *pos; - + struct list_head *expiter, *n; + struct lustre_handle hdl; + struct obd_export *exp; + int rc; ENTRY; - /* Cancel outstanding locks. */ - list_for_each_safe(tmp, pos, &conn->c_exports) { - } + list_for_each_safe(expiter, n, &conn->c_exports) { + exp = list_entry(expiter, struct obd_export, exp_conn_chain); + hdl.addr = (__u64)(unsigned long)exp; + hdl.cookie = exp->exp_cookie; + rc = obd_disconnect(&hdl); + if (rc) + CERROR("disconnecting export %p failed: %d\n", exp, rc); + } RETURN(0); } @@ -336,6 +344,7 @@ static int target_fence_failed_connection(struct ptlrpc_connection *conn) ENTRY; conn->c_level = LUSTRE_CONN_RECOVD; + conn->c_recovd_data.rd_phase = RECOVD_PREPARED; RETURN(0); } @@ -351,7 +360,7 @@ int target_revoke_connection(struct recovd_data *rd, int phase) case PTLRPC_RECOVD_PHASE_PREPARE: RETURN(target_fence_failed_connection(conn)); case PTLRPC_RECOVD_PHASE_RECOVER: - RETURN(target_revoke_client_resources(conn)); + RETURN(target_disconnect_client(conn)); case PTLRPC_RECOVD_PHASE_FAILURE: LBUG(); RETURN(0); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 83a544c..147f7a36 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -333,6 +333,10 @@ out_dec: static int mds_disconnect(struct lustre_handle *conn) { int rc; + struct obd_export *export = class_conn2export(conn); + + ldlm_cancel_locks_for_export(export); + mds_client_free(export); rc = class_disconnect(conn); if (!rc) @@ -1133,8 +1137,6 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) if (rc) GOTO(err_thread, rc); - mds_destroy_export = mds_client_free; - ptlrpc_init_client(LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, "mds_ldlm_client", &obddev->obd_ldlm_client); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index ff1341b..f7811b3 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -935,8 +935,6 @@ static struct miscdevice obd_psdev = { }; void (*class_signal_connection_failure)(struct ptlrpc_connection *); -int (*mds_destroy_export)(struct obd_export *exp); -int (*ldlm_destroy_export)(struct obd_export *exp); EXPORT_SYMBOL(obd_dev); EXPORT_SYMBOL(obdo_cachep); @@ -965,8 +963,6 @@ EXPORT_SYMBOL(class_uuid_unparse); //EXPORT_SYMBOL(class_multi_cleanup); EXPORT_SYMBOL(class_signal_connection_failure); -EXPORT_SYMBOL(mds_destroy_export); -EXPORT_SYMBOL(ldlm_destroy_export); EXPORT_SYMBOL(ll_sync_io_cb); EXPORT_SYMBOL(ll_init_cb); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index acf1bd7..aaa0402 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -35,11 +35,6 @@ static int sync_io_timeout(void *data) desc->bd_connection->c_level = LUSTRE_CONN_RECOVD; desc->bd_flags |= PTL_RPC_FL_TIMEOUT; if (desc->bd_connection && class_signal_connection_failure) { - - /* XXXshaver Do we need a resend strategy, or do we just - * XXXshaver return -ERESTARTSYS and punt it? - */ - CERROR("signalling failure of conn %p\n", desc->bd_connection); class_signal_connection_failure(desc->bd_connection); /* We go back to sleep, until we're resumed or interrupted. */ @@ -389,19 +384,6 @@ void class_destroy_export(struct obd_export *exp) list_del(&exp->exp_conn_chain); if (exp->exp_connection) spin_unlock(&exp->exp_connection->c_lock); - /* XXXshaver these bits want to be hung off the export, instead of - * XXXshaver hard-coded here. - */ - if (mds_destroy_export) { - rc = mds_destroy_export(exp); - if (rc) - CERROR("error freeing mds client data: rc = %d\n", rc); - } - if (ldlm_destroy_export) { - rc = ldlm_destroy_export(exp); - if (rc) - CERROR("error freeing dlm client data: rc = %d\n", rc); - } kmem_cache_free(export_cachep, exp); EXIT; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 79e4174..a57dafb 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -173,8 +173,16 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, INIT_LIST_HEAD(&request->rq_list); INIT_LIST_HEAD(&request->rq_multi); - /* this will be dec()d once in req_finished, once in free_committed */ - atomic_set(&request->rq_refcount, 2); + /* + * This will be reduced once when the sender is finished (waiting for + * reply, f.e.), once when the request has been committed and is + * removed from the to-be-committed list, and once when portals is + * finished with it and has called request_out_callback. + * + * (Except in the DLM server case, where it will be dropped twice + * by the sender, and then the last time by request_out_callback.) + */ + atomic_set(&request->rq_refcount, 3); spin_lock(&conn->c_lock); request->rq_xid = HTON__u32(++conn->c_xid_out); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 4e9b29c..e6e0a06 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -35,6 +35,7 @@ static const ptl_handle_ni_t *socknal_nip = NULL, *qswnal_nip = NULL, *gmnal_nip */ static int request_out_callback(ptl_event_t *ev) { + struct ptlrpc_request *req = ev->mem_desc.user_ptr; ENTRY; LASSERT ((ev->mem_desc.options & PTL_MD_IOV) == 0); /* requests always contiguous */ @@ -45,6 +46,7 @@ static int request_out_callback(ptl_event_t *ev) LBUG(); } + ptlrpc_req_finished(req); RETURN(1); } diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index d7884f9..0668c2b 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -327,56 +327,54 @@ int ptl_send_rpc(struct ptlrpc_request *request) LBUG(); RETURN(EINVAL); } - if (request->rq_replen == 0) { - CERROR("request->rq_replen is 0!\n"); - RETURN(EINVAL); - } - - /* request->rq_repmsg is set only when the reply comes in, in - * client_packet_callback() */ - if (request->rq_reply_md.start) - OBD_FREE(request->rq_reply_md.start, request->rq_replen); - - OBD_ALLOC(repbuf, request->rq_replen); - if (!repbuf) { - LBUG(); - RETURN(ENOMEM); - } - - // down(&request->rq_client->cli_rpc_sem); source_id.nid = request->rq_connection->c_peer.peer_nid; source_id.pid = PTL_PID_ANY; - rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni, - request->rq_import->imp_client->cli_reply_portal, - source_id, request->rq_xid, 0, PTL_UNLINK, - PTL_INS_AFTER, &request->rq_reply_me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - LBUG(); - GOTO(cleanup, rc); - } - - request->rq_reply_md.start = repbuf; - request->rq_reply_md.length = request->rq_replen; - request->rq_reply_md.threshold = 1; - request->rq_reply_md.options = PTL_MD_OP_PUT; - request->rq_reply_md.user_ptr = request; - request->rq_reply_md.eventq = reply_in_eq; - - rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md, - PTL_UNLINK, &request->rq_reply_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LBUG(); - GOTO(cleanup2, rc); + if (request->rq_replen != 0) { + + /* request->rq_repmsg is set only when the reply comes in, in + * client_packet_callback() */ + if (request->rq_reply_md.start) + OBD_FREE(request->rq_reply_md.start, request->rq_replen); + + OBD_ALLOC(repbuf, request->rq_replen); + if (!repbuf) { + LBUG(); + RETURN(ENOMEM); + } + + rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni, + request->rq_import->imp_client->cli_reply_portal, + source_id, request->rq_xid, 0, PTL_UNLINK, + PTL_INS_AFTER, &request->rq_reply_me_h); + if (rc != PTL_OK) { + CERROR("PtlMEAttach failed: %d\n", rc); + LBUG(); + GOTO(cleanup, rc); + } + + request->rq_reply_md.start = repbuf; + request->rq_reply_md.length = request->rq_replen; + request->rq_reply_md.threshold = 1; + request->rq_reply_md.options = PTL_MD_OP_PUT; + request->rq_reply_md.user_ptr = request; + request->rq_reply_md.eventq = reply_in_eq; + + rc = PtlMDAttach(request->rq_reply_me_h, request->rq_reply_md, + PTL_UNLINK, &request->rq_reply_md_h); + if (rc != PTL_OK) { + CERROR("PtlMDAttach failed: %d\n", rc); + LBUG(); + GOTO(cleanup2, rc); + } + + CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 + ", portal %u\n", + request->rq_replen, request->rq_xid, + request->rq_import->imp_client->cli_reply_portal); } - CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64", portal %u\n", - request->rq_replen, request->rq_xid, - request->rq_import->imp_client->cli_reply_portal); - rc = ptl_send_buf(request, request->rq_connection, request->rq_import->imp_client->cli_request_portal); RETURN(rc); diff --git a/lustre/ptlrpc/recovd.c b/lustre/ptlrpc/recovd.c index 7561ba0..f7787d3 100644 --- a/lustre/ptlrpc/recovd.c +++ b/lustre/ptlrpc/recovd.c @@ -44,13 +44,24 @@ void recovd_conn_fail(struct ptlrpc_connection *conn) if (!recovd) { CERROR("no recovd for connection %p\n", conn); + EXIT; return; } - CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid); + spin_lock(&recovd->recovd_lock); + if (rd->rd_phase != RECOVD_IDLE || rd->rd_next_phase != RECOVD_IDLE) { + CDEBUG(D_INFO, "connection %p to %s already in recovery\n", + conn, conn->c_remote_uuid); + spin_unlock(&recovd->recovd_lock); + EXIT; + return; + } + + CERROR("connection %p to %s failed\n", conn, conn->c_remote_uuid); list_del(&rd->rd_managed_chain); list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items); + rd->rd_next_phase = RECOVD_PREPARING; spin_unlock(&recovd->recovd_lock); wake_up(&recovd->recovd_waitq); @@ -89,6 +100,8 @@ static int recovd_check_event(struct recovd_obd *recovd) rd_managed_chain); if (rd->rd_phase == rd->rd_next_phase || + (rd->rd_phase == RECOVD_IDLE && + rd->rd_next_phase == RECOVD_PREPARING) || rd->rd_phase == RECOVD_FAILED) GOTO(out, rc = 1); } @@ -131,7 +144,10 @@ static int recovd_handle_event(struct recovd_obd *recovd) struct recovd_data *rd = list_entry(tmp, struct recovd_data, rd_managed_chain); + /* XXXshaver This is very ugly -- add a RECOVD_TROUBLED state! */ if (rd->rd_phase != RECOVD_FAILED && + !(rd->rd_phase == RECOVD_IDLE && + rd->rd_next_phase == RECOVD_PREPARING) && rd->rd_phase != rd->rd_next_phase) continue; @@ -236,8 +252,6 @@ static int recovd_main(void *arg) int recovd_setup(struct recovd_obd *recovd) { int rc; - extern void (*class_signal_connection_failure) - (struct ptlrpc_connection *); ENTRY; @@ -258,8 +272,6 @@ int recovd_setup(struct recovd_obd *recovd) wait_event(recovd->recovd_ctl_waitq, recovd->recovd_state == RECOVD_READY); - /* exported and called by obdclass timeout handlers */ - class_signal_connection_failure = recovd_conn_fail; ptlrpc_recovd = recovd; RETURN(0);