From: pschwan Date: Fri, 12 Jul 2002 16:48:13 +0000 (+0000) Subject: - Added some temporary LDLM_LOCK_PUT/LDLM_LOCK_GET macros to aid in debugging X-Git-Tag: 0.4.5~46 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=dbc11cfaa7ff9dc05fa316805b37b7eeef6c3e25;p=fs%2Flustre-release.git - Added some temporary LDLM_LOCK_PUT/LDLM_LOCK_GET macros to aid in debugging - Fixed a couple more places where we dereference 'lock' after ldlm_lock_put() - Fixed a showstopper in ldlm_handle2lock that was causing serious problems. - The DLM in UML with 3 mountpoints now survives pretty much any load that create.pl can throw at it --- diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 898e182..cb446e2 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -112,7 +112,6 @@ struct ldlm_lock { struct list_head l_children; struct list_head l_childof; struct list_head l_res_link; /*position in one of three res lists*/ - atomic_t l_refcount; ldlm_mode_t l_req_mode; ldlm_mode_t l_granted_mode; @@ -196,17 +195,27 @@ extern char *ldlm_typename[]; #define LDLM_DEBUG(lock, format, a...) \ do { \ - CDEBUG(D_DLMTRACE, "### " format \ - " (%s: lock %p(rc=%d/%d,%d) mode %s/%s on res %Lu" \ - "(rc=%d) type %s remote %Lx)\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_refc, lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name[0], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_remote_handle.addr); \ + if (lock->l_resource == NULL) \ + CDEBUG(D_DLMTRACE, "### " format \ + " (UNKNOWN: lock %p(rc=%d/%d,%d) mode %s/%s on " \ + "res \?\? (rc=\?\?) type \?\?\? remote %Lx)\n" , \ + ## a, lock, lock->l_refc, lock->l_readers, \ + lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_remote_handle.addr); \ + else \ + CDEBUG(D_DLMTRACE, "### " format \ + " (%s: lock %p(rc=%d/%d,%d) mode %s/%s on res " \ + "%Lu (rc=%d) type %s remote %Lx)\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, lock, \ + lock->l_refc, lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name[0], \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_remote_handle.addr); \ } while (0) #define LDLM_DEBUG_NOLOCK(format, a...) \ @@ -220,6 +229,20 @@ int ldlm_extent_policy(struct ldlm_lock *, void *, ldlm_mode_t, void *); void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle); void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); + +#define LDLM_LOCK_PUT(lock) \ +do { \ + LDLM_DEBUG(lock, "put"); \ + ldlm_lock_put(lock); \ +} while (0) + +#define LDLM_LOCK_GET(lock) \ +({ \ + ldlm_lock_get(lock); \ + LDLM_DEBUG(lock, "get"); \ + lock; \ +}) + void ldlm_lock_put(struct ldlm_lock *lock); void ldlm_lock_destroy(struct ldlm_lock *lock); void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 3c40a49..dbb7b71 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -83,15 +83,17 @@ void ldlm_lock_put(struct ldlm_lock *lock) l_lock(nslock); lock->l_refc--; - //LDLM_DEBUG(lock, "after refc--"); + LDLM_DEBUG(lock, "after refc--"); if (lock->l_refc < 0) LBUG(); ldlm_resource_put(lock->l_resource); if (lock->l_parent) - ldlm_lock_put(lock->l_parent); + LDLM_LOCK_PUT(lock->l_parent); if (lock->l_refc == 0 && (lock->l_flags & LDLM_FL_DESTROYED)) { + lock->l_resource = NULL; + LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing"); if (lock->l_connection) ptlrpc_put_connection(lock->l_connection); CDEBUG(D_MALLOC, "kfreed 'lock': %d at %p (tot 1).\n", @@ -132,7 +134,7 @@ void ldlm_lock_destroy(struct ldlm_lock *lock) lock->l_flags = LDLM_FL_DESTROYED; l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); EXIT; } @@ -176,7 +178,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, } /* this is the extra refcount, to prevent the lock evaporating */ - ldlm_lock_get(lock); + LDLM_LOCK_GET(lock); RETURN(lock); } @@ -223,7 +225,7 @@ void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh) struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle) { - struct ldlm_lock *lock = NULL; + struct ldlm_lock *lock = NULL, *retval = NULL; ENTRY; if (!handle || !handle->addr) @@ -235,16 +237,16 @@ struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *handle) l_lock(&lock->l_resource->lr_namespace->ns_lock); if (lock->l_random != handle->cookie) - GOTO(out, handle = NULL); + GOTO(out, NULL); if (lock->l_flags & LDLM_FL_DESTROYED) - GOTO(out, handle = NULL); + GOTO(out, NULL); - ldlm_lock_get(lock); + retval = LDLM_LOCK_GET(lock); EXIT; out: l_unlock(&lock->l_resource->lr_namespace->ns_lock); - return lock; + return retval; } @@ -431,7 +433,7 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock, OBD_ALLOC(w, sizeof(*w)); if (!w) { LBUG(); - return; + GOTO(out, 0); } if (new) { @@ -440,7 +442,7 @@ static void ldlm_add_ast_work_item(struct ldlm_lock *lock, ldlm_lock2desc(new, &w->w_desc); } - w->w_lock = ldlm_lock_get(lock); + w->w_lock = LDLM_LOCK_GET(lock); list_add(&w->w_list, lock->l_resource->lr_tmp); out: l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -453,7 +455,7 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode) lock = ldlm_handle2lock(lockh); ldlm_lock_addref_internal(lock, mode); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } /* only called for local locks */ @@ -465,7 +467,7 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) else lock->l_writers++; l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_get(lock); + LDLM_LOCK_GET(lock); LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); } @@ -506,8 +508,8 @@ void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode) } else l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_put(lock); /* matches the ldlm_lock_get in addref */ - ldlm_lock_put(lock); /* matches the handle2lock above */ + LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */ + LDLM_LOCK_PUT(lock); /* matches the handle2lock above */ EXIT; } @@ -819,7 +821,7 @@ void ldlm_run_ast_work(struct list_head *rpc_list) if (rc) CERROR("Failed AST - should clean & disconnect " "client\n"); - ldlm_lock_put(w->w_lock); + LDLM_LOCK_PUT(w->w_lock); list_del(&w->w_list); OBD_FREE(w, sizeof(*w)); } @@ -852,7 +854,6 @@ void ldlm_reprocess_all(struct ldlm_resource *res) EXIT; } -/* Must be called with lock and lock->l_resource unlocked */ void ldlm_lock_cancel(struct ldlm_lock *lock) { struct ldlm_resource *res; @@ -870,9 +871,9 @@ void ldlm_lock_cancel(struct ldlm_lock *lock) ldlm_resource_unlink_lock(lock); ldlm_lock_destroy(lock); l_unlock(&ns->ns_lock); + EXIT; } -/* Must be called with lock and lock->l_resource unlocked */ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, int *flags) { diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9617ab0..d69128c 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -86,18 +86,19 @@ static int ldlm_handle_enqueue(struct ptlrpc_request *req) lock->l_connection = ptlrpc_connection_addref(req->rq_connection); EXIT; out: - if (lock) { - LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"); - ldlm_lock_put(lock); - } + if (lock) + LDLM_DEBUG(lock, "server-side enqueue handler, sending reply" + "(err=%d)", err); req->rq_status = err; - CDEBUG(D_INFO, "err = %d\n", err); if (ptlrpc_reply(req->rq_svc, req)) LBUG(); - if (!err) - ldlm_reprocess_all(lock->l_resource); + if (lock) { + if (!err) + ldlm_reprocess_all(lock->l_resource); + LDLM_LOCK_PUT(lock); + } LDLM_DEBUG_NOLOCK("server-side enqueue handler END (lock %p)", lock); return 0; @@ -134,8 +135,8 @@ static int ldlm_handle_convert(struct ptlrpc_request *req) if (lock) { ldlm_reprocess_all(lock->l_resource); - ldlm_lock_put(lock); LDLM_DEBUG(lock, "server-side convert handler END"); + LDLM_LOCK_PUT(lock); } else LDLM_DEBUG_NOLOCK("server-side convert handler END"); @@ -147,7 +148,6 @@ static int ldlm_handle_cancel(struct ptlrpc_request *req) struct ldlm_request *dlm_req; struct ldlm_lock *lock; int rc; - char *ns_name; ENTRY; rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); @@ -162,7 +162,6 @@ static int ldlm_handle_cancel(struct ptlrpc_request *req) req->rq_status = ESTALE; } else { LDLM_DEBUG(lock, "server-side cancel handler START"); - ns_name = lock->l_resource->lr_namespace->ns_name; ldlm_lock_cancel(lock); req->rq_status = 0; } @@ -172,9 +171,8 @@ static int ldlm_handle_cancel(struct ptlrpc_request *req) if (lock) { ldlm_reprocess_all(lock->l_resource); - ldlm_lock_put(lock); - LDLM_DEBUG_NOLOCK("server-side cancel handler END (%s: lock " - "%p)", ns_name, lock); + LDLM_DEBUG(lock, "server-side cancel handler END"); + LDLM_LOCK_PUT(lock); } else LDLM_DEBUG_NOLOCK("server-side cancel handler END (lock %p)", lock); @@ -240,9 +238,9 @@ static int ldlm_handle_callback(struct ptlrpc_request *req) } } else { LDLM_DEBUG(lock, "Lock still has references, will be" - " cancelled later"); + " cancelled later"); } - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } else { struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); @@ -264,7 +262,7 @@ static int ldlm_handle_callback(struct ptlrpc_request *req) wake_up(&lock->l_waitq); lock->l_resource->lr_tmp = NULL; l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); ldlm_run_ast_work(&rpc_list); } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index f71b122..7e47486 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -83,7 +83,7 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, if (rc != ELDLM_OK) { LDLM_DEBUG(lock, "client-side enqueue END (%s)", rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED"); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); ldlm_lock_decref(lockh, mode); /* FIXME: if we've already received a completion AST, this will * LBUG! */ @@ -125,7 +125,7 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, ptlrpc_free_req(req); rc = ldlm_lock_enqueue(lock, cookie, cookielen, flags, callback, - callback); + callback); if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV)) { @@ -139,7 +139,7 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, LDLM_DEBUG(lock, "client-side enqueue waking up: granted"); } LDLM_DEBUG(lock, "client-side enqueue END"); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); EXIT; out: return rc; @@ -156,8 +156,10 @@ int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *desc, ENTRY; lock = ldlm_handle2lock(lockh); - if (lock == NULL) + if (lock == NULL) { LBUG(); + RETURN(-EINVAL); + } cl = &lock->l_resource->lr_namespace->ns_rpc_client; req = ptlrpc_prep_req(cl, lock->l_connection, LDLM_CALLBACK, 1, &size, NULL); @@ -187,7 +189,7 @@ int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *desc, EXIT; out: - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); return rc; } @@ -204,8 +206,10 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct lustre_handle *lockh, ENTRY; lock = ldlm_handle2lock(lockh); - if (!lock) - LBUG(); + if (!lock) { + LBUG(); + RETURN(-EINVAL); + } *flags = 0; LDLM_DEBUG(lock, "client-side convert"); @@ -243,7 +247,7 @@ int ldlm_cli_convert(struct ptlrpc_client *cl, struct lustre_handle *lockh, lock->l_req_mode == lock->l_granted_mode); CDEBUG(D_NET, "waking up, the lock must be granted.\n"); } - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); EXIT; out: ptlrpc_free_req(req); @@ -289,7 +293,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh, GOTO(out, rc); ldlm_lock_cancel(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); EXIT; out: return 0; diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 26d8de9..374b10c 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -73,7 +73,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q) list_for_each_safe(tmp, pos, q) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_get(lock); + LDLM_LOCK_GET(lock); if (client) { struct lustre_handle lockh; @@ -90,7 +90,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q) ldlm_resource_unlink_lock(lock); ldlm_lock_destroy(lock); } - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } return; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 26eb727..201b4d8 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -215,6 +215,10 @@ int ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) int ptlrpc_reply(struct ptlrpc_service *svc, struct ptlrpc_request *req) { + if (req->rq_repmsg == NULL) + CERROR("bad: someone called ptlrpc_reply when they meant " + "ptlrpc_error\n"); + /* FIXME: we need to increment the count of handled events */ req->rq_type = PTL_RPC_TYPE_REPLY; //req->rq_repmsg->conn = req->rq_connection->c_remote_conn;