From: pschwan Date: Mon, 1 Jul 2002 06:35:51 +0000 (+0000) Subject: - updated LDLM_DEBUG to give more refcount info X-Git-Tag: v1_7_100~5397 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=15c78e9c6026bbb802835ecb731488b4b4d22e87;p=fs%2Flustre-release.git - updated LDLM_DEBUG to give more refcount info - made fixme a macro, so that it shows us where it's called from - fixed a DLM deadlock (unbalanced l_lock) - fixed the refcount bug in ldlm_lock_decref - fixed the refcount bug in ldlm_cli_enqueue in the failed/aborted case - the lock slab cleans up now! - fixed the ``connection foo has refcount -61'' bug - found, but have not yet fixed, a subtle ctrl-c-during-aborted-ldlm-enqueue bug that can be triggered if you abort the hanging Perl test at _just_ the right time. - fixed request leaks in osc_connect and mdc_connect - we create an import in osc_connect but never use or free it -- I didn't remove this code, assuming it was going to be used soon? --- diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 572ed16..2aa3ff5 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -194,18 +194,18 @@ extern struct obd_ops ldlm_obd_ops; extern char *ldlm_lockname[]; extern char *ldlm_typename[]; -#define LDLM_DEBUG(lock, format, a...) \ -do { \ - CDEBUG(D_DLMTRACE, "### " format \ - " (%s: lock %p mode %s/%s on res %Lu (rc %d) " \ - " type %s remote %Lx)\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name[0], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_remote_handle.addr); \ +#define LDLM_DEBUG(lock, format, a...) \ +do { \ + CDEBUG(D_DLMTRACE, "### " format \ + " (%s: lock %p(rc=%d) mode %s/%s on res %Lu(rc=%d) " \ + " type %s remote %Lx)\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, lock, \ + lock->l_refc, ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name[0], \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_remote_handle.addr); \ } while (0) #define LDLM_DEBUG_NOLOCK(format, a...) \ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 758ad57..4b9bb0c 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -82,6 +82,8 @@ static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb) static inline struct mdc_obd *sbi2mdc(struct ll_sb_info *sbi) { struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); + if (obd == NULL) + LBUG(); return &obd->u.mdc; } diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 1ad9902..bd10e2b 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -110,10 +110,7 @@ do { \ #include #include -static inline void fixme(void) -{ - CERROR("FIXME\n"); -} +#define fixme() CERROR("FIXME\n"); static inline void OBD_FAIL_WRITE(int id, kdev_t dev) { diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 63a301f..25791c1 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -99,7 +99,6 @@ void ldlm_lock_put(struct ldlm_lock *lock) } l_unlock(nslock); EXIT; - return; } void ldlm_lock_destroy(struct ldlm_lock *lock) @@ -123,6 +122,7 @@ void ldlm_lock_destroy(struct ldlm_lock *lock) LBUG(); if (lock->l_flags & LDLM_FL_DESTROYED) { + l_unlock(&lock->l_resource->lr_namespace->ns_lock); EXIT; return; } @@ -131,8 +131,8 @@ void ldlm_lock_destroy(struct ldlm_lock *lock) l_unlock(&lock->l_resource->lr_namespace->ns_lock); ldlm_lock_put(lock); EXIT; - return; } + /* usage: pass in a resource on which you have done get pass in a parent lock on which you have done a get @@ -494,7 +494,8 @@ void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode) } else l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_put(lock); + ldlm_lock_put(lock); /* matches the ldlm_lock_get in addref */ + ldlm_lock_put(lock); /* matches the handle2lock above */ EXIT; } @@ -648,7 +649,6 @@ int ldlm_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type, } /* Returns a referenced, lock */ - struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, struct lustre_handle *parent_lock_handle, __u64 *res_id, __u32 type, diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 9c92d64..268eb2f 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -73,7 +73,7 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, s req->rq_replen = lustre_msg_size(1, &size); } - lock->l_connection = conn; + lock->l_connection = ptlrpc_connection_addref(conn); lock->l_client = cl; rc = ptlrpc_queue_wait(req); @@ -83,7 +83,10 @@ int ldlm_cli_enqueue(struct ptlrpc_client *cl, struct ptlrpc_connection *conn, s if (rc != ELDLM_OK) { LDLM_DEBUG(lock, "client-side enqueue END (%s)", rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED"); + ldlm_lock_put(lock); ldlm_lock_decref(lockh, mode); + /* FIXME: if we've already received a completion AST, this will + * LBUG! */ ldlm_lock_destroy(lock); GOTO(out, rc); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index d555a2d..c13119a 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -762,13 +762,13 @@ static int mdc_connect(struct lustre_handle *conn, struct obd_device *obd) mdc->mdc_connh.cookie = request->rq_repmsg->cookie; EXIT; - return 0; out: ptlrpc_free_req(request); out_disco: - class_disconnect(conn); - if (rc) + if (rc) { + class_disconnect(conn); MOD_DEC_USE_COUNT; + } return rc; } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index baa3185..07dc96f 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -25,14 +25,6 @@ #include #include -static void osc_obd2cl(struct obd_device *obd, struct ptlrpc_client **cl, - struct ptlrpc_connection **connection) -{ - struct osc_obd *osc = &obd->u.osc; - *cl = osc->osc_client; - *connection = osc->osc_conn; -} - static void osc_con2cl(struct lustre_handle *conn, struct ptlrpc_client **cl, struct ptlrpc_connection **connection) { @@ -54,8 +46,6 @@ static int osc_connect(struct lustre_handle *conn, struct obd_device *obd) struct osc_obd *osc = &obd->u.osc; struct obd_import *import; struct ptlrpc_request *request; - struct ptlrpc_client *cl; - struct ptlrpc_connection *connection; char *tmp = osc->osc_target_uuid; int rc, size = sizeof(osc->osc_target_uuid); ENTRY; @@ -69,11 +59,10 @@ static int osc_connect(struct lustre_handle *conn, struct obd_device *obd) if (rc) RETURN(rc); - osc_obd2cl(obd, &cl, &connection); request = ptlrpc_prep_req(osc->osc_client, osc->osc_conn, OST_CONNECT, 1, &size, &tmp); if (!request) - GOTO(out_disco, -ENOMEM); + GOTO(out_disco, rc = -ENOMEM); request->rq_replen = lustre_msg_size(0, NULL); @@ -89,13 +78,13 @@ static int osc_connect(struct lustre_handle *conn, struct obd_device *obd) osc->osc_connh.cookie = request->rq_repmsg->cookie; EXIT; - return 0; out: ptlrpc_free_req(request); out_disco: - class_disconnect(conn); - if (rc) + if (rc) { + class_disconnect(conn); MOD_DEC_USE_COUNT; + } return rc; } diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index f7e8559..abb2bea 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -154,22 +154,22 @@ static int handle_incoming_request(struct obd_device *obddev, if (request.rq_reqlen < sizeof(struct lustre_msg)) { CERROR("incomplete request: ptl %d from %Lx xid %Ld\n", - svc->srv_req_portal, event->initiator.nid, - request.rq_xid); + svc->srv_req_portal, event->initiator.nid, + request.rq_xid); return -EINVAL; } - - if (request.rq_reqmsg->magic != PTLRPC_MSG_MAGIC) { + + if (request.rq_reqmsg->magic != PTLRPC_MSG_MAGIC) { CERROR("wrong lustre_msg magic: ptl %d from %Lx xid %Ld\n", - svc->srv_req_portal, event->initiator.nid, - request.rq_xid); + svc->srv_req_portal, event->initiator.nid, + request.rq_xid); return -EINVAL; } - if (request.rq_reqmsg->version != PTLRPC_MSG_VERSION) { + if (request.rq_reqmsg->version != PTLRPC_MSG_VERSION) { CERROR("wrong lustre_msg version: ptl %d from %Lx xid %Ld\n", - svc->srv_req_portal, event->initiator.nid, - request.rq_xid); + svc->srv_req_portal, event->initiator.nid, + request.rq_xid); return -EINVAL; } @@ -180,23 +180,23 @@ static int handle_incoming_request(struct obd_device *obddev, * We don't know how to find that from here. */ peer.peer_ni = svc->srv_self.peer_ni; - request.rq_export = class_conn2export((struct lustre_handle *) request.rq_reqmsg); + request.rq_export = class_conn2export((struct lustre_handle *) request.rq_reqmsg); if (request.rq_export) { request.rq_connection = request.rq_export->export_connection; - ptlrpc_connection_addref(request.rq_connection); - } else { + ptlrpc_connection_addref(request.rq_connection); + } else { request.rq_connection = ptlrpc_get_connection(&peer); } spin_unlock(&svc->srv_lock); rc = svc->srv_handler(&request); - ptlrpc_put_connection(request.rq_connection); + ptlrpc_put_connection(request.rq_connection); ptl_handled_rpc(svc, start); return rc; } -void ptlrpc_rotate_reqbufs(struct ptlrpc_service *service, +void ptlrpc_rotate_reqbufs(struct ptlrpc_service *service, ptl_event_t *ev) { int index; @@ -204,7 +204,7 @@ void ptlrpc_rotate_reqbufs(struct ptlrpc_service *service, for (index = 0; index < service->srv_ring_length; index++) if (service->srv_buf[index] == ev->mem_desc.start) break; - + if (index == service->srv_ring_length) LBUG(); @@ -274,10 +274,10 @@ static int ptlrpc_main(void *arg) EXIT; break; } - - if (thread->t_flags & SVC_EVENT) { + + if (thread->t_flags & SVC_EVENT) { thread->t_flags &= ~SVC_EVENT; - ptlrpc_rotate_reqbufs(svc, &event); + ptlrpc_rotate_reqbufs(svc, &event); rc = handle_incoming_request(obddev, svc, &event); thread->t_flags &= ~SVC_EVENT;