From a34c37a94de1188da1f033f589ea01b08f19f722 Mon Sep 17 00:00:00 2001 From: braam Date: Sun, 17 Mar 2002 05:39:22 +0000 Subject: [PATCH] - temporarily change client.c which bums out on killing a client (PHIL?) - lock the service structure whenever it mucks with state - add full debugging to llecho.sh --- lustre/doc/Makefile.am | 2 +- lustre/ost/ost_handler.c | 1 + lustre/ptlrpc/client.c | 5 ++++- lustre/ptlrpc/events.c | 3 +++ lustre/ptlrpc/niobuf.c | 14 +++++++++++++- lustre/ptlrpc/service.c | 42 ++++++++++++++++++++++++++++++------------ lustre/tests/llecho.sh | 2 +- 7 files changed, 53 insertions(+), 16 deletions(-) diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 8ffdfb5..851ad81 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -4,7 +4,7 @@ # See the file COPYING in this distribution DOCS = OBD-HOWTO.sgml OLVM.txt figs notes.txt obdtrace_demo.txt -doc_DATA = $(DOCS) OBD-HOWTO.html OBD-HOWTO.txt +# doc_DATA = $(DOCS) OBD-HOWTO.html OBD-HOWTO.txt CLEANFILES = OBD-HOWTO.html OBD-HOWTO.txt EXTRA_DIST = $(DOCS) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index f10e86e..befaf88 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -585,6 +585,7 @@ static int ost_handle(struct obd_device *obddev, if (NTOH__u32(hdr->type) != OST_TYPE_REQ) { CERROR("lustre_ost: wrong packet type sent %d\n", NTOH__u32(hdr->type)); + BUG(); rc = -EINVAL; goto out; } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 6118366..4755dda 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -174,6 +174,7 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) } if (sigismember(&(current->pending.signal), SIGKILL) || + sigismember(&(current->pending.signal), SIGSTOP) || sigismember(&(current->pending.signal), SIGINT)) { req->rq_flags = PTL_RPC_INTR; EXIT; @@ -267,7 +268,9 @@ int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req) if (req->rq_flags != PTL_RPC_REPLY) { CERROR("Unknown reason for wakeup\n"); - BUG(); + /* XXX Phil - I end up here when I kill obdctl */ + ptlrpc_abort(req); + //BUG(); EXIT; return -EINTR; } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index f977307..0edfe1a 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -98,6 +98,7 @@ int server_request_callback(ptl_event_t *ev, void *data) * kmalloc()'ed memory and inserted at the ring tail. */ + spin_lock(&service->srv_lock); service->srv_ref_count[service->srv_md_active]++; CDEBUG(D_INODE, "event offset %d buf size %d\n", @@ -111,6 +112,7 @@ int server_request_callback(ptl_event_t *ev, void *data) if (rc != PTL_OK) { CERROR("PtlMEUnlink failed - DROPPING soon: %d\n", rc); BUG(); + spin_unlock(&service->srv_lock); return rc; } @@ -122,6 +124,7 @@ int server_request_callback(ptl_event_t *ev, void *data) service->srv_ring_length); } + spin_unlock(&service->srv_lock); if (ev->type == PTL_EVENT_PUT) { wake_up(&service->srv_waitq); } else { diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index cb95b2e..e734aac 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -264,11 +264,18 @@ int ptlrpc_error(struct obd_device *obddev, struct ptlrpc_service *svc, int ptl_send_rpc(struct ptlrpc_request *request, struct lustre_peer *peer) { ptl_process_id_t local_id; + struct ptlreq_hdr *hdr; int rc; char *repbuf; ENTRY; + hdr = (struct ptlreq_hdr *)request->rq_reqbuf; + if (NTOH__u32(hdr->type) != OST_TYPE_REQ) { + CERROR("lustre_ost: wrong packet type sent %d\n", + NTOH__u32(hdr->type)); + BUG(); + } if (request->rq_replen == 0) { CERROR("request->rq_replen is 0!\n"); EXIT; @@ -332,9 +339,11 @@ int ptl_send_rpc(struct ptlrpc_request *request, struct lustre_peer *peer) * it finishes processing an event. This ensures the ref count is * decremented and that the rpc ring buffer cycles properly. */ -int ptl_received_rpc(struct ptlrpc_service *service) { +int ptl_received_rpc(struct ptlrpc_service *service) +{ int rc, index; + spin_lock(&service->srv_lock); index = service->srv_md_active; CDEBUG(D_INFO, "MD index=%d Ref Count=%d\n", index, service->srv_ref_count[index]); @@ -354,6 +363,7 @@ int ptl_received_rpc(struct ptlrpc_service *service) { if (rc != PTL_OK) { CERROR("PtlMEInsert failed: %d\n", rc); BUG(); + spin_unlock(&service->srv_lock); return rc; } @@ -373,6 +383,7 @@ int ptl_received_rpc(struct ptlrpc_service *service) { /* XXX cleanup */ CERROR("PtlMDAttach failed: %d\n", rc); BUG(); + spin_unlock(&service->srv_lock); return rc; } @@ -380,5 +391,6 @@ int ptl_received_rpc(struct ptlrpc_service *service) { NEXT_INDEX(index, service->srv_ring_length); } + spin_unlock(&service->srv_lock); return 0; } diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 2efdf64..f59b267 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -36,48 +36,60 @@ extern int server_request_callback(ptl_event_t *ev, void *data); static int ptlrpc_check_event(struct ptlrpc_service *svc) { + int rc = 0; + + spin_lock(&svc->srv_lock); if (sigismember(&(current->pending.signal), SIGKILL) || + sigismember(&(current->pending.signal), SIGSTOP) || + sigismember(&(current->pending.signal), SIGCONT) || sigismember(&(current->pending.signal), SIGINT)) { svc->srv_flags |= SVC_KILLED; EXIT; - return 1; + rc = 1; + goto out; } if ( svc->srv_flags & SVC_STOPPING ) { EXIT; - return 1; + rc = 1; + goto out; } if (svc->srv_flags & SVC_EVENT) BUG(); if ( svc->srv_eq_h ) { - int rc; - rc = PtlEQGet(svc->srv_eq_h, &svc->srv_ev); + int err; + err = PtlEQGet(svc->srv_eq_h, &svc->srv_ev); - if (rc == PTL_OK) { + if (err == PTL_OK) { svc->srv_flags |= SVC_EVENT; EXIT; - return 1; + rc = 1; + goto out; } - if (rc != PTL_EQ_EMPTY) { + if (err != PTL_EQ_EMPTY) { CDEBUG(D_NET, "BUG: PtlEQGet returned %d\n", rc); BUG(); } EXIT; - return 0; + rc = 0; + goto out; } if (!list_empty(&svc->srv_reqs)) { svc->srv_flags |= SVC_LIST; EXIT; - return 1; + rc = 1; + goto out; } EXIT; - return 0; + out: + spin_unlock(&svc->srv_lock); + return rc; } struct ptlrpc_service * @@ -147,13 +159,16 @@ static int ptlrpc_main(void *arg) while (1) { wait_event(svc->srv_waitq, ptlrpc_check_event(svc)); + spin_lock(&svc->srv_lock); if (svc->srv_flags & SVC_SIGNAL) { EXIT; + spin_unlock(&svc->srv_lock); break; } if (svc->srv_flags & SVC_STOPPING) { EXIT; + spin_unlock(&svc->srv_lock); break; } @@ -176,9 +191,11 @@ static int ptlrpc_main(void *arg) /* FIXME: this NI should be the incoming NI. * We don't know how to find that from here. */ request.rq_peer.peer_ni = svc->srv_self.peer_ni; + svc->srv_flags &= ~SVC_EVENT; + + spin_unlock(&svc->srv_lock); rc = svc->srv_handler(obddev, svc, &request); ptl_received_rpc(svc); - svc->srv_flags &= ~SVC_EVENT; continue; } @@ -186,7 +203,6 @@ static int ptlrpc_main(void *arg) struct ptlrpc_request *request; svc->srv_flags = SVC_RUNNING; - spin_lock(&svc->srv_lock); request = list_entry(svc->srv_reqs.next, struct ptlrpc_request, rq_list); @@ -196,6 +212,7 @@ static int ptlrpc_main(void *arg) continue; } CERROR("unknown break in service"); + spin_unlock(&svc->srv_lock); break; } @@ -290,6 +307,7 @@ int rpc_register_service(struct ptlrpc_service *service, char *uuid) return -ENOMEM; } + /* Insert additional ME's to the ring */ if (i > 0) { rc = PtlMEInsert(service->srv_me_h[i-1], diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh index ea65259d..c89169f 100644 --- a/lustre/tests/llecho.sh +++ b/lustre/tests/llecho.sh @@ -14,7 +14,7 @@ add_uuid ost quit EOF -echo 8191 > /proc/sys/portals/debug +echo 0xffffffff > /proc/sys/portals/debug $OBDCTL <