From 573d8bd7ae980423d435f5e761d753731dbdfad3 Mon Sep 17 00:00:00 2001 From: braam Date: Mon, 29 Apr 2002 20:40:12 +0000 Subject: [PATCH 1/1] - and here are the new files with the previous commit --- lustre/llite/recover.c | 44 ++++++++ lustre/ptlrpc/connmgr.c | 155 ++++++++++++++++++++++++++ lustre/tests/runfailure-client-mds-recover.sh | 21 ++++ 3 files changed, 220 insertions(+) create mode 100644 lustre/llite/recover.c create mode 100644 lustre/ptlrpc/connmgr.c create mode 100755 lustre/tests/runfailure-client-mds-recover.sh diff --git a/lustre/llite/recover.c b/lustre/llite/recover.c new file mode 100644 index 0000000..115e229 --- /dev/null +++ b/lustre/llite/recover.c @@ -0,0 +1,44 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copryright (C) 1996 Peter J. Braam + * Copryright (C) 1999 Stelias Computing Inc. + * Copryright (C) 1999 Seagate Technology Inc. + * Copryright (C) 2001 Mountain View Data, Inc. + * Copryright (C) 2002 Cluster File Systems, Inc. + * + */ + +#include +#include + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include + +void ll_recover(struct ptlrpc_client *cli) +{ + struct ptlrpc_request *req; + struct list_head *tmp, *pos; + ENTRY; + + spin_lock(&cli->cli_lock); + /* first shot at this: resend the request */ + list_for_each_safe(tmp, pos, &cli->cli_sent_head) { + req = list_entry(tmp, struct ptlrpc_request, rq_list); + CDEBUG(D_INODE, "replaying request %p\n", req); + list_del(&req->rq_list); + ptlrpc_resend_req(req); + } + + recovd_cli_fixed(cli); + spin_unlock(&cli->cli_lock); + + EXIT; +} diff --git a/lustre/ptlrpc/connmgr.c b/lustre/ptlrpc/connmgr.c new file mode 100644 index 0000000..c93c17d --- /dev/null +++ b/lustre/ptlrpc/connmgr.c @@ -0,0 +1,155 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * obd/rpc/recovd.c + * + * Lustre High Availability Daemon + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * by Peter Braam + * + */ + +#define EXPORT_SYMTAB +#define DEBUG_SUBSYSTEM S_RPC + +#include +#include +#include + +static int connmgr_unpack_body(struct ptlrpc_request *req) +{ + struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0); + if (b == NULL) { + LBUG(); + RETURN(-EINVAL); + } + + b->generation = NTOH__u32(b->generation); + + return 0; +} + +int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn) +{ + struct ptlrpc_request *req; + struct ptlrpc_client *cl; + struct connmgr_body *body; + int rc, size = sizeof(*body); + ENTRY; + + if (!recovd) { + CERROR("no manager\n"); + LBUG(); + } + cl = recovd->recovd_client; + + req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0); + body->generation = HTON__u32(conn->c_generation); + body->conn = (__u64)(unsigned long)conn; + body->conn_token = conn->c_token; + strncpy(body->conn_uuid, conn->c_local_uuid, sizeof(body->conn_uuid)); + + req->rq_replen = lustre_msg_size(1, &size); + + rc = ptlrpc_queue_wait(req); + rc = ptlrpc_check_status(req, rc); + if (!rc) { + rc = connmgr_unpack_body(req); + if (rc) + GOTO(out_free, rc); + body = lustre_msg_buf(req->rq_repmsg, 0); + CDEBUG(D_NET, "remote generation: %o\n", body->generation); + conn->c_level = LUSTRE_CONN_CON; + conn->c_remote_conn = body->conn; + conn->c_remote_token = body->conn_token; + strncpy(conn->c_remote_uuid, body->conn_uuid, + sizeof(conn->c_remote_uuid)); + } + + EXIT; + out_free: + ptlrpc_free_req(req); + out: + return rc; +} + +static int connmgr_handle_connect(struct ptlrpc_request *req) +{ + struct connmgr_body *body; + int rc, size = sizeof(*body); + ENTRY; + + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) { + CERROR("connmgr: out of memory\n"); + req->rq_status = -ENOMEM; + RETURN(0); + } + + body = lustre_msg_buf(req->rq_reqmsg, 0); + connmgr_unpack_body(req); + + req->rq_connection->c_remote_conn = body->conn; + req->rq_connection->c_remote_token = body->conn_token; + strncpy(req->rq_connection->c_remote_uuid, body->conn_uuid, + sizeof(req->rq_connection->c_remote_uuid)); + + CERROR("incoming generation %d\n", body->generation); + body = lustre_msg_buf(req->rq_repmsg, 0); + body->generation = 4711; + body->conn = (__u64)(unsigned long)req->rq_connection; + body->conn_token = req->rq_connection->c_token; + + req->rq_connection->c_level = LUSTRE_CONN_CON; + RETURN(0); +} + +int connmgr_handle(struct obd_device *dev, struct ptlrpc_service *svc, + struct ptlrpc_request *req) +{ + int rc; + ENTRY; + + rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen); + if (rc) { + CERROR("Invalid request\n"); + GOTO(out, rc); + } + + if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) { + CERROR("wrong packet type sent %d\n", + req->rq_reqmsg->type); + GOTO(out, rc = -EINVAL); + } + + switch (req->rq_reqmsg->opc) { + case CONNMGR_CONNECT: + CDEBUG(D_INODE, "connmgr connect\n"); + rc = connmgr_handle_connect(req); + break; + + default: + rc = ptlrpc_error(svc, req); + RETURN(rc); + } + + EXIT; +out: + if (rc) { + ptlrpc_error(svc, req); + } else { + CDEBUG(D_NET, "sending reply\n"); + ptlrpc_reply(svc, req); + } + + return 0; +} diff --git a/lustre/tests/runfailure-client-mds-recover.sh b/lustre/tests/runfailure-client-mds-recover.sh new file mode 100755 index 0000000..a4b46a5 --- /dev/null +++ b/lustre/tests/runfailure-client-mds-recover.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +echo `date` creating /mnt/lustre/foo +echo 0x80000107 > /proc/sys/lustre/fail_loc +touch /mnt/lustre/foo & +ps axww | grep touch +echo "touch program suspended and hanging -- sleeping 5 secs" +sleep 5 +ls -l /mnt/lustre/foo + +echo 0x0000107 > /proc/sys/lustre/fail_loc +touch /mnt/lustre/bar & +ps axww | grep touch +echo "touch program will have repeated failures sleeping 10" + +sleep 10 + +echo 0 > /proc/sys/lustre/fail_loc +echo "failure cleared" +sleep 5 +ls -l /mnt/lustre/bar -- 1.8.3.1