*/
#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_RPC
-#include <linux/config.h>
#include <linux/module.h>
-#include <linux/kernel.h>
-
#include <linux/obd_support.h>
-#include <linux/lustre_net.h>
-
-static ptl_handle_eq_t req_eq, bulk_source_eq, bulk_sink_eq;
-
-/* This callback performs two functions:
- *
- * 1. Free the request buffer after it has gone out on the wire
- * 2. Wake up the thread waiting for the reply once it comes in.
- */
-static int request_callback(ptl_event_t *ev, void *data)
-{
- struct ptlrpc_request *rpc = ev->mem_desc.user_ptr;
-
- ENTRY;
-
- if (ev->type == PTL_EVENT_SENT) {
- kfree(ev->mem_desc.start);
- } else if (ev->type == PTL_EVENT_PUT) {
- rpc->rq_repbuf = ev->mem_desc.start + ev->offset;
- wake_up_interruptible(&rpc->rq_wait_for_rep);
- }
-
- EXIT;
- return 1;
-}
-
-static int incoming_callback(ptl_event_t *ev, void *data)
-{
- struct ptlrpc_service *service = data;
-
- ENTRY;
-
- if (ev->type == PTL_EVENT_PUT) {
- wake_up(service->srv_wait_queue);
- } else {
- printk("Unexpected event type: %d\n", ev->type);
- }
+#include <linux/obd_class.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
+#include <linux/init.h>
- EXIT;
- return 0;
-}
+extern int ptlrpc_init_portals(void);
+extern void ptlrpc_exit_portals(void);
-static int bulk_source_callback(ptl_event_t *ev, void *data)
+int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf)
{
- struct ptlrpc_request *rpc = ev->mem_desc.user_ptr;
-
+ struct recovd_obd *recovd = &obddev->u.recovd;
+ int err;
ENTRY;
- if (ev->type == PTL_EVENT_SENT) {
- ;
- } else if (ev->type == PTL_EVENT_ACK) {
- wake_up_interruptible(&rpc->rq_wait_for_bulk);
- } else {
- printk("Unexpected event type in " __FUNCTION__ "!\n");
- }
-
- EXIT;
- return 1;
-}
+ MOD_INC_USE_COUNT;
+ memset(recovd, 0, sizeof(*recovd));
-static int bulk_sink_callback(ptl_event_t *ev, void *data)
-{
- struct ptlrpc_request *rpc = ev->mem_desc.user_ptr;
-
- ENTRY;
-
- if (ev->type == PTL_EVENT_PUT) {
- if (rpc->rq_bulkbuf != ev->mem_desc.start + ev->offset)
- printk(__FUNCTION__ ": bulkbuf != mem_desc -- why?\n");
- wake_up_interruptible(&rpc->rq_wait_for_bulk);
- } else {
- printk("Unexpected event type in " __FUNCTION__ "!\n");
+ err = recovd_setup(recovd);
+ if (err) {
+ MOD_DEC_USE_COUNT;
+ RETURN(err);
}
- EXIT;
- return 1;
+ RETURN(0);
}
-int ptl_send_buf(struct ptlrpc_request *request, struct lustre_peer *peer,
- int portal, int is_request)
+int connmgr_cleanup(struct obd_device *dev)
{
- int rc;
- ptl_process_id_t remote_id;
- ptl_handle_md_t md_h;
-
- /* FIXME: This is bad. */
- if (request->rq_bulklen) {
- request->rq_req_md.start = request->rq_bulkbuf;
- request->rq_req_md.length = request->rq_bulklen;
- request->rq_req_md.eventq = bulk_source_eq;
- } else if (is_request) {
- request->rq_req_md.start = request->rq_reqbuf;
- request->rq_req_md.length = request->rq_reqlen;
- request->rq_req_md.eventq = req_eq;
- } else {
- request->rq_req_md.start = request->rq_repbuf;
- request->rq_req_md.length = request->rq_replen;
- request->rq_req_md.eventq = req_eq;
- }
- request->rq_req_md.threshold = 1;
- request->rq_req_md.options = PTL_MD_OP_PUT;
- request->rq_req_md.user_ptr = request;
-
- rc = PtlMDBind(peer->peer_ni, request->rq_req_md, &md_h);
- if (rc != 0) {
- printk(__FUNCTION__ ": PtlMDBind failed: %d\n", rc);
- return rc;
- }
+ struct recovd_obd *recovd = &dev->u.recovd;
+ int err;
- remote_id.addr_kind = PTL_ADDR_NID;
- remote_id.nid = peer->peer_nid;
- remote_id.pid = 0;
-
- if (request->rq_bulklen) {
- rc = PtlPut(md_h, PTL_ACK_REQ, remote_id, portal, 0,
- request->rq_xid, 0, 0);
- } else {
- rc = PtlPut(md_h, PTL_NOACK_REQ, remote_id, portal, 0,
- request->rq_xid, 0, 0);
- }
- if (rc != PTL_OK) {
- printk(__FUNCTION__ ": PtlPut failed: %d\n", rc);
- /* FIXME: tear down md */
- }
+ err = recovd_cleanup(recovd);
+ if (err)
+ LBUG();
- return rc;
+ MOD_DEC_USE_COUNT;
+ RETURN(0);
}
-int ptl_send_rpc(struct ptlrpc_request *request, struct lustre_peer *peer)
+int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
+ void *uarg)
{
- ptl_handle_me_t me_h, bulk_me_h;
- ptl_process_id_t local_id;
- int rc;
+ struct ptlrpc_connection *conn = NULL;
+ struct obd_device *obd = class_conn2obd(hdl);
+ struct recovd_obd *recovd = &obd->u.recovd;
+ struct obd_ioctl_data *data = karg;
+ struct list_head *tmp;
+ int rc = 0;
ENTRY;
- if (request->rq_replen == 0) {
- printk(__FUNCTION__ ": request->rq_replen is 0!\n");
- EXIT;
- return -EINVAL;
- }
-
- request->rq_repbuf = kmalloc(request->rq_replen, GFP_KERNEL);
- if (!request->rq_repbuf) {
- EXIT;
- return -ENOMEM;
- }
-
- local_id.addr_kind = PTL_ADDR_GID;
- local_id.gid = PTL_ID_ANY;
- local_id.rid = PTL_ID_ANY;
-
- rc = PtlMEAttach(peer->peer_ni, request->rq_reply_portal, local_id,
- request->rq_xid, 0, PTL_UNLINK, &me_h);
- if (rc != PTL_OK) {
- EXIT;
- /* FIXME: tear down EQ, free reqbuf */
- return rc;
- }
-
- request->rq_reply_md.start = request->rq_repbuf;
- request->rq_reply_md.length = request->rq_replen;
- request->rq_reply_md.threshold = 1;
- request->rq_reply_md.options = PTL_MD_OP_PUT;
- request->rq_reply_md.user_ptr = request;
- request->rq_reply_md.eventq = req_eq;
-
- rc = PtlMDAttach(me_h, request->rq_reply_md, PTL_UNLINK,
- &request->rq_reply_md_h);
- if (rc != PTL_OK) {
- EXIT;
- return rc;
- }
-
- if (request->rq_bulklen != 0) {
- rc = PtlMEAttach(peer->peer_ni, request->rq_bulk_portal,
- local_id, request->rq_xid, 0, PTL_UNLINK,
- &bulk_me_h);
- if (rc != PTL_OK) {
- EXIT;
- return rc;
+ if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
+ RETURN(-EINVAL); /* XXX ENOSYS? */
+
+ /* Find the connection that's been rebuilt or has failed. */
+ spin_lock(&recovd->recovd_lock);
+ list_for_each(tmp, &recovd->recovd_troubled_items) {
+ conn = list_entry(tmp, struct ptlrpc_connection,
+ c_recovd_data.rd_managed_chain);
+
+ LASSERT(conn->c_recovd_data.rd_recovd == recovd); /* sanity */
+
+ if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+ break;
+ conn = NULL;
+ }
+
+ if (!conn) {
+ if (cmd == OBD_IOC_RECOVD_NEWCONN)
+ GOTO(out, rc = -EINVAL);
+ /* XXX macroize/inline and share with loop above */
+ list_for_each(tmp, &recovd->recovd_managed_items) {
+ conn = list_entry(tmp, struct ptlrpc_connection,
+ c_recovd_data.rd_managed_chain);
+
+ LASSERT(conn->c_recovd_data.rd_recovd == recovd);
+
+ if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+ break;
+ conn = NULL;
}
-
- request->rq_bulk_md.start = request->rq_bulkbuf;
- request->rq_bulk_md.length = request->rq_bulklen;
- request->rq_bulk_md.threshold = 1;
- request->rq_bulk_md.options = PTL_MD_OP_PUT;
- request->rq_bulk_md.user_ptr = request;
- request->rq_bulk_md.eventq = bulk_sink_eq;
-
- rc = PtlMDAttach(bulk_me_h, request->rq_bulk_md, PTL_UNLINK,
- &request->rq_bulk_md_h);
- if (rc != PTL_OK) {
- EXIT;
- return rc;
- }
- }
-
- return ptl_send_buf(request, peer, request->rq_req_portal, 1);
-}
-
-int rpc_register_service(struct ptlrpc_service *service, char *uuid)
-{
- struct lustre_peer peer;
- int rc;
-
- rc = kportal_uuid_to_peer(uuid, &peer);
- if (rc != 0) {
- printk("Invalid uuid \"%s\"\n", uuid);
- return -EINVAL;
- }
-
- service->srv_buf = kmalloc(service->srv_buf_size, GFP_KERNEL);
- if (service->srv_buf == NULL) {
- printk(__FUNCTION__ ": no memory\n");
- return -ENOMEM;
+ if (!conn)
+ GOTO(out, rc = -EINVAL);
}
- service->srv_id.addr_kind = PTL_ADDR_GID;
- service->srv_id.gid = PTL_ID_ANY;
- service->srv_id.rid = PTL_ID_ANY;
+ if (cmd == OBD_IOC_RECOVD_FAILCONN) {
+ spin_unlock(&recovd->recovd_lock);
+ recovd_conn_fail(conn);
+ spin_lock(&recovd->recovd_lock);
- rc = PtlMEAttach(peer.peer_ni, service->srv_portal, service->srv_id,
- 0, ~0, PTL_RETAIN, &service->srv_me_h);
- if (rc != PTL_OK) {
- printk("PtlMEAttach failed: %d\n", rc);
- return rc;
+ /* Jump straight to the "failed" phase of recovery. */
+ conn->c_recovd_data.rd_phase = RD_FAILED;
+ goto out;
}
- rc = PtlEQAlloc(peer.peer_ni, 128, incoming_callback, service,
- &service->srv_eq_h);
- if (rc != PTL_OK) {
- printk("PtlEQAlloc failed: %d\n", rc);
- return rc;
- }
-
- /* FIXME: Build an auto-unlinking MD and build a ring. */
- /* FIXME: Make sure that these are reachable by DMA on well-known
- * addresses. */
- service->srv_md.start = service->srv_buf;
- service->srv_md.length = service->srv_buf_size;
- service->srv_md.threshold = PTL_MD_THRESH_INF;
- service->srv_md.options = PTL_MD_OP_PUT;
- service->srv_md.user_ptr = service;
- service->srv_md.eventq = service->srv_eq_h;
-
- rc = PtlMDAttach(service->srv_me_h, service->srv_md,
- PTL_RETAIN, &service->srv_md_h);
- if (rc != PTL_OK) {
- printk("PtlMDAttach failed: %d\n", rc);
- /* FIXME: wow, we need to clean up. */
- return rc;
- }
+ /* else (NEWCONN) */
+ if (conn->c_recovd_data.rd_phase != RD_PREPARING)
+ GOTO(out, rc = -EALREADY);
- return 0;
+ spin_lock(&conn->c_lock);
+ if (data->ioc_inllen2) {
+ CERROR("conn %p UUID change %s -> %s\n",
+ conn, conn->c_remote_uuid, data->ioc_inlbuf2);
+ strcpy(conn->c_remote_uuid, data->ioc_inlbuf2);
+ } else {
+ CERROR("conn %p UUID %s reconnected\n", conn,
+ conn->c_remote_uuid);
+ }
+ ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
+ spin_unlock(&conn->c_lock);
+
+ conn->c_recovd_data.rd_phase = RD_PREPARED;
+ wake_up(&recovd->recovd_waitq);
+ out:
+ spin_unlock(&recovd->recovd_lock);
+ RETURN(rc);
}
-int rpc_unregister_service(struct ptlrpc_service *service)
-{
- int rc;
-
- rc = PtlMDUnlink(service->srv_md_h);
- if (rc)
- printk(__FUNCTION__ ": PtlMDUnlink failed: %d\n", rc);
- rc = PtlEQFree(service->srv_eq_h);
- if (rc)
- printk(__FUNCTION__ ": PtlEQFree failed: %d\n", rc);
- rc = PtlMEUnlink(service->srv_me_h);
- if (rc)
- printk(__FUNCTION__ ": PtlMEUnlink failed: %d\n", rc);
-
- kfree(service->srv_buf);
- return 0;
-}
-static int req_init_portals(void)
-{
- int rc;
- const ptl_handle_ni_t *nip;
- ptl_handle_ni_t ni;
-
- nip = inter_module_get_request(LUSTRE_NAL "_ni", LUSTRE_NAL);
- if (nip == NULL) {
- printk("get_ni failed: is the NAL module loaded?\n");
- return -EIO;
- }
- ni = *nip;
-
- rc = PtlEQAlloc(ni, 128, request_callback, NULL, &req_eq);
- if (rc != PTL_OK)
- printk("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 128, bulk_source_callback, NULL, &bulk_source_eq);
- if (rc != PTL_OK)
- printk("PtlEQAlloc failed: %d\n", rc);
-
- rc = PtlEQAlloc(ni, 128, bulk_sink_callback, NULL, &bulk_sink_eq);
- if (rc != PTL_OK)
- printk("PtlEQAlloc failed: %d\n", rc);
-
- return rc;
-}
+/* use obd ops to offer management infrastructure */
+static struct obd_ops recovd_obd_ops = {
+ o_setup: connmgr_setup,
+ o_cleanup: connmgr_cleanup,
+ o_iocontrol: connmgr_iocontrol,
+ o_connect: class_connect,
+ o_disconnect: class_disconnect
+};
static int __init ptlrpc_init(void)
{
- return req_init_portals();
+ int rc;
+ rc = ptlrpc_init_portals();
+ if (rc)
+ RETURN(rc);
+ ptlrpc_init_connection();
+ class_register_type(&recovd_obd_ops, LUSTRE_HA_NAME);
+ return 0;
}
static void __exit ptlrpc_exit(void)
{
- PtlEQFree(req_eq);
-
- inter_module_put(LUSTRE_NAL "_ni");
-
- return;
+ class_unregister_type(LUSTRE_HA_NAME);
+ ptlrpc_exit_portals();
+ ptlrpc_cleanup_connection();
}
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+/* recovd.c */
+EXPORT_SYMBOL(ptlrpc_recovd);
+EXPORT_SYMBOL(recovd_conn_fail);
+EXPORT_SYMBOL(recovd_conn_manage);
+EXPORT_SYMBOL(recovd_conn_fixed);
+EXPORT_SYMBOL(recovd_setup);
+EXPORT_SYMBOL(recovd_cleanup);
+
+/* connection.c */
+EXPORT_SYMBOL(ptlrpc_readdress_connection);
+EXPORT_SYMBOL(ptlrpc_get_connection);
+EXPORT_SYMBOL(ptlrpc_put_connection);
+EXPORT_SYMBOL(ptlrpc_connection_addref);
+EXPORT_SYMBOL(ptlrpc_init_connection);
+EXPORT_SYMBOL(ptlrpc_cleanup_connection);
+
+/* niobuf.c */
+EXPORT_SYMBOL(ptlrpc_send_bulk);
+EXPORT_SYMBOL(ptlrpc_register_bulk);
+EXPORT_SYMBOL(ptlrpc_abort_bulk);
+EXPORT_SYMBOL(ptlrpc_reply);
+EXPORT_SYMBOL(ptlrpc_error);
+EXPORT_SYMBOL(ptlrpc_resend_req);
+EXPORT_SYMBOL(ptl_send_rpc);
+EXPORT_SYMBOL(ptlrpc_link_svc_me);
+
+/* client.c */
+EXPORT_SYMBOL(ptlrpc_init_client);
+EXPORT_SYMBOL(ptlrpc_cleanup_client);
+EXPORT_SYMBOL(ptlrpc_req_to_uuid);
+EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
+EXPORT_SYMBOL(ptlrpc_queue_wait);
+EXPORT_SYMBOL(ptlrpc_continue_req);
+EXPORT_SYMBOL(ptlrpc_replay_req);
+EXPORT_SYMBOL(ptlrpc_restart_req);
+EXPORT_SYMBOL(ptlrpc_prep_req);
+EXPORT_SYMBOL(ptlrpc_free_req);
+EXPORT_SYMBOL(ptlrpc_req_finished);
+EXPORT_SYMBOL(ptlrpc_prep_bulk);
+EXPORT_SYMBOL(ptlrpc_free_bulk);
+EXPORT_SYMBOL(ptlrpc_prep_bulk_page);
+EXPORT_SYMBOL(ptlrpc_free_bulk_page);
+EXPORT_SYMBOL(ptlrpc_check_status);
+
+/* service.c */
+EXPORT_SYMBOL(ptlrpc_init_svc);
+EXPORT_SYMBOL(ptlrpc_stop_all_threads);
+EXPORT_SYMBOL(ptlrpc_start_thread);
+EXPORT_SYMBOL(ptlrpc_unregister_service);
+
+/* pack_generic.c */
+EXPORT_SYMBOL(lustre_pack_msg);
+EXPORT_SYMBOL(lustre_msg_size);
+EXPORT_SYMBOL(lustre_unpack_msg);
+EXPORT_SYMBOL(lustre_msg_buf);
+
+EXPORT_SYMBOL(ll_recover);
+
+
+MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Request Processor v1.0");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL");
module_init(ptlrpc_init);
module_exit(ptlrpc_exit);
-