Whamcloud - gitweb
Remove no-longer-needed inode operations (they previously had extN EA VFS
[fs/lustre-release.git] / lustre / ost / ost_handler.c
index 10a595b..67ca61a 100644 (file)
  */
 
 #define EXPORT_SYMTAB
-
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/locks.h>
-#include <linux/ext2_fs.h>
-#include <linux/quotaops.h>
-#include <asm/unistd.h>
-
 #define DEBUG_SUBSYSTEM S_OST
 
+#include <linux/module.h>
 #include <linux/obd_ost.h>
 #include <linux/lustre_net.h>
 
 static int ost_destroy(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-
-        req->rq_rep.ost->result = obd_destroy(&conn, &req->rq_req.ost->oa);
 
+        req->rq_status = obd_destroy(&conn, &body->oa);
         RETURN(0);
 }
 
 static int ost_getattr(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-        req->rq_rep.ost->oa.o_id = req->rq_req.ost->oa.o_id;
-        req->rq_rep.ost->oa.o_valid = req->rq_req.ost->oa.o_valid;
-
-        req->rq_rep.ost->result =  obd_getattr(&conn, &req->rq_rep.ost->oa);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_getattr(&conn, &repbody->oa);
         RETURN(0);
 }
 
 static int ost_open(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-        req->rq_rep.ost->oa.o_id = req->rq_req.ost->oa.o_id;
-        req->rq_rep.ost->oa.o_valid = req->rq_req.ost->oa.o_valid;
-
-        req->rq_rep.ost->result =  obd_open(&conn, &req->rq_rep.ost->oa);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_open(&conn, &repbody->oa);
         RETURN(0);
 }
 
 static int ost_close(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-        req->rq_rep.ost->oa.o_id = req->rq_req.ost->oa.o_id;
-        req->rq_rep.ost->oa.o_valid = req->rq_req.ost->oa.o_valid;
-
-        req->rq_rep.ost->result =  obd_close(&conn, &req->rq_rep.ost->oa);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_close(&conn, &repbody->oa);
         RETURN(0);
 }
 
-
 static int ost_create(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-
-        memcpy(&req->rq_rep.ost->oa, &req->rq_req.ost->oa,
-               sizeof(req->rq_req.ost->oa));
-
-        req->rq_rep.ost->result =obd_create(&conn, &req->rq_rep.ost->oa);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_create(&conn, &repbody->oa);
         RETURN(0);
 }
 
 static int ost_punch(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-
-        memcpy(&req->rq_rep.ost->oa, &req->rq_req.ost->oa,
-               sizeof(req->rq_req.ost->oa));
-
-        req->rq_rep.ost->result = obd_punch(&conn, &req->rq_rep.ost->oa,
-                                            req->rq_rep.ost->oa.o_size,
-                                            req->rq_rep.ost->oa.o_blocks);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_punch(&conn, &repbody->oa,
+                                   repbody->oa.o_size, repbody->oa.o_blocks);
         RETURN(0);
 }
 
-
 static int ost_setattr(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body, *repbody;
+        int rc, size = sizeof(*body);
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-
-        memcpy(&req->rq_rep.ost->oa, &req->rq_req.ost->oa,
-               sizeof(req->rq_req.ost->oa));
-
-        req->rq_rep.ost->result = obd_setattr(&conn, &req->rq_rep.ost->oa);
 
+        repbody = lustre_msg_buf(req->rq_repmsg, 0);
+        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        req->rq_status = obd_setattr(&conn, &repbody->oa);
         RETURN(0);
 }
 
 static int ost_connect(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body;
+        int rc, size = sizeof(*body);
         ENTRY;
 
         conn.oc_dev = ost->ost_tgt;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
 
-        req->rq_rep.ost->result = obd_connect(&conn);
+        req->rq_status = obd_connect(&conn);
 
-        CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repbuf, conn.oc_id);
-        req->rq_rep.ost->connid = conn.oc_id;
+        CDEBUG(D_IOCTL, "rep buffer %p, id %d\n", req->rq_repmsg, conn.oc_id);
+        body = lustre_msg_buf(req->rq_repmsg, 0);
+        body->connid = conn.oc_id;
         RETURN(0);
 }
 
 static int ost_disconnect(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-
+        struct ost_body *body;
+        int rc, size = sizeof(*body);
         ENTRY;
 
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
-        conn.oc_id = req->rq_req.ost->connid;
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0, &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-        CDEBUG(D_IOCTL, "Disconnecting %d\n", conn.oc_id);
-        req->rq_rep.ost->result = obd_disconnect(&conn);
 
+        CDEBUG(D_IOCTL, "Disconnecting %d\n", conn.oc_id);
+        req->rq_status = obd_disconnect(&conn);
         RETURN(0);
 }
 
 static int ost_get_info(struct ost_obd *ost, struct ptlrpc_request *req)
 {
         struct obd_conn conn;
-        int rc;
-        int vallen;
-        void *val;
-        char *ptr;
-
+        struct ost_body *body;
+        int rc, size[2] = {sizeof(*body)};
+        char *bufs[2] = {NULL, NULL}, *ptr;
         ENTRY;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        conn.oc_id = body->connid;
         conn.oc_dev = ost->ost_tgt;
 
-        ptr = ost_req_buf1(req->rq_req.ost);
-        req->rq_rep.ost->result = obd_get_info(&conn, req->rq_req.ost->buflen1,
-                                               ptr, &vallen, &val);
+        ptr = lustre_msg_buf(req->rq_reqmsg, 1);
+        if (!ptr)
+                RETURN(-EINVAL);
 
-        rc = ost_pack_rep(val, vallen, NULL, 0, &req->rq_rephdr,
-                          &req->rq_rep, &req->rq_replen, &req->rq_repbuf);
+        req->rq_status = obd_get_info(&conn, req->rq_reqmsg->buflens[1], ptr,
+                                      &(size[1]), (void **)&(bufs[1]));
+
+        rc = lustre_pack_msg(2, size, bufs, &req->rq_replen, &req->rq_repmsg);
         if (rc)
                 CERROR("cannot pack reply\n");
 
@@ -291,126 +252,96 @@ static int ost_get_info(struct ost_obd *ost, struct ptlrpc_request *req)
 
 static int ost_brw_read(struct ost_obd *obddev, struct ptlrpc_request *req)
 {
-        struct ptlrpc_bulk_desc **bulk_vec = NULL;
-        struct ptlrpc_bulk_desc *bulk = NULL;
+        struct ptlrpc_bulk_desc *desc;
         struct obd_conn conn;
-        int rc;
-        int i, j;
-        int objcount, niocount;
-        char *tmp1, *tmp2, *end2;
-        char *res = NULL;
-        int cmd;
-        struct niobuf *nb, *src, *dst;
+        void *tmp1, *tmp2, *end2;
+        struct niobuf_remote *remote_nb;
+        struct niobuf_local *local_nb = NULL;
         struct obd_ioobj *ioo;
-        struct ost_req *r = req->rq_req.ost;
-
+        struct ost_body *body;
+        int rc, cmd, i, j, objcount, niocount, size = sizeof(*body);
         ENTRY;
 
-        tmp1 = ost_req_buf1(r);
-        tmp2 = ost_req_buf2(r);
-        end2 = tmp2 + req->rq_req.ost->buflen2;
-        objcount = r->buflen1 / sizeof(*ioo);
-        niocount = r->buflen2 / sizeof(*nb);
-        cmd = r->cmd;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+        end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
+        objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
+        niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
+        cmd = body->data;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        conn.oc_id = body->connid;
         conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
 
         for (i = 0; i < objcount; i++) {
-                ost_unpack_ioo((void *)&tmp1, &ioo);
+                ost_unpack_ioo(&tmp1, &ioo);
                 if (tmp2 + ioo->ioo_bufcnt > end2) {
-                        BUG();
-                        rc = -EFAULT;
-                        break;
+                        LBUG();
+                        GOTO(out, rc = -EFAULT);
                 }
                 for (j = 0; j < ioo->ioo_bufcnt; j++)
-                        ost_unpack_niobuf((void *)&tmp2, &nb);
+                        ost_unpack_niobuf(&tmp2, &remote_nb);
         }
 
-        rc = ost_pack_rep(NULL, 0, NULL, 0,
-                          &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
+        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
                 RETURN(rc);
-        }
-        OBD_ALLOC(res, sizeof(struct niobuf) * niocount);
-        if (res == NULL)
+        OBD_ALLOC(local_nb, sizeof(*local_nb) * niocount);
+        if (local_nb == NULL)
                 RETURN(-ENOMEM);
 
         /* The unpackers move tmp1 and tmp2, so reset them before using */
-        tmp1 = ost_req_buf1(r);
-        tmp2 = ost_req_buf2(r);
-        req->rq_rep.ost->result = obd_preprw
-                (cmd, &conn, objcount, (struct obd_ioobj *)tmp1,
-                 niocount, (struct niobuf *)tmp2, (struct niobuf *)res);
+        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+        req->rq_status = obd_preprw(cmd, &conn, objcount,
+                                    tmp1, niocount, tmp2, local_nb);
 
-        if (req->rq_rep.ost->result)
-                GOTO(out, 0);
+        if (req->rq_status)
+                GOTO(out_local, 0);
 
-        for (i = 0; i < niocount; i++) {
-                bulk = ptlrpc_prep_bulk(&req->rq_peer);
-                if (bulk == NULL) {
-                        CERROR("cannot alloc bulk desc\n");
-                        rc = -ENOMEM;
-                        GOTO(out, rc);
-                }
+        desc = ptlrpc_prep_bulk(req->rq_connection);
+        if (desc == NULL)
+                GOTO(out_local, rc = -ENOMEM);
+        desc->b_portal = OST_BULK_PORTAL;
 
-                src = &((struct niobuf *)res)[i];
-                dst = &((struct niobuf *)tmp2)[i];
-                bulk->b_xid = dst->xid;
-                bulk->b_buf = (void *)(unsigned long)src->addr;
+        for (i = 0; i < niocount; i++) {
+                struct ptlrpc_bulk_page *bulk;
+                bulk = ptlrpc_prep_bulk_page(desc);
+                if (bulk == NULL)
+                        GOTO(out_bulk, rc = -ENOMEM);
+                remote_nb = &(((struct niobuf_remote *)tmp2)[i]);
+                bulk->b_xid = remote_nb->xid;
+                bulk->b_buf = (void *)(unsigned long)local_nb[i].addr;
                 bulk->b_buflen = PAGE_SIZE;
-                rc = ptlrpc_send_bulk(bulk, OST_BULK_PORTAL);
-                if (rc)
-                        GOTO(out, rc);
-                wait_event_interruptible(bulk->b_waitq,
-                                         ptlrpc_check_bulk_sent(bulk));
-
-                if (bulk->b_flags == PTL_RPC_INTR)
-                        GOTO(out, 0);
-
-                OBD_FREE(bulk, sizeof(*bulk));
-                bulk = NULL;
         }
 
-#if 0
-        /* Local delivery */
-        dst = &((struct niobuf *)tmp2)[i];
-        memcpy((void *)(unsigned long)dst->addr,
-               (void *)(unsigned long)src->addr, PAGE_SIZE);
-#endif
-        barrier();
+        rc = ptlrpc_send_bulk(desc);
+        if (rc)
+                GOTO(out_bulk, rc);
+
+        ptlrpc_free_bulk(desc);
 
         /* The unpackers move tmp1 and tmp2, so reset them before using */
-        tmp1 = ost_req_buf1(r);
-        tmp2 = ost_req_buf2(r);
-        req->rq_rep.ost->result = obd_commitrw
-                (cmd, &conn, objcount, (struct obd_ioobj *)tmp1,
-                 niocount, (struct niobuf *)res);
+        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+        req->rq_status = obd_commitrw(cmd, &conn, objcount,
+                                      tmp1, niocount, local_nb);
 
-        EXIT;
- out:
-        if (res != NULL)
-                OBD_FREE(res, sizeof(struct niobuf) * niocount);
-        if (bulk != NULL)
-                OBD_FREE(bulk, sizeof(*bulk));
-        if (bulk_vec != NULL) {
-                for (i = 0; i < niocount; i++) {
-                        if (bulk_vec[i] != NULL)
-                                OBD_FREE(bulk_vec[i], sizeof(*bulk));
-                }
-                OBD_FREE(bulk_vec,
-                         niocount * sizeof(struct ptlrpc_bulk_desc *));
-        }
+        RETURN(rc);
 
+ out_bulk:
+        ptlrpc_free_bulk(desc);
+ out_local:
+        if (local_nb != NULL)
+                OBD_FREE(local_nb, sizeof(*local_nb) * niocount);
+ out:
         return 0;
 }
 
 static int ost_commit_page(struct obd_conn *conn, struct page *page)
 {
         struct obd_ioobj obj;
-        struct niobuf buf;
+        struct niobuf_local buf;
         int rc;
         ENTRY;
 
@@ -424,42 +355,58 @@ static int ost_commit_page(struct obd_conn *conn, struct page *page)
         RETURN(rc);
 }
 
-static int ost_brw_write_cb(struct ptlrpc_bulk_desc *bulk, void *data)
+static int ost_brw_write_cb(struct ptlrpc_bulk_page *bulk)
 {
+        void *journal_save;
         int rc;
-
         ENTRY;
 
-        rc = ost_commit_page(&bulk->b_conn, bulk->b_page);
+        /* Restore the filesystem journal context when we do the commit.
+         * This is needed for ext3 and reiserfs, but can't really hurt
+         * other filesystems.
+         */
+        journal_save = current->journal_info;
+        current->journal_info = bulk->b_desc->b_journal_info;
+        CDEBUG(D_BUFFS, "journal_info: saved %p->%p, restored %p\n", current,
+               journal_save, bulk->b_desc->b_journal_info);
+        rc = ost_commit_page(&bulk->b_desc->b_conn, bulk->b_page);
+        current->journal_info = journal_save;
+        CDEBUG(D_BUFFS, "journal_info: restored %p->%p\n", current,
+               journal_save);
         if (rc)
                 CERROR("ost_commit_page failed: %d\n", rc);
 
         RETURN(rc);
 }
 
-int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
+static int ost_brw_write_finished_cb(struct ptlrpc_bulk_desc *desc)
 {
+        ptlrpc_free_bulk(desc);
+
+        return 0;
+}
+
+static int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
+{
+        struct ptlrpc_bulk_desc *desc;
         struct obd_conn conn;
-        int rc;
-        int i, j;
-        int objcount, niocount;
-        char *tmp1, *tmp2, *end2;
-        char *res;
-        int cmd;
-        struct niobuf *nb, *dst;
+        struct niobuf_remote *remote_nb;
+        struct niobuf_local *local_nb, *lnb;
         struct obd_ioobj *ioo;
-        struct ost_req *r = req->rq_req.ost;
-
+        struct ost_body *body;
+        int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
+        void *tmp1, *tmp2, *end2;
         ENTRY;
 
-        tmp1 = ost_req_buf1(r);
-        tmp2 = ost_req_buf2(r);
-        end2 = tmp2 + req->rq_req.ost->buflen2;
-        objcount = r->buflen1 / sizeof(*ioo);
-        niocount = r->buflen2 / sizeof(*nb);
-        cmd = r->cmd;
+        body = lustre_msg_buf(req->rq_reqmsg, 0);
+        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+        end2 = (char *)tmp2 + req->rq_reqmsg->buflens[2];
+        objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
+        niocount = req->rq_reqmsg->buflens[2] / sizeof(*remote_nb);
+        cmd = body->data;
 
-        conn.oc_id = req->rq_req.ost->connid;
+        conn.oc_id = body->connid;
         conn.oc_dev = req->rq_obd->u.ost.ost_tgt;
 
         for (i = 0; i < objcount; i++) {
@@ -469,77 +416,85 @@ int ost_brw_write(struct ost_obd *obddev, struct ptlrpc_request *req)
                         break;
                 }
                 for (j = 0; j < ioo->ioo_bufcnt; j++)
-                        ost_unpack_niobuf((void *)&tmp2, &nb);
-        }
-
-        rc = ost_pack_rep(NULL, 0, NULL, niocount * sizeof(*nb),
-                          &req->rq_rephdr, &req->rq_rep,
-                          &req->rq_replen, &req->rq_repbuf);
-        if (rc) {
-                CERROR("cannot pack reply\n");
-                RETURN(rc);
+                        ost_unpack_niobuf((void *)&tmp2, &remote_nb);
         }
-        res = ost_rep_buf2(req->rq_rep.ost);
 
-        /* The unpackers move tmp1 and tmp2, so reset them before using */
-        tmp1 = ost_req_buf1(r);
-        tmp2 = ost_req_buf2(r);
-        req->rq_rep.ost->result = obd_preprw
-                (cmd, &conn, objcount, (struct obd_ioobj *)tmp1,
-                 niocount, (struct niobuf *)tmp2, (struct niobuf *)res);
+        size[1] = niocount * sizeof(*remote_nb);
+        rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
+        if (rc)
+                GOTO(fail, rc);
+        remote_nb = lustre_msg_buf(req->rq_repmsg, 1);
 
-        if (req->rq_rep.ost->result)
-                GOTO(out, 0);
+        OBD_ALLOC(local_nb, niocount * sizeof(*local_nb));
+        if (local_nb == NULL)
+                GOTO(fail, rc = -ENOMEM);
 
-        for (i = 0; i < niocount; i++) {
-                struct ptlrpc_bulk_desc *bulk;
+        /* The unpackers move tmp1 and tmp2, so reset them before using */
+        tmp1 = lustre_msg_buf(req->rq_reqmsg, 1);
+        tmp2 = lustre_msg_buf(req->rq_reqmsg, 2);
+        req->rq_status = obd_preprw(cmd, &conn, objcount,
+                                    tmp1, niocount, tmp2, local_nb);
+        if (req->rq_status)
+                GOTO(success, 0);
+
+        desc = ptlrpc_prep_bulk(req->rq_connection);
+        if (desc == NULL)
+                GOTO(fail_preprw, rc = -ENOMEM);
+        desc->b_cb = ost_brw_write_finished_cb;
+        desc->b_portal = OSC_BULK_PORTAL;
+        memcpy(&(desc->b_conn), &conn, sizeof(conn));
+
+        /* Save journal context for commit callbacks */
+        CDEBUG(D_BUFFS, "journal_info: saved %p->%p\n", current,
+               current->journal_info);
+        desc->b_journal_info = current->journal_info;
+
+        for (i = 0, lnb = local_nb; i < niocount; i++, lnb++) {
                 struct ptlrpc_service *srv = req->rq_obd->u.ost.ost_service;
+                struct ptlrpc_bulk_page *bulk;
 
-                bulk = ptlrpc_prep_bulk(&req->rq_peer);
-                if (bulk == NULL) {
-                        CERROR("cannot alloc bulk desc\n");
-                        rc = -ENOMEM;
-                        GOTO(out, rc);
-                }
+                bulk = ptlrpc_prep_bulk_page(desc);
+                if (bulk == NULL)
+                        GOTO(fail_bulk, rc = -ENOMEM);
 
                 spin_lock(&srv->srv_lock);
                 bulk->b_xid = srv->srv_xid++;
                 spin_unlock(&srv->srv_lock);
 
-                dst = &((struct niobuf *)res)[i];
-                dst->xid = HTON__u32(bulk->b_xid);
-
-                bulk->b_buf = (void *)(unsigned long)dst->addr;
-                bulk->b_cb = ost_brw_write_cb;
-                bulk->b_page = dst->page;
-                memcpy(&(bulk->b_conn), &conn, sizeof(conn));
+                bulk->b_buf = (void *)(unsigned long)lnb->addr;
+                bulk->b_page = lnb->page;
                 bulk->b_buflen = PAGE_SIZE;
-                bulk->b_portal = OSC_BULK_PORTAL;
-                rc = ptlrpc_register_bulk(bulk);
-                if (rc)
-                        GOTO(out, rc);
-
-#if 0
-                /* Local delivery */
-                src = &((struct niobuf *)tmp2)[i];
-                memcpy((void *)(unsigned long)dst->addr,
-                       (void *)(unsigned long)src->addr, src->len);
-#endif
+                bulk->b_cb = ost_brw_write_cb;
+
+                /* this advances remote_nb */
+                ost_pack_niobuf((void **)&remote_nb, lnb->offset, lnb->len, 0,
+                                bulk->b_xid);
         }
-        barrier();
+
+        rc = ptlrpc_register_bulk(desc);
+        current->journal_info = NULL; /* kind of scary */
+        if (rc)
+                GOTO(fail_bulk, rc);
 
         EXIT;
out:
-        /* FIXME: should we return 'rc' here? */
success:
+        OBD_FREE(local_nb, niocount * sizeof(*local_nb));
         return 0;
+
+ fail_bulk:
+        ptlrpc_free_bulk(desc);
+ fail_preprw:
+        OBD_FREE(local_nb, niocount * sizeof(*local_nb));
+        /* FIXME: how do we undo the preprw? */
+ fail:
+        return rc;
 }
 
-int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
+static int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
 {
-        struct ost_req *r = req->rq_req.ost;
-        int cmd = r->cmd;
+        struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0);
 
-        if (cmd == OBD_BRW_READ)
+        if (body->data == OBD_BRW_READ)
                 return ost_brw_read(obddev, req);
         else
                 return ost_brw_write(obddev, req);
@@ -548,110 +503,100 @@ int ost_brw(struct ost_obd *obddev, struct ptlrpc_request *req)
 static int ost_handle(struct obd_device *obddev, struct ptlrpc_service *svc,
                       struct ptlrpc_request *req)
 {
-       int rc;
-       struct ost_obd *ost = &obddev->u.ost;
-       struct ptlreq_hdr *hdr;
-
-       ENTRY;
-
-       hdr = (struct ptlreq_hdr *)req->rq_reqbuf;
-       if (NTOH__u32(hdr->type) != OST_TYPE_REQ) {
-               CERROR("lustre_ost: wrong packet type sent %d\n",
-                      NTOH__u32(hdr->type));
-                BUG();
-               rc = -EINVAL;
-                GOTO(out, rc);
-       }
+        int rc;
+        struct ost_obd *ost = &obddev->u.ost;
+        ENTRY;
 
-        rc = ost_unpack_req(req->rq_reqbuf, req->rq_reqlen,
-                            &req->rq_reqhdr, &req->rq_req);
-        if (rc) {
-                CERROR("lustre_ost: Invalid request\n");
+        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
+        if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_HANDLE_UNPACK)) {
+                CERROR("lustre_mds: Invalid request\n");
                 GOTO(out, rc);
         }
 
-        switch (req->rq_reqhdr->opc) {
+        if (req->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
+                CERROR("lustre_mds: wrong packet type sent %d\n",
+                       req->rq_reqmsg->type);
+                GOTO(out, rc = -EINVAL);
+        }
 
+        switch (req->rq_reqmsg->opc) {
         case OST_CONNECT:
                 CDEBUG(D_INODE, "connect\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_CONNECT);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
                 rc = ost_connect(ost, req);
                 break;
         case OST_DISCONNECT:
                 CDEBUG(D_INODE, "disconnect\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_DISCONNECT);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
                 rc = ost_disconnect(ost, req);
                 break;
         case OST_GET_INFO:
                 CDEBUG(D_INODE, "get_info\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_GET_INFO);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_GET_INFO_NET, 0);
                 rc = ost_get_info(ost, req);
                 break;
         case OST_CREATE:
                 CDEBUG(D_INODE, "create\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_CREATE);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
                 rc = ost_create(ost, req);
                 break;
         case OST_DESTROY:
                 CDEBUG(D_INODE, "destroy\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_DESTROY);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
                 rc = ost_destroy(ost, req);
                 break;
         case OST_GETATTR:
                 CDEBUG(D_INODE, "getattr\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_GETATTR);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
                 rc = ost_getattr(ost, req);
                 break;
         case OST_SETATTR:
                 CDEBUG(D_INODE, "setattr\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_SETATTR);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
                 rc = ost_setattr(ost, req);
                 break;
         case OST_OPEN:
                 CDEBUG(D_INODE, "setattr\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_OPEN);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0);
                 rc = ost_open(ost, req);
                 break;
         case OST_CLOSE:
                 CDEBUG(D_INODE, "setattr\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_CLOSE);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0);
                 rc = ost_close(ost, req);
                 break;
         case OST_BRW:
                 CDEBUG(D_INODE, "brw\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_BRW);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
                 rc = ost_brw(ost, req);
                 break;
         case OST_PUNCH:
                 CDEBUG(D_INODE, "punch\n");
-                OBD_CHECK_DROP_PACKET(req, OBD_INST_OST_PUNCH);
+                OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
                 rc = ost_punch(ost, req);
                 break;
         default:
                 req->rq_status = -ENOTSUPP;
-                rc = ptlrpc_error(obddev, svc, req);
+                rc = ptlrpc_error(svc, req);
                 RETURN(rc);
         }
 
         EXIT;
 out:
-        req->rq_status = rc;
+        //req->rq_status = rc;
         if (rc) {
                 CERROR("ost: processing error %d\n", rc);
-                ptlrpc_error(obddev, svc, req);
+                ptlrpc_error(svc, req);
         } else {
                 CDEBUG(D_INODE, "sending reply\n");
-                ptlrpc_reply(obddev, svc, req);
+                ptlrpc_reply(svc, req);
         }
 
         return 0;
 }
 
-
 /* mount the file system (secretly) */
-static int ost_setup(struct obd_device *obddev, obd_count len,
-                        void *buf)
-
+static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct obd_ioctl_data* data = buf;
         struct ost_obd *ost = &obddev->u.ost;
@@ -659,47 +604,48 @@ static int ost_setup(struct obd_device *obddev, obd_count len,
         int err;
         ENTRY;
 
-        if (data->ioc_dev  < 0 || data->ioc_dev > MAX_OBD_DEVICES)
+        if (data->ioc_dev < 0 || data->ioc_dev > MAX_OBD_DEVICES)
                 RETURN(-ENODEV);
 
+        MOD_INC_USE_COUNT;
         tgt = &obd_dev[data->ioc_dev];
         ost->ost_tgt = tgt;
-        if ( ! (tgt->obd_flags & OBD_ATTACHED) ||
-             ! (tgt->obd_flags & OBD_SET_UP) ){
+        if (!(tgt->obd_flags & OBD_ATTACHED) ||
+            !(tgt->obd_flags & OBD_SET_UP)) {
                 CERROR("device not attached or not set up (%d)\n",
                        data->ioc_dev);
-                RETURN(-EINVAL);
+                GOTO(error_dec, err = -EINVAL);
         }
 
         ost->ost_conn.oc_dev = tgt;
         err = obd_connect(&ost->ost_conn);
         if (err) {
                 CERROR("fail to connect to device %d\n", data->ioc_dev);
-                RETURN(-EINVAL);
+                GOTO(error_dec, err = -EINVAL);
         }
 
-        ost->ost_service = ptlrpc_init_svc( 2 * 1024, 
-                                            OST_REQUEST_PORTAL,
-                                            OSC_REPLY_PORTAL,
-                                            "self",
-                                            ost_unpack_req,
-                                            ost_pack_rep,
-                                            ost_handle);
+        ost->ost_service = ptlrpc_init_svc(128 * 1024,
+                                           OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+                                           "self", ost_handle);
         if (!ost->ost_service) {
-                obd_disconnect(&ost->ost_conn);
-                RETURN(-EINVAL);
+                CERROR("failed to start service\n");
+                GOTO(error_disc, err = -EINVAL);
         }
 
-        rpc_register_service(ost->ost_service, "self");
-
         err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
-        if (err) {
-                obd_disconnect(&ost->ost_conn);
-                RETURN(-EINVAL);
-        }
+        if (err)
+                GOTO(error_disc, err = -EINVAL);
+        err = ptlrpc_start_thread(obddev, ost->ost_service, "lustre_ost");
+        if (err)
+                GOTO(error_disc, err = -EINVAL);
 
-        MOD_INC_USE_COUNT;
         RETURN(0);
+
+error_disc:
+        obd_disconnect(&ost->ost_conn);
+error_dec:
+        MOD_DEC_USE_COUNT;
+        RETURN(err);
 }
 
 static int ost_cleanup(struct obd_device * obddev)
@@ -714,7 +660,7 @@ static int ost_cleanup(struct obd_device * obddev)
                 RETURN(-EBUSY);
         }
 
-        ptlrpc_stop_thread(ost->ost_service);
+        ptlrpc_stop_all_threads(ost->ost_service);
         rpc_unregister_service(ost->ost_service);
 
         if (!list_empty(&ost->ost_service->srv_reqs)) {