Whamcloud - gitweb
LU-12811 ptlrpc: pass buflen to lustre_swab_object_update_*()
[fs/lustre-release.git] / lustre / target / out_handler.c
index 9ee9e73..e91c338 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2013, 2017, Intel Corporation.
  *
  * lustre/target/out_handler.c
  *
 
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <obd_class.h>
+#include <llog_swab.h>
+#include <lustre_obdo.h>
+#include <lustre_swab.h>
+#include <lustre_update.h>
 #include <md_object.h>
+#include <obd_class.h>
 #include "tgt_internal.h"
-#include <lustre_update.h>
+
+static inline void orr_cpu_to_le(struct out_read_reply *orr_dst,
+                                const struct out_read_reply *orr_src)
+{
+       orr_dst->orr_size = cpu_to_le32(orr_src->orr_size);
+       orr_dst->orr_padding = cpu_to_le32(orr_src->orr_padding);
+       orr_dst->orr_offset = cpu_to_le64(orr_dst->orr_offset);
+}
 
 static void out_reconstruct(const struct lu_env *env, struct dt_device *dt,
                            struct dt_object *obj,
@@ -45,7 +56,6 @@ static void out_reconstruct(const struct lu_env *env, struct dt_device *dt,
               dt_obd_name(dt), reply, index, 0);
 
        object_update_result_insert(reply, NULL, 0, index, 0);
-       return;
 }
 
 typedef void (*out_reconstruct_t)(const struct lu_env *env,
@@ -79,13 +89,12 @@ static inline int out_check_resent(const struct lu_env *env,
                lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
                lustre_msg_set_status(req->rq_repmsg, req->rq_status);
 
-               DEBUG_REQ(D_RPCTRACE, req, "restoring transno "LPD64"status %d",
-                         req->rq_transno, req->rq_status);
+               DEBUG_REQ(D_RPCTRACE, req, "restoring resent RPC");
 
                reconstruct(env, dt, obj, reply, index);
                return 1;
        }
-       DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+       DEBUG_REQ(D_HA, req, "no reply for RESENT req (have %lld)",
                 req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
        return 0;
 }
@@ -106,10 +115,10 @@ static int out_create(struct tgt_session_info *tsi)
        ENTRY;
 
        wobdo = object_update_param_get(update, 0, &size);
-       if (wobdo == NULL || IS_ERR(wobdo) || size != sizeof(*wobdo)) {
-               CERROR("%s: obdo is NULL, invalid RPC: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(wobdo) || size != sizeof(*wobdo)) {
+               CERROR("%s: obdo is NULL, invalid RPC: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(wobdo));
+               RETURN(PTR_ERR(wobdo));
        }
 
        if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
@@ -120,17 +129,17 @@ static int out_create(struct tgt_session_info *tsi)
        dof->dof_type = dt_mode_to_dft(attr->la_mode);
        if (update->ou_params_count > 1) {
                fid = object_update_param_get(update, 1, &size);
-               if (fid == NULL || IS_ERR(fid) || size != sizeof(*fid)) {
-                       CERROR("%s: invalid fid: rc = %d\n",
-                              tgt_name(tsi->tsi_tgt), -EPROTO);
-                       RETURN(err_serious(-EPROTO));
+               if (IS_ERR(fid) || size != sizeof(*fid)) {
+                       CERROR("%s: invalid fid: rc = %ld\n",
+                              tgt_name(tsi->tsi_tgt), PTR_ERR(fid));
+                       RETURN(PTR_ERR(fid));
                }
                if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
                        lustre_swab_lu_fid(fid);
                if (!fid_is_sane(fid)) {
                        CERROR("%s: invalid fid "DFID": rc = %d\n",
                               tgt_name(tsi->tsi_tgt), PFID(fid), -EPROTO);
-                       RETURN(err_serious(-EPROTO));
+                       RETURN(-EPROTO);
                }
        }
 
@@ -159,10 +168,10 @@ static int out_attr_set(struct tgt_session_info *tsi)
        ENTRY;
 
        wobdo = object_update_param_get(update, 0, &size);
-       if (wobdo == NULL || IS_ERR(wobdo) || size != sizeof(*wobdo)) {
-               CERROR("%s: empty obdo in the update: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(wobdo) || size != sizeof(*wobdo)) {
+               CERROR("%s: empty obdo in the update: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(wobdo));
+               RETURN(PTR_ERR(wobdo));
        }
 
        attr->la_valid = 0;
@@ -185,6 +194,7 @@ static int out_attr_get(struct tgt_session_info *tsi)
 {
        const struct lu_env     *env = tsi->tsi_env;
        struct tgt_thread_info  *tti = tgt_th_info(env);
+       struct object_update    *update = tti->tti_u.update.tti_update;
        struct obdo             *obdo = &tti->tti_u.update.tti_obdo;
        struct lu_attr          *la = &tti->tti_attr;
        struct dt_object        *obj = tti->tti_u.update.tti_dt_object;
@@ -193,6 +203,9 @@ static int out_attr_get(struct tgt_session_info *tsi)
 
        ENTRY;
 
+       if (unlikely(update->ou_result_size < sizeof(*obdo)))
+               return -EPROTO;
+
        if (!lu_object_exists(&obj->do_lu)) {
                /* Usually, this will be called when the master MDT try
                 * to init a remote object(see osp_object_init), so if
@@ -203,14 +216,13 @@ static int out_attr_get(struct tgt_session_info *tsi)
                RETURN(-ENOENT);
        }
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        rc = dt_attr_get(env, obj, la);
        if (rc)
                GOTO(out_unlock, rc);
 
        obdo->o_valid = 0;
        obdo_from_la(obdo, la, la->la_valid);
-       lustre_set_wire_obdo(NULL, obdo, obdo);
 
 out_unlock:
        dt_read_unlock(env, obj);
@@ -247,41 +259,97 @@ static int out_xattr_get(struct tgt_session_info *tsi)
        }
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for xattr get: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for xattr get: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
-       update_result = object_update_result_get(reply, 0, NULL);
+       update_result = object_update_result_get(reply, idx, NULL);
        if (update_result == NULL) {
                CERROR("%s: empty name for xattr get: rc = %d\n",
                       tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+               RETURN(-EPROTO);
        }
 
-       lbuf->lb_buf = update_result->our_data;
-       lbuf->lb_len = OUT_UPDATE_REPLY_SIZE -
-                      cfs_size_round((unsigned long)update_result->our_data -
-                                     (unsigned long)update_result);
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       lbuf->lb_len = (int)tti->tti_u.update.tti_update->ou_result_size;
+       if (lbuf->lb_len == 0)
+               lbuf->lb_buf = NULL;
+       else
+               lbuf->lb_buf = update_result->our_data;
+
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        rc = dt_xattr_get(env, obj, lbuf, name);
        dt_read_unlock(env, obj);
-       if (rc < 0) {
+       if (rc <= 0) {
                lbuf->lb_len = 0;
-               GOTO(out, rc);
+               if (unlikely(!rc))
+                       rc = -ENODATA;
+       } else if (lbuf->lb_buf) {
+               lbuf->lb_len = rc;
        }
-       lbuf->lb_len = rc;
-       rc = 0;
        CDEBUG(D_INFO, "%s: "DFID" get xattr %s len %d\n",
               tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)),
-              name, (int)lbuf->lb_len);
+              name, rc);
 
        GOTO(out, rc);
 
 out:
        object_update_result_insert(reply, lbuf->lb_buf, lbuf->lb_len, idx, rc);
-       RETURN(rc);
+       RETURN(0);
+}
+
+static int out_xattr_list(struct tgt_session_info *tsi)
+{
+       const struct lu_env *env = tsi->tsi_env;
+       struct tgt_thread_info *tti = tgt_th_info(env);
+       struct lu_buf *lbuf = &tti->tti_buf;
+       struct object_update_reply *reply = tti->tti_u.update.tti_update_reply;
+       struct dt_object *obj = tti->tti_u.update.tti_dt_object;
+       struct object_update_result *update_result;
+       int idx = tti->tti_u.update.tti_update_reply_index;
+       int rc;
+
+       ENTRY;
+
+       if (!lu_object_exists(&obj->do_lu)) {
+               set_bit(LU_OBJECT_HEARD_BANSHEE,
+                       &obj->do_lu.lo_header->loh_flags);
+               RETURN(-ENOENT);
+       }
+
+       update_result = object_update_result_get(reply, 0, NULL);
+       if (!update_result) {
+               rc = -EPROTO;
+               CERROR("%s: empty buf for xattr list: rc = %d\n",
+                      tgt_name(tsi->tsi_tgt), rc);
+               RETURN(rc);
+       }
+
+       lbuf->lb_len = (int)tti->tti_u.update.tti_update->ou_result_size;
+       lbuf->lb_buf = update_result->our_data;
+       if (lbuf->lb_len == 0)
+               lbuf->lb_buf = 0;
+
+       dt_read_lock(env, obj, DT_TGT_CHILD);
+       rc = dt_xattr_list(env, obj, lbuf);
+       dt_read_unlock(env, obj);
+       if (rc <= 0) {
+               lbuf->lb_len = 0;
+               if (unlikely(!rc))
+                       rc = -ENODATA;
+       } else if (lbuf->lb_buf) {
+               lbuf->lb_len = rc;
+       }
+
+       CDEBUG(D_INFO, "%s: "DFID" list xattr len %d\n",
+              tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)), rc);
+
+       /* Since we directly use update_result->our_data as the lbuf->lb_buf,
+        * then use NULL for result_insert to avoid unnecessary memory copy. */
+       object_update_result_insert(reply, NULL, lbuf->lb_len, idx, rc);
+
+       RETURN(0);
 }
 
 static int out_index_lookup(struct tgt_session_info *tsi)
@@ -295,17 +363,20 @@ static int out_index_lookup(struct tgt_session_info *tsi)
 
        ENTRY;
 
+       if (unlikely(update->ou_result_size < sizeof(tti->tti_fid1)))
+               return -EPROTO;
+
        if (!lu_object_exists(&obj->do_lu))
                RETURN(-ENOENT);
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for lookup: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for lookup: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        if (!dt_try_as_dir(env, obj))
                GOTO(out_unlock, rc = -ENOTDIR);
 
@@ -351,24 +422,25 @@ static int out_xattr_set(struct tgt_session_info *tsi)
        ENTRY;
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for xattr set: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for xattr set: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
+       /* If buffer == NULL (-ENODATA), then it might mean delete xattr */
        buf = object_update_param_get(update, 1, &buf_len);
-       if (IS_ERR(buf))
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(buf) && PTR_ERR(buf) != -ENODATA)
+               RETURN(PTR_ERR(buf));
 
        lbuf->lb_buf = buf;
        lbuf->lb_len = buf_len;
 
        tmp = object_update_param_get(update, 2, &size);
-       if (tmp == NULL || IS_ERR(tmp) || size != sizeof(*tmp)) {
-               CERROR("%s: emptry or wrong size %zu flag: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), size, -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(tmp) || size != sizeof(*tmp)) {
+               CERROR("%s: emptry or wrong size %zu flag: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), size, PTR_ERR(tmp));
+               RETURN(PTR_ERR(tmp));
        }
 
        if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
@@ -392,10 +464,10 @@ static int out_xattr_del(struct tgt_session_info *tsi)
        ENTRY;
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for xattr set: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for xattr set: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
        rc = out_tx_xattr_del(tsi->tsi_env, obj, name, &tti->tti_tea,
@@ -455,17 +527,17 @@ static int out_index_insert(struct tgt_session_info *tsi)
        ENTRY;
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for index insert: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for index insert: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
        fid = object_update_param_get(update, 1, &size);
-       if (fid == NULL || IS_ERR(fid) || size != sizeof(*fid)) {
-               CERROR("%s: invalid fid: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(fid) || size != sizeof(*fid)) {
+               CERROR("%s: invalid fid: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(fid));
+               RETURN(PTR_ERR(fid));
        }
 
        if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
@@ -474,14 +546,14 @@ static int out_index_insert(struct tgt_session_info *tsi)
        if (!fid_is_sane(fid)) {
                CERROR("%s: invalid FID "DFID": rc = %d\n",
                       tgt_name(tsi->tsi_tgt), PFID(fid), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+               RETURN(-EPROTO);
        }
 
        ptype = object_update_param_get(update, 2, &size);
-       if (ptype == NULL || IS_ERR(ptype) || size != sizeof(*ptype)) {
-               CERROR("%s: invalid type for index insert: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(ptype) || size != sizeof(*ptype)) {
+               CERROR("%s: invalid type for index insert: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(ptype));
+               RETURN(PTR_ERR(ptype));
        }
 
        if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
@@ -495,6 +567,11 @@ static int out_index_insert(struct tgt_session_info *tsi)
                                 tti->tti_tea.ta_handle,
                                 tti->tti_u.update.tti_update_reply,
                                 tti->tti_u.update.tti_update_reply_index);
+
+       CDEBUG(D_INFO, "%s: "DFID" index insert %s: rc = %d\n",
+              tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)),
+              name, rc);
+
        RETURN(rc);
 }
 
@@ -510,10 +587,10 @@ static int out_index_delete(struct tgt_session_info *tsi)
                RETURN(-ENOENT);
 
        name = object_update_param_get(update, 0, NULL);
-       if (name == NULL || IS_ERR(name)) {
-               CERROR("%s: empty name for index delete: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(name)) {
+               CERROR("%s: empty name for index delete: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(name));
+               RETURN(PTR_ERR(name));
        }
 
        rc = out_tx_index_delete(tsi->tsi_env, obj, (const struct dt_key *)name,
@@ -536,7 +613,7 @@ static int out_destroy(struct tgt_session_info *tsi)
        if (!fid_is_sane(fid)) {
                CERROR("%s: invalid FID "DFID": rc = %d\n",
                       tgt_name(tsi->tsi_tgt), PFID(fid), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+               RETURN(-EPROTO);
        }
 
        if (!lu_object_exists(&obj->do_lu))
@@ -565,19 +642,19 @@ static int out_write(struct tgt_session_info *tsi)
        ENTRY;
 
        buf = object_update_param_get(update, 0, &buf_len);
-       if (buf == NULL || IS_ERR(buf) || buf_len == 0) {
-               CERROR("%s: empty buf for xattr set: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(buf) || buf_len == 0) {
+               CERROR("%s: empty buf for xattr set: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(buf));
+               RETURN(PTR_ERR(buf));
        }
        lbuf->lb_buf = buf;
        lbuf->lb_len = buf_len;
 
        tmp = object_update_param_get(update, 1, &size);
-       if (tmp == NULL || IS_ERR(tmp) || size != sizeof(*tmp)) {
-               CERROR("%s: empty or wrong size %zu pos: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), size, -EPROTO);
-               RETURN(err_serious(-EPROTO));
+       if (IS_ERR(tmp) || size != sizeof(*tmp)) {
+               CERROR("%s: empty or wrong size %zu pos: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), size, PTR_ERR(tmp));
+               RETURN(PTR_ERR(tmp));
        }
 
        if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
@@ -598,14 +675,17 @@ static int out_read(struct tgt_session_info *tsi)
        struct object_update    *update = tti->tti_u.update.tti_update;
        struct dt_object        *obj = tti->tti_u.update.tti_dt_object;
        struct object_update_reply *reply = tti->tti_u.update.tti_update_reply;
-       int             index = tti->tti_u.update.tti_update_reply_index;
+       int index = tti->tti_u.update.tti_update_reply_index;
+       struct lu_rdbuf *rdbuf;
        struct object_update_result *update_result;
-       struct lu_buf           *lbuf = &tti->tti_buf;
        struct out_read_reply   *orr;
-       void                    *tmp;
-       size_t                  size;
-       __u64                   pos;
-       int                      rc;
+       void *tmp;
+       size_t size;
+       size_t total_size = 0;
+       __u64 pos;
+       unsigned int i;
+       unsigned int nbufs;
+       int rc = 0;
        ENTRY;
 
        update_result = object_update_result_get(reply, index, NULL);
@@ -616,47 +696,72 @@ static int out_read(struct tgt_session_info *tsi)
                GOTO(out, rc = -ENOENT);
 
        tmp = object_update_param_get(update, 0, NULL);
-       if (tmp == NULL || IS_ERR(tmp)) {
-               CERROR("%s: empty size for read: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               GOTO(out, rc = err_serious(-EPROTO));
+       if (IS_ERR(tmp)) {
+               CERROR("%s: empty size for read: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(tmp));
+               GOTO(out, rc = PTR_ERR(tmp));
        }
        size = le64_to_cpu(*(size_t *)(tmp));
 
        tmp = object_update_param_get(update, 1, NULL);
-       if (tmp == NULL || IS_ERR(tmp)) {
-               CERROR("%s: empty pos for read: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), -EPROTO);
-               GOTO(out, rc = err_serious(-EPROTO));
+       if (IS_ERR(tmp)) {
+               CERROR("%s: empty pos for read: rc = %ld\n",
+                      tgt_name(tsi->tsi_tgt), PTR_ERR(tmp));
+               GOTO(out, rc = PTR_ERR(tmp));
        }
        pos = le64_to_cpu(*(__u64 *)(tmp));
 
-       /* Check if the read buffer can hold the read_size */
-       if (size > OUT_UPDATE_REPLY_SIZE -
-                  cfs_size_round(offsetof(struct object_update_reply,
-                                          ourp_lens[1])) -
-                  cfs_size_round(sizeof(*update_result)) -
-                  cfs_size_round(sizeof(*orr))) {
-               CERROR("%s: get %zu the biggest read size is %d: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), size, OUT_UPDATE_REPLY_SIZE,
-                      -EPROTO);
-               GOTO(out, rc = err_serious(-EPROTO));
-       }
-
        /* Put the offset into the begining of the buffer in reply */
        orr = (struct out_read_reply *)update_result->our_data;
 
-       lbuf->lb_buf = orr->orr_data;
-       lbuf->lb_len = size;
+       nbufs = (size + OUT_BULK_BUFFER_SIZE - 1) / OUT_BULK_BUFFER_SIZE;
+       OBD_ALLOC(rdbuf, sizeof(*rdbuf) + nbufs * sizeof(rdbuf->rb_bufs[0]));
+       if (rdbuf == NULL)
+               GOTO(out, rc = -ENOMEM);
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
-       rc = dt_read(env, obj, lbuf, &pos);
-       dt_read_unlock(env, obj);
-       orr->orr_size = rc < 0 ? 0 : rc;
+       rdbuf->rb_nbufs = 0;
+       total_size = 0;
+       for (i = 0; i < nbufs; i++) {
+               __u32 read_size;
+
+               read_size = size > OUT_BULK_BUFFER_SIZE ?
+                           OUT_BULK_BUFFER_SIZE : size;
+               OBD_ALLOC(rdbuf->rb_bufs[i].lb_buf, read_size);
+               if (rdbuf->rb_bufs[i].lb_buf == NULL)
+                       GOTO(out_free, rc = -ENOMEM);
+
+               rdbuf->rb_bufs[i].lb_len = read_size;
+               dt_read_lock(env, obj, DT_TGT_CHILD);
+               rc = dt_read(env, obj, &rdbuf->rb_bufs[i], &pos);
+               dt_read_unlock(env, obj);
+
+               total_size += rc < 0 ? 0 : rc;
+               if (rc <= 0)
+                       break;
+
+               rdbuf->rb_nbufs++;
+               size -= read_size;
+       }
+
+       /* send pages to client */
+       rc = tgt_send_buffer(tsi, rdbuf);
+       if (rc < 0)
+               GOTO(out_free, rc);
+
+       orr->orr_size = total_size;
        orr->orr_offset = pos;
 
        orr_cpu_to_le(orr, orr);
        update_result->our_datalen += orr->orr_size;
+out_free:
+       for (i = 0; i < nbufs; i++) {
+               if (rdbuf->rb_bufs[i].lb_buf != NULL) {
+                       OBD_FREE(rdbuf->rb_bufs[i].lb_buf,
+                                rdbuf->rb_bufs[i].lb_len);
+               }
+       }
+       OBD_FREE(rdbuf, sizeof(*rdbuf) +
+                       nbufs * sizeof(rdbuf->rb_bufs[0]));
 out:
        /* Insert read buffer */
        update_result->our_rc = ptlrpc_status_hton(rc);
@@ -682,33 +787,35 @@ static int out_noop(struct tgt_session_info *tsi)
 }
 
 static struct tgt_handler out_update_ops[] = {
-       DEF_OUT_HNDL(OUT_CREATE, "out_create", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_CREATE, "out_create", IS_MUTABLE | HAS_REPLY,
                     out_create),
-       DEF_OUT_HNDL(OUT_DESTROY, "out_create", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_DESTROY, "out_create", IS_MUTABLE | HAS_REPLY,
                     out_destroy),
-       DEF_OUT_HNDL(OUT_REF_ADD, "out_ref_add", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_REF_ADD, "out_ref_add", IS_MUTABLE | HAS_REPLY,
                     out_ref_add),
-       DEF_OUT_HNDL(OUT_REF_DEL, "out_ref_del", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_REF_DEL, "out_ref_del", IS_MUTABLE | HAS_REPLY,
                     out_ref_del),
-       DEF_OUT_HNDL(OUT_ATTR_SET, "out_attr_set",  MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_ATTR_SET, "out_attr_set",  IS_MUTABLE | HAS_REPLY,
                     out_attr_set),
-       DEF_OUT_HNDL(OUT_ATTR_GET, "out_attr_get",  HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_ATTR_GET, "out_attr_get",  HAS_REPLY,
                     out_attr_get),
-       DEF_OUT_HNDL(OUT_XATTR_SET, "out_xattr_set", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_SET, "out_xattr_set", IS_MUTABLE | HAS_REPLY,
                     out_xattr_set),
-       DEF_OUT_HNDL(OUT_XATTR_DEL, "out_xattr_del", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_DEL, "out_xattr_del", IS_MUTABLE | HAS_REPLY,
                     out_xattr_del),
-       DEF_OUT_HNDL(OUT_XATTR_GET, "out_xattr_get", HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_GET, "out_xattr_get", HAS_REPLY,
                     out_xattr_get),
-       DEF_OUT_HNDL(OUT_INDEX_LOOKUP, "out_index_lookup", HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_INDEX_LOOKUP, "out_index_lookup", HAS_REPLY,
                     out_index_lookup),
        DEF_OUT_HNDL(OUT_INDEX_INSERT, "out_index_insert",
-                    MUTABOR | HABEO_REFERO, out_index_insert),
+                    IS_MUTABLE | HAS_REPLY, out_index_insert),
        DEF_OUT_HNDL(OUT_INDEX_DELETE, "out_index_delete",
-                    MUTABOR | HABEO_REFERO, out_index_delete),
-       DEF_OUT_HNDL(OUT_WRITE, "out_write", MUTABOR | HABEO_REFERO, out_write),
-       DEF_OUT_HNDL(OUT_READ, "out_read", HABEO_REFERO, out_read),
-       DEF_OUT_HNDL(OUT_NOOP, "out_noop", HABEO_REFERO, out_noop),
+                    IS_MUTABLE | HAS_REPLY, out_index_delete),
+       DEF_OUT_HNDL(OUT_WRITE, "out_write", IS_MUTABLE | HAS_REPLY, out_write),
+       DEF_OUT_HNDL(OUT_READ, "out_read", HAS_REPLY, out_read),
+       DEF_OUT_HNDL(OUT_NOOP, "out_noop", HAS_REPLY, out_noop),
+       DEF_OUT_HNDL(OUT_XATTR_LIST, "out_xattr_list", HAS_REPLY,
+                    out_xattr_list),
 };
 
 static struct tgt_handler *out_handler_find(__u32 opc)
@@ -762,17 +869,7 @@ static int out_trans_stop(const struct lu_env *env,
        rc = dt_trans_stop(env, ta->ta_handle->th_dev, ta->ta_handle);
        for (i = 0; i < ta->ta_argno; i++) {
                if (ta->ta_args[i]->object != NULL) {
-                       struct dt_object *obj = ta->ta_args[i]->object;
-
-                       /* If the object is being created during this
-                        * transaction, we need to remove them from the
-                        * cache immediately, because a few layers are
-                        * missing in OUT handler, i.e. the object might
-                        * not be initialized in all layers */
-                       if (ta->ta_args[i]->exec_fn == out_tx_create_exec)
-                               set_bit(LU_OBJECT_HEARD_BANSHEE,
-                                       &obj->do_lu.lo_header->loh_flags);
-                       lu_object_put(env, &ta->ta_args[i]->object->do_lu);
+                       dt_object_put(env, ta->ta_args[i]->object);
                        ta->ta_args[i]->object = NULL;
                }
        }
@@ -863,11 +960,10 @@ int out_handle(struct tgt_session_info *tsi)
        struct req_capsule              *pill = tsi->tsi_pill;
        struct dt_device                *dt = tsi->tsi_tgt->lut_bottom;
        struct out_update_header        *ouh;
-       struct out_update_buffer        *oub;
+       struct out_update_buffer        *oub = NULL;
        struct object_update            *update;
        struct object_update_reply      *reply;
-       struct ptlrpc_bulk_desc         *desc;
-       struct l_wait_info              lwi;
+       struct ptlrpc_bulk_desc         *desc = NULL;
        void                            **update_bufs;
        int                             current_batchid = -1;
        __u32                           update_buf_count;
@@ -875,16 +971,19 @@ int out_handle(struct tgt_session_info *tsi)
        unsigned int                    reply_index = 0;
        int                             rc = 0;
        int                             rc1 = 0;
-
+       int                             ouh_size, reply_size;
+       int                             updates;
        ENTRY;
 
        req_capsule_set(pill, &RQF_OUT_UPDATE);
+       ouh_size = req_capsule_get_size(pill, &RMF_OUT_UPDATE_HEADER,
+                                       RCL_CLIENT);
+       if (ouh_size <= 0)
+               RETURN(err_serious(-EPROTO));
+
        ouh = req_capsule_client_get(pill, &RMF_OUT_UPDATE_HEADER);
-       if (ouh == NULL) {
-               CERROR("%s: No buf!: rc = %d\n", tgt_name(tsi->tsi_tgt),
-                      -EPROTO);
+       if (ouh == NULL)
                RETURN(err_serious(-EPROTO));
-       }
 
        if (ouh->ouh_magic != OUT_UPDATE_HEADER_MAGIC) {
                CERROR("%s: invalid update buffer magic %x expect %x: "
@@ -894,82 +993,128 @@ int out_handle(struct tgt_session_info *tsi)
        }
 
        update_buf_count = ouh->ouh_count;
-       if (update_buf_count == 0) {
-               CERROR("%s: empty update: rc = %d\n", tgt_name(tsi->tsi_tgt),
-                      -EPROTO);
+       if (update_buf_count == 0)
                RETURN(err_serious(-EPROTO));
-       }
-
-       req_capsule_set_size(pill, &RMF_OUT_UPDATE_REPLY, RCL_SERVER,
-                            OUT_UPDATE_REPLY_SIZE);
-       rc = req_capsule_server_pack(pill);
-       if (rc != 0) {
-               CERROR("%s: Can't pack response: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), rc);
-               RETURN(rc);
-       }
 
        OBD_ALLOC(update_bufs, sizeof(*update_bufs) * update_buf_count);
        if (update_bufs == NULL)
-               RETURN(-ENOMEM);
-
-       oub = req_capsule_client_get(pill, &RMF_OUT_UPDATE_BUF);
-       desc = ptlrpc_prep_bulk_exp(pill->rc_req, update_buf_count,
-                                   PTLRPC_BULK_OPS_COUNT,
-                                   PTLRPC_BULK_GET_SINK |
-                                   PTLRPC_BULK_BUF_KVEC,
-                                   MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops);
-       if (desc == NULL)
-               GOTO(out_free, rc = -ENOMEM);
-
-       /* NB Having prepped, we must commit... */
-       for (i = 0; i < update_buf_count; i++, oub++) {
-               OBD_ALLOC(update_bufs[i], oub->oub_size);
-               if (update_bufs[i] == NULL)
-                       GOTO(out_free, rc = -ENOMEM);
+               RETURN(err_serious(-ENOMEM));
 
-               desc->bd_frag_ops->add_iov_frag(desc, update_bufs[i],
-                                               oub->oub_size);
-       }
-
-       pill->rc_req->rq_bulk_write = 1;
-       rc = sptlrpc_svc_prep_bulk(pill->rc_req, desc);
-       if (rc != 0)
-               GOTO(out_free, rc);
-
-       rc = target_bulk_io(pill->rc_req->rq_export, desc, &lwi);
-       if (rc < 0)
-               GOTO(out_free, rc);
+       if (ouh->ouh_inline_length > 0) {
+               update_bufs[0] = ouh->ouh_inline_data;
+       } else {
+               struct out_update_buffer *tmp;
+               int page_count = 0;
+
+               oub = req_capsule_client_get(pill, &RMF_OUT_UPDATE_BUF);
+               if (oub == NULL)
+                       GOTO(out_free, rc = err_serious(-EPROTO));
+
+               for (i = 0; i < update_buf_count; i++)
+                       /* First *and* last might be partial pages, hence +1 */
+                       page_count += DIV_ROUND_UP(oub[i].oub_size,
+                                                  PAGE_SIZE) + 1;
+
+               desc = ptlrpc_prep_bulk_exp(pill->rc_req, page_count,
+                                           PTLRPC_BULK_OPS_COUNT,
+                                           PTLRPC_BULK_GET_SINK,
+                                           MDS_BULK_PORTAL,
+                                           &ptlrpc_bulk_kiov_nopin_ops);
+               if (desc == NULL)
+                       GOTO(out_free, rc = err_serious(-ENOMEM));
+
+               tmp = oub;
+               for (i = 0; i < update_buf_count; i++, tmp++) {
+                       if (tmp->oub_size >= OUT_MAXREQSIZE)
+                               GOTO(out_free, rc = err_serious(-EPROTO));
+
+                       OBD_ALLOC_LARGE(update_bufs[i], tmp->oub_size);
+                       if (update_bufs[i] == NULL)
+                               GOTO(out_free, rc = err_serious(-ENOMEM));
+
+                       desc->bd_frag_ops->add_iov_frag(desc, update_bufs[i],
+                                                       tmp->oub_size);
+               }
 
-       /* Prepare the update reply buffer */
-       reply = req_capsule_server_get(pill, &RMF_OUT_UPDATE_REPLY);
-       if (reply == NULL)
-               GOTO(out_free, rc = err_serious(-EPROTO));
-       reply->ourp_magic = UPDATE_REPLY_MAGIC;
-       tti->tti_u.update.tti_update_reply = reply;
-       tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
+               pill->rc_req->rq_bulk_write = 1;
+               rc = sptlrpc_svc_prep_bulk(pill->rc_req, desc);
+               if (rc != 0)
+                       GOTO(out_free, rc = err_serious(rc));
 
+               rc = target_bulk_io(pill->rc_req->rq_export, desc);
+               if (rc < 0)
+                       GOTO(out_free, rc = err_serious(rc));
+       }
        /* validate the request and calculate the total update count and
         * set it to reply */
+       reply_size = 0;
+       updates = 0;
        for (i = 0; i < update_buf_count; i++) {
-               struct object_update_request *our;
-               int                     update_count;
+               struct object_update_request    *our;
+               int                              j;
 
                our = update_bufs[i];
                if (ptlrpc_req_need_swab(pill->rc_req))
-                       lustre_swab_object_update_request(our);
+                       lustre_swab_object_update_request(our, 0);
 
                if (our->ourq_magic != UPDATE_REQUEST_MAGIC) {
                        CERROR("%s: invalid update buffer magic %x"
                               " expect %x: rc = %d\n",
                               tgt_name(tsi->tsi_tgt), our->ourq_magic,
                               UPDATE_REQUEST_MAGIC, -EPROTO);
-                       GOTO(out_free, rc = -EPROTO);
+                       GOTO(out_free, rc = err_serious(-EPROTO));
+               }
+               updates += our->ourq_count;
+
+               /* need to calculate reply size */
+               for (j = 0; j < our->ourq_count; j++) {
+                       update = object_update_request_get(our, j, NULL);
+                       if (update == NULL)
+                               GOTO(out, rc = err_serious(-EPROTO));
+                       if (ptlrpc_req_need_swab(pill->rc_req))
+                               lustre_swab_object_update(update);
+
+                       if (!fid_is_sane(&update->ou_fid)) {
+                               CERROR("%s: invalid FID "DFID": rc = %d\n",
+                                      tgt_name(tsi->tsi_tgt),
+                                      PFID(&update->ou_fid), -EPROTO);
+                               GOTO(out, rc = err_serious(-EPROTO));
+                       }
+
+                       /* XXX: what ou_result_size can be considered safe? */
+
+                       reply_size += sizeof(reply->ourp_lens[0]);
+                       reply_size += sizeof(struct object_update_result);
+                       reply_size += update->ou_result_size;
                }
-               update_count = our->ourq_count;
-               reply->ourp_count += update_count;
        }
+       reply_size += sizeof(*reply);
+
+       if (unlikely(reply_size > ouh->ouh_reply_size)) {
+               CERROR("%s: too small reply buf %u for %u, need %u at least\n",
+                      tgt_name(tsi->tsi_tgt), ouh->ouh_reply_size,
+                      updates, reply_size);
+               GOTO(out_free, rc = err_serious(-EPROTO));
+       }
+
+       req_capsule_set_size(pill, &RMF_OUT_UPDATE_REPLY, RCL_SERVER,
+                            ouh->ouh_reply_size);
+       rc = req_capsule_server_pack(pill);
+       if (rc != 0) {
+               CERROR("%s: Can't pack response: rc = %d\n",
+                      tgt_name(tsi->tsi_tgt), rc);
+               GOTO(out_free, rc = err_serious(-EPROTO));
+       }
+
+       /* Prepare the update reply buffer */
+       reply = req_capsule_server_get(pill, &RMF_OUT_UPDATE_REPLY);
+       if (reply == NULL)
+               GOTO(out_free, rc = -EPROTO);
+       reply->ourp_magic = UPDATE_REPLY_MAGIC;
+       reply->ourp_count = updates;
+       tti->tti_u.update.tti_update_reply = reply;
+       tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
+
        /* Walk through updates in the request to execute them */
        for (i = 0; i < update_buf_count; i++) {
                struct tgt_handler      *h;
@@ -981,31 +1126,27 @@ int out_handle(struct tgt_session_info *tsi)
                our = update_bufs[i];
                update_count = our->ourq_count;
                for (j = 0; j < update_count; j++) {
-                       update = object_update_request_get(our, j, NULL);
-                       if (update == NULL)
-                               GOTO(out, rc = -EPROTO);
-
-                       if (ptlrpc_req_need_swab(pill->rc_req))
-                               lustre_swab_object_update(update);
+                       struct lu_object_conf conf;
 
-                       if (!fid_is_sane(&update->ou_fid)) {
-                               CERROR("%s: invalid FID "DFID": rc = %d\n",
-                                      tgt_name(tsi->tsi_tgt),
-                                      PFID(&update->ou_fid), -EPROTO);
-                               GOTO(out, rc = err_serious(-EPROTO));
-                       }
+                       update = object_update_request_get(our, j, NULL);
+                       if (update->ou_type == OUT_CREATE)
+                               conf.loc_flags = LOC_F_NEW;
+                       else
+                               conf.loc_flags = 0;
 
-                       dt_obj = dt_locate(env, dt, &update->ou_fid);
+                       dt_obj = dt_locate_at(env, dt, &update->ou_fid,
+                               dt->dd_lu_dev.ld_site->ls_top_dev, &conf);
                        if (IS_ERR(dt_obj))
                                GOTO(out, rc = PTR_ERR(dt_obj));
 
                        if (dt->dd_record_fid_accessed) {
-                               lfsck_pack_rfa(&tti->tti_lr,
+                               struct lfsck_req_local *lrl = &tti->tti_lrl;
+
+                               lfsck_pack_rfa(lrl,
                                               lu_object_fid(&dt_obj->do_lu),
-                                              LE_FID_ACCESSED,
+                                              LEL_FID_ACCESSED,
                                               LFSCK_TYPE_LAYOUT);
-                               tgt_lfsck_in_notify(env, dt, &tti->tti_lr,
-                                                   NULL);
+                               tgt_lfsck_in_notify_local(env, dt, lrl, NULL);
                        }
 
                        tti->tti_u.update.tti_dt_object = dt_obj;
@@ -1020,17 +1161,20 @@ int out_handle(struct tgt_session_info *tsi)
                        }
 
                        /* Check resend case only for modifying RPC */
-                       if (h->th_flags & MUTABOR) {
+                       if (h->th_flags & IS_MUTABLE) {
                                struct ptlrpc_request *req = tgt_ses_req(tsi);
 
                                if (out_check_resent(env, dt, dt_obj, req,
                                                     out_reconstruct, reply,
                                                     reply_index))
                                        GOTO(next, rc = 0);
+
+                               if (dt->dd_rdonly)
+                                       GOTO(next, rc = -EROFS);
                        }
 
                        /* start transaction for modification RPC only */
-                       if (h->th_flags & MUTABOR && current_batchid == -1) {
+                       if (h->th_flags & IS_MUTABLE && current_batchid == -1) {
                                current_batchid = update->ou_batchid;
                                rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
                                if (rc != 0)
@@ -1043,7 +1187,7 @@ int out_handle(struct tgt_session_info *tsi)
                        /* Stop the current update transaction, if the update
                         * has different batchid, or read-only update */
                        if (((current_batchid != update->ou_batchid) ||
-                            !(h->th_flags & MUTABOR)) &&
+                            !(h->th_flags & IS_MUTABLE)) &&
                             ta->ta_handle != NULL) {
                                rc = out_tx_end(env, ta, rc);
                                current_batchid = -1;
@@ -1051,7 +1195,7 @@ int out_handle(struct tgt_session_info *tsi)
                                        GOTO(next, rc);
 
                                /* start a new transaction if needed */
-                               if (h->th_flags & MUTABOR) {
+                               if (h->th_flags & IS_MUTABLE) {
                                        rc = out_tx_start(env, dt, ta,
                                                          tsi->tsi_exp);
                                        if (rc != 0)
@@ -1065,7 +1209,7 @@ int out_handle(struct tgt_session_info *tsi)
                        rc = h->th_act(tsi);
 next:
                        reply_index++;
-                       lu_object_put(env, &dt_obj->do_lu);
+                       dt_object_put(env, dt_obj);
                        if (rc < 0)
                                GOTO(out, rc);
                }
@@ -1078,14 +1222,16 @@ out:
        }
 
 out_free:
-       oub = req_capsule_client_get(pill, &RMF_OUT_UPDATE_BUF);
        if (update_bufs != NULL) {
-               for (i = 0; i < update_buf_count; i++, oub++) {
-                       if (update_bufs[i] != NULL)
-                               OBD_FREE(update_bufs[i], oub->oub_size);
+               if (oub != NULL) {
+                       for (i = 0; i < update_buf_count; i++, oub++) {
+                               if (update_bufs[i] != NULL)
+                                       OBD_FREE_LARGE(update_bufs[i],
+                                                      oub->oub_size);
+                       }
                }
-               OBD_FREE(update_bufs, sizeof(update_bufs[0]) *
-                                       update_buf_count);
+
+               OBD_FREE(update_bufs, sizeof(*update_bufs) * update_buf_count);
        }
 
        if (desc != NULL)
@@ -1095,7 +1241,7 @@ out_free:
 }
 
 struct tgt_handler tgt_out_handlers[] = {
-TGT_UPDATE_HDL(MUTABOR,        OUT_UPDATE,     out_handle),
+TGT_UPDATE_HDL(IS_MUTABLE,     OUT_UPDATE,     out_handle),
 };
 EXPORT_SYMBOL(tgt_out_handlers);