Whamcloud - gitweb
LU-14124 target: set OBD_MD_FLGRANT in read's reply
[fs/lustre-release.git] / lustre / target / tgt_handler.c
index e41c59c..1a903db 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/user_namespace.h>
-#ifdef HAVE_UIDGID_HEADER
-# include <linux/uidgid.h>
-#endif
+#include <linux/delay.h>
+#include <linux/uidgid.h>
 
+#include <libcfs/linux/linux-mem.h>
 #include <obd.h>
 #include <obd_class.h>
 #include <obd_cksum.h>
@@ -265,6 +265,9 @@ static int tgt_ost_body_unpack(struct tgt_session_info *tsi, __u32 flags)
        body->oa.o_gid = nodemap_map_id(nodemap, NODEMAP_GID,
                                        NODEMAP_CLIENT_TO_FS,
                                        body->oa.o_gid);
+       body->oa.o_projid = nodemap_map_id(nodemap, NODEMAP_PROJID,
+                                          NODEMAP_CLIENT_TO_FS,
+                                          body->oa.o_projid);
        nodemap_putref(nodemap);
 
        tsi->tsi_ost_body = body;
@@ -320,11 +323,15 @@ static int tgt_request_preprocess(struct tgt_session_info *tsi,
                     h->th_fmt != NULL));
        if (h->th_fmt != NULL) {
                req_capsule_set(pill, h->th_fmt);
-               if (req_capsule_has_field(pill, &RMF_MDT_BODY, RCL_CLIENT)) {
+               if (req_capsule_has_field(pill, &RMF_MDT_BODY, RCL_CLIENT) &&
+                   req_capsule_field_present(pill, &RMF_MDT_BODY,
+                                             RCL_CLIENT)) {
                        rc = tgt_mdt_body_unpack(tsi, flags);
                        if (rc < 0)
                                RETURN(rc);
                } else if (req_capsule_has_field(pill, &RMF_OST_BODY,
+                                                RCL_CLIENT) &&
+                          req_capsule_field_present(pill, &RMF_OST_BODY,
                                                 RCL_CLIENT)) {
                        rc = tgt_ost_body_unpack(tsi, flags);
                        if (rc < 0)
@@ -415,6 +422,11 @@ static int tgt_handle_request0(struct tgt_session_info *tsi,
                     OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_MULTI_NET)))
                RETURN(0);
 
+       /* drop OUT_UPDATE rpc */
+       if (unlikely(lustre_msg_get_opc(req->rq_reqmsg) == OUT_UPDATE &&
+                    OBD_FAIL_CHECK(OBD_FAIL_OUT_UPDATE_DROP)))
+               RETURN(0);
+
        rc = tgt_request_preprocess(tsi, h, req);
        /* pack reply if reply format is fixed */
        if (rc == 0 && h->th_flags & HAS_REPLY) {
@@ -442,7 +454,8 @@ static int tgt_handle_request0(struct tgt_session_info *tsi,
 
                        req_capsule_set_size(tsi->tsi_pill, &RMF_SHORT_IO,
                                         RCL_SERVER,
-                                        (body->oa.o_flags & OBD_FL_SHORT_IO) ?
+                                        (body->oa.o_valid & OBD_MD_FLFLAGS &&
+                                         body->oa.o_flags & OBD_FL_SHORT_IO) ?
                                         remote_nb[0].rnb_len : 0);
                }
 
@@ -517,6 +530,7 @@ static int tgt_filter_recovery_request(struct ptlrpc_request *req,
        case MDS_HSM_PROGRESS:
        case MDS_HSM_STATE_SET:
        case MDS_HSM_REQUEST:
+       case OST_FALLOCATE:
                *process = target_queue_recovery_request(req, obd);
                RETURN(0);
 
@@ -611,8 +625,14 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req)
 
        /* opcode was not found in slice */
        if (unlikely(s->tos_hs == NULL)) {
-               CERROR("%s: no handlers for opcode 0x%x\n", tgt_name(tgt),
-                      opc);
+               static bool printed;
+
+               /* don't spew error messages for unhandled RPCs */
+               if (!printed) {
+                       CERROR("%s: no handler for opcode 0x%x from %s\n",
+                              tgt_name(tgt), opc, libcfs_id2str(req->rq_peer));
+                       printed = true;
+               }
                RETURN(ERR_PTR(-ENOTSUPP));
        }
 
@@ -678,8 +698,15 @@ static int process_req_last_xid(struct ptlrpc_request *req)
         * replay request will have the larger xid than "exp_last_xid"
         */
        if (req->rq_export->exp_conn_cnt >
-           lustre_msg_get_conn_cnt(req->rq_reqmsg))
+           lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
+               CDEBUG(D_RPCTRACE,
+                      "Dropping request %llu from an old epoch %u/%u\n",
+                      req->rq_xid,
+                      lustre_msg_get_conn_cnt(req->rq_reqmsg),
+                      req->rq_export->exp_conn_cnt);
+               req->rq_no_reply = 1;
                GOTO(out, rc = -ESTALE);
+       }
 
        /* try to release in-memory reply data */
        if (tgt_is_multimodrpcs_client(exp)) {
@@ -712,18 +739,12 @@ int tgt_request_handle(struct ptlrpc_request *req)
                if (cfs_fail_val == 0 &&
                    lustre_msg_get_opc(msg) != OBD_PING &&
                    lustre_msg_get_flags(msg) & MSG_REQ_REPLAY_DONE) {
-                       struct l_wait_info lwi =  { 0 };
-
                        cfs_fail_val = 1;
                        cfs_race_state = 0;
-                       l_wait_event(cfs_race_waitq, (cfs_race_state == 1),
-                                    &lwi);
+                       wait_event_idle(cfs_race_waitq, (cfs_race_state == 1));
                }
        }
 
-       /* Refill the context, to make sure all thread keys are allocated */
-       lu_env_refill(req->rq_svc_thread->t_env);
-
        req_capsule_init(&req->rq_pill, req, RCL_SERVER);
        tsi->tsi_pill = &req->rq_pill;
        tsi->tsi_env = req->rq_svc_thread->t_env;
@@ -785,7 +806,7 @@ int tgt_request_handle(struct ptlrpc_request *req)
                /* reset the exp_last_xid on each connection. */
                req->rq_export->exp_last_xid = 0;
        } else if (obd->obd_recovery_data.trd_processing_task !=
-                  current_pid()) {
+                  current->pid) {
                rc = process_req_last_xid(req);
                if (rc) {
                        req->rq_status = rc;
@@ -888,9 +909,9 @@ EXPORT_SYMBOL(tgt_counter_incr);
 
 int tgt_connect_check_sptlrpc(struct ptlrpc_request *req, struct obd_export *exp)
 {
-       struct lu_target        *tgt = class_exp2tgt(exp);
-       struct sptlrpc_flavor    flvr;
-       int                      rc = 0;
+       struct lu_target *tgt = class_exp2tgt(exp);
+       struct sptlrpc_flavor flvr;
+       int rc = 0;
 
        LASSERT(tgt);
        LASSERT(tgt->lut_obd);
@@ -915,13 +936,13 @@ int tgt_connect_check_sptlrpc(struct ptlrpc_request *req, struct obd_export *exp
                exp->exp_sp_peer = req->rq_sp_from;
                exp->exp_flvr = flvr;
 
-               /* when on mgs, if no restriction is set, or if client
-                * is loopback, allow any flavor */
+               /* when on mgs, if no restriction is set, or if the client
+                * NID is on the local node, allow any flavor
+                */
                if ((strcmp(exp->exp_obd->obd_type->typ_name,
                           LUSTRE_MGS_NAME) == 0) &&
                     (exp->exp_flvr.sf_rpc == SPTLRPC_FLVR_NULL ||
-                     LNET_NETTYP(LNET_NIDNET(exp->exp_connection->c_peer.nid))
-                     == LOLND))
+                     LNetIsPeerLocal(exp->exp_connection->c_peer.nid)))
                        exp->exp_flvr.sf_rpc = SPTLRPC_FLVR_ANY;
 
                if (exp->exp_flvr.sf_rpc != SPTLRPC_FLVR_ANY &&
@@ -955,7 +976,7 @@ int tgt_adapt_sptlrpc_conf(struct lu_target *tgt)
        int                      rc;
 
        if (unlikely(tgt == NULL)) {
-               CERROR("No target passed");
+               CERROR("No target passed\n");
                return -EINVAL;
        }
 
@@ -1012,6 +1033,27 @@ int tgt_connect(struct tgt_session_info *tsi)
                rc = req_check_sepol(tsi->tsi_pill);
                if (rc)
                        GOTO(out, rc);
+
+               if (reply->ocd_connect_flags & OBD_CONNECT_FLAGS2 &&
+                   reply->ocd_connect_flags2 & OBD_CONNECT2_ENCRYPT &&
+                   tsi->tsi_pill->rc_req->rq_export) {
+                       bool forbid_encrypt = true;
+                       struct lu_nodemap *nm =
+                        nodemap_get_from_exp(tsi->tsi_pill->rc_req->rq_export);
+
+                       if (!nm) {
+                               /* nodemap_get_from_exp returns NULL in case
+                                * nodemap is not active, so we do not forbid
+                                */
+                               forbid_encrypt = false;
+                       } else if (!IS_ERR(nm)) {
+                               forbid_encrypt = nm->nmf_forbid_encryption;
+                               nodemap_putref(nm);
+                       }
+
+                       if (forbid_encrypt)
+                               GOTO(out, rc = -EACCES);
+               }
        }
 
        RETURN(0);
@@ -1075,12 +1117,22 @@ int tgt_send_buffer(struct tgt_session_info *tsi, struct lu_rdbuf *rdbuf)
        struct ptlrpc_bulk_desc *desc;
        int                      i;
        int                      rc;
+       int                      pages = 0;
 
        ENTRY;
 
-       desc = ptlrpc_prep_bulk_exp(req, rdbuf->rb_nbufs, 1,
-                                 PTLRPC_BULK_PUT_SOURCE | PTLRPC_BULK_BUF_KVEC,
-                                   MDS_BULK_PORTAL, &ptlrpc_bulk_kvec_ops);
+       for (i = 0; i < rdbuf->rb_nbufs; i++) {
+               unsigned int offset;
+
+               offset = (unsigned long)rdbuf->rb_bufs[i].lb_buf & ~PAGE_MASK;
+               pages += DIV_ROUND_UP(rdbuf->rb_bufs[i].lb_len + offset,
+                                     PAGE_SIZE);
+       }
+
+       desc = ptlrpc_prep_bulk_exp(req, pages, 1,
+                                 PTLRPC_BULK_PUT_SOURCE,
+                                   MDS_BULK_PORTAL,
+                                   &ptlrpc_bulk_kiov_nopin_ops);
        if (desc == NULL)
                RETURN(-ENOMEM);
 
@@ -1108,8 +1160,7 @@ int tgt_sendpage(struct tgt_session_info *tsi, struct lu_rdpg *rdpg, int nob)
        ENTRY;
 
        desc = ptlrpc_prep_bulk_exp(req, rdpg->rp_npages, 1,
-                                   PTLRPC_BULK_PUT_SOURCE |
-                                       PTLRPC_BULK_BUF_KIOV,
+                                   PTLRPC_BULK_PUT_SOURCE,
                                    MDS_BULK_PORTAL,
                                    &ptlrpc_bulk_kiov_pin_ops);
        if (desc == NULL)
@@ -1186,7 +1237,7 @@ static int tgt_obd_idx_read(struct tgt_session_info *tsi)
        rdpg->rp_npages = (rdpg->rp_count + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
        /* allocate pages to store the containers */
-       OBD_ALLOC(rdpg->rp_pages, rdpg->rp_npages * sizeof(rdpg->rp_pages[0]));
+       OBD_ALLOC_PTR_ARRAY(rdpg->rp_pages, rdpg->rp_npages);
        if (rdpg->rp_pages == NULL)
                GOTO(out, rc = -ENOMEM);
        for (i = 0; i < rdpg->rp_npages; i++) {
@@ -1213,8 +1264,7 @@ out:
                for (i = 0; i < rdpg->rp_npages; i++)
                        if (rdpg->rp_pages[i])
                                __free_page(rdpg->rp_pages[i]);
-               OBD_FREE(rdpg->rp_pages,
-                        rdpg->rp_npages * sizeof(rdpg->rp_pages[0]));
+               OBD_FREE_PTR_ARRAY(rdpg->rp_pages, rdpg->rp_npages);
        }
        return rc;
 }
@@ -1284,8 +1334,8 @@ static int tgt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 
        if (flag == LDLM_CB_CANCELING &&
            (lock->l_granted_mode & (LCK_EX | LCK_PW | LCK_GROUP)) &&
-           (tgt->lut_sync_lock_cancel == ALWAYS_SYNC_ON_CANCEL ||
-            (tgt->lut_sync_lock_cancel == BLOCKING_SYNC_ON_CANCEL &&
+           (tgt->lut_sync_lock_cancel == SYNC_LOCK_CANCEL_ALWAYS ||
+            (tgt->lut_sync_lock_cancel == SYNC_LOCK_CANCEL_BLOCKING &&
              ldlm_is_cbpending(lock))) &&
            ((exp_connect_flags(lock->l_export) & OBD_CONNECT_MDS_MDS) ||
             lock->l_resource->lr_type == LDLM_EXTENT)) {
@@ -1637,13 +1687,6 @@ int tgt_mdt_data_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
 }
 EXPORT_SYMBOL(tgt_mdt_data_lock);
 
-void tgt_mdt_data_unlock(struct lustre_handle *lh, enum ldlm_mode mode)
-{
-       LASSERT(lustre_handle_is_used(lh));
-       ldlm_lock_decref(lh, mode);
-}
-EXPORT_SYMBOL(tgt_mdt_data_unlock);
-
 /**
  * Helper function for getting server side [start, start+count] DLM lock
  * if asked by client.
@@ -1681,30 +1724,41 @@ int tgt_extent_lock(const struct lu_env *env, struct ldlm_namespace *ns,
 }
 EXPORT_SYMBOL(tgt_extent_lock);
 
-void tgt_extent_unlock(struct lustre_handle *lh, enum ldlm_mode mode)
+static int tgt_data_lock(const struct lu_env *env, struct obd_export *exp,
+                        struct ldlm_res_id *res_id, __u64 start, __u64 end,
+                        struct lustre_handle *lh, enum ldlm_mode mode)
+{
+       struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+       __u64 flags = 0;
+
+       /* MDT IO for data-on-mdt */
+       if (exp->exp_connect_data.ocd_connect_flags & OBD_CONNECT_IBITS)
+               return tgt_mdt_data_lock(ns, res_id, lh, mode, &flags);
+
+       return tgt_extent_lock(env, ns, res_id, start, end, lh, mode, &flags);
+}
+
+void tgt_data_unlock(struct lustre_handle *lh, enum ldlm_mode mode)
 {
        LASSERT(lustre_handle_is_used(lh));
        ldlm_lock_decref(lh, mode);
 }
-EXPORT_SYMBOL(tgt_extent_unlock);
+EXPORT_SYMBOL(tgt_data_unlock);
 
 static int tgt_brw_lock(const struct lu_env *env, struct obd_export *exp,
                        struct ldlm_res_id *res_id, struct obd_ioobj *obj,
                        struct niobuf_remote *nb, struct lustre_handle *lh,
                        enum ldlm_mode mode)
 {
-       struct ldlm_namespace   *ns = exp->exp_obd->obd_namespace;
-       __u64                    flags = 0;
-       int                      nrbufs = obj->ioo_bufcnt;
-       int                      i;
-       int                      rc;
+       int nrbufs = obj->ioo_bufcnt;
+       int i;
 
        ENTRY;
 
        LASSERT(mode == LCK_PR || mode == LCK_PW);
        LASSERT(!lustre_handle_is_used(lh));
 
-       if (ns->ns_obd->obd_recovering)
+       if (exp->exp_obd->obd_recovering)
                RETURN(0);
 
        if (nrbufs == 0 || !(nb[0].rnb_flags & OBD_BRW_SRVLOCK))
@@ -1714,31 +1768,27 @@ static int tgt_brw_lock(const struct lu_env *env, struct obd_export *exp,
                if (!(nb[i].rnb_flags & OBD_BRW_SRVLOCK))
                        RETURN(-EFAULT);
 
-       /* MDT IO for data-on-mdt */
-       if (exp->exp_connect_data.ocd_connect_flags & OBD_CONNECT_IBITS)
-               rc = tgt_mdt_data_lock(ns, res_id, lh, mode, &flags);
-       else
-               rc = tgt_extent_lock(env, ns, res_id, nb[0].rnb_offset,
-                                    nb[nrbufs - 1].rnb_offset +
-                                    nb[nrbufs - 1].rnb_len - 1,
-                                    lh, mode, &flags);
-       RETURN(rc);
+       return tgt_data_lock(env, exp, res_id, nb[0].rnb_offset,
+                            nb[nrbufs - 1].rnb_offset +
+                            nb[nrbufs - 1].rnb_len - 1, lh, mode);
 }
 
-static void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob,
+static void tgt_brw_unlock(struct obd_export *exp, struct obd_ioobj *obj,
+                          struct niobuf_remote *niob,
                           struct lustre_handle *lh, enum ldlm_mode mode)
 {
        ENTRY;
 
        LASSERT(mode == LCK_PR || mode == LCK_PW);
-       LASSERT((obj->ioo_bufcnt > 0 &&
-                (niob[0].rnb_flags & OBD_BRW_SRVLOCK)) ==
+       LASSERT((!exp->exp_obd->obd_recovering && obj->ioo_bufcnt &&
+                niob[0].rnb_flags & OBD_BRW_SRVLOCK) ==
                lustre_handle_is_used(lh));
 
        if (lustre_handle_is_used(lh))
-               tgt_extent_unlock(lh, mode);
+               tgt_data_unlock(lh, mode);
        EXIT;
 }
+
 static int tgt_checksum_niobuf(struct lu_target *tgt,
                                 struct niobuf_local *local_nb, int npages,
                                 int opc, enum cksum_types cksum_type,
@@ -1767,13 +1817,12 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                        struct page *np = tgt_page_to_corrupt;
 
                        if (np) {
-                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
-                                                       KM_USER0);
+                               char *ptr = kmap_atomic(local_nb[i].lnb_page);
                                char *ptr2 = page_address(np);
 
                                memcpy(ptr2 + off, ptr + off, len);
                                memcpy(ptr2 + off, "bad3", min(4, len));
-                               ll_kunmap_atomic(ptr, KM_USER0);
+                               kunmap_atomic(ptr);
 
                                /* LU-8376 to preserve original index for
                                 * display in dump_all_bulk_pages() */
@@ -1800,13 +1849,12 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                        struct page *np = tgt_page_to_corrupt;
 
                        if (np) {
-                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
-                                                       KM_USER0);
+                               char *ptr = kmap_atomic(local_nb[i].lnb_page);
                                char *ptr2 = page_address(np);
 
                                memcpy(ptr2 + off, ptr + off, len);
                                memcpy(ptr2 + off, "bad4", min(4, len));
-                               ll_kunmap_atomic(ptr, KM_USER0);
+                               kunmap_atomic(ptr);
 
                                /* LU-8376 to preserve original index for
                                 * display in dump_all_bulk_pages() */
@@ -1843,15 +1891,15 @@ static void dump_all_bulk_pages(struct obdo *oa, int count,
         * file/fid, not during the resends/retries. */
        snprintf(dbgcksum_file_name, sizeof(dbgcksum_file_name),
                 "%s-checksum_dump-ost-"DFID":[%llu-%llu]-%x-%x",
-                (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 ?
-                 libcfs_debug_file_path_arr :
-                 LIBCFS_DEBUG_FILE_PATH_DEFAULT),
+                (strncmp(libcfs_debug_file_path, "NONE", 4) != 0 ?
+                 libcfs_debug_file_path : LIBCFS_DEBUG_FILE_PATH_DEFAULT),
                 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
                 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
                 oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
                 local_nb[0].lnb_file_offset,
                 local_nb[count-1].lnb_file_offset +
                 local_nb[count-1].lnb_len - 1, client_cksum, server_cksum);
+       CWARN("dumping checksum data to %s\n", dbgcksum_file_name);
        filp = filp_open(dbgcksum_file_name,
                         O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, 0600);
        if (IS_ERR(filp)) {
@@ -1878,8 +1926,6 @@ static void dump_all_bulk_pages(struct obdo *oa, int count,
                        }
                        len -= rc;
                        buf += rc;
-                       CDEBUG(D_INFO, "%s: wrote %d bytes\n",
-                              dbgcksum_file_name, rc);
                }
                kunmap(local_nb[i].lnb_page);
        }
@@ -1888,7 +1934,8 @@ static void dump_all_bulk_pages(struct obdo *oa, int count,
        if (rc)
                CERROR("%s: sync returns %d\n", dbgcksum_file_name, rc);
        filp_close(filp, NULL);
-       return;
+
+       libcfs_debug_dumplog();
 }
 
 static int check_read_checksum(struct niobuf_local *local_nb, int npages,
@@ -1956,9 +2003,9 @@ static int tgt_pages2shortio(struct niobuf_local *local, int npages,
                if (len > size)
                        return -EINVAL;
 
-               ptr = ll_kmap_atomic(local[i].lnb_page, KM_USER0);
-               memcpy(buf + off, ptr, len);
-               ll_kunmap_atomic(ptr, KM_USER0);
+               ptr = kmap_atomic(local[i].lnb_page);
+               memcpy(buf, ptr + off, len);
+               kunmap_atomic(ptr);
                buf += len;
                size -= len;
        }
@@ -1966,11 +2013,10 @@ static int tgt_pages2shortio(struct niobuf_local *local, int npages,
 }
 
 static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
-                                    struct niobuf_local *local_nb,
-                                    int npages, int opc,
-                                    obd_dif_csum_fn *fn,
-                                    int sector_size,
-                                    u32 *check_sum)
+                                    struct niobuf_local *local_nb, int npages,
+                                    int opc, obd_dif_csum_fn *fn,
+                                    int sector_size, u32 *check_sum,
+                                    bool resend)
 {
        enum cksum_types t10_cksum_type = tgt->lut_dt_conf.ddp_t10_cksum_type;
        unsigned char cfs_alg = cksum_obd2cfs(OBD_CKSUM_T10_TOP);
@@ -2001,7 +2047,11 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
        buffer = kmap(__page);
        guard_start = (__u16 *)buffer;
        guard_number = PAGE_SIZE / sizeof(*guard_start);
+       if (unlikely(resend))
+               CDEBUG(D_PAGE | D_HA, "GRD tags per page = %u\n", guard_number);
        for (i = 0; i < npages; i++) {
+               bool use_t10_grd;
+
                /* corrupt the data before we compute the checksum, to
                 * simulate a client->OST data error */
                if (i == 0 && opc == OST_WRITE &&
@@ -2011,13 +2061,12 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
                        struct page *np = tgt_page_to_corrupt;
 
                        if (np) {
-                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
-                                                       KM_USER0);
+                               char *ptr = kmap_atomic(local_nb[i].lnb_page);
                                char *ptr2 = page_address(np);
 
                                memcpy(ptr2 + off, ptr + off, len);
                                memcpy(ptr2 + off, "bad3", min(4, len));
-                               ll_kunmap_atomic(ptr, KM_USER0);
+                               kunmap_atomic(ptr);
 
                                /* LU-8376 to preserve original index for
                                 * display in dump_all_bulk_pages() */
@@ -2036,8 +2085,10 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
                 * The left guard number should be able to hold checksums of a
                 * whole page
                 */
-               if (t10_cksum_type && opc == OST_READ &&
-                   local_nb[i].lnb_guard_disk) {
+               use_t10_grd = t10_cksum_type && opc == OST_READ &&
+                             local_nb[i].lnb_len == PAGE_SIZE &&
+                             local_nb[i].lnb_guard_disk;
+               if (use_t10_grd) {
                        used = DIV_ROUND_UP(local_nb[i].lnb_len, sector_size);
                        if (used > (guard_number - used_number)) {
                                rc = -E2BIG;
@@ -2045,14 +2096,52 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
                        }
                        memcpy(guard_start + used_number,
                               local_nb[i].lnb_guards,
-                              used * sizeof(*local_nb[i].lnb_guards));
-               } else {
+                              used * sizeof(*guard_start));
+                       if (unlikely(resend))
+                               CDEBUG(D_PAGE | D_HA,
+                                      "lnb[%u]: used %u off %u+%u lnb checksum: %*phN\n",
+                                      i, used,
+                                      local_nb[i].lnb_page_offset,
+                                      local_nb[i].lnb_len,
+                                      (int)(used * sizeof(*guard_start)),
+                                      guard_start + used_number);
+               }
+               if (!use_t10_grd || unlikely(resend)) {
+                       __u16 guard_tmp[MAX_GUARD_NUMBER];
+                       __u16 *guards = guard_start + used_number;
+                       int used_tmp = -1, *usedp = &used;
+
+                       if (unlikely(use_t10_grd)) {
+                               guards = guard_tmp;
+                               usedp = &used_tmp;
+                       }
                        rc = obd_page_dif_generate_buffer(obd_name,
                                local_nb[i].lnb_page,
                                local_nb[i].lnb_page_offset & ~PAGE_MASK,
-                               local_nb[i].lnb_len, guard_start + used_number,
-                               guard_number - used_number, &used, sector_size,
+                               local_nb[i].lnb_len, guards,
+                               guard_number - used_number, usedp, sector_size,
                                fn);
+                       if (unlikely(resend)) {
+                               bool bad = use_t10_grd &&
+                                       memcmp(guard_tmp,
+                                              local_nb[i].lnb_guards,
+                                              used_tmp * sizeof(*guard_tmp));
+
+                               if (bad)
+                                       CERROR("lnb[%u]: used %u/%u off %u+%u tmp checksum: %*phN\n",
+                                              i, used, used_tmp,
+                                              local_nb[i].lnb_page_offset,
+                                              local_nb[i].lnb_len,
+                                              (int)(used_tmp * sizeof(*guard_start)),
+                                              guard_tmp);
+                               CDEBUG_LIMIT(D_PAGE | D_HA | (bad ? D_ERROR : 0),
+                                      "lnb[%u]: used %u/%u off %u+%u gen checksum: %*phN\n",
+                                      i, used, used_tmp,
+                                      local_nb[i].lnb_page_offset,
+                                      local_nb[i].lnb_len,
+                                      (int)(used * sizeof(*guard_start)),
+                                      guard_start + used_number);
+                       }
                        if (rc)
                                break;
                }
@@ -2092,13 +2181,12 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
                        struct page *np = tgt_page_to_corrupt;
 
                        if (np) {
-                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
-                                                       KM_USER0);
+                               char *ptr = kmap_atomic(local_nb[i].lnb_page);
                                char *ptr2 = page_address(np);
 
                                memcpy(ptr2 + off, ptr + off, len);
                                memcpy(ptr2 + off, "bad4", min(4, len));
-                               ll_kunmap_atomic(ptr, KM_USER0);
+                               kunmap_atomic(ptr);
 
                                /* LU-8376 to preserve original index for
                                 * display in dump_all_bulk_pages() */
@@ -2134,7 +2222,8 @@ out:
 static int tgt_checksum_niobuf_rw(struct lu_target *tgt,
                                  enum cksum_types cksum_type,
                                  struct niobuf_local *local_nb,
-                                 int npages, int opc, u32 *check_sum)
+                                 int npages, int opc, u32 *check_sum,
+                                 bool resend)
 {
        obd_dif_csum_fn *fn = NULL;
        int sector_size = 0;
@@ -2146,10 +2235,11 @@ static int tgt_checksum_niobuf_rw(struct lu_target *tgt,
        if (fn)
                rc = tgt_checksum_niobuf_t10pi(tgt, local_nb, npages,
                                               opc, fn, sector_size,
-                                              check_sum);
+                                              check_sum, resend);
        else
                rc = tgt_checksum_niobuf(tgt, local_nb, npages, opc,
                                         cksum_type, check_sum);
+
        RETURN(rc);
 }
 
@@ -2162,7 +2252,6 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        struct niobuf_local     *local_nb;
        struct obd_ioobj        *ioo;
        struct ost_body         *body, *repbody;
-       struct l_wait_info       lwi;
        struct lustre_handle     lockh = { 0 };
        int                      npages, nob = 0, rc, i, no_reply = 0,
                                 npages_read;
@@ -2182,8 +2271,22 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 
        req->rq_bulk_read = 1;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
-               RETURN(-EIO);
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK)) {
+               /* optionally use cfs_fail_val - 1 to select a specific OST on
+                * this server to fail requests.
+                */
+               char fail_ost_name[MAX_OBD_NAME];
+
+               if (cfs_fail_val > 0) {
+                       snprintf(fail_ost_name, MAX_OBD_NAME, "OST%04X",
+                                cfs_fail_val - 1);
+
+                       if (strstr(obd_name, fail_ost_name))
+                               RETURN(err_serious(-EIO));
+               } else {
+                       RETURN(err_serious(-EIO));
+               }
+       }
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK, cfs_fail_val > 0 ?
                         cfs_fail_val : (obd_timeout + 1) / 4);
@@ -2192,10 +2295,9 @@ int tgt_brw_read(struct tgt_session_info *tsi)
         * finish */
        if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
                /* We do not care how long it takes */
-               lwi = LWI_INTR(NULL, NULL);
-               rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
-                        !atomic_read(&exp->exp_obd->obd_evict_inprogress),
-                        &lwi);
+               wait_event_idle(
+                       exp->exp_obd->obd_evict_inprogress_waitq,
+                       !atomic_read(&exp->exp_obd->obd_evict_inprogress));
        }
 
        /* There must be big cache in current thread to process this request
@@ -2207,6 +2309,10 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        body = tsi->tsi_ost_body;
        LASSERT(body != NULL);
 
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_NORPC)
+               RETURN(0);
+
        ioo = req_capsule_client_get(tsi->tsi_pill, &RMF_OBD_IOOBJ);
        LASSERT(ioo != NULL); /* must exists after tgt_ost_body_unpack */
 
@@ -2243,12 +2349,12 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        if (rc != 0)
                GOTO(out_lock, rc);
 
-       if (body->oa.o_flags & OBD_FL_SHORT_IO) {
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_SHORT_IO) {
                desc = NULL;
        } else {
                desc = ptlrpc_prep_bulk_exp(req, npages, ioobj_max_brw_get(ioo),
-                                           PTLRPC_BULK_PUT_SOURCE |
-                                               PTLRPC_BULK_BUF_KIOV,
+                                           PTLRPC_BULK_PUT_SOURCE,
                                            OST_BULK_PORTAL,
                                            &ptlrpc_bulk_kiov_nopin_ops);
                if (desc == NULL)
@@ -2284,14 +2390,13 @@ int tgt_brw_read(struct tgt_session_info *tsi)
                        break;
                }
        }
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_READ_SIZE) &&
-           nob != cfs_fail_val)
-               rc = -E2BIG;
 
        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
                u32 flag = body->oa.o_valid & OBD_MD_FLFLAGS ?
                           body->oa.o_flags : 0;
                enum cksum_types cksum_type = obd_cksum_type_unpack(flag);
+               bool resend = (body->oa.o_valid & OBD_MD_FLFLAGS) &&
+                       (body->oa.o_flags & OBD_FL_RECOV_RESEND);
 
                repbody->oa.o_flags = obd_cksum_type_pack(obd_name,
                                                          cksum_type);
@@ -2299,17 +2404,17 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 
                rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
                                            local_nb, npages_read, OST_READ,
-                                           &repbody->oa.o_cksum);
+                                           &repbody->oa.o_cksum, resend);
                if (rc < 0)
                        GOTO(out_commitrw, rc);
-               CDEBUG(D_PAGE, "checksum at read origin: %x\n",
-                      repbody->oa.o_cksum);
+               CDEBUG(D_PAGE | (resend ? D_HA : 0),
+                      "checksum at read origin: %x (%x)\n",
+                      repbody->oa.o_cksum, cksum_type);
 
                /* if a resend it could be for a cksum error, so check Server
                 * cksum with returned Client cksum (this should even cover
                 * zero-cksum case) */
-               if ((body->oa.o_valid & OBD_MD_FLFLAGS) &&
-                   (body->oa.o_flags & OBD_FL_RECOV_RESEND))
+               if (resend)
                        check_read_checksum(local_nb, npages_read, exp,
                                            &body->oa, &req->rq_peer,
                                            body->oa.o_cksum,
@@ -2317,12 +2422,15 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        } else {
                repbody->oa.o_valid = 0;
        }
+       if (body->oa.o_valid & OBD_MD_FLGRANT)
+               repbody->oa.o_valid |= OBD_MD_FLGRANT;
        /* We're finishing using body->oa as an input variable */
 
        /* Check if client was evicted while we were doing i/o before touching
         * network */
        if (rc == 0) {
-               if (body->oa.o_flags & OBD_FL_SHORT_IO) {
+               if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+                   body->oa.o_flags & OBD_FL_SHORT_IO) {
                        unsigned char *short_io_buf;
                        int short_io_size;
 
@@ -2343,7 +2451,8 @@ int tgt_brw_read(struct tgt_session_info *tsi)
                }
                no_reply = rc != 0;
        } else {
-               if (body->oa.o_flags & OBD_FL_SHORT_IO)
+               if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+                   body->oa.o_flags & OBD_FL_SHORT_IO)
                        req_capsule_shrink(&req->rq_pill, &RMF_SHORT_IO, 0,
                                           RCL_SERVER);
        }
@@ -2353,7 +2462,7 @@ out_commitrw:
        rc = obd_commitrw(tsi->tsi_env, OBD_BRW_READ, exp, &repbody->oa, 1, ioo,
                          remote_nb, npages, local_nb, rc);
 out_lock:
-       tgt_brw_unlock(ioo, remote_nb, &lockh, LCK_PR);
+       tgt_brw_unlock(exp, ioo, remote_nb, &lockh, LCK_PR);
 
        if (desc && !CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2))
                ptlrpc_free_bulk(desc);
@@ -2377,14 +2486,22 @@ out_lock:
         * to reorder. */
        if (unlikely(CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2)) &&
            desc) {
-               wait_queue_head_t        waitq;
-               struct l_wait_info       lwi1;
+               /* Calculate checksum before request transfer, original
+                * it is done by target_bulk_io() */
+               rc = sptlrpc_svc_wrap_bulk(req, desc);
+               if (OCD_HAS_FLAG(&exp->exp_connect_data, BULK_MBITS))
+                       req->rq_mbits = lustre_msg_get_mbits(req->rq_reqmsg);
+               else /* old version, bulk matchbits is rq_xid */
+                       req->rq_mbits = req->rq_xid;
+
+               req->rq_status = rc;
+               target_committed_to_req(req);
+               target_send_reply(req, 0, 0);
 
                CDEBUG(D_INFO, "reorder BULK\n");
-               init_waitqueue_head(&waitq);
+               OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2,
+                                cfs_fail_val ? : 3);
 
-               lwi1 = LWI_TIMEOUT_INTR(cfs_time_seconds(3), NULL, NULL, NULL);
-               l_wait_event(waitq, 0, &lwi1);
                target_bulk_io(exp, desc);
                ptlrpc_free_bulk(desc);
        }
@@ -2408,11 +2525,11 @@ static int tgt_shortio2pages(struct niobuf_local *local, int npages,
 
                CDEBUG(D_PAGE, "index %d offset = %d len = %d left = %d\n",
                       i, off, len, size);
-               ptr = ll_kmap_atomic(local[i].lnb_page, KM_USER0);
+               ptr = kmap_atomic(local[i].lnb_page);
                if (ptr == NULL)
                        return -EINVAL;
                memcpy(ptr + off, buf, len < size ? len : size);
-               ll_kunmap_atomic(ptr, KM_USER0);
+               kunmap_atomic(ptr);
                buf += len;
                size -= len;
        }
@@ -2478,12 +2595,15 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        struct lustre_handle     lockh = {0};
        __u32                   *rcs;
        int                      objcount, niocount, npages;
-       int                      rc, i, j;
+       int                      rc = 0;
+       int                      i, j;
        enum cksum_types cksum_type = OBD_CKSUM_CRC32;
        bool                     no_reply = false, mmap;
        struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
        bool wait_sync = false;
        const char *obd_name = exp->exp_obd->obd_name;
+       /* '1' for consistency with code that checks !mpflag to restore */
+       unsigned int mpflags = 1;
 
        ENTRY;
 
@@ -2504,9 +2624,25 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        req->rq_bulk_write = 1;
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
-               RETURN(err_serious(-EIO));
+               rc = -EIO;
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK2))
-               RETURN(err_serious(-EFAULT));
+               rc = -EFAULT;
+       if (rc < 0) {
+               /* optionally use cfs_fail_val - 1 to select a specific OST on
+                * this server to fail requests.
+                */
+               char fail_ost_name[MAX_OBD_NAME];
+
+               if (cfs_fail_val > 0) {
+                       snprintf(fail_ost_name, MAX_OBD_NAME, "OST%04X",
+                                cfs_fail_val - 1);
+
+                       if (strstr(obd_name, fail_ost_name))
+                               RETURN(err_serious(rc));
+               } else {
+                       RETURN(err_serious(rc));
+               }
+       }
 
        /* pause before transaction has been started */
        CFS_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK, cfs_fail_val > 0 ?
@@ -2524,6 +2660,11 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        body = tsi->tsi_ost_body;
        LASSERT(body != NULL);
 
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_NORPC)
+               RETURN(0);
+
+
        ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
        LASSERT(ioo != NULL); /* must exists after tgt_ost_body_unpack */
 
@@ -2542,7 +2683,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 
        if ((remote_nb[0].rnb_flags & OBD_BRW_MEMALLOC) &&
            ptlrpc_connection_is_local(exp->exp_connection))
-               memory_pressure_set();
+               mpflags = memalloc_noreclaim_save();
 
        req_capsule_set_size(&req->rq_pill, &RMF_RCS, RCL_SERVER,
                             niocount * sizeof(*rcs));
@@ -2593,7 +2734,8 @@ int tgt_brw_write(struct tgt_session_info *tsi)
                        objcount, ioo, remote_nb, &npages, local_nb);
        if (rc < 0)
                GOTO(out_lock, rc);
-       if (body->oa.o_flags & OBD_FL_SHORT_IO) {
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_SHORT_IO) {
                unsigned int short_io_size;
                unsigned char *short_io_buf;
 
@@ -2611,8 +2753,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
                desc = NULL;
        } else {
                desc = ptlrpc_prep_bulk_exp(req, npages, ioobj_max_brw_get(ioo),
-                                           PTLRPC_BULK_GET_SINK |
-                                           PTLRPC_BULK_BUF_KIOV,
+                                           PTLRPC_BULK_GET_SINK,
                                            OST_BULK_PORTAL,
                                            &ptlrpc_bulk_kiov_nopin_ops);
                if (desc == NULL)
@@ -2648,7 +2789,7 @@ skip_transfer:
 
                rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
                                            local_nb, npages, OST_WRITE,
-                                           &repbody->oa.o_cksum);
+                                           &repbody->oa.o_cksum, false);
                if (rc < 0)
                        GOTO(out_commitrw, rc);
 
@@ -2670,6 +2811,8 @@ skip_transfer:
                }
        }
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK2, cfs_fail_val);
+
 out_commitrw:
        /* Must commit after prep above in all cases */
        rc = obd_commitrw(tsi->tsi_env, OBD_BRW_WRITE, exp, &repbody->oa,
@@ -2715,7 +2858,7 @@ out_commitrw:
                ptlrpc_lprocfs_brw(req, nob);
        }
 out_lock:
-       tgt_brw_unlock(ioo, remote_nb, &lockh, LCK_PW);
+       tgt_brw_unlock(exp, ioo, remote_nb, &lockh, LCK_PW);
        if (desc)
                ptlrpc_free_bulk(desc);
 out:
@@ -2730,11 +2873,80 @@ out:
                                      obd_uuid2str(&exp->exp_client_uuid),
                                      obd_export_nid2str(exp), rc);
        }
-       memory_pressure_clr();
+
+       if (mpflags)
+               memalloc_noreclaim_restore(mpflags);
+
        RETURN(rc);
 }
 EXPORT_SYMBOL(tgt_brw_write);
 
+/**
+ * Common request handler for OST_SEEK RPC.
+ *
+ * Unified request handling for OST_SEEK RPC.
+ * It takes object by its FID, does needed lseek and packs result
+ * into reply. Only SEEK_HOLE and SEEK_DATA are supported.
+ *
+ * \param[in] tsi      target session environment for this request
+ *
+ * \retval             0 if successful
+ * \retval             negative value on error
+ */
+int tgt_lseek(struct tgt_session_info *tsi)
+{
+       struct lustre_handle lh = { 0 };
+       struct dt_object *dob;
+       struct ost_body *repbody;
+       loff_t offset = tsi->tsi_ost_body->oa.o_size;
+       int whence = tsi->tsi_ost_body->oa.o_mode;
+       bool srvlock;
+       int rc = 0;
+
+       ENTRY;
+
+       if (whence != SEEK_HOLE && whence != SEEK_DATA)
+               RETURN(-EPROTO);
+
+       /* Negative offset is prohibited on wire and must be handled on client
+        * prior sending RPC.
+        */
+       if (offset < 0)
+               RETURN(-EPROTO);
+
+       repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
+       if (repbody == NULL)
+               RETURN(-ENOMEM);
+       repbody->oa = tsi->tsi_ost_body->oa;
+
+       srvlock = tsi->tsi_ost_body->oa.o_valid & OBD_MD_FLFLAGS &&
+                 tsi->tsi_ost_body->oa.o_flags & OBD_FL_SRVLOCK;
+       if (srvlock) {
+               rc = tgt_data_lock(tsi->tsi_env, tsi->tsi_exp, &tsi->tsi_resid,
+                                  offset, OBD_OBJECT_EOF, &lh, LCK_PR);
+               if (rc)
+                       RETURN(rc);
+       }
+
+       dob = dt_locate(tsi->tsi_env, tsi->tsi_tgt->lut_bottom, &tsi->tsi_fid);
+       if (IS_ERR(dob))
+               GOTO(out, rc = PTR_ERR(dob));
+
+       if (!dt_object_exists(dob))
+               GOTO(obj_put, rc = -ENOENT);
+
+       repbody->oa.o_size = dt_lseek(tsi->tsi_env, dob, offset, whence);
+       rc = 0;
+obj_put:
+       dt_object_put(tsi->tsi_env, dob);
+out:
+       if (srvlock)
+               tgt_data_unlock(&lh, LCK_PR);
+
+       RETURN(rc);
+}
+EXPORT_SYMBOL(tgt_lseek);
+
 /* Check if request can be reconstructed from saved reply data
  * A copy of the reply data is returned in @trd if the pointer is not NULL
  */