Whamcloud - gitweb
LU-11697 ost: do not reuse T10PI guards of unaligned page write
[fs/lustre-release.git] / lustre / target / tgt_handler.c
index 3b8454d..64c0052 100644 (file)
@@ -991,6 +991,9 @@ int tgt_connect(struct tgt_session_info *tsi)
        reply = req_capsule_server_get(tsi->tsi_pill, &RMF_CONNECT_DATA);
        spin_lock(&tsi->tsi_exp->exp_lock);
        *exp_connect_flags_ptr(tsi->tsi_exp) = reply->ocd_connect_flags;
+       if (reply->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+               *exp_connect_flags2_ptr(tsi->tsi_exp) =
+                       reply->ocd_connect_flags2;
        tsi->tsi_exp->exp_connect_data.ocd_brw_size = reply->ocd_brw_size;
        spin_unlock(&tsi->tsi_exp->exp_lock);
 
@@ -1194,7 +1197,6 @@ out:
 
 struct tgt_handler tgt_obd_handlers[] = {
 TGT_OBD_HDL    (0,     OBD_PING,               tgt_obd_ping),
-TGT_OBD_HDL_VAR(0,     OBD_LOG_CANCEL,         tgt_obd_log_cancel),
 TGT_OBD_HDL    (0,     OBD_IDX_READ,           tgt_obd_idx_read)
 };
 EXPORT_SYMBOL(tgt_obd_handlers);
@@ -1268,7 +1270,7 @@ static int tgt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 
                rc = lu_env_init(&env, LCT_DT_THREAD);
                if (unlikely(rc != 0))
-                       RETURN(rc);
+                       GOTO(err, rc);
 
                ost_fid_from_resid(&fid, &lock->l_resource->lr_name,
                                   tgt->lut_lsd.lsd_osd_index);
@@ -1299,7 +1301,7 @@ err_put:
 err_env:
                lu_env_fini(&env);
        }
-
+err:
        rc = ldlm_server_blocking_ast(lock, desc, data, flag);
        RETURN(rc);
 }
@@ -1392,30 +1394,6 @@ int tgt_llog_open(struct tgt_session_info *tsi)
 }
 EXPORT_SYMBOL(tgt_llog_open);
 
-int tgt_llog_close(struct tgt_session_info *tsi)
-{
-       int rc;
-
-       ENTRY;
-
-       rc = llog_origin_handle_close(tgt_ses_req(tsi));
-
-       RETURN(rc);
-}
-EXPORT_SYMBOL(tgt_llog_close);
-
-
-int tgt_llog_destroy(struct tgt_session_info *tsi)
-{
-       int rc;
-
-       ENTRY;
-
-       rc = llog_origin_handle_destroy(tgt_ses_req(tsi));
-
-       RETURN(rc);
-}
-
 int tgt_llog_read_header(struct tgt_session_info *tsi)
 {
        int rc;
@@ -1458,8 +1436,6 @@ TGT_LLOG_HDL    (0,       LLOG_ORIGIN_HANDLE_CREATE,      tgt_llog_open),
 TGT_LLOG_HDL    (0,    LLOG_ORIGIN_HANDLE_NEXT_BLOCK,  tgt_llog_next_block),
 TGT_LLOG_HDL    (0,    LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header),
 TGT_LLOG_HDL    (0,    LLOG_ORIGIN_HANDLE_PREV_BLOCK,  tgt_llog_prev_block),
-TGT_LLOG_HDL    (0,    LLOG_ORIGIN_HANDLE_DESTROY,     tgt_llog_destroy),
-TGT_LLOG_HDL_VAR(0,    LLOG_ORIGIN_HANDLE_CLOSE,       tgt_llog_close),
 };
 EXPORT_SYMBOL(tgt_llog_handlers);
 
@@ -1628,7 +1604,7 @@ int tgt_mdt_data_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
        LASSERT(ns != NULL);
        LASSERT(!lustre_handle_is_used(lh));
 
-       rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, &policy, mode,
+       rc = ldlm_cli_enqueue_local(NULL, ns, res_id, LDLM_IBITS, &policy, mode,
                                    flags, ldlm_blocking_ast,
                                    ldlm_completion_ast, ldlm_glimpse_ast,
                                    NULL, 0, LVB_T_NONE, NULL, lh);
@@ -1648,9 +1624,9 @@ EXPORT_SYMBOL(tgt_mdt_data_unlock);
  * Helper function for getting server side [start, start+count] DLM lock
  * if asked by client.
  */
-int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
-                   __u64 start, __u64 end, struct lustre_handle *lh,
-                   int mode, __u64 *flags)
+int tgt_extent_lock(const struct lu_env *env, struct ldlm_namespace *ns,
+                   struct ldlm_res_id *res_id, __u64 start, __u64 end,
+                   struct lustre_handle *lh, int mode, __u64 *flags)
 {
        union ldlm_policy_data policy;
        int rc;
@@ -1673,8 +1649,8 @@ int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
        else
                policy.l_extent.end = end | ~PAGE_MASK;
 
-       rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_EXTENT, &policy, mode,
-                                   flags, ldlm_blocking_ast,
+       rc = ldlm_cli_enqueue_local(env, ns, res_id, LDLM_EXTENT, &policy,
+                                   mode, flags, ldlm_blocking_ast,
                                    ldlm_completion_ast, ldlm_glimpse_ast,
                                    NULL, 0, LVB_T_NONE, NULL, lh);
        RETURN(rc == ELDLM_OK ? 0 : -EIO);
@@ -1688,9 +1664,10 @@ void tgt_extent_unlock(struct lustre_handle *lh, enum ldlm_mode mode)
 }
 EXPORT_SYMBOL(tgt_extent_unlock);
 
-static int tgt_brw_lock(struct obd_export *exp, struct ldlm_res_id *res_id,
-                       struct obd_ioobj *obj, struct niobuf_remote *nb,
-                       struct lustre_handle *lh, enum ldlm_mode mode)
+static int tgt_brw_lock(const struct lu_env *env, struct obd_export *exp,
+                       struct ldlm_res_id *res_id, struct obd_ioobj *obj,
+                       struct niobuf_remote *nb, struct lustre_handle *lh,
+                       enum ldlm_mode mode)
 {
        struct ldlm_namespace   *ns = exp->exp_obd->obd_namespace;
        __u64                    flags = 0;
@@ -1717,7 +1694,7 @@ static int tgt_brw_lock(struct obd_export *exp, struct ldlm_res_id *res_id,
        if (exp->exp_connect_data.ocd_connect_flags & OBD_CONNECT_IBITS)
                rc = tgt_mdt_data_lock(ns, res_id, lh, mode, &flags);
        else
-               rc = tgt_extent_lock(ns, res_id, nb[0].rnb_offset,
+               rc = tgt_extent_lock(env, ns, res_id, nb[0].rnb_offset,
                                     nb[nrbufs - 1].rnb_offset +
                                     nb[nrbufs - 1].rnb_len - 1,
                                     lh, mode, &flags);
@@ -1743,16 +1720,16 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                                 int opc, enum cksum_types cksum_type,
                                 __u32 *cksum)
 {
-       struct cfs_crypto_hash_desc     *hdesc;
+       struct ahash_request           *req;
        unsigned int                    bufsize;
        int                             i, err;
        unsigned char                   cfs_alg = cksum_obd2cfs(cksum_type);
 
-       hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0);
-       if (IS_ERR(hdesc)) {
+       req = cfs_crypto_hash_init(cfs_alg, NULL, 0);
+       if (IS_ERR(req)) {
                CERROR("%s: unable to initialize checksum hash %s\n",
                       tgt_name(tgt), cfs_crypto_hash_name(cfs_alg));
-               return PTR_ERR(hdesc);
+               return PTR_ERR(req);
        }
 
        CDEBUG(D_INFO, "Checksum for algo %s\n", cfs_crypto_hash_name(cfs_alg));
@@ -1778,7 +1755,7 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                                 * display in dump_all_bulk_pages() */
                                np->index = i;
 
-                               cfs_crypto_hash_update_page(hdesc, np, off,
+                               cfs_crypto_hash_update_page(req, np, off,
                                                            len);
                                continue;
                        } else {
@@ -1786,7 +1763,7 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                                       tgt_name(tgt));
                        }
                }
-               cfs_crypto_hash_update_page(hdesc, local_nb[i].lnb_page,
+               cfs_crypto_hash_update_page(req, local_nb[i].lnb_page,
                                  local_nb[i].lnb_page_offset & ~PAGE_MASK,
                                  local_nb[i].lnb_len);
 
@@ -1811,7 +1788,7 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
                                 * display in dump_all_bulk_pages() */
                                np->index = i;
 
-                               cfs_crypto_hash_update_page(hdesc, np, off,
+                               cfs_crypto_hash_update_page(req, np, off,
                                                            len);
                                continue;
                        } else {
@@ -1822,7 +1799,7 @@ static int tgt_checksum_niobuf(struct lu_target *tgt,
        }
 
        bufsize = sizeof(*cksum);
-       err = cfs_crypto_hash_final(hdesc, (unsigned char *)cksum, &bufsize);
+       err = cfs_crypto_hash_final(req, (unsigned char *)cksum, &bufsize);
 
        return 0;
 }
@@ -1912,8 +1889,8 @@ static int check_read_checksum(struct niobuf_local *local_nb, int npages,
                dump_all_bulk_pages(oa, npages, local_nb, server_cksum,
                                    client_cksum);
 
-       cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
-                                      oa->o_flags : 0);
+       cksum_type = obd_cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+                                          oa->o_flags : 0);
 
        if (cksum_type != server_cksum_type)
                msg = "the server may have not used the checksum type specified"
@@ -1964,6 +1941,194 @@ static int tgt_pages2shortio(struct niobuf_local *local, int npages,
        return copied - size;
 }
 
+static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
+                                    struct niobuf_local *local_nb,
+                                    int npages, int opc,
+                                    obd_dif_csum_fn *fn,
+                                    int sector_size,
+                                    u32 *check_sum)
+{
+       enum cksum_types t10_cksum_type = tgt->lut_dt_conf.ddp_t10_cksum_type;
+       unsigned char cfs_alg = cksum_obd2cfs(OBD_CKSUM_T10_TOP);
+       const char *obd_name = tgt->lut_obd->obd_name;
+       struct ahash_request *req;
+       unsigned int bufsize;
+       unsigned char *buffer;
+       struct page *__page;
+       __u16 *guard_start;
+       int guard_number;
+       int used_number = 0;
+       __u32 cksum;
+       int rc = 0;
+       int used;
+       int i;
+
+       __page = alloc_page(GFP_KERNEL);
+       if (__page == NULL)
+               return -ENOMEM;
+
+       req = cfs_crypto_hash_init(cfs_alg, NULL, 0);
+       if (IS_ERR(req)) {
+               CERROR("%s: unable to initialize checksum hash %s\n",
+                      tgt_name(tgt), cfs_crypto_hash_name(cfs_alg));
+               return PTR_ERR(req);
+       }
+
+       buffer = kmap(__page);
+       guard_start = (__u16 *)buffer;
+       guard_number = PAGE_SIZE / sizeof(*guard_start);
+       for (i = 0; i < npages; i++) {
+               /* corrupt the data before we compute the checksum, to
+                * simulate a client->OST data error */
+               if (i == 0 && opc == OST_WRITE &&
+                   OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_RECEIVE)) {
+                       int off = local_nb[i].lnb_page_offset & ~PAGE_MASK;
+                       int len = local_nb[i].lnb_len;
+                       struct page *np = tgt_page_to_corrupt;
+
+                       if (np) {
+                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
+                                                       KM_USER0);
+                               char *ptr2 = page_address(np);
+
+                               memcpy(ptr2 + off, ptr + off, len);
+                               memcpy(ptr2 + off, "bad3", min(4, len));
+                               ll_kunmap_atomic(ptr, KM_USER0);
+
+                               /* LU-8376 to preserve original index for
+                                * display in dump_all_bulk_pages() */
+                               np->index = i;
+
+                               cfs_crypto_hash_update_page(req, np, off,
+                                                           len);
+                               continue;
+                       } else {
+                               CERROR("%s: can't alloc page for corruption\n",
+                                      tgt_name(tgt));
+                       }
+               }
+
+               /*
+                * The left guard number should be able to hold checksums of a
+                * whole page
+                */
+               if (t10_cksum_type && opc == OST_READ &&
+                   local_nb[i].lnb_guard_disk) {
+                       used = DIV_ROUND_UP(local_nb[i].lnb_len, sector_size);
+                       if (used > (guard_number - used_number)) {
+                               rc = -E2BIG;
+                               break;
+                       }
+                       memcpy(guard_start + used_number,
+                              local_nb[i].lnb_guards,
+                              used * sizeof(*local_nb[i].lnb_guards));
+               } else {
+                       rc = obd_page_dif_generate_buffer(obd_name,
+                               local_nb[i].lnb_page,
+                               local_nb[i].lnb_page_offset & ~PAGE_MASK,
+                               local_nb[i].lnb_len, guard_start + used_number,
+                               guard_number - used_number, &used, sector_size,
+                               fn);
+                       if (rc)
+                               break;
+               }
+
+               LASSERT(used <= MAX_GUARD_NUMBER);
+               /*
+                * If disk support T10PI checksum, copy guards to local_nb.
+                * If the write is partial page, do not use the guards for bio
+                * submission since the data might not be full-sector. The bio
+                * guards will be generated later based on the full sectors. If
+                * the sector size is 512B rather than 4 KB, or the page size
+                * is larger than 4KB, this might drop some useful guards for
+                * partial page write, but it will only add minimal extra time
+                * of checksum calculation.
+                */
+               if (t10_cksum_type && opc == OST_WRITE &&
+                   local_nb[i].lnb_len == PAGE_SIZE) {
+                       local_nb[i].lnb_guard_rpc = 1;
+                       memcpy(local_nb[i].lnb_guards,
+                              guard_start + used_number,
+                              used * sizeof(*local_nb[i].lnb_guards));
+               }
+
+               used_number += used;
+               if (used_number == guard_number) {
+                       cfs_crypto_hash_update_page(req, __page, 0,
+                               used_number * sizeof(*guard_start));
+                       used_number = 0;
+               }
+
+                /* corrupt the data after we compute the checksum, to
+                * simulate an OST->client data error */
+               if (unlikely(i == 0 && opc == OST_READ &&
+                            OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_SEND))) {
+                       int off = local_nb[i].lnb_page_offset & ~PAGE_MASK;
+                       int len = local_nb[i].lnb_len;
+                       struct page *np = tgt_page_to_corrupt;
+
+                       if (np) {
+                               char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
+                                                       KM_USER0);
+                               char *ptr2 = page_address(np);
+
+                               memcpy(ptr2 + off, ptr + off, len);
+                               memcpy(ptr2 + off, "bad4", min(4, len));
+                               ll_kunmap_atomic(ptr, KM_USER0);
+
+                               /* LU-8376 to preserve original index for
+                                * display in dump_all_bulk_pages() */
+                               np->index = i;
+
+                               cfs_crypto_hash_update_page(req, np, off,
+                                                           len);
+                               continue;
+                       } else {
+                               CERROR("%s: can't alloc page for corruption\n",
+                                      tgt_name(tgt));
+                       }
+               }
+       }
+       kunmap(__page);
+       if (rc)
+               GOTO(out, rc);
+
+       if (used_number != 0)
+               cfs_crypto_hash_update_page(req, __page, 0,
+                       used_number * sizeof(*guard_start));
+
+       bufsize = sizeof(cksum);
+       rc = cfs_crypto_hash_final(req, (unsigned char *)&cksum, &bufsize);
+
+       if (rc == 0)
+               *check_sum = cksum;
+out:
+       __free_page(__page);
+       return rc;
+}
+
+static int tgt_checksum_niobuf_rw(struct lu_target *tgt,
+                                 enum cksum_types cksum_type,
+                                 struct niobuf_local *local_nb,
+                                 int npages, int opc, u32 *check_sum)
+{
+       obd_dif_csum_fn *fn = NULL;
+       int sector_size = 0;
+       int rc;
+
+       ENTRY;
+       obd_t10_cksum2dif(cksum_type, &fn, &sector_size);
+
+       if (fn)
+               rc = tgt_checksum_niobuf_t10pi(tgt, local_nb, npages,
+                                              opc, fn, sector_size,
+                                              check_sum);
+       else
+               rc = tgt_checksum_niobuf(tgt, local_nb, npages, opc,
+                                        cksum_type, check_sum);
+       RETURN(rc);
+}
+
 int tgt_brw_read(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
@@ -1978,6 +2143,7 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        int                      npages, nob = 0, rc, i, no_reply = 0,
                                 npages_read;
        struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
+       const char *obd_name = exp->exp_obd->obd_name;
 
        ENTRY;
 
@@ -2025,8 +2191,8 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 
        local_nb = tbc->local;
 
-       rc = tgt_brw_lock(exp, &tsi->tsi_resid, ioo, remote_nb, &lockh,
-                         LCK_PR);
+       rc = tgt_brw_lock(tsi->tsi_env, exp, &tsi->tsi_resid, ioo, remote_nb,
+                         &lockh, LCK_PR);
        if (rc != 0)
                RETURN(rc);
 
@@ -2099,18 +2265,19 @@ int tgt_brw_read(struct tgt_session_info *tsi)
                rc = -E2BIG;
 
        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
-               enum cksum_types cksum_type =
-                       cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ?
-                                         body->oa.o_flags : 0);
+               u32 flag = body->oa.o_valid & OBD_MD_FLFLAGS ?
+                          body->oa.o_flags : 0;
+               enum cksum_types cksum_type = obd_cksum_type_unpack(flag);
 
-               repbody->oa.o_flags = cksum_type_pack(cksum_type);
+               repbody->oa.o_flags = obd_cksum_type_pack(obd_name,
+                                                         cksum_type);
                repbody->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
-               rc = tgt_checksum_niobuf(tsi->tsi_tgt, local_nb,
-                                        npages_read, OST_READ, cksum_type,
-                                        &repbody->oa.o_cksum);
+
+               rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
+                                           local_nb, npages_read, OST_READ,
+                                           &repbody->oa.o_cksum);
                if (rc < 0)
                        GOTO(out_commitrw, rc);
-
                CDEBUG(D_PAGE, "checksum at read origin: %x\n",
                       repbody->oa.o_cksum);
 
@@ -2177,7 +2344,7 @@ out_lock:
                ptlrpc_req_drop_rs(req);
                LCONSOLE_WARN("%s: Bulk IO read error with %s (at %s), "
                              "client will retry: rc %d\n",
-                             exp->exp_obd->obd_name,
+                             obd_name,
                              obd_uuid2str(&exp->exp_client_uuid),
                              obd_export_nid2str(exp), rc);
        }
@@ -2293,6 +2460,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        bool                     no_reply = false, mmap;
        struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
        bool wait_sync = false;
+       const char *obd_name = exp->exp_obd->obd_name;
 
        ENTRY;
 
@@ -2361,8 +2529,8 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 
        local_nb = tbc->local;
 
-       rc = tgt_brw_lock(exp, &tsi->tsi_resid, ioo, remote_nb, &lockh,
-                         LCK_PW);
+       rc = tgt_brw_lock(tsi->tsi_env, exp, &tsi->tsi_resid, ioo, remote_nb,
+                         &lockh, LCK_PW);
        if (rc != 0)
                GOTO(out, rc);
 
@@ -2445,14 +2613,16 @@ skip_transfer:
                static int cksum_counter;
 
                if (body->oa.o_valid & OBD_MD_FLFLAGS)
-                       cksum_type = cksum_type_unpack(body->oa.o_flags);
+                       cksum_type = obd_cksum_type_unpack(body->oa.o_flags);
 
                repbody->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
                repbody->oa.o_flags &= ~OBD_FL_CKSUM_ALL;
-               repbody->oa.o_flags |= cksum_type_pack(cksum_type);
-               rc = tgt_checksum_niobuf(tsi->tsi_tgt, local_nb,
-                                        npages, OST_WRITE, cksum_type,
-                                        &repbody->oa.o_cksum);
+               repbody->oa.o_flags |= obd_cksum_type_pack(obd_name,
+                                                          cksum_type);
+
+               rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
+                                           local_nb, npages, OST_WRITE,
+                                           &repbody->oa.o_cksum);
                if (rc < 0)
                        GOTO(out_commitrw, rc);
 
@@ -2530,7 +2700,7 @@ out:
                if (!exp->exp_obd->obd_no_transno)
                        LCONSOLE_WARN("%s: Bulk IO write error with %s (at %s),"
                                      " client will retry: rc = %d\n",
-                                     exp->exp_obd->obd_name,
+                                     obd_name,
                                      obd_uuid2str(&exp->exp_client_uuid),
                                      obd_export_nid2str(exp), rc);
        }