RETURN(-EPROTO);
}
+ /* The "last_xid" is the minimum xid among unreplied requests,
+ * if the request is from the previous connection, its xid can
+ * still be larger than "exp_last_xid", then the above check of
+ * xid is not enough to determine whether the request is delayed.
+ *
+ * For example, if some replay request was delayed and caused
+ * timeout at client and the replay is restarted, the delayed
+ * replay request will have the larger xid than "exp_last_xid"
+ */
+ if (req->rq_export->exp_conn_cnt >
+ lustre_msg_get_conn_cnt(req->rq_reqmsg))
+ RETURN(-ESTALE);
+
/* try to release in-memory reply data */
if (tgt_is_multimodrpcs_client(req->rq_export)) {
tgt_handle_received_xid(req->rq_export,
bool is_connect = false;
ENTRY;
+ if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) {
+ if (cfs_fail_val == 0 &&
+ lustre_msg_get_opc(msg) != OBD_PING &&
+ lustre_msg_get_flags(msg) & MSG_REQ_REPLAY_DONE) {
+ struct l_wait_info lwi = { 0 };
+
+ cfs_fail_val = 1;
+ cfs_race_state = 0;
+ l_wait_event(cfs_race_waitq, (cfs_race_state == 1),
+ &lwi);
+ }
+ }
+
/* Refill the context, to make sure all thread keys are allocated */
lu_env_refill(req->rq_svc_thread->t_env);
reply = req_capsule_server_get(tsi->tsi_pill, &RMF_CONNECT_DATA);
spin_lock(&tsi->tsi_exp->exp_lock);
*exp_connect_flags_ptr(tsi->tsi_exp) = reply->ocd_connect_flags;
+ if (reply->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+ *exp_connect_flags2_ptr(tsi->tsi_exp) =
+ reply->ocd_connect_flags2;
tsi->tsi_exp->exp_connect_data.ocd_brw_size = reply->ocd_brw_size;
spin_unlock(&tsi->tsi_exp->exp_lock);
struct tgt_handler tgt_obd_handlers[] = {
TGT_OBD_HDL (0, OBD_PING, tgt_obd_ping),
-TGT_OBD_HDL_VAR(0, OBD_LOG_CANCEL, tgt_obd_log_cancel),
TGT_OBD_HDL (0, OBD_IDX_READ, tgt_obd_idx_read)
};
EXPORT_SYMBOL(tgt_obd_handlers);
rc = lu_env_init(&env, LCT_DT_THREAD);
if (unlikely(rc != 0))
- RETURN(rc);
+ GOTO(err, rc);
ost_fid_from_resid(&fid, &lock->l_resource->lr_name,
tgt->lut_lsd.lsd_osd_index);
err_env:
lu_env_fini(&env);
}
-
+err:
rc = ldlm_server_blocking_ast(lock, desc, data, flag);
RETURN(rc);
}
/* generic LDLM target handler */
struct tgt_handler tgt_dlm_handlers[] = {
TGT_DLM_HDL (HABEO_CLAVIS, LDLM_ENQUEUE, tgt_enqueue),
-TGT_DLM_HDL_VAR(HABEO_CLAVIS, LDLM_CONVERT, tgt_convert),
+TGT_DLM_HDL (HABEO_CLAVIS, LDLM_CONVERT, tgt_convert),
TGT_DLM_HDL_VAR(0, LDLM_BL_CALLBACK, tgt_bl_callback),
TGT_DLM_HDL_VAR(0, LDLM_CP_CALLBACK, tgt_cp_callback)
};
}
EXPORT_SYMBOL(tgt_llog_open);
-int tgt_llog_close(struct tgt_session_info *tsi)
-{
- int rc;
-
- ENTRY;
-
- rc = llog_origin_handle_close(tgt_ses_req(tsi));
-
- RETURN(rc);
-}
-EXPORT_SYMBOL(tgt_llog_close);
-
-
-int tgt_llog_destroy(struct tgt_session_info *tsi)
-{
- int rc;
-
- ENTRY;
-
- rc = llog_origin_handle_destroy(tgt_ses_req(tsi));
-
- RETURN(rc);
-}
-
int tgt_llog_read_header(struct tgt_session_info *tsi)
{
int rc;
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, tgt_llog_next_block),
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header),
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, tgt_llog_prev_block),
-TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_DESTROY, tgt_llog_destroy),
-TGT_LLOG_HDL_VAR(0, LLOG_ORIGIN_HANDLE_CLOSE, tgt_llog_close),
};
EXPORT_SYMBOL(tgt_llog_handlers);
LASSERT(ns != NULL);
LASSERT(!lustre_handle_is_used(lh));
- rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, &policy, mode,
+ rc = ldlm_cli_enqueue_local(NULL, ns, res_id, LDLM_IBITS, &policy, mode,
flags, ldlm_blocking_ast,
ldlm_completion_ast, ldlm_glimpse_ast,
NULL, 0, LVB_T_NONE, NULL, lh);
* Helper function for getting server side [start, start+count] DLM lock
* if asked by client.
*/
-int tgt_extent_lock(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
- __u64 start, __u64 end, struct lustre_handle *lh,
- int mode, __u64 *flags)
+int tgt_extent_lock(const struct lu_env *env, struct ldlm_namespace *ns,
+ struct ldlm_res_id *res_id, __u64 start, __u64 end,
+ struct lustre_handle *lh, int mode, __u64 *flags)
{
union ldlm_policy_data policy;
int rc;
else
policy.l_extent.end = end | ~PAGE_MASK;
- rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_EXTENT, &policy, mode,
- flags, ldlm_blocking_ast,
+ rc = ldlm_cli_enqueue_local(env, ns, res_id, LDLM_EXTENT, &policy,
+ mode, flags, ldlm_blocking_ast,
ldlm_completion_ast, ldlm_glimpse_ast,
NULL, 0, LVB_T_NONE, NULL, lh);
RETURN(rc == ELDLM_OK ? 0 : -EIO);
}
EXPORT_SYMBOL(tgt_extent_unlock);
-static int tgt_brw_lock(struct obd_export *exp, struct ldlm_res_id *res_id,
- struct obd_ioobj *obj, struct niobuf_remote *nb,
- struct lustre_handle *lh, enum ldlm_mode mode)
+static int tgt_brw_lock(const struct lu_env *env, struct obd_export *exp,
+ struct ldlm_res_id *res_id, struct obd_ioobj *obj,
+ struct niobuf_remote *nb, struct lustre_handle *lh,
+ enum ldlm_mode mode)
{
struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
__u64 flags = 0;
if (exp->exp_connect_data.ocd_connect_flags & OBD_CONNECT_IBITS)
rc = tgt_mdt_data_lock(ns, res_id, lh, mode, &flags);
else
- rc = tgt_extent_lock(ns, res_id, nb[0].rnb_offset,
+ rc = tgt_extent_lock(env, ns, res_id, nb[0].rnb_offset,
nb[nrbufs - 1].rnb_offset +
nb[nrbufs - 1].rnb_len - 1,
lh, mode, &flags);
int opc, enum cksum_types cksum_type,
__u32 *cksum)
{
- struct cfs_crypto_hash_desc *hdesc;
+ struct ahash_request *req;
unsigned int bufsize;
int i, err;
unsigned char cfs_alg = cksum_obd2cfs(cksum_type);
- hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0);
- if (IS_ERR(hdesc)) {
+ req = cfs_crypto_hash_init(cfs_alg, NULL, 0);
+ if (IS_ERR(req)) {
CERROR("%s: unable to initialize checksum hash %s\n",
tgt_name(tgt), cfs_crypto_hash_name(cfs_alg));
- return PTR_ERR(hdesc);
+ return PTR_ERR(req);
}
CDEBUG(D_INFO, "Checksum for algo %s\n", cfs_crypto_hash_name(cfs_alg));
* display in dump_all_bulk_pages() */
np->index = i;
- cfs_crypto_hash_update_page(hdesc, np, off,
+ cfs_crypto_hash_update_page(req, np, off,
len);
continue;
} else {
tgt_name(tgt));
}
}
- cfs_crypto_hash_update_page(hdesc, local_nb[i].lnb_page,
+ cfs_crypto_hash_update_page(req, local_nb[i].lnb_page,
local_nb[i].lnb_page_offset & ~PAGE_MASK,
local_nb[i].lnb_len);
* display in dump_all_bulk_pages() */
np->index = i;
- cfs_crypto_hash_update_page(hdesc, np, off,
+ cfs_crypto_hash_update_page(req, np, off,
len);
continue;
} else {
}
bufsize = sizeof(*cksum);
- err = cfs_crypto_hash_final(hdesc, (unsigned char *)cksum, &bufsize);
+ err = cfs_crypto_hash_final(req, (unsigned char *)cksum, &bufsize);
return 0;
}
dump_all_bulk_pages(oa, npages, local_nb, server_cksum,
client_cksum);
- cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
- oa->o_flags : 0);
+ cksum_type = obd_cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+ oa->o_flags : 0);
if (cksum_type != server_cksum_type)
msg = "the server may have not used the checksum type specified"
return copied - size;
}
+static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
+ struct niobuf_local *local_nb,
+ int npages, int opc,
+ obd_dif_csum_fn *fn,
+ int sector_size,
+ u32 *check_sum)
+{
+ enum cksum_types t10_cksum_type = tgt->lut_dt_conf.ddp_t10_cksum_type;
+ unsigned char cfs_alg = cksum_obd2cfs(OBD_CKSUM_T10_TOP);
+ const char *obd_name = tgt->lut_obd->obd_name;
+ struct ahash_request *req;
+ unsigned int bufsize;
+ unsigned char *buffer;
+ struct page *__page;
+ __u16 *guard_start;
+ int guard_number;
+ int used_number = 0;
+ __u32 cksum;
+ int rc = 0;
+ int used;
+ int i;
+
+ __page = alloc_page(GFP_KERNEL);
+ if (__page == NULL)
+ return -ENOMEM;
+
+ req = cfs_crypto_hash_init(cfs_alg, NULL, 0);
+ if (IS_ERR(req)) {
+ CERROR("%s: unable to initialize checksum hash %s\n",
+ tgt_name(tgt), cfs_crypto_hash_name(cfs_alg));
+ return PTR_ERR(req);
+ }
+
+ buffer = kmap(__page);
+ guard_start = (__u16 *)buffer;
+ guard_number = PAGE_SIZE / sizeof(*guard_start);
+ for (i = 0; i < npages; i++) {
+ /* corrupt the data before we compute the checksum, to
+ * simulate a client->OST data error */
+ if (i == 0 && opc == OST_WRITE &&
+ OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_RECEIVE)) {
+ int off = local_nb[i].lnb_page_offset & ~PAGE_MASK;
+ int len = local_nb[i].lnb_len;
+ struct page *np = tgt_page_to_corrupt;
+
+ if (np) {
+ char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
+ KM_USER0);
+ char *ptr2 = page_address(np);
+
+ memcpy(ptr2 + off, ptr + off, len);
+ memcpy(ptr2 + off, "bad3", min(4, len));
+ ll_kunmap_atomic(ptr, KM_USER0);
+
+ /* LU-8376 to preserve original index for
+ * display in dump_all_bulk_pages() */
+ np->index = i;
+
+ cfs_crypto_hash_update_page(req, np, off,
+ len);
+ continue;
+ } else {
+ CERROR("%s: can't alloc page for corruption\n",
+ tgt_name(tgt));
+ }
+ }
+
+ /*
+ * The left guard number should be able to hold checksums of a
+ * whole page
+ */
+ if (t10_cksum_type && opc == OST_READ &&
+ local_nb[i].lnb_guard_disk) {
+ used = DIV_ROUND_UP(local_nb[i].lnb_len, sector_size);
+ if (used > (guard_number - used_number)) {
+ rc = -E2BIG;
+ break;
+ }
+ memcpy(guard_start + used_number,
+ local_nb[i].lnb_guards,
+ used * sizeof(*local_nb[i].lnb_guards));
+ } else {
+ rc = obd_page_dif_generate_buffer(obd_name,
+ local_nb[i].lnb_page,
+ local_nb[i].lnb_page_offset & ~PAGE_MASK,
+ local_nb[i].lnb_len, guard_start + used_number,
+ guard_number - used_number, &used, sector_size,
+ fn);
+ if (rc)
+ break;
+ }
+
+ LASSERT(used <= MAX_GUARD_NUMBER);
+ /*
+ * If disk support T10PI checksum, copy guards to local_nb.
+ * If the write is partial page, do not use the guards for bio
+ * submission since the data might not be full-sector. The bio
+ * guards will be generated later based on the full sectors. If
+ * the sector size is 512B rather than 4 KB, or the page size
+ * is larger than 4KB, this might drop some useful guards for
+ * partial page write, but it will only add minimal extra time
+ * of checksum calculation.
+ */
+ if (t10_cksum_type && opc == OST_WRITE &&
+ local_nb[i].lnb_len == PAGE_SIZE) {
+ local_nb[i].lnb_guard_rpc = 1;
+ memcpy(local_nb[i].lnb_guards,
+ guard_start + used_number,
+ used * sizeof(*local_nb[i].lnb_guards));
+ }
+
+ used_number += used;
+ if (used_number == guard_number) {
+ cfs_crypto_hash_update_page(req, __page, 0,
+ used_number * sizeof(*guard_start));
+ used_number = 0;
+ }
+
+ /* corrupt the data after we compute the checksum, to
+ * simulate an OST->client data error */
+ if (unlikely(i == 0 && opc == OST_READ &&
+ OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_SEND))) {
+ int off = local_nb[i].lnb_page_offset & ~PAGE_MASK;
+ int len = local_nb[i].lnb_len;
+ struct page *np = tgt_page_to_corrupt;
+
+ if (np) {
+ char *ptr = ll_kmap_atomic(local_nb[i].lnb_page,
+ KM_USER0);
+ char *ptr2 = page_address(np);
+
+ memcpy(ptr2 + off, ptr + off, len);
+ memcpy(ptr2 + off, "bad4", min(4, len));
+ ll_kunmap_atomic(ptr, KM_USER0);
+
+ /* LU-8376 to preserve original index for
+ * display in dump_all_bulk_pages() */
+ np->index = i;
+
+ cfs_crypto_hash_update_page(req, np, off,
+ len);
+ continue;
+ } else {
+ CERROR("%s: can't alloc page for corruption\n",
+ tgt_name(tgt));
+ }
+ }
+ }
+ kunmap(__page);
+ if (rc)
+ GOTO(out, rc);
+
+ if (used_number != 0)
+ cfs_crypto_hash_update_page(req, __page, 0,
+ used_number * sizeof(*guard_start));
+
+ bufsize = sizeof(cksum);
+ rc = cfs_crypto_hash_final(req, (unsigned char *)&cksum, &bufsize);
+
+ if (rc == 0)
+ *check_sum = cksum;
+out:
+ __free_page(__page);
+ return rc;
+}
+
+static int tgt_checksum_niobuf_rw(struct lu_target *tgt,
+ enum cksum_types cksum_type,
+ struct niobuf_local *local_nb,
+ int npages, int opc, u32 *check_sum)
+{
+ obd_dif_csum_fn *fn = NULL;
+ int sector_size = 0;
+ int rc;
+
+ ENTRY;
+ obd_t10_cksum2dif(cksum_type, &fn, §or_size);
+
+ if (fn)
+ rc = tgt_checksum_niobuf_t10pi(tgt, local_nb, npages,
+ opc, fn, sector_size,
+ check_sum);
+ else
+ rc = tgt_checksum_niobuf(tgt, local_nb, npages, opc,
+ cksum_type, check_sum);
+ RETURN(rc);
+}
+
int tgt_brw_read(struct tgt_session_info *tsi)
{
struct ptlrpc_request *req = tgt_ses_req(tsi);
int npages, nob = 0, rc, i, no_reply = 0,
npages_read;
struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
+ const char *obd_name = exp->exp_obd->obd_name;
ENTRY;
local_nb = tbc->local;
- rc = tgt_brw_lock(exp, &tsi->tsi_resid, ioo, remote_nb, &lockh,
- LCK_PR);
+ rc = tgt_brw_lock(tsi->tsi_env, exp, &tsi->tsi_resid, ioo, remote_nb,
+ &lockh, LCK_PR);
if (rc != 0)
RETURN(rc);
rc = -E2BIG;
if (body->oa.o_valid & OBD_MD_FLCKSUM) {
- enum cksum_types cksum_type =
- cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ?
- body->oa.o_flags : 0);
+ u32 flag = body->oa.o_valid & OBD_MD_FLFLAGS ?
+ body->oa.o_flags : 0;
+ enum cksum_types cksum_type = obd_cksum_type_unpack(flag);
- repbody->oa.o_flags = cksum_type_pack(cksum_type);
+ repbody->oa.o_flags = obd_cksum_type_pack(obd_name,
+ cksum_type);
repbody->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
- rc = tgt_checksum_niobuf(tsi->tsi_tgt, local_nb,
- npages_read, OST_READ, cksum_type,
- &repbody->oa.o_cksum);
+
+ rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
+ local_nb, npages_read, OST_READ,
+ &repbody->oa.o_cksum);
if (rc < 0)
GOTO(out_commitrw, rc);
-
CDEBUG(D_PAGE, "checksum at read origin: %x\n",
repbody->oa.o_cksum);
ptlrpc_req_drop_rs(req);
LCONSOLE_WARN("%s: Bulk IO read error with %s (at %s), "
"client will retry: rc %d\n",
- exp->exp_obd->obd_name,
+ obd_name,
obd_uuid2str(&exp->exp_client_uuid),
obd_export_nid2str(exp), rc);
}
bool no_reply = false, mmap;
struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
bool wait_sync = false;
+ const char *obd_name = exp->exp_obd->obd_name;
ENTRY;
local_nb = tbc->local;
- rc = tgt_brw_lock(exp, &tsi->tsi_resid, ioo, remote_nb, &lockh,
- LCK_PW);
+ rc = tgt_brw_lock(tsi->tsi_env, exp, &tsi->tsi_resid, ioo, remote_nb,
+ &lockh, LCK_PW);
if (rc != 0)
GOTO(out, rc);
static int cksum_counter;
if (body->oa.o_valid & OBD_MD_FLFLAGS)
- cksum_type = cksum_type_unpack(body->oa.o_flags);
+ cksum_type = obd_cksum_type_unpack(body->oa.o_flags);
repbody->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
repbody->oa.o_flags &= ~OBD_FL_CKSUM_ALL;
- repbody->oa.o_flags |= cksum_type_pack(cksum_type);
- rc = tgt_checksum_niobuf(tsi->tsi_tgt, local_nb,
- npages, OST_WRITE, cksum_type,
- &repbody->oa.o_cksum);
+ repbody->oa.o_flags |= obd_cksum_type_pack(obd_name,
+ cksum_type);
+
+ rc = tgt_checksum_niobuf_rw(tsi->tsi_tgt, cksum_type,
+ local_nb, npages, OST_WRITE,
+ &repbody->oa.o_cksum);
if (rc < 0)
GOTO(out_commitrw, rc);
if (!exp->exp_obd->obd_no_transno)
LCONSOLE_WARN("%s: Bulk IO write error with %s (at %s),"
" client will retry: rc = %d\n",
- exp->exp_obd->obd_name,
+ obd_name,
obd_uuid2str(&exp->exp_client_uuid),
obd_export_nid2str(exp), rc);
}