From: Mikhail Pershin Date: Wed, 7 Nov 2018 13:31:57 +0000 (+0300) Subject: LU-11595 mdt: fix read-on-open for big PAGE_SIZE X-Git-Tag: 2.12.0-RC1~19 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4d7b022e373d265f4f3b9d90af44cddd0e65f9ae LU-11595 mdt: fix read-on-open for big PAGE_SIZE Client PAGE_SIZE can be larger than server one so data returned from server along with OPEN can be misaligned on client. Patch replaces assertion on client with check and graceful exit, changes MDC_DOM_DEF_INLINE_REPSIZE to be PAGE_SIZE at least and updates mdt_dom_read_on_open() to return file tail for maximum possible page size that can fit into reply. Signed-off-by: Mikhail Pershin Change-Id: Ic2c54b95c814d3b6df3b527527cac08488060651 Reviewed-on: https://review.whamcloud.com/33606 Tested-by: Jenkins Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 8007956..f866fba 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -443,8 +443,26 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req, if (rnb == NULL || rnb->rnb_len == 0) RETURN_EXIT; - CDEBUG(D_INFO, "Get data buffer along with open, len %i, i_size %llu\n", - rnb->rnb_len, i_size_read(inode)); + /* LU-11595: Server may return whole file and that is OK always or + * it may return just file tail and its offset must be aligned with + * client PAGE_SIZE to be used on that client, if server's PAGE_SIZE is + * smaller then offset may be not aligned and that data is just ignored. + */ + if (rnb->rnb_offset % PAGE_SIZE) + RETURN_EXIT; + + /* Server returns whole file or just file tail if it fills in + * reply buffer, in both cases total size should be inode size. + */ + if (rnb->rnb_offset + rnb->rnb_len < i_size_read(inode)) { + CERROR("%s: server returns off/len %llu/%u < i_size %llu\n", + ll_get_fsname(inode->i_sb, NULL, 0), rnb->rnb_offset, + rnb->rnb_len, i_size_read(inode)); + RETURN_EXIT; + } + + CDEBUG(D_INFO, "Get data along with open at %llu len %i, i_size %llu\n", + rnb->rnb_offset, rnb->rnb_len, i_size_read(inode)); data = (char *)rnb + sizeof(*rnb); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 18d3001..36ce3b6 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -173,7 +173,8 @@ int mdc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, int mdc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data); int mdc_fill_lvb(struct ptlrpc_request *req, struct ost_lvb *lvb); -#define MDC_DOM_DEF_INLINE_REPSIZE 8192 +/* the minimum inline repsize should be PAGE_SIZE at least */ +#define MDC_DOM_DEF_INLINE_REPSIZE max(8192UL, PAGE_SIZE) #define MDC_DOM_MAX_INLINE_REPSIZE XATTR_SIZE_MAX #endif diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 7d15b33..4caa22a 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -2169,7 +2169,7 @@ out_shrink: * Data-on-MDT optimization - read data along with OPEN and return it * in reply. Do that only if we have both DOM and LAYOUT locks. */ - if (rc == 0 && op == REINT_OPEN && + if (rc == 0 && op == REINT_OPEN && !req_is_replay(pill->rc_req) && info->mti_attr.ma_lmm != NULL && mdt_lmm_dom_entry(info->mti_attr.ma_lmm) == LMM_DOM_ONLY) { rc = mdt_dom_read_on_open(info, info->mti_mdt, diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 1fc93b9..3124c8c 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -1465,9 +1465,26 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, len = mbo->mbo_dom_size; offset = 0; } else { - int tail = mbo->mbo_dom_size % PAGE_SIZE; + int tail, pgbits; + + /* File tail offset must be aligned with larger page size + * between client and server, so the maximum page size is + * used here to align offset. + * + * NB: DOM feature was introduced when server supports pagebits + * already, so it should be always non-zero value. Report error + * if it is not for some reason. + */ + if (!req->rq_export->exp_target_data.ted_pagebits) { + CERROR("%s: client page bits are not saved on server\n", + mdt_obd_name(mdt)); + RETURN(0); + } + pgbits = max_t(int, PAGE_SHIFT, + req->rq_export->exp_target_data.ted_pagebits); + tail = mbo->mbo_dom_size % (1 << pgbits); - /* no tail or tail can't fit in reply */ + /* no partial tail or tail can't fit in reply */ if (tail == 0 || len < tail) RETURN(0); @@ -1482,22 +1499,23 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, GOTO(out, rc = -E2BIG); } - /* re-take MDT_BODY buffer after the buffer growing above */ + /* re-take MDT_BODY and NIOBUF_INLINE buffers after the buffer grow */ mbo = req_capsule_server_get(pill, &RMF_MDT_BODY); fid = &mbo->mbo_fid1; if (!fid_is_sane(fid)) - RETURN(0); + GOTO(out, rc = -EINVAL); rnb = req_capsule_server_get(tsi->tsi_pill, &RMF_NIOBUF_INLINE); if (rnb == NULL) GOTO(out, rc = -EPROTO); + buf = (char *)rnb + sizeof(*rnb); rnb->rnb_len = len; rnb->rnb_offset = offset; mo = dt_locate(env, dt, fid); if (IS_ERR(mo)) - GOTO(out, rc = PTR_ERR(mo)); + GOTO(out_rnb, rc = PTR_ERR(mo)); LASSERT(mo != NULL); dt_read_lock(env, mo, 0); @@ -1535,11 +1553,14 @@ int mdt_dom_read_on_open(struct mdt_thread_info *mti, struct mdt_device *mdt, } CDEBUG(D_INFO, "Read %i (wanted %u) bytes from %llu\n", copied, len, offset); - if (copied < len) + if (copied < len) { CWARN("%s: read %i bytes for "DFID " but wanted %u, is size wrong?\n", tsi->tsi_exp->exp_obd->obd_name, copied, PFID(&tsi->tsi_fid), len); + /* Ignore partially copied data */ + copied = 0; + } EXIT; buf_put: dt_bufs_put(env, mo, lnb, nr_local); @@ -1548,9 +1569,15 @@ free: unlock: dt_read_unlock(env, mo); lu_object_put(env, &mo->do_lu); +out_rnb: + rnb->rnb_len = copied; out: - if (rnb != NULL) - rnb->rnb_len = copied; + /* Don't fail OPEN request if read-on-open is failed, but drop + * a message in log about the error. + */ + if (rc) + CDEBUG(D_INFO, "Read-on-open is failed, rc = %d", rc); + RETURN(0); }