* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2016, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
cl_io_is_mkwrite(io)))
RETURN(0);
+ /* FLR: check if it needs to send a write intent RPC to server.
+ * Writing to sync_pending file needs write intent RPC to change
+ * the file state back to write_pending, so that the layout version
+ * can be increased when the state changes to sync_pending at a later
+ * time. Otherwise there exists a chance that an evicted client may
+ * dirty the file data while resync client is working on it.
+ * Designated I/O is allowed for resync workload.
+ */
if (lov_flr_state(obj) == LCM_FL_RDONLY ||
- lov_flr_state(obj) == LCM_FL_SYNC_PENDING) {
+ (lov_flr_state(obj) == LCM_FL_SYNC_PENDING &&
+ io->ci_designated_mirror == 0)) {
io->ci_need_write_intent = 1;
RETURN(0);
}
RETURN(0);
}
+ /* transfer the layout version for verification */
+ if (io->ci_layout_version == 0)
+ io->ci_layout_version = obj->lo_lsm->lsm_layout_gen;
+
+ /* find the corresponding mirror for designated mirror IO */
+ if (io->ci_designated_mirror > 0) {
+ struct lov_mirror_entry *entry;
+
+ LASSERT(!io->ci_ndelay);
+
+ CDEBUG(D_LAYOUT, "designated I/O mirror state: %d\n",
+ lov_flr_state(obj));
+
+ if ((cl_io_is_trunc(io) || io->ci_type == CIT_WRITE) &&
+ (io->ci_layout_version != obj->lo_lsm->lsm_layout_gen)) {
+ /* For resync I/O, the ci_layout_version was the layout
+ * version when resync starts. If it doesn't match the
+ * current object layout version, it means the layout
+ * has been changed */
+ RETURN(-ESTALE);
+ }
+
+ io->ci_layout_version |= LU_LAYOUT_RESYNC;
+
+ index = 0;
+ lio->lis_mirror_index = -1;
+ lov_foreach_mirror_entry(obj, entry) {
+ if (entry->lre_mirror_id ==
+ io->ci_designated_mirror) {
+ lio->lis_mirror_index = index;
+ break;
+ }
+
+ index++;
+ }
+
+ RETURN(lio->lis_mirror_index < 0 ? -EINVAL : 0);
+ }
+
result = lov_io_mirror_write_intent(lio, obj, io);
if (result)
RETURN(result);
PFID(lu_object_fid(lov2lu(obj))),
lio->lis_pos, lio->lis_endpos);
+ if (cl_io_is_trunc(io)) {
+ /**
+ * for truncate, we uses [size, EOF) to judge whether
+ * a write intent needs to be send, but we need to
+ * restore the write extent to [0, size).
+ */
+ io->ci_write_intent.e_start = 0;
+ io->ci_write_intent.e_end =
+ io->u.ci_setattr.sa_attr.lvb_size;
+ }
/* stop cl_io_init() loop */
RETURN(1);
}
- /* transfer the layout version for verification */
- io->ci_layout_version = obj->lo_lsm->lsm_layout_gen;
-
if (io->ci_ndelay_tried == 0 || /* first time to try */
/* reset the mirror index if layout has changed */
lio->lis_mirror_layout_gen != obj->lo_lsm->lsm_layout_gen) {
static int lov_io_slice_init(struct lov_io *lio,
struct lov_object *obj, struct cl_io *io)
{
- struct lu_extent ext;
int index;
int result = 0;
ENTRY;
(cl_io_is_trunc(io) && io->u.ci_setattr.sa_attr.lvb_size > 0)))
GOTO(out, result = 0);
- ext.e_start = lio->lis_pos;
- ext.e_end = lio->lis_endpos;
-
/* for truncate, it only needs to instantiate the components
* before the truncated size. */
if (cl_io_is_trunc(io)) {
- ext.e_start = 0;
- ext.e_end = io->u.ci_setattr.sa_attr.lvb_size;
+ io->ci_write_intent.e_start = 0;
+ io->ci_write_intent.e_end = io->u.ci_setattr.sa_attr.lvb_size;
+ } else {
+ io->ci_write_intent.e_start = lio->lis_pos;
+ io->ci_write_intent.e_end = lio->lis_endpos;
}
index = 0;
- lov_foreach_io_layout(index, lio, &ext) {
+ lov_foreach_io_layout(index, lio, &io->ci_write_intent) {
if (!lsm_entry_inited(obj->lo_lsm, index)) {
io->ci_need_write_intent = 1;
- io->ci_write_intent = ext;
- GOTO(out, result = 1);
+ break;
}
}
+
+ if (io->ci_need_write_intent && io->ci_designated_mirror > 0) {
+ /* REINT_SYNC RPC has already tried to instantiate all of the
+ * components involved, obviously it didn't succeed. Skip this
+ * mirror for now. The server won't be able to figure out
+ * which mirror it should instantiate components */
+ CERROR(DFID": trying to instantiate components for designated "
+ "I/O, file state: %d\n",
+ PFID(lu_object_fid(lov2lu(obj))), lov_flr_state(obj));
+
+ io->ci_need_write_intent = 0;
+ GOTO(out, result = -EIO);
+ }
+
+ if (io->ci_need_write_intent)
+ GOTO(out, result = 1);
+
EXIT;
out:
ext.e_end = lio->lis_endpos;
lov_foreach_io_layout(index, lio, &ext) {
- struct lov_layout_raid0 *r0 = lov_r0(lio->lis_object, index);
+ struct lov_layout_entry *le = lov_entry(lio->lis_object, index);
+ struct lov_layout_raid0 *r0 = &le->lle_raid0;
u64 start;
u64 end;
int stripe;
continue;
}
+ if (!le->lle_valid && !ios->cis_io->ci_designated_mirror) {
+ CERROR("I/O to invalid component: %d, mirror: %d\n",
+ index, lio->lis_mirror_index);
+ RETURN(-EIO);
+ }
+
for (stripe = 0; stripe < r0->lo_nr; stripe++) {
if (!lov_stripe_intersects(lsm, index, stripe,
&ext, &start, &end))
RETURN(-ENODATA);
}
+ if (!lov_entry(lio->lis_object, index)->lle_valid &&
+ !io->ci_designated_mirror)
+ RETURN(io->ci_type == CIT_READ ? -EAGAIN : -EIO);
+
lse = lov_lse(lio->lis_object, index);
next = MAX_LFS_FILESIZE;
list_for_each_entry(sub, &lio->lis_active, sub_linkage) {
struct cl_data_version_io *sdv = &sub->sub_io.u.ci_data_version;
- lov_io_end_wrapper(env, &sub->sub_io);
+ lov_io_end_wrapper(sub->sub_env, &sub->sub_io);
pdv->dv_data_version += sdv->dv_data_version;
if (pdv->dv_layout_version > sdv->dv_layout_version)
if (lov_page_is_empty(page)) {
cl_page_list_move(&queue->c2_qout, qin, page);
- cl_page_prep(env, ios->cis_io, page, crt);
+ /* it could only be mirror read to get here therefore
+ * the pages will be transient. We don't care about
+ * the return code of cl_page_prep() at all. */
+ (void) cl_page_prep(env, ios->cis_io, page, crt);
cl_page_completion(env, page, crt, 0);
continue;
}