/* obtain new environment */
sub->sub_env = cl_env_get(&sub->sub_refcheck);
- if (IS_ERR(sub->sub_env))
+ if (IS_ERR(sub->sub_env)) {
result = PTR_ERR(sub->sub_env);
+ RETURN(result);
+ }
sub_obj = lovsub2cl(lov_r0(lov, index)->lo_sub[stripe]);
sub_io = &sub->sub_io;
sub_io->ci_type = io->ci_type;
sub_io->ci_no_srvlock = io->ci_no_srvlock;
sub_io->ci_noatime = io->ci_noatime;
- sub_io->ci_pio = io->ci_pio;
+ sub_io->ci_async_readahead = io->ci_async_readahead;
sub_io->ci_lock_no_expand = io->ci_lock_no_expand;
sub_io->ci_ndelay = io->ci_ndelay;
sub_io->ci_layout_version = io->ci_layout_version;
+ sub_io->ci_tried_all_mirrors = io->ci_tried_all_mirrors;
result = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj);
* Lov io operations.
*
*/
-
-int lov_page_index(const struct cl_page *page)
-{
- const struct cl_page_slice *slice;
- ENTRY;
-
- slice = cl_page_at(page, &lov_device_type);
- LASSERT(slice != NULL);
- LASSERT(slice->cpl_obj != NULL);
-
- RETURN(cl2lov_page(slice)->lps_index);
-}
-
static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
struct cl_io *io)
{
found = true;
break;
}
- }
-
+ } /* each component of the mirror */
if (found) {
index = (index + i) % comp->lo_mirror_count;
break;
}
- }
+ } /* each mirror */
+
if (i == comp->lo_mirror_count) {
CERROR(DFID": failed to find a component covering "
"I/O region at %llu\n",
* of this client has been partitioned. We should relinquish CPU for
* a while before trying again.
*/
- ++io->ci_ndelay_tried;
- if (io->ci_ndelay && io->ci_ndelay_tried >= comp->lo_mirror_count) {
+ if (io->ci_ndelay && io->ci_ndelay_tried > 0 &&
+ (io->ci_ndelay_tried % comp->lo_mirror_count == 0)) {
set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC)); /* 10ms */
+ schedule_timeout(cfs_time_seconds(1) / 100); /* 10ms */
if (signal_pending(current))
RETURN(-EINTR);
- /* reset retry counter */
- io->ci_ndelay_tried = 1;
+ /**
+ * we'd set ci_tried_all_mirrors to turn off fast mirror
+ * switching for read after we've tried all mirrors several
+ * rounds.
+ */
+ io->ci_tried_all_mirrors = io->ci_ndelay_tried %
+ (comp->lo_mirror_count * 4) == 0;
}
+ ++io->ci_ndelay_tried;
CDEBUG(D_VFSTRACE, "use %sdelayed RPC state for this IO\n",
io->ci_ndelay ? "non-" : "");
switch (io->ci_type) {
case CIT_READ:
case CIT_WRITE:
- lio->lis_pos = io->u.ci_rw.rw_range.cir_pos;
- lio->lis_endpos = lio->lis_pos + io->u.ci_rw.rw_range.cir_count;
+ lio->lis_pos = io->u.ci_rw.crw_pos;
+ lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
lio->lis_io_endpos = lio->lis_endpos;
if (cl_io_is_append(io)) {
LASSERT(io->ci_type == CIT_WRITE);
*/
if (cl_io_is_trunc(io)) {
io->ci_write_intent.e_start = 0;
- io->ci_write_intent.e_end = io->u.ci_setattr.sa_attr.lvb_size;
+ /* for writes, e_end is endpos, the location of the file
+ * pointer after the write is completed, so it is not accessed.
+ * For truncate, 'end' is the size, and *is* acccessed.
+ * In other words, writes are [start, end), but truncate is
+ * [start, size], where both are included. So add 1 to the
+ * size when creating the write intent to account for this.
+ */
+ io->ci_write_intent.e_end =
+ io->u.ci_setattr.sa_attr.lvb_size + 1;
} else {
io->ci_write_intent.e_start = lio->lis_pos;
io->ci_write_intent.e_end = lio->lis_endpos;
int index = lov_comp_entry(sub->sub_subio_index);
int stripe = lov_comp_stripe(sub->sub_subio_index);
- io->ci_pio = parent->ci_pio;
switch (io->ci_type) {
case CIT_SETATTR: {
io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
}
case CIT_READ:
case CIT_WRITE: {
- io->u.ci_rw.rw_ptask = parent->u.ci_rw.rw_ptask;
- io->u.ci_rw.rw_iter = parent->u.ci_rw.rw_iter;
- io->u.ci_rw.rw_iocb = parent->u.ci_rw.rw_iocb;
- io->u.ci_rw.rw_file = parent->u.ci_rw.rw_file;
- io->u.ci_rw.rw_sync = parent->u.ci_rw.rw_sync;
+ io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent);
+ io->ci_tried_all_mirrors = parent->ci_tried_all_mirrors;
if (cl_io_is_append(parent)) {
- io->u.ci_rw.rw_append = 1;
+ io->u.ci_wr.wr_append = 1;
} else {
- io->u.ci_rw.rw_range.cir_pos = start;
- io->u.ci_rw.rw_range.cir_count = end - start;
+ io->u.ci_rw.crw_pos = start;
+ io->u.ci_rw.crw_count = end - start;
}
break;
}
if (rc != 0)
break;
- CDEBUG(D_VFSTRACE,
- "shrink stripe: {%d, %d} range: [%llu, %llu)\n",
- index, stripe, start, end);
+ CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n",
+ stripe, start, end);
list_add_tail(&sub->sub_linkage, &lio->lis_active);
}
static int lov_io_rw_iter_init(const struct lu_env *env,
const struct cl_io_slice *ios)
{
- struct cl_io *io = ios->cis_io;
struct lov_io *lio = cl2lov_io(env, ios);
+ struct cl_io *io = ios->cis_io;
struct lov_stripe_md_entry *lse;
- struct cl_io_range *range = &io->u.ci_rw.rw_range;
- loff_t start = range->cir_pos;
+ loff_t start = io->u.ci_rw.crw_pos;
loff_t next;
int index;
if (cl_io_is_append(io))
RETURN(lov_io_iter_init(env, ios));
- index = lov_io_layout_at(lio, range->cir_pos);
+ index = lov_io_layout_at(lio, io->u.ci_rw.crw_pos);
if (index < 0) { /* non-existing layout component */
if (io->ci_type == CIT_READ) {
/*
* then set the next pos
*/
io->ci_continue = 0;
- /* execute it in main thread */
- io->ci_pio = 0;
RETURN(lov_io_iter_init(env, ios));
}
next = MAX_LFS_FILESIZE;
}
- LASSERTF(range->cir_pos >= lse->lsme_extent.e_start,
- "pos %lld, [%lld, %lld)\n", range->cir_pos,
+ LASSERTF(io->u.ci_rw.crw_pos >= lse->lsme_extent.e_start,
+ "pos %lld, [%lld, %lld)\n", io->u.ci_rw.crw_pos,
lse->lsme_extent.e_start, lse->lsme_extent.e_end);
next = min_t(__u64, next, lse->lsme_extent.e_end);
next = min_t(loff_t, next, lio->lis_io_endpos);
- io->ci_continue = next < lio->lis_io_endpos;
- range->cir_count = next - range->cir_pos;
- lio->lis_pos = range->cir_pos;
- lio->lis_endpos = range->cir_pos + range->cir_count;
+ io->ci_continue = next < lio->lis_io_endpos;
+ io->u.ci_rw.crw_count = next - io->u.ci_rw.crw_pos;
+ lio->lis_pos = io->u.ci_rw.crw_pos;
+ lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
CDEBUG(D_VFSTRACE,
- "stripe: {%d, %llu} range: [%llu, %llu) end: %llu, count: %zd\n",
- index, start, lio->lis_pos, lio->lis_endpos,
- lio->lis_io_endpos, range->cir_count);
-
- if (!io->ci_continue) {
- /* the last piece of IO, execute it in main thread */
- io->ci_pio = 0;
- }
-
- if (io->ci_pio)
- RETURN(0);
+ "stripe: %llu chunk: [%llu, %llu) %llu, %zd\n",
+ (__u64)start, lio->lis_pos, lio->lis_endpos,
+ (__u64)lio->lis_io_endpos, io->u.ci_rw.crw_count);
/*
* XXX The following call should be optimized: we know, that
struct lov_io_sub *sub;
struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
struct cl_page *page;
+ struct cl_page *tmp;
int index;
int rc = 0;
ENTRY;
cl_2queue_init(cl2q);
cl_page_list_move(&cl2q->c2_qin, qin, page);
- index = lov_page_index(page);
- while (qin->pl_nr > 0) {
- page = cl_page_list_first(qin);
- if (index != lov_page_index(page))
- break;
+ index = page->cp_lov_index;
+ cl_page_list_for_each_safe(page, tmp, qin) {
+ /* this page is not on this stripe */
+ if (index != page->cp_lov_index)
+ continue;
cl_page_list_move(&cl2q->c2_qin, qin, page);
}
cl_page_list_move(plist, queue, page);
- index = lov_page_index(page);
+ index = page->cp_lov_index;
while (queue->pl_nr > 0) {
page = cl_page_list_first(queue);
- if (index != lov_page_index(page))
+ if (index != page->cp_lov_index)
break;
cl_page_list_move(plist, queue, page);
fio = &ios->cis_io->u.ci_fault;
lio = cl2lov_io(env, ios);
- sub = lov_sub_get(env, lio, lov_page_index(fio->ft_page));
+ sub = lov_sub_get(env, lio, fio->ft_page->cp_lov_index);
sub->sub_io.u.ci_fault.ft_nob = fio->ft_nob;
RETURN(lov_io_start(env, ios));