X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flov%2Flov_io.c;h=92b8ce5ed8fde2fa64940bd37f01a33cfa6ff204;hp=4f70a684960d81e7dccc4e01fb7aea201846f223;hb=39da3c06275e04e2a6e7f055cb27ee9dff1ea576;hpb=7e4ce69298e0fd716dcd5419d3e871112a4d250b diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 4f70a68..92b8ce5 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -122,8 +122,10 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, /* obtain new environment */ sub->sub_env = cl_env_get(&sub->sub_refcheck); - if (IS_ERR(sub->sub_env)) + if (IS_ERR(sub->sub_env)) { result = PTR_ERR(sub->sub_env); + RETURN(result); + } sub_obj = lovsub2cl(lov_r0(lov, index)->lo_sub[stripe]); sub_io = &sub->sub_io; @@ -136,10 +138,11 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, sub_io->ci_type = io->ci_type; sub_io->ci_no_srvlock = io->ci_no_srvlock; sub_io->ci_noatime = io->ci_noatime; - sub_io->ci_pio = io->ci_pio; + sub_io->ci_async_readahead = io->ci_async_readahead; sub_io->ci_lock_no_expand = io->ci_lock_no_expand; sub_io->ci_ndelay = io->ci_ndelay; sub_io->ci_layout_version = io->ci_layout_version; + sub_io->ci_tried_all_mirrors = io->ci_tried_all_mirrors; result = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj); @@ -189,19 +192,6 @@ out: * Lov io operations. * */ - -int lov_page_index(const struct cl_page *page) -{ - const struct cl_page_slice *slice; - ENTRY; - - slice = cl_page_at(page, &lov_device_type); - LASSERT(slice != NULL); - LASSERT(slice->cpl_obj != NULL); - - RETURN(cl2lov_page(slice)->lps_index); -} - static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, struct cl_io *io) { @@ -416,13 +406,13 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, found = true; break; } - } - + } /* each component of the mirror */ if (found) { index = (index + i) % comp->lo_mirror_count; break; } - } + } /* each mirror */ + if (i == comp->lo_mirror_count) { CERROR(DFID": failed to find a component covering " "I/O region at %llu\n", @@ -446,16 +436,22 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, * of this client has been partitioned. We should relinquish CPU for * a while before trying again. */ - ++io->ci_ndelay_tried; - if (io->ci_ndelay && io->ci_ndelay_tried >= comp->lo_mirror_count) { + if (io->ci_ndelay && io->ci_ndelay_tried > 0 && + (io->ci_ndelay_tried % comp->lo_mirror_count == 0)) { set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC)); /* 10ms */ + schedule_timeout(cfs_time_seconds(1) / 100); /* 10ms */ if (signal_pending(current)) RETURN(-EINTR); - /* reset retry counter */ - io->ci_ndelay_tried = 1; + /** + * we'd set ci_tried_all_mirrors to turn off fast mirror + * switching for read after we've tried all mirrors several + * rounds. + */ + io->ci_tried_all_mirrors = io->ci_ndelay_tried % + (comp->lo_mirror_count * 4) == 0; } + ++io->ci_ndelay_tried; CDEBUG(D_VFSTRACE, "use %sdelayed RPC state for this IO\n", io->ci_ndelay ? "non-" : ""); @@ -478,8 +474,8 @@ static int lov_io_slice_init(struct lov_io *lio, switch (io->ci_type) { case CIT_READ: case CIT_WRITE: - lio->lis_pos = io->u.ci_rw.rw_range.cir_pos; - lio->lis_endpos = lio->lis_pos + io->u.ci_rw.rw_range.cir_count; + lio->lis_pos = io->u.ci_rw.crw_pos; + lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count; lio->lis_io_endpos = lio->lis_endpos; if (cl_io_is_append(io)) { LASSERT(io->ci_type == CIT_WRITE); @@ -564,7 +560,15 @@ static int lov_io_slice_init(struct lov_io *lio, */ if (cl_io_is_trunc(io)) { io->ci_write_intent.e_start = 0; - io->ci_write_intent.e_end = io->u.ci_setattr.sa_attr.lvb_size; + /* for writes, e_end is endpos, the location of the file + * pointer after the write is completed, so it is not accessed. + * For truncate, 'end' is the size, and *is* acccessed. + * In other words, writes are [start, end), but truncate is + * [start, size], where both are included. So add 1 to the + * size when creating the write intent to account for this. + */ + io->ci_write_intent.e_end = + io->u.ci_setattr.sa_attr.lvb_size + 1; } else { io->ci_write_intent.e_start = lio->lis_pos; io->ci_write_intent.e_end = lio->lis_endpos; @@ -639,7 +643,6 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio, int index = lov_comp_entry(sub->sub_subio_index); int stripe = lov_comp_stripe(sub->sub_subio_index); - io->ci_pio = parent->ci_pio; switch (io->ci_type) { case CIT_SETATTR: { io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr; @@ -685,16 +688,13 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio, } case CIT_READ: case CIT_WRITE: { - io->u.ci_rw.rw_ptask = parent->u.ci_rw.rw_ptask; - io->u.ci_rw.rw_iter = parent->u.ci_rw.rw_iter; - io->u.ci_rw.rw_iocb = parent->u.ci_rw.rw_iocb; - io->u.ci_rw.rw_file = parent->u.ci_rw.rw_file; - io->u.ci_rw.rw_sync = parent->u.ci_rw.rw_sync; + io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent); + io->ci_tried_all_mirrors = parent->ci_tried_all_mirrors; if (cl_io_is_append(parent)) { - io->u.ci_rw.rw_append = 1; + io->u.ci_wr.wr_append = 1; } else { - io->u.ci_rw.rw_range.cir_pos = start; - io->u.ci_rw.rw_range.cir_count = end - start; + io->u.ci_rw.crw_pos = start; + io->u.ci_rw.crw_count = end - start; } break; } @@ -787,9 +787,8 @@ static int lov_io_iter_init(const struct lu_env *env, if (rc != 0) break; - CDEBUG(D_VFSTRACE, - "shrink stripe: {%d, %d} range: [%llu, %llu)\n", - index, stripe, start, end); + CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n", + stripe, start, end); list_add_tail(&sub->sub_linkage, &lio->lis_active); } @@ -802,11 +801,10 @@ static int lov_io_iter_init(const struct lu_env *env, static int lov_io_rw_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { - struct cl_io *io = ios->cis_io; struct lov_io *lio = cl2lov_io(env, ios); + struct cl_io *io = ios->cis_io; struct lov_stripe_md_entry *lse; - struct cl_io_range *range = &io->u.ci_rw.rw_range; - loff_t start = range->cir_pos; + loff_t start = io->u.ci_rw.crw_pos; loff_t next; int index; @@ -816,7 +814,7 @@ static int lov_io_rw_iter_init(const struct lu_env *env, if (cl_io_is_append(io)) RETURN(lov_io_iter_init(env, ios)); - index = lov_io_layout_at(lio, range->cir_pos); + index = lov_io_layout_at(lio, io->u.ci_rw.crw_pos); if (index < 0) { /* non-existing layout component */ if (io->ci_type == CIT_READ) { /* @@ -824,8 +822,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env, * then set the next pos */ io->ci_continue = 0; - /* execute it in main thread */ - io->ci_pio = 0; RETURN(lov_io_iter_init(env, ios)); } @@ -849,28 +845,20 @@ static int lov_io_rw_iter_init(const struct lu_env *env, next = MAX_LFS_FILESIZE; } - LASSERTF(range->cir_pos >= lse->lsme_extent.e_start, - "pos %lld, [%lld, %lld)\n", range->cir_pos, + LASSERTF(io->u.ci_rw.crw_pos >= lse->lsme_extent.e_start, + "pos %lld, [%lld, %lld)\n", io->u.ci_rw.crw_pos, lse->lsme_extent.e_start, lse->lsme_extent.e_end); next = min_t(__u64, next, lse->lsme_extent.e_end); next = min_t(loff_t, next, lio->lis_io_endpos); - io->ci_continue = next < lio->lis_io_endpos; - range->cir_count = next - range->cir_pos; - lio->lis_pos = range->cir_pos; - lio->lis_endpos = range->cir_pos + range->cir_count; + io->ci_continue = next < lio->lis_io_endpos; + io->u.ci_rw.crw_count = next - io->u.ci_rw.crw_pos; + lio->lis_pos = io->u.ci_rw.crw_pos; + lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count; CDEBUG(D_VFSTRACE, - "stripe: {%d, %llu} range: [%llu, %llu) end: %llu, count: %zd\n", - index, start, lio->lis_pos, lio->lis_endpos, - lio->lis_io_endpos, range->cir_count); - - if (!io->ci_continue) { - /* the last piece of IO, execute it in main thread */ - io->ci_pio = 0; - } - - if (io->ci_pio) - RETURN(0); + "stripe: %llu chunk: [%llu, %llu) %llu, %zd\n", + (__u64)start, lio->lis_pos, lio->lis_endpos, + (__u64)lio->lis_io_endpos, io->u.ci_rw.crw_count); /* * XXX The following call should be optimized: we know, that @@ -1118,6 +1106,7 @@ static int lov_io_submit(const struct lu_env *env, struct lov_io_sub *sub; struct cl_page_list *plist = &lov_env_info(env)->lti_plist; struct cl_page *page; + struct cl_page *tmp; int index; int rc = 0; ENTRY; @@ -1143,11 +1132,11 @@ static int lov_io_submit(const struct lu_env *env, cl_2queue_init(cl2q); cl_page_list_move(&cl2q->c2_qin, qin, page); - index = lov_page_index(page); - while (qin->pl_nr > 0) { - page = cl_page_list_first(qin); - if (index != lov_page_index(page)) - break; + index = page->cp_lov_index; + cl_page_list_for_each_safe(page, tmp, qin) { + /* this page is not on this stripe */ + if (index != page->cp_lov_index) + continue; cl_page_list_move(&cl2q->c2_qin, qin, page); } @@ -1210,10 +1199,10 @@ static int lov_io_commit_async(const struct lu_env *env, cl_page_list_move(plist, queue, page); - index = lov_page_index(page); + index = page->cp_lov_index; while (queue->pl_nr > 0) { page = cl_page_list_first(queue); - if (index != lov_page_index(page)) + if (index != page->cp_lov_index) break; cl_page_list_move(plist, queue, page); @@ -1259,7 +1248,7 @@ static int lov_io_fault_start(const struct lu_env *env, fio = &ios->cis_io->u.ci_fault; lio = cl2lov_io(env, ios); - sub = lov_sub_get(env, lio, lov_page_index(fio->ft_page)); + sub = lov_sub_get(env, lio, fio->ft_page->cp_lov_index); sub->sub_io.u.ci_fault.ft_nob = fio->ft_nob; RETURN(lov_io_start(env, ios));