X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flov%2Flov_io.c;h=e6996032d4ce2a65ff1eefdea3c7ec1f8b5d3798;hb=4c4c327b25f3414f20a9ae600e7311f1aa3a866d;hp=d67dfa7a5dc26b187406224f2f73c83432070d55;hpb=0c2cc920370e5dcf99e141610020d75082d2898c;p=fs%2Flustre-release.git diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index d67dfa7..e699603 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -142,6 +142,7 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, sub_io->ci_lock_no_expand = io->ci_lock_no_expand; sub_io->ci_ndelay = io->ci_ndelay; sub_io->ci_layout_version = io->ci_layout_version; + sub_io->ci_tried_all_mirrors = io->ci_tried_all_mirrors; result = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj); @@ -191,19 +192,6 @@ out: * Lov io operations. * */ - -int lov_page_index(const struct cl_page *page) -{ - const struct cl_page_slice *slice; - ENTRY; - - slice = cl_page_at(page, &lov_device_type); - LASSERT(slice != NULL); - LASSERT(slice->cpl_obj != NULL); - - RETURN(cl2lov_page(slice)->lps_index); -} - static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, struct cl_io *io) { @@ -269,8 +257,8 @@ static int lov_io_mirror_write_intent(struct lov_io *lio, if (!lu_extent_is_overlapped(ext, lle->lle_extent)) continue; - ext->e_start = MIN(ext->e_start, lle->lle_extent->e_start); - ext->e_end = MAX(ext->e_end, lle->lle_extent->e_end); + ext->e_start = min(ext->e_start, lle->lle_extent->e_start); + ext->e_end = max(ext->e_end, lle->lle_extent->e_end); ++count; } if (count == 0) { @@ -418,13 +406,13 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, found = true; break; } - } - + } /* each component of the mirror */ if (found) { index = (index + i) % comp->lo_mirror_count; break; } - } + } /* each mirror */ + if (i == comp->lo_mirror_count) { CERROR(DFID": failed to find a component covering " "I/O region at %llu\n", @@ -448,16 +436,22 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, * of this client has been partitioned. We should relinquish CPU for * a while before trying again. */ - ++io->ci_ndelay_tried; - if (io->ci_ndelay && io->ci_ndelay_tried >= comp->lo_mirror_count) { + if (io->ci_ndelay && io->ci_ndelay_tried > 0 && + (io->ci_ndelay_tried % comp->lo_mirror_count == 0)) { set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); /* 10ms */ + schedule_timeout(cfs_time_seconds(1) / 100); /* 10ms */ if (signal_pending(current)) RETURN(-EINTR); - /* reset retry counter */ - io->ci_ndelay_tried = 1; + /** + * we'd set ci_tried_all_mirrors to turn off fast mirror + * switching for read after we've tried all mirrors several + * rounds. + */ + io->ci_tried_all_mirrors = io->ci_ndelay_tried % + (comp->lo_mirror_count * 4) == 0; } + ++io->ci_ndelay_tried; CDEBUG(D_VFSTRACE, "use %sdelayed RPC state for this IO\n", io->ci_ndelay ? "non-" : ""); @@ -566,7 +560,15 @@ static int lov_io_slice_init(struct lov_io *lio, */ if (cl_io_is_trunc(io)) { io->ci_write_intent.e_start = 0; - io->ci_write_intent.e_end = io->u.ci_setattr.sa_attr.lvb_size; + /* for writes, e_end is endpos, the location of the file + * pointer after the write is completed, so it is not accessed. + * For truncate, 'end' is the size, and *is* acccessed. + * In other words, writes are [start, end), but truncate is + * [start, size], where both are included. So add 1 to the + * size when creating the write intent to account for this. + */ + io->ci_write_intent.e_end = + io->u.ci_setattr.sa_attr.lvb_size + 1; } else { io->ci_write_intent.e_start = lio->lis_pos; io->ci_write_intent.e_end = lio->lis_endpos; @@ -687,6 +689,7 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio, case CIT_READ: case CIT_WRITE: { io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent); + io->ci_tried_all_mirrors = parent->ci_tried_all_mirrors; if (cl_io_is_append(parent)) { io->u.ci_wr.wr_append = 1; } else { @@ -1041,7 +1044,8 @@ static int lov_io_read_ahead(const struct lu_env *env, ra); CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n", - PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc); + PFID(lu_object_fid(lov2lu(loo))), ra->cra_end_idx, + r0->lo_nr, rc); if (rc != 0) RETURN(rc); @@ -1053,29 +1057,29 @@ static int lov_io_read_ahead(const struct lu_env *env, */ /* cra_end is stripe level, convert it into file level */ - ra_end = ra->cra_end; + ra_end = ra->cra_end_idx; if (ra_end != CL_PAGE_EOF) - ra->cra_end = lov_stripe_pgoff(loo->lo_lsm, index, - ra_end, stripe); + ra->cra_end_idx = lov_stripe_pgoff(loo->lo_lsm, index, + ra_end, stripe); /* boundary of current component */ ra_end = cl_index(obj, (loff_t)lov_io_extent(lio, index)->e_end); - if (ra_end != CL_PAGE_EOF && ra->cra_end >= ra_end) - ra->cra_end = ra_end - 1; + if (ra_end != CL_PAGE_EOF && ra->cra_end_idx >= ra_end) + ra->cra_end_idx = ra_end - 1; if (r0->lo_nr == 1) /* single stripe file */ RETURN(0); pps = lov_lse(loo, index)->lsme_stripe_size >> PAGE_SHIFT; - CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, index = %u, " + CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, index = %d, " "stripe_size = %u, stripe no = %u, start index = %lu\n", - PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, pps, index, + PFID(lu_object_fid(lov2lu(loo))), ra->cra_end_idx, pps, index, lov_lse(loo, index)->lsme_stripe_size, stripe, start); /* never exceed the end of the stripe */ - ra->cra_end = min_t(pgoff_t, - ra->cra_end, start + pps - start % pps - 1); + ra->cra_end_idx = min_t(pgoff_t, ra->cra_end_idx, + start + pps - start % pps - 1); RETURN(0); } @@ -1103,6 +1107,7 @@ static int lov_io_submit(const struct lu_env *env, struct lov_io_sub *sub; struct cl_page_list *plist = &lov_env_info(env)->lti_plist; struct cl_page *page; + struct cl_page *tmp; int index; int rc = 0; ENTRY; @@ -1128,11 +1133,11 @@ static int lov_io_submit(const struct lu_env *env, cl_2queue_init(cl2q); cl_page_list_move(&cl2q->c2_qin, qin, page); - index = lov_page_index(page); - while (qin->pl_nr > 0) { - page = cl_page_list_first(qin); - if (index != lov_page_index(page)) - break; + index = page->cp_lov_index; + cl_page_list_for_each_safe(page, tmp, qin) { + /* this page is not on this stripe */ + if (index != page->cp_lov_index) + continue; cl_page_list_move(&cl2q->c2_qin, qin, page); } @@ -1195,10 +1200,10 @@ static int lov_io_commit_async(const struct lu_env *env, cl_page_list_move(plist, queue, page); - index = lov_page_index(page); + index = page->cp_lov_index; while (queue->pl_nr > 0) { page = cl_page_list_first(queue); - if (index != lov_page_index(page)) + if (index != page->cp_lov_index) break; cl_page_list_move(plist, queue, page); @@ -1244,7 +1249,7 @@ static int lov_io_fault_start(const struct lu_env *env, fio = &ios->cis_io->u.ci_fault; lio = cl2lov_io(env, ios); - sub = lov_sub_get(env, lio, lov_page_index(fio->ft_page)); + sub = lov_sub_get(env, lio, fio->ft_page->cp_lov_index); sub->sub_io.u.ci_fault.ft_nob = fio->ft_nob; RETURN(lov_io_start(env, ios));