Whamcloud - gitweb
LU-12328 flr: avoid reading unhealthy mirror
[fs/lustre-release.git] / lustre / lov / lov_io.c
index 3cc6ec0..92b8ce5 100644 (file)
@@ -142,6 +142,7 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio,
        sub_io->ci_lock_no_expand = io->ci_lock_no_expand;
        sub_io->ci_ndelay = io->ci_ndelay;
        sub_io->ci_layout_version = io->ci_layout_version;
+       sub_io->ci_tried_all_mirrors = io->ci_tried_all_mirrors;
 
        result = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj);
 
@@ -191,19 +192,6 @@ out:
  * Lov io operations.
  *
  */
-
-int lov_page_index(const struct cl_page *page)
-{
-       const struct cl_page_slice *slice;
-       ENTRY;
-
-       slice = cl_page_at(page, &lov_device_type);
-       LASSERT(slice != NULL);
-       LASSERT(slice->cpl_obj != NULL);
-
-       RETURN(cl2lov_page(slice)->lps_index);
-}
-
 static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
                              struct cl_io *io)
 {
@@ -418,13 +406,13 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
                                found = true;
                                break;
                        }
-               }
-
+               } /* each component of the mirror */
                if (found) {
                        index = (index + i) % comp->lo_mirror_count;
                        break;
                }
-       }
+       } /* each mirror */
+
        if (i == comp->lo_mirror_count) {
                CERROR(DFID": failed to find a component covering "
                       "I/O region at %llu\n",
@@ -448,16 +436,22 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
         * of this client has been partitioned. We should relinquish CPU for
         * a while before trying again.
         */
-       ++io->ci_ndelay_tried;
-       if (io->ci_ndelay && io->ci_ndelay_tried >= comp->lo_mirror_count) {
+       if (io->ci_ndelay && io->ci_ndelay_tried > 0 &&
+           (io->ci_ndelay_tried % comp->lo_mirror_count == 0)) {
                set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1)); /* 10ms */
+               schedule_timeout(cfs_time_seconds(1) / 100); /* 10ms */
                if (signal_pending(current))
                        RETURN(-EINTR);
 
-               /* reset retry counter */
-               io->ci_ndelay_tried = 1;
+               /**
+                * we'd set ci_tried_all_mirrors to turn off fast mirror
+                * switching for read after we've tried all mirrors several
+                * rounds.
+                */
+               io->ci_tried_all_mirrors = io->ci_ndelay_tried %
+                                          (comp->lo_mirror_count * 4) == 0;
        }
+       ++io->ci_ndelay_tried;
 
        CDEBUG(D_VFSTRACE, "use %sdelayed RPC state for this IO\n",
               io->ci_ndelay ? "non-" : "");
@@ -695,6 +689,7 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio,
        case CIT_READ:
        case CIT_WRITE: {
                io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent);
+               io->ci_tried_all_mirrors = parent->ci_tried_all_mirrors;
                if (cl_io_is_append(parent)) {
                        io->u.ci_wr.wr_append = 1;
                } else {
@@ -1137,10 +1132,10 @@ static int lov_io_submit(const struct lu_env *env,
                cl_2queue_init(cl2q);
                cl_page_list_move(&cl2q->c2_qin, qin, page);
 
-               index = lov_page_index(page);
+               index = page->cp_lov_index;
                cl_page_list_for_each_safe(page, tmp, qin) {
                        /* this page is not on this stripe */
-                       if (index != lov_page_index(page))
+                       if (index != page->cp_lov_index)
                                continue;
 
                        cl_page_list_move(&cl2q->c2_qin, qin, page);
@@ -1204,10 +1199,10 @@ static int lov_io_commit_async(const struct lu_env *env,
 
                cl_page_list_move(plist, queue, page);
 
-               index = lov_page_index(page);
+               index = page->cp_lov_index;
                while (queue->pl_nr > 0) {
                        page = cl_page_list_first(queue);
-                       if (index != lov_page_index(page))
+                       if (index != page->cp_lov_index)
                                break;
 
                        cl_page_list_move(plist, queue, page);
@@ -1253,7 +1248,7 @@ static int lov_io_fault_start(const struct lu_env *env,
 
        fio = &ios->cis_io->u.ci_fault;
        lio = cl2lov_io(env, ios);
-       sub = lov_sub_get(env, lio, lov_page_index(fio->ft_page));
+       sub = lov_sub_get(env, lio, fio->ft_page->cp_lov_index);
        sub->sub_io.u.ci_fault.ft_nob = fio->ft_nob;
 
        RETURN(lov_io_start(env, ios));