X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flov%2Flov_io.c;h=4253e215a5f6e23f68c616b77e29f5f4d5dd4689;hb=53935a5dbfb95e0f7c9a7ea9e690cfb2dd6f82cb;hp=346a99241e5220ab3fe4ef16f1b034d44a1e0ef1;hpb=fbf5870b9848929d352460f1f005b79c0b5ccc5a;p=fs%2Flustre-release.git diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 346a992..4253e21 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -36,32 +36,24 @@ * Implementation of cl_io for LOV layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_LOV #include "lov_cl_internal.h" -/** \addtogroup lov lov @{ */ +/** \addtogroup lov + * @{ + */ -static void lov_sub_enter(struct lov_io_sub *sub) +static inline void lov_sub_enter(struct lov_io_sub *sub) { - ENTRY; - if (sub->sub_reenter++ == 0) { - sub->sub_cookie = cl_env_reenter(); - cl_env_implant(sub->sub_env, &sub->sub_refcheck2); - } - EXIT; + sub->sub_reenter++; } - -static void lov_sub_exit(struct lov_io_sub *sub) +static inline void lov_sub_exit(struct lov_io_sub *sub) { - ENTRY; - if (--sub->sub_reenter == 0) { - cl_env_unplant(sub->sub_env, &sub->sub_refcheck2); - cl_env_reexit(sub->sub_cookie); - } - EXIT; + sub->sub_reenter--; } static void lov_io_sub_fini(const struct lu_env *env, struct lov_io *lio, @@ -93,16 +85,20 @@ static void lov_io_sub_fini(const struct lu_env *env, struct lov_io *lio, static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio, int stripe, loff_t start, loff_t end) { - struct lov_stripe_md *lsm = lov_r0(lio->lis_object)->lo_lsm; - struct cl_io *parent = lio->lis_cl.cis_io; + struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; + struct cl_io *parent = lio->lis_cl.cis_io; switch(io->ci_type) { - case CIT_TRUNC: { - size_t new_size = parent->u.ci_truncate.tr_size; - - new_size = lov_size_to_stripe(lsm, new_size, stripe); - io->u.ci_truncate.tr_capa = parent->u.ci_truncate.tr_capa; - io->u.ci_truncate.tr_size = new_size; + case CIT_SETATTR: { + io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr; + io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid; + io->u.ci_setattr.sa_capa = parent->u.ci_setattr.sa_capa; + if (cl_io_is_trunc(io)) { + loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size; + + new_size = lov_size_to_stripe(lsm, new_size, stripe); + io->u.ci_setattr.sa_attr.lvb_size = new_size; + } break; } case CIT_FAULT: { @@ -114,10 +110,23 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio, io->u.ci_fault.ft_index = cl_index(obj, off); break; } - case CIT_READ: - case CIT_WRITE: { - io->u.ci_rw.crw_pos = start; - io->u.ci_rw.crw_count = end - start; + case CIT_FSYNC: { + io->u.ci_fsync.fi_start = start; + io->u.ci_fsync.fi_end = end; + io->u.ci_fsync.fi_capa = parent->u.ci_fsync.fi_capa; + io->u.ci_fsync.fi_fid = parent->u.ci_fsync.fi_fid; + io->u.ci_fsync.fi_mode = parent->u.ci_fsync.fi_mode; + break; + } + case CIT_READ: + case CIT_WRITE: { + io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent); + if (cl_io_is_append(parent)) { + io->u.ci_wr.wr_append = 1; + } else { + io->u.ci_rw.crw_pos = start; + io->u.ci_rw.crw_count = end - start; + } break; } default: @@ -146,17 +155,8 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, sub->sub_io_initialized = 0; sub->sub_borrowed = 0; - /* - * First sub-io. Use ->lis_single_subio and current environment, to - * avoid dynamic allocation. - */ - if (lio->lis_active_subios == 0) { - sub->sub_io = &lio->lis_single_subio; - lio->lis_single_subio_index = stripe; - sub->sub_env = cl_env_get(&sub->sub_refcheck); - LASSERT(sub->sub_env == env); - } else if (lio->lis_mem_frozen) { - LASSERT(mutex_is_locked(&ld->ld_mutex)); + if (lio->lis_mem_frozen) { + LASSERT(mutex_is_locked(&ld->ld_mutex)); sub->sub_io = &ld->ld_emrg[stripe]->emrg_subio; sub->sub_env = ld->ld_emrg[stripe]->emrg_env; sub->sub_borrowed = 1; @@ -167,12 +167,23 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, cookie = cl_env_reenter(); sub->sub_env = cl_env_get(&sub->sub_refcheck); cl_env_reexit(cookie); - - OBD_ALLOC_PTR(sub->sub_io); if (IS_ERR(sub->sub_env)) result = PTR_ERR(sub->sub_env); - else if (sub->sub_io == NULL) - result = -ENOMEM; + + if (result == 0) { + /* + * First sub-io. Use ->lis_single_subio to + * avoid dynamic allocation. + */ + if (lio->lis_active_subios == 0) { + sub->sub_io = &lio->lis_single_subio; + lio->lis_single_subio_index = stripe; + } else { + OBD_ALLOC_PTR(sub->sub_io); + if (sub->sub_io == NULL) + result = -ENOMEM; + } + } } if (result == 0) { @@ -185,6 +196,8 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, sub_io->ci_parent = io; sub_io->ci_lockreq = io->ci_lockreq; sub_io->ci_type = io->ci_type; + sub_io->ci_no_srvlock = io->ci_no_srvlock; + sub_io->ci_noatime = io->ci_noatime; lov_sub_enter(sub); result = cl_io_sub_init(sub->sub_env, sub_io, @@ -201,8 +214,8 @@ static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, RETURN(result); } -static struct lov_io_sub *lov_sub_get(const struct lu_env *env, - struct lov_io *lio, int stripe) +struct lov_io_sub *lov_sub_get(const struct lu_env *env, + struct lov_io *lio, int stripe) { int rc; struct lov_io_sub *sub = &lio->lis_subs[stripe]; @@ -235,22 +248,24 @@ void lov_sub_put(struct lov_io_sub *sub) static int lov_page_stripe(const struct cl_page *page) { - struct lovsub_object *subobj; + struct lovsub_object *subobj; + const struct cl_page_slice *slice; + ENTRY; - ENTRY; - subobj = lu2lovsub( - lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header, - &lovsub_device_type)); - LASSERT(subobj != NULL); - RETURN(subobj->lso_index); + slice = cl_page_at(page, &lovsub_device_type); + LASSERT(slice != NULL); + LASSERT(slice->cpl_obj != NULL); + + subobj = cl2lovsub(slice->cpl_obj); + RETURN(subobj->lso_index); } struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, const struct cl_page_slice *slice) { - struct lov_stripe_md *lsm = lov_r0(lio->lis_object)->lo_lsm; - struct cl_page *page = slice->cpl_page; - int stripe; + struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; + struct cl_page *page = slice->cpl_page; + int stripe; LASSERT(lio->lis_cl.cis_io != NULL); LASSERT(cl2lov(slice->cpl_obj) == lio->lis_object); @@ -266,19 +281,19 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, struct cl_io *io) { - struct lov_object *lov = lio->lis_object; - struct lov_stripe_md *lsm = lov_r0(lov)->lo_lsm; - int result; + struct lov_stripe_md *lsm; + int result; + ENTRY; - LASSERT(lio->lis_object != NULL); - ENTRY; + LASSERT(lio->lis_object != NULL); + lsm = lio->lis_object->lo_lsm; /* * Need to be optimized, we can't afford to allocate a piece of memory * when writing a page. -jay */ - OBD_ALLOC(lio->lis_subs, - lsm->lsm_stripe_count * sizeof lio->lis_subs[0]); + OBD_ALLOC_LARGE(lio->lis_subs, + lsm->lsm_stripe_count * sizeof lio->lis_subs[0]); if (lio->lis_subs != NULL) { lio->lis_nr_subios = lio->lis_stripe_count; lio->lis_single_subio_index = -1; @@ -292,14 +307,13 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, static void lov_io_slice_init(struct lov_io *lio, struct lov_object *obj, struct cl_io *io) { - struct lov_stripe_md *lsm = lov_r0(obj)->lo_lsm; + ENTRY; - LASSERT(lsm != NULL); - ENTRY; + io->ci_result = 0; + lio->lis_object = obj; - io->ci_result = 0; - lio->lis_object = obj; - lio->lis_stripe_count = lsm->lsm_stripe_count; + LASSERT(obj->lo_lsm != NULL); + lio->lis_stripe_count = obj->lo_lsm->lsm_stripe_count; switch (io->ci_type) { case CIT_READ: @@ -314,8 +328,11 @@ static void lov_io_slice_init(struct lov_io *lio, } break; - case CIT_TRUNC: - lio->lis_pos = io->u.ci_truncate.tr_size; + case CIT_SETATTR: + if (cl_io_is_trunc(io)) + lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size; + else + lio->lis_pos = 0; lio->lis_endpos = OBD_OBJECT_EOF; break; @@ -326,6 +343,12 @@ static void lov_io_slice_init(struct lov_io *lio, break; } + case CIT_FSYNC: { + lio->lis_pos = io->u.ci_fsync.fi_start; + lio->lis_endpos = io->u.ci_fsync.fi_end; + break; + } + case CIT_MISC: lio->lis_pos = 0; lio->lis_endpos = OBD_OBJECT_EOF; @@ -340,18 +363,23 @@ static void lov_io_slice_init(struct lov_io *lio, static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) { - struct lov_io *lio = cl2lov_io(env, ios); - int i; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_object *lov = cl2lov(ios->cis_obj); + int i; ENTRY; if (lio->lis_subs != NULL) { for (i = 0; i < lio->lis_nr_subios; i++) lov_io_sub_fini(env, lio, &lio->lis_subs[i]); - OBD_FREE(lio->lis_subs, + OBD_FREE_LARGE(lio->lis_subs, lio->lis_nr_subios * sizeof lio->lis_subs[0]); lio->lis_nr_subios = 0; } - EXIT; + + LASSERT(cfs_atomic_read(&lov->lo_active_ios) > 0); + if (cfs_atomic_dec_and_test(&lov->lo_active_ios)) + wake_up_all(&lov->lo_waitq); + EXIT; } static obd_off lov_offset_mod(obd_off val, int delta) @@ -364,8 +392,8 @@ static obd_off lov_offset_mod(obd_off val, int delta) static int lov_io_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_stripe_md *lsm = lov_r0(lio->lis_object)->lo_lsm; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; struct lov_io_sub *sub; obd_off endpos; obd_off start; @@ -387,12 +415,13 @@ static int lov_io_iter_init(const struct lu_env *env, start, end); rc = cl_io_iter_init(sub->sub_env, sub->sub_io); lov_sub_put(sub); - CDEBUG(D_VFSTRACE, "shrink: %i [%llu, %llu)\n", + CDEBUG(D_VFSTRACE, "shrink: %d ["LPU64", "LPU64")\n", stripe, start, end); } else rc = PTR_ERR(sub); + if (!rc) - list_add_tail(&sub->sub_linkage, &lio->lis_active); + cfs_list_add_tail(&sub->sub_linkage, &lio->lis_active); else break; } @@ -402,12 +431,12 @@ static int lov_io_iter_init(const struct lu_env *env, static int lov_io_rw_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_io *io = ios->cis_io; - struct lov_stripe_md *lsm = lov_r0(cl2lov(ios->cis_obj))->lo_lsm; + struct lov_io *lio = cl2lov_io(env, ios); + struct cl_io *io = ios->cis_io; + struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; loff_t start = io->u.ci_rw.crw_pos; loff_t next; - int ssize = lsm->lsm_stripe_size; + unsigned long ssize = lsm->lsm_stripe_size; LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); ENTRY; @@ -415,42 +444,46 @@ static int lov_io_rw_iter_init(const struct lu_env *env, /* fast path for common case. */ if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) { - do_div(start, ssize); - next = (start + 1) * ssize; - if (next <= start * ssize) - next = ~0ull; + lov_do_div64(start, ssize); + next = (start + 1) * ssize; + if (next <= start * ssize) + next = ~0ull; io->ci_continue = next < lio->lis_io_endpos; io->u.ci_rw.crw_count = min_t(loff_t, lio->lis_io_endpos, next) - io->u.ci_rw.crw_pos; lio->lis_pos = io->u.ci_rw.crw_pos; lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count; - CDEBUG(D_VFSTRACE, "stripe: %llu chunk: [%llu, %llu) %llu\n", - (__u64)start, lio->lis_pos, lio->lis_endpos, - (__u64)lio->lis_io_endpos); - } - /* - * XXX The following call should be optimized: we know, that - * [lio->lis_pos, lio->lis_endpos) intersects with exactly one stripe. - */ - RETURN(lov_io_iter_init(env, ios)); + CDEBUG(D_VFSTRACE, "stripe: "LPU64" chunk: ["LPU64", "LPU64") " + LPU64"\n", (__u64)start, lio->lis_pos, lio->lis_endpos, + (__u64)lio->lis_io_endpos); + } + /* + * XXX The following call should be optimized: we know, that + * [lio->lis_pos, lio->lis_endpos) intersects with exactly one stripe. + */ + RETURN(lov_io_iter_init(env, ios)); } static int lov_io_call(const struct lu_env *env, struct lov_io *lio, int (*iofunc)(const struct lu_env *, struct cl_io *)) { + struct cl_io *parent = lio->lis_cl.cis_io; struct lov_io_sub *sub; int rc = 0; ENTRY; - list_for_each_entry(sub, &lio->lis_active, sub_linkage) { + cfs_list_for_each_entry(sub, &lio->lis_active, sub_linkage) { lov_sub_enter(sub); rc = iofunc(sub->sub_env, sub->sub_io); lov_sub_exit(sub); if (rc) break; - } - RETURN(rc); + + if (parent->ci_result == 0) + parent->ci_result = sub->sub_io->ci_result; + } + RETURN(rc); } static int lov_io_lock(const struct lu_env *env, const struct cl_io_slice *ios) @@ -509,8 +542,8 @@ static void lov_io_iter_fini(const struct lu_env *env, ENTRY; rc = lov_io_call(env, lio, lov_io_iter_fini_wrapper); LASSERT(rc == 0); - while (!list_empty(&lio->lis_active)) - list_del_init(lio->lis_active.next); + while (!cfs_list_empty(&lio->lis_active)) + cfs_list_del_init(lio->lis_active.next); EXIT; } @@ -525,14 +558,6 @@ static void lov_io_unlock(const struct lu_env *env, EXIT; } - -static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld, - struct cl_page_list *qin, - int idx, int alloc) -{ - return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list; -} - /** * lov implementation of cl_operations::cio_submit() method. It takes a list * of pages in \a queue, splits it into per-stripe sub-lists, invokes @@ -549,157 +574,147 @@ static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld, * lov_device::ld_mutex mutex. */ static int lov_io_submit(const struct lu_env *env, - const struct cl_io_slice *ios, - enum cl_req_type crt, struct cl_2queue *queue) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_object *obj = lio->lis_object; - struct lov_device *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev); - struct cl_page_list *qin = &queue->c2_qin; - struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - struct cl_page_list *stripes_qin = NULL; - struct cl_page *page; - struct cl_page *tmp; - int stripe; - -#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc) + const struct cl_io_slice *ios, + enum cl_req_type crt, struct cl_2queue *queue) +{ + struct cl_page_list *qin = &queue->c2_qin; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; + struct cl_page *page; + int stripe; + int rc = 0; + ENTRY; - int rc = 0; - int alloc = -#if defined(__KERNEL__) && defined(__linux__) - !(current->flags & PF_MEMALLOC); -#else - 1; -#endif - ENTRY; if (lio->lis_active_subios == 1) { int idx = lio->lis_single_subio_index; - struct lov_io_sub *sub; LASSERT(idx < lio->lis_nr_subios); sub = lov_sub_get(env, lio, idx); LASSERT(!IS_ERR(sub)); LASSERT(sub->sub_io == &lio->lis_single_subio); - rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, crt, queue); + rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, + crt, queue); lov_sub_put(sub); RETURN(rc); } LASSERT(lio->lis_subs != NULL); - if (alloc) { - OBD_ALLOC(stripes_qin, - sizeof(*stripes_qin) * lio->lis_nr_subios); - if (stripes_qin == NULL) - RETURN(-ENOMEM); - - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) - cl_page_list_init(&stripes_qin[stripe]); - } else { - /* - * If we get here, it means pageout & swap doesn't help. - * In order to not make things worse, even don't try to - * allocate the memory with __GFP_NOWARN. -jay - */ - mutex_lock(&ld->ld_mutex); - lio->lis_mem_frozen = 1; - } - cl_2queue_init(cl2q); - cl_page_list_for_each_safe(page, tmp, qin) { - stripe = lov_page_stripe(page); - cl_page_list_move(QIN(stripe), qin, page); - } + cl_page_list_init(plist); + while (qin->pl_nr > 0) { + struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct lov_io_sub *sub; - struct cl_page_list *sub_qin = QIN(stripe); + cl_2queue_init(cl2q); - if (list_empty(&sub_qin->pl_pages)) - continue; + page = cl_page_list_first(qin); + cl_page_list_move(&cl2q->c2_qin, qin, page); - cl_page_list_splice(sub_qin, &cl2q->c2_qin); - sub = lov_sub_get(env, lio, stripe); - if (!IS_ERR(sub)) { - rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, - crt, cl2q); - lov_sub_put(sub); - } else - rc = PTR_ERR(sub); - cl_page_list_splice(&cl2q->c2_qin, &queue->c2_qin); - cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout); - if (rc != 0) - break; - } - - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct cl_page_list *sub_qin = QIN(stripe); - - if (list_empty(&sub_qin->pl_pages)) - continue; - - cl_page_list_splice(sub_qin, qin); - } - - if (alloc) { - OBD_FREE(stripes_qin, - sizeof(*stripes_qin) * lio->lis_nr_subios); - } else { - int i; - - for (i = 0; i < lio->lis_nr_subios; i++) { - struct cl_io *cio = lio->lis_subs[i].sub_io; - - if (cio && cio == &ld->ld_emrg[i]->emrg_subio) - lov_io_sub_fini(env, lio, &lio->lis_subs[i]); - } - lio->lis_mem_frozen = 0; - mutex_unlock(&ld->ld_mutex); - } - - RETURN(rc); -#undef QIN -} + stripe = lov_page_stripe(page); + while (qin->pl_nr > 0) { + page = cl_page_list_first(qin); + if (stripe != lov_page_stripe(page)) + break; -static int lov_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; + cl_page_list_move(&cl2q->c2_qin, qin, page); + } - ENTRY; - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_prepare_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - RETURN(result); -} - -static int lov_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; - - ENTRY; - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_commit_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - RETURN(result); + sub = lov_sub_get(env, lio, stripe); + if (!IS_ERR(sub)) { + rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, + crt, cl2q); + lov_sub_put(sub); + } else { + rc = PTR_ERR(sub); + } + + cl_page_list_splice(&cl2q->c2_qin, plist); + cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout); + cl_2queue_fini(env, cl2q); + + if (rc != 0) + break; + } + + cl_page_list_splice(plist, qin); + cl_page_list_fini(env, plist); + + RETURN(rc); +} + +static int lov_io_commit_async(const struct lu_env *env, + const struct cl_io_slice *ios, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb) +{ + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page *page; + int rc = 0; + ENTRY; + + if (lio->lis_active_subios == 1) { + int idx = lio->lis_single_subio_index; + + LASSERT(idx < lio->lis_nr_subios); + sub = lov_sub_get(env, lio, idx); + LASSERT(!IS_ERR(sub)); + LASSERT(sub->sub_io == &lio->lis_single_subio); + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue, + from, to, cb); + lov_sub_put(sub); + RETURN(rc); + } + + LASSERT(lio->lis_subs != NULL); + + cl_page_list_init(plist); + while (queue->pl_nr > 0) { + int stripe_to = to; + int stripe; + + LASSERT(plist->pl_nr == 0); + page = cl_page_list_first(queue); + cl_page_list_move(plist, queue, page); + + stripe = lov_page_stripe(page); + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + if (stripe != lov_page_stripe(page)) + break; + + cl_page_list_move(plist, queue, page); + } + + if (queue->pl_nr > 0) /* still has more pages */ + stripe_to = PAGE_SIZE; + + sub = lov_sub_get(env, lio, stripe); + if (!IS_ERR(sub)) { + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, + plist, from, stripe_to, cb); + lov_sub_put(sub); + } else { + rc = PTR_ERR(sub); + break; + } + + if (plist->pl_nr > 0) /* short write */ + break; + + from = 0; + } + + /* for error case, add the page back into the qin list */ + LASSERT(ergo(rc == 0, plist->pl_nr == 0)); + while (plist->pl_nr > 0) { + /* error occurred, add the uncommitted pages back into queue */ + page = cl_page_list_last(plist); + cl_page_list_move_head(queue, plist, page); + } + + RETURN(rc); } static int lov_io_fault_start(const struct lu_env *env, @@ -718,6 +733,28 @@ static int lov_io_fault_start(const struct lu_env *env, RETURN(lov_io_start(env, ios)); } +static void lov_io_fsync_end(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + unsigned int *written = &ios->cis_io->u.ci_fsync.fi_nr_written; + ENTRY; + + *written = 0; + cfs_list_for_each_entry(sub, &lio->lis_active, sub_linkage) { + struct cl_io *subio = sub->sub_io; + + lov_sub_enter(sub); + lov_io_end_wrapper(sub->sub_env, subio); + lov_sub_exit(sub); + + if (subio->ci_result == 0) + *written += subio->u.ci_fsync.fi_nr_written; + } + RETURN_EXIT; +} + static const struct cl_io_operations lov_io_ops = { .op = { [CIT_READ] = { @@ -738,7 +775,7 @@ static const struct cl_io_operations lov_io_ops = { .cio_start = lov_io_start, .cio_end = lov_io_end }, - [CIT_TRUNC] = { + [CIT_SETATTR] = { .cio_fini = lov_io_fini, .cio_iter_init = lov_io_iter_init, .cio_iter_fini = lov_io_iter_fini, @@ -756,20 +793,21 @@ static const struct cl_io_operations lov_io_ops = { .cio_start = lov_io_fault_start, .cio_end = lov_io_end }, - [CIT_MISC] = { - .cio_fini = lov_io_fini - } - }, - .req_op = { - [CRT_READ] = { - .cio_submit = lov_io_submit - }, - [CRT_WRITE] = { - .cio_submit = lov_io_submit - } - }, - .cio_prepare_write = lov_io_prepare_write, - .cio_commit_write = lov_io_commit_write + [CIT_FSYNC] = { + .cio_fini = lov_io_fini, + .cio_iter_init = lov_io_iter_init, + .cio_iter_fini = lov_io_iter_fini, + .cio_lock = lov_io_lock, + .cio_unlock = lov_io_unlock, + .cio_start = lov_io_start, + .cio_end = lov_io_fsync_end + }, + [CIT_MISC] = { + .cio_fini = lov_io_fini + } + }, + .cio_submit = lov_io_submit, + .cio_commit_async = lov_io_commit_async, }; /***************************************************************************** @@ -781,8 +819,12 @@ static const struct cl_io_operations lov_io_ops = { static void lov_empty_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) { - ENTRY; - EXIT; + struct lov_object *lov = cl2lov(ios->cis_obj); + ENTRY; + + if (cfs_atomic_dec_and_test(&lov->lo_active_ios)) + wake_up_all(&lov->lo_waitq); + EXIT; } static void lov_empty_impossible(const struct lu_env *env, @@ -799,8 +841,8 @@ static void lov_empty_impossible(const struct lu_env *env, static const struct cl_io_operations lov_empty_io_ops = { .op = { [CIT_READ] = { -#if 0 .cio_fini = lov_empty_io_fini, +#if 0 .cio_iter_init = LOV_EMPTY_IMPOSSIBLE, .cio_lock = LOV_EMPTY_IMPOSSIBLE, .cio_start = LOV_EMPTY_IMPOSSIBLE, @@ -814,7 +856,7 @@ static const struct cl_io_operations lov_empty_io_ops = { .cio_start = LOV_EMPTY_IMPOSSIBLE, .cio_end = LOV_EMPTY_IMPOSSIBLE }, - [CIT_TRUNC] = { + [CIT_SETATTR] = { .cio_fini = lov_empty_io_fini, .cio_iter_init = LOV_EMPTY_IMPOSSIBLE, .cio_lock = LOV_EMPTY_IMPOSSIBLE, @@ -828,19 +870,15 @@ static const struct cl_io_operations lov_empty_io_ops = { .cio_start = LOV_EMPTY_IMPOSSIBLE, .cio_end = LOV_EMPTY_IMPOSSIBLE }, - [CIT_MISC] = { - .cio_fini = lov_empty_io_fini - } - }, - .req_op = { - [CRT_READ] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - }, - [CRT_WRITE] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - } - }, - .cio_commit_write = LOV_EMPTY_IMPOSSIBLE + [CIT_FSYNC] = { + .cio_fini = lov_empty_io_fini + }, + [CIT_MISC] = { + .cio_fini = lov_empty_io_fini + } + }, + .cio_submit = LOV_EMPTY_IMPOSSIBLE, + .cio_commit_async = LOV_EMPTY_IMPOSSIBLE }; int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, @@ -853,10 +891,11 @@ int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, CFS_INIT_LIST_HEAD(&lio->lis_active); lov_io_slice_init(lio, lov, io); if (io->ci_result == 0) { - LASSERT(lov_r0(lov)->lo_lsm != NULL); io->ci_result = lov_io_subio_init(env, lio, io); - if (io->ci_result == 0) + if (io->ci_result == 0) { cl_io_slice_add(io, &lio->lis_cl, obj, &lov_io_ops); + cfs_atomic_inc(&lov->lo_active_ios); + } } RETURN(io->ci_result); } @@ -864,19 +903,24 @@ int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { - struct lov_io *lio = lov_env_io(env); - int result; - - ENTRY; - switch (io->ci_type) { - default: - LBUG(); - case CIT_MISC: - case CIT_READ: - result = 0; - break; + struct lov_object *lov = cl2lov(obj); + struct lov_io *lio = lov_env_io(env); + int result; + ENTRY; + + lio->lis_object = lov; + switch (io->ci_type) { + default: + LBUG(); + case CIT_MISC: + case CIT_READ: + result = 0; + break; + case CIT_FSYNC: + case CIT_SETATTR: + result = +1; + break; case CIT_WRITE: - case CIT_TRUNC: result = -EBADF; break; case CIT_FAULT: @@ -885,10 +929,56 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, PFID(lu_object_fid(&obj->co_lu))); break; } - if (result == 0) + if (result == 0) { cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops); - io->ci_result = result; - RETURN(result != 0); + cfs_atomic_inc(&lov->lo_active_ios); + } + + io->ci_result = result < 0 ? result : 0; + RETURN(result != 0); +} + +int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io) +{ + struct lov_object *lov = cl2lov(obj); + struct lov_io *lio = lov_env_io(env); + int result; + ENTRY; + + LASSERT(lov->lo_lsm != NULL); + lio->lis_object = lov; + + switch (io->ci_type) { + default: + LASSERTF(0, "invalid type %d\n", io->ci_type); + case CIT_MISC: + case CIT_FSYNC: + result = 1; + break; + case CIT_SETATTR: + /* the truncate to 0 is managed by MDT: + * - in open, for open O_TRUNC + * - in setattr, for truncate + */ + /* the truncate is for size > 0 so triggers a restore */ + if (cl_io_is_trunc(io)) + io->ci_restore_needed = 1; + result = -ENODATA; + break; + case CIT_READ: + case CIT_WRITE: + case CIT_FAULT: + io->ci_restore_needed = 1; + result = -ENODATA; + break; + } + if (result == 0) { + cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops); + cfs_atomic_inc(&lov->lo_active_ios); + } + + io->ci_result = result < 0 ? result : 0; + RETURN(result != 0); } - /** @} lov */