X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosc%2Fosc_io.c;h=e7aeed8faf04cf448f70885a7c92b1ea8d3c243a;hb=32c51c0b6f61a5c026993706d06de8654868df79;hp=6190433311bf65eeb191033929af9d0fc1ab57b8;hpb=6e3ec5812ebd1b5ecf7cae584f429b013ffe7431;p=fs%2Flustre-release.git diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 6190433..e7aeed8 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -97,16 +97,9 @@ static void osc_io_unplug(const struct lu_env *env, struct osc_object *osc, { loi_list_maint(cli, osc->oo_oinfo); osc_check_rpcs(env, cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); } /** - * How many pages osc_io_submit() queues before checking whether an RPC is - * ready. - */ -#define OSC_QUEUE_GRAIN (32) - -/** * An implementation of cl_io_operations::cio_io_submit() method for osc * layer. Iterates over pages in the in-queue, prepares each for io by calling * cl_page_prep() and then either submits them through osc_io_submit_page() @@ -133,7 +126,7 @@ static int osc_io_submit(const struct lu_env *env, LASSERT(qin->pl_nr > 0); - CDEBUG(D_INFO, "%i %i\n", qin->pl_nr, crt); + CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, crt); /* * NOTE: here @page is a top-level page. This is done to avoid * creation of sub-page-list. @@ -156,26 +149,24 @@ static int osc_io_submit(const struct lu_env *env, oap->oap_async_flags |= ASYNC_HP; cfs_spin_unlock(&oap->oap_lock); } - /* - * This can be checked without cli->cl_loi_list_lock, because - * ->oap_*_item are always manipulated when the page is owned. - */ - if (!cfs_list_empty(&oap->oap_urgent_item) || - !cfs_list_empty(&oap->oap_rpc_item)) { - result = -EBUSY; - break; - } if (osc0 == NULL) { /* first iteration */ cli = &exp->exp_obd->u.cli; osc0 = osc; + client_obd_list_lock(&cli->cl_loi_list_lock); } else /* check that all pages are against the same object * (for now) */ LASSERT(osc == osc0); - if (queued++ == 0) - client_obd_list_lock(&cli->cl_loi_list_lock); + + if (!cfs_list_empty(&oap->oap_urgent_item) || + !cfs_list_empty(&oap->oap_rpc_item)) { + result = -EBUSY; + break; + } + result = cl_page_prep(env, io, page, crt); if (result == 0) { + ++queued; cl_page_list_move(qout, qin, page); if (cfs_list_empty(&oap->oap_pending_item)) { osc_io_submit_page(env, cl2osc_io(env, ios), @@ -187,7 +178,7 @@ static int osc_io_submit(const struct lu_env *env, OSC_FLAGS); /* * bug 18881: we can't just break out here when - * error occurrs after cl_page_prep has been + * error occurs after cl_page_prep has been * called against the page. The correct * way is to call page's completion routine, * as in osc_oap_interrupted. For simplicity, @@ -208,30 +199,18 @@ static int osc_io_submit(const struct lu_env *env, */ result = 0; } + /* - * Don't keep client_obd_list_lock() for too long. + * We might hold client_obd_list_lock() for too long and cause + * soft-lockups (see bug 16651). But on the other hand, pages + * are queued here with ASYNC_URGENT flag, thus will be sent + * out immediately once osc_io_unplug() be called, possibly + * resulting sub-optimal RPCs. * - * XXX client_obd_list lock has to be unlocked periodically to - * avoid soft-lockups that tend to happen otherwise (see bug - * 16651). On the other hand, osc_io_submit_page() queues a - * page with ASYNC_URGENT flag and so all pages queued up - * until this point are sent out immediately by - * osc_io_unplug() resulting in sub-optimal RPCs (sub-optimal - * RPCs only happen during `warm up' phase when less than - * cl_max_rpcs_in_flight RPCs are in flight). To balance these - * conflicting requirements, one might unplug once enough - * pages to form a large RPC were queued (i.e., use - * cli->cl_max_pages_per_rpc as OSC_QUEUE_GRAIN, see - * lop_makes_rpc()), or ignore soft-lockup issue altogether. - * - * XXX lock_need_resched() should be used here, but it is not - * available in the older of supported kernels. + * We think creating optimal-sized RPCs is more important than + * avoiding the transient soft-lockups, plus I believe the + * soft-locks only happen in full debug testing. */ - if (queued > OSC_QUEUE_GRAIN || cfs_need_resched()) { - queued = 0; - osc_io_unplug(env, osc, cli); - cfs_cond_resched(); - } } LASSERT(ergo(result == 0, cli != NULL)); @@ -239,7 +218,9 @@ static int osc_io_submit(const struct lu_env *env, if (queued > 0) osc_io_unplug(env, osc, cli); - CDEBUG(D_INFO, "%i/%i %i\n", qin->pl_nr, qout->pl_nr, result); + if (osc0) + client_obd_list_unlock(&cli->cl_loi_list_lock); + CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); return qout->pl_nr > 0 ? 0 : result; } @@ -317,14 +298,24 @@ static int osc_io_prepare_write(const struct lu_env *env, { struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev); struct obd_import *imp = class_exp2cliimp(dev->od_exp); - + struct osc_io *oio = cl2osc_io(env, ios); + int result = 0; ENTRY; /* * This implements OBD_BRW_CHECK logic from old client. */ - RETURN(imp == NULL || imp->imp_invalid ? -EIO : 0); + if (imp == NULL || imp->imp_invalid) + result = -EIO; + if (result == 0 && oio->oi_lockless) + /* this page contains `invalid' data, but who cares? + * nobody can access the invalid data. + * in osc_io_commit_write(), we're going to write exact + * [from, to) bytes of this page to OST. -jay */ + cl_page_export(env, slice->cpl_page, 1); + + RETURN(result); } static int osc_io_commit_write(const struct lu_env *env, @@ -332,6 +323,7 @@ static int osc_io_commit_write(const struct lu_env *env, const struct cl_page_slice *slice, unsigned from, unsigned to) { + struct osc_io *oio = cl2osc_io(env, ios); struct osc_page *opg = cl2osc_page(slice); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); struct osc_async_page *oap = &opg->ops_oap; @@ -349,6 +341,10 @@ static int osc_io_commit_write(const struct lu_env *env, cfs_capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags |= OBD_BRW_NOQUOTA; + if (oio->oi_lockless) + /* see osc_io_prepare_write() for lockless io handling. */ + cl_page_clip(env, slice->cpl_page, from, to); + RETURN(0); } @@ -362,7 +358,7 @@ static int osc_io_fault_start(const struct lu_env *env, io = ios->cis_io; fio = &io->u.ci_fault; - CDEBUG(D_INFO, "%lu %i %i\n", + CDEBUG(D_INFO, "%lu %d %d\n", fio->ft_index, fio->ft_writable, fio->ft_nob); /* * If mapping is writeable, adjust kms to cover this page, @@ -375,9 +371,9 @@ static int osc_io_fault_start(const struct lu_env *env, RETURN(0); } -static int osc_punch_upcall(void *a, int rc) +static int osc_setattr_upcall(void *a, int rc) { - struct osc_punch_cbargs *args = a; + struct osc_setattr_cbargs *args = a; args->opc_rc = rc; cfs_complete(&args->opc_sync); @@ -414,7 +410,7 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, * XXX this is quite expensive check. */ cl_page_list_init(list); - cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list, 0); + cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list); cl_page_list_for_each(page, list) CL_PAGE_DEBUG(D_ERROR, env, page, "exists %lu\n", start); @@ -433,7 +429,7 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, /* * XXX Linux specific debugging stuff. */ - CL_PAGE_DEBUG(D_ERROR, env, page, "%s/%i %lu\n", + CL_PAGE_DEBUG(D_ERROR, env, page, "%s/%d %lu\n", submitter->comm, submitter->pid, start); libcfs_debug_dumpstack(submitter); } @@ -444,8 +440,8 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, # define osc_trunc_check(env, io, oio, size) do {;} while (0) #endif -static int osc_io_trunc_start(const struct lu_env *env, - const struct cl_io_slice *slice) +static int osc_io_setattr_start(const struct lu_env *env, + const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); @@ -453,56 +449,86 @@ static int osc_io_trunc_start(const struct lu_env *env, struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; struct cl_attr *attr = &osc_env_info(env)->oti_attr; struct obdo *oa = &oio->oi_oa; - struct osc_punch_cbargs *cbargs = &oio->oi_punch_cbarg; - struct obd_capa *capa; - loff_t size = io->u.ci_truncate.tr_size; + struct osc_setattr_cbargs *cbargs = &oio->oi_setattr_cbarg; + loff_t size = io->u.ci_setattr.sa_attr.lvb_size; + unsigned int ia_valid = io->u.ci_setattr.sa_valid; int result = 0; + struct obd_info oinfo = { { { 0 } } }; - osc_trunc_check(env, io, oio, size); + if (ia_valid & ATTR_SIZE) + osc_trunc_check(env, io, oio, size); if (oio->oi_lockless == 0) { cl_object_attr_lock(obj); result = cl_object_attr_get(env, obj, attr); if (result == 0) { - attr->cat_size = attr->cat_kms = size; - result = cl_object_attr_set(env, obj, attr, - CAT_SIZE|CAT_KMS); + unsigned int cl_valid = 0; + + if (ia_valid & ATTR_SIZE) { + attr->cat_size = attr->cat_kms = size; + cl_valid = (CAT_SIZE | CAT_KMS); + } + if (ia_valid & ATTR_MTIME_SET) { + attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime; + cl_valid |= CAT_MTIME; + } + if (ia_valid & ATTR_ATIME_SET) { + attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime; + cl_valid |= CAT_ATIME; + } + if (ia_valid & ATTR_CTIME_SET) { + attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime; + cl_valid |= CAT_CTIME; + } + result = cl_object_attr_set(env, obj, attr, cl_valid); } cl_object_attr_unlock(obj); } - memset(oa, 0, sizeof(*oa)); if (result == 0) { oa->o_id = loi->loi_id; - oa->o_gr = loi->loi_gr; + oa->o_seq = loi->loi_seq; oa->o_mtime = attr->cat_mtime; oa->o_atime = attr->cat_atime; oa->o_ctime = attr->cat_ctime; oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME | OBD_MD_FLCTIME | OBD_MD_FLMTIME; - if (oio->oi_lockless) { - oa->o_flags = OBD_FL_SRVLOCK; - oa->o_valid |= OBD_MD_FLFLAGS; + if (ia_valid & ATTR_SIZE) { + oa->o_size = size; + oa->o_blocks = OBD_OBJECT_EOF; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + + if (oio->oi_lockless) { + oa->o_flags = OBD_FL_SRVLOCK; + oa->o_valid |= OBD_MD_FLFLAGS; + } + } else { + LASSERT(oio->oi_lockless == 0); } - oa->o_size = size; - oa->o_blocks = OBD_OBJECT_EOF; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - capa = io->u.ci_truncate.tr_capa; + oinfo.oi_oa = oa; + oinfo.oi_capa = io->u.ci_setattr.sa_capa; cfs_init_completion(&cbargs->opc_sync); - result = osc_punch_base(osc_export(cl2osc(obj)), oa, capa, - osc_punch_upcall, cbargs, PTLRPCD_SET); + + if (ia_valid & ATTR_SIZE) + result = osc_punch_base(osc_export(cl2osc(obj)), + &oinfo, osc_setattr_upcall, + cbargs, PTLRPCD_SET); + else + result = osc_setattr_async_base(osc_export(cl2osc(obj)), + &oinfo, NULL, + osc_setattr_upcall, + cbargs, PTLRPCD_SET); } return result; } -static void osc_io_trunc_end(const struct lu_env *env, - const struct cl_io_slice *slice) +static void osc_io_setattr_end(const struct lu_env *env, + const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); - struct osc_punch_cbargs *cbargs = &oio->oi_punch_cbarg; - struct obdo *oa = &oio->oi_oa; + struct osc_setattr_cbargs *cbargs = &oio->oi_setattr_cbarg; int result; cfs_wait_for_completion(&cbargs->opc_sync); @@ -510,56 +536,75 @@ static void osc_io_trunc_end(const struct lu_env *env, result = io->ci_result = cbargs->opc_rc; if (result == 0) { struct cl_object *obj = slice->cis_obj; - if (oio->oi_lockless == 0) { - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - int valid = 0; - - /* Update kms & size */ - if (oa->o_valid & OBD_MD_FLSIZE) { - attr->cat_size = oa->o_size; - attr->cat_kms = oa->o_size; - valid |= CAT_KMS|CAT_SIZE; - } - if (oa->o_valid & OBD_MD_FLBLOCKS) { - attr->cat_blocks = oa->o_blocks; - valid |= CAT_BLOCKS; - } - if (oa->o_valid & OBD_MD_FLMTIME) { - attr->cat_mtime = oa->o_mtime; - valid |= CAT_MTIME; - } - if (oa->o_valid & OBD_MD_FLCTIME) { - attr->cat_ctime = oa->o_ctime; - valid |= CAT_CTIME; - } - if (oa->o_valid & OBD_MD_FLATIME) { - attr->cat_atime = oa->o_atime; - valid |= CAT_ATIME; - } - cl_object_attr_lock(obj); - result = cl_object_attr_set(env, obj, attr, valid); - cl_object_attr_unlock(obj); - } else { /* lockless truncate */ + if (oio->oi_lockless) { + /* lockless truncate */ struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); + + LASSERT(cl_io_is_trunc(io)); /* XXX: Need a lock. */ osd->od_stats.os_lockless_truncates++; } } +} + +static int osc_io_read_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + int result = 0; + ENTRY; - /* return result; */ + if (oio->oi_lockless == 0) { + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + attr->cat_atime = LTIME_S(CFS_CURRENT_TIME); + result = cl_object_attr_set(env, obj, attr, + CAT_ATIME); + } + cl_object_attr_unlock(obj); + } + RETURN(result); +} + +static int osc_io_write_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + int result = 0; + ENTRY; + + if (oio->oi_lockless == 0) { + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + attr->cat_mtime = attr->cat_ctime = + LTIME_S(CFS_CURRENT_TIME); + result = cl_object_attr_set(env, obj, attr, + CAT_MTIME | CAT_CTIME); + } + cl_object_attr_unlock(obj); + } + RETURN(result); } static const struct cl_io_operations osc_io_ops = { .op = { [CIT_READ] = { + .cio_start = osc_io_read_start, .cio_fini = osc_io_fini }, [CIT_WRITE] = { + .cio_start = osc_io_write_start, .cio_fini = osc_io_fini }, - [CIT_TRUNC] = { - .cio_start = osc_io_trunc_start, - .cio_end = osc_io_trunc_end + [CIT_SETATTR] = { + .cio_start = osc_io_setattr_start, + .cio_end = osc_io_setattr_end }, [CIT_FAULT] = { .cio_fini = osc_io_fini, @@ -604,7 +649,7 @@ static void osc_req_completion(const struct lu_env *env, /** * Implementation of struct cl_req_operations::cro_attr_set() for osc - * layer. osc is responsible for struct obdo::o_id and struct obdo::o_gr + * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq * fields. */ static void osc_req_attr_set(const struct lu_env *env, @@ -627,7 +672,7 @@ static void osc_req_attr_set(const struct lu_env *env, oa->o_valid |= OBD_MD_FLID; } if (flags & OBD_MD_FLGROUP) { - oa->o_gr = oinfo->loi_gr; + oa->o_seq = oinfo->loi_seq; oa->o_valid |= OBD_MD_FLGROUP; } if (flags & OBD_MD_FLHANDLE) {