X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosc%2Fosc_io.c;h=81ea610c9ed558bdf8d487cb01e81d74164a18af;hb=7ec56e2628a908343c9c60396e63f8e7093821b5;hp=fd38a1c9062135cd3cde095bcc1a46b18c697553;hpb=89f659febd4220cc517a181f8ac6cc52235b76ca;p=fs%2Flustre-release.git diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index fd38a1c..81ea610 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -36,14 +36,17 @@ * Implementation of cl_io for OSC layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ -/** \addtogroup osc osc @{ */ - #define DEBUG_SUBSYSTEM S_OSC #include "osc_cl_internal.h" +/** \addtogroup osc + * @{ + */ + /***************************************************************************** * * Type conversions. @@ -85,59 +88,50 @@ static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io) { } -struct cl_page *osc_oap2cl_page(struct osc_async_page *oap) -{ - return container_of(oap, struct osc_page, ops_oap)->ops_cl.cpl_page; -} - -static void osc_io_unplug(const struct lu_env *env, struct osc_object *osc, - struct client_obd *cli) -{ - loi_list_maint(cli, osc->oo_oinfo); - osc_check_rpcs(env, cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); -} - -/** - * How many pages osc_io_submit() queues before checking whether an RPC is - * ready. - */ -#define OSC_QUEUE_GRAIN (32) - /** * An implementation of cl_io_operations::cio_io_submit() method for osc * layer. Iterates over pages in the in-queue, prepares each for io by calling * cl_page_prep() and then either submits them through osc_io_submit_page() * or, if page is already submitted, changes osc flags through - * osc_set_async_flags_base(). + * osc_set_async_flags(). */ static int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, - enum cl_req_type crt, struct cl_2queue *queue) + enum cl_req_type crt, struct cl_2queue *queue) { struct cl_page *page; struct cl_page *tmp; - struct osc_object *osc0 = NULL; struct client_obd *cli = NULL; struct osc_object *osc = NULL; /* to keep gcc happy */ struct osc_page *opg; struct cl_io *io; + CFS_LIST_HEAD (list); - struct cl_page_list *qin = &queue->c2_qin; - struct cl_page_list *qout = &queue->c2_qout; - int queued = 0; - int result = 0; + struct cl_page_list *qin = &queue->c2_qin; + struct cl_page_list *qout = &queue->c2_qout; + int queued = 0; + int result = 0; + int cmd; + int brw_flags; + int max_pages; - LASSERT(qin->pl_nr > 0); + LASSERT(qin->pl_nr > 0); + + CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); + + osc = cl2osc(ios->cis_obj); + cli = osc_cli(osc); + max_pages = cli->cl_max_pages_per_rpc; + + cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; + brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0; - CDEBUG(D_INFO, "%i %i\n", qin->pl_nr, crt); /* * NOTE: here @page is a top-level page. This is done to avoid * creation of sub-page-list. */ cl_page_list_for_each_safe(page, tmp, qin) { struct osc_async_page *oap; - struct obd_export *exp; /* Top level IO. */ io = page->cp_owner; @@ -145,42 +139,18 @@ static int osc_io_submit(const struct lu_env *env, opg = osc_cl_page_osc(page); oap = &opg->ops_oap; - osc = cl2osc(opg->ops_cl.cpl_obj); - exp = osc_export(osc); - - /* - * This can be checked without cli->cl_loi_list_lock, because - * ->oap_*_item are always manipulated when the page is owned. - */ - if (!list_empty(&oap->oap_urgent_item) || - !list_empty(&oap->oap_rpc_item)) { + LASSERT(osc == oap->oap_obj); + + if (!cfs_list_empty(&oap->oap_pending_item) || + !cfs_list_empty(&oap->oap_rpc_item)) { + CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", + oap, opg); result = -EBUSY; break; } - if (osc0 == NULL) { /* first iteration */ - cli = &exp->exp_obd->u.cli; - osc0 = osc; - } else /* check that all pages are against the same object - * (for now) */ - LASSERT(osc == osc0); - if (queued++ == 0) - client_obd_list_lock(&cli->cl_loi_list_lock); result = cl_page_prep(env, io, page, crt); - if (result == 0) { - cl_page_list_move(qout, qin, page); - if (list_empty(&oap->oap_pending_item)) { - osc_io_submit_page(env, cl2osc_io(env, ios), - opg, crt); - } else { - result = osc_set_async_flags_base(cli, - osc->oo_oinfo, - oap, - OSC_FLAGS); - if (result != 0) - break; - } - } else { + if (result != 0) { LASSERT(result < 0); if (result != -EALREADY) break; @@ -190,27 +160,29 @@ static int osc_io_submit(const struct lu_env *env, * is not dirty. */ result = 0; + continue; } - /* - * Don't keep client_obd_list_lock() for too long. - * - * XXX lock_need_resched() should be used here, but it is not - * available in the older of supported kernels. - */ - if (queued > OSC_QUEUE_GRAIN || cfs_need_resched()) { - queued = 0; - osc_io_unplug(env, osc, cli); - cfs_cond_resched(); - } - } - - LASSERT(ergo(result == 0, cli != NULL)); - LASSERT(ergo(result == 0, osc == osc0)); - if (queued > 0) - osc_io_unplug(env, osc, cli); - CDEBUG(D_INFO, "%i/%i %i\n", qin->pl_nr, qout->pl_nr, result); - return qout->pl_nr > 0 ? 0 : result; + cl_page_list_move(qout, qin, page); + oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY; + oap->oap_async_flags |= ASYNC_COUNT_STABLE; + + osc_page_submit(env, opg, crt, brw_flags); + cfs_list_add_tail(&oap->oap_pending_item, &list); + if (++queued == max_pages) { + queued = 0; + result = osc_queue_sync_pages(env, osc, &list, cmd, + brw_flags); + if (result < 0) + break; + } + } + + if (queued > 0) + result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); + + CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); + return qout->pl_nr > 0 ? 0 : result; } static void osc_page_touch_at(const struct lu_env *env, @@ -287,14 +259,24 @@ static int osc_io_prepare_write(const struct lu_env *env, { struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev); struct obd_import *imp = class_exp2cliimp(dev->od_exp); - + struct osc_io *oio = cl2osc_io(env, ios); + int result = 0; ENTRY; /* * This implements OBD_BRW_CHECK logic from old client. */ - RETURN(imp == NULL || imp->imp_invalid ? -EIO : 0); + if (imp == NULL || imp->imp_invalid) + result = -EIO; + if (result == 0 && oio->oi_lockless) + /* this page contains `invalid' data, but who cares? + * nobody can access the invalid data. + * in osc_io_commit_write(), we're going to write exact + * [from, to) bytes of this page to OST. -jay */ + cl_page_export(env, slice->cpl_page, 1); + + RETURN(result); } static int osc_io_commit_write(const struct lu_env *env, @@ -302,6 +284,7 @@ static int osc_io_commit_write(const struct lu_env *env, const struct cl_page_slice *slice, unsigned from, unsigned to) { + struct osc_io *oio = cl2osc_io(env, ios); struct osc_page *opg = cl2osc_page(slice); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); struct osc_async_page *oap = &opg->ops_oap; @@ -319,6 +302,10 @@ static int osc_io_commit_write(const struct lu_env *env, cfs_capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags |= OBD_BRW_NOQUOTA; + if (oio->oi_lockless) + /* see osc_io_prepare_write() for lockless io handling. */ + cl_page_clip(env, slice->cpl_page, from, to); + RETURN(0); } @@ -332,7 +319,7 @@ static int osc_io_fault_start(const struct lu_env *env, io = ios->cis_io; fio = &io->u.ci_fault; - CDEBUG(D_INFO, "%lu %i %i\n", + CDEBUG(D_INFO, "%lu %d %d\n", fio->ft_index, fio->ft_writable, fio->ft_nob); /* * If mapping is writeable, adjust kms to cover this page, @@ -345,73 +332,77 @@ static int osc_io_fault_start(const struct lu_env *env, RETURN(0); } -static int osc_punch_upcall(void *a, int rc) +static int osc_async_upcall(void *a, int rc) { - struct osc_punch_cbargs *args = a; + struct osc_async_cbargs *args = a; args->opc_rc = rc; - complete(&args->opc_sync); + complete(&args->opc_sync); return 0; } -#ifdef __KERNEL__ +#if defined(__KERNEL__) /** * Checks that there are no pages being written in the extent being truncated. */ +static int trunc_check_cb(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, void *cbdata) +{ + const struct cl_page_slice *slice; + struct osc_page *ops; + struct osc_async_page *oap; + __u64 start = *(__u64 *)cbdata; + + slice = cl_page_at(page, &osc_device_type); + LASSERT(slice != NULL); + ops = cl2osc_page(slice); + oap = &ops->ops_oap; + + if (oap->oap_cmd & OBD_BRW_WRITE && + !cfs_list_empty(&oap->oap_pending_item)) + CL_PAGE_DEBUG(D_ERROR, env, page, "exists " LPU64 "/%s.\n", + start, current->comm); + +#ifdef __linux__ + { + cfs_page_t *vmpage = cl_page_vmpage(env, page); + if (PageLocked(vmpage)) + CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", + ops, page->cp_index, + (oap->oap_cmd & OBD_BRW_RWMASK)); + } +#endif + + return CLP_GANG_OKAY; +} + static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, - struct osc_io *oio, size_t size) + struct osc_io *oio, __u64 size) { - struct osc_page *cp; - struct osc_object *obj; - struct cl_object *clob; - struct cl_page *page; - struct cl_page_list *list; - int partial; - pgoff_t start; + struct cl_object *clob; + int partial; + pgoff_t start; clob = oio->oi_cl.cis_obj; - obj = cl2osc(clob); start = cl_index(clob, size); partial = cl_offset(clob, start) < size; - list = &osc_env_info(env)->oti_plist; /* * Complain if there are pages in the truncated region. - * - * XXX this is quite expensive check. */ - cl_page_list_init(list); - cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list); - - cl_page_list_for_each(page, list) - CL_PAGE_DEBUG(D_ERROR, env, page, "exists %lu\n", start); - - cl_page_list_disown(env, io, list); - cl_page_list_fini(env, list); - - spin_lock(&obj->oo_seatbelt); - list_for_each_entry(cp, &obj->oo_inflight[CRT_WRITE], ops_inflight) { - page = cp->ops_cl.cpl_page; - if (page->cp_index >= start + partial) { - cfs_task_t *submitter; - - submitter = cp->ops_submitter; - /* - * XXX Linux specific debugging stuff. - */ - CL_PAGE_DEBUG(D_ERROR, env, page, "%s/%i %lu\n", - submitter->comm, submitter->pid, start); - libcfs_debug_dumpstack(submitter); - } - } - spin_unlock(&obj->oo_seatbelt); + cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, + trunc_check_cb, (void *)&size); } #else /* __KERNEL__ */ -# define osc_trunc_check(env, io, oio, size) do {;} while (0) +static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, + struct osc_io *oio, __u64 size) +{ + return; +} #endif -static int osc_io_trunc_start(const struct lu_env *env, - const struct cl_io_slice *slice) +static int osc_io_setattr_start(const struct lu_env *env, + const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); @@ -419,120 +410,294 @@ static int osc_io_trunc_start(const struct lu_env *env, struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; struct cl_attr *attr = &osc_env_info(env)->oti_attr; struct obdo *oa = &oio->oi_oa; - struct osc_punch_cbargs *cbargs = &oio->oi_punch_cbarg; - struct obd_capa *capa; - loff_t size = io->u.ci_truncate.tr_size; - int result = 0; - - - memset(oa, 0, sizeof(*oa)); - - osc_trunc_check(env, io, oio, size); - - if (oio->oi_lockless == 0) { - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - if (result == 0) { - attr->cat_size = attr->cat_kms = size; - result = cl_object_attr_set(env, obj, attr, - CAT_SIZE|CAT_KMS); + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + __u64 size = io->u.ci_setattr.sa_attr.lvb_size; + unsigned int ia_valid = io->u.ci_setattr.sa_valid; + int result = 0; + struct obd_info oinfo = { { { 0 } } }; + + /* truncate cache dirty pages first */ + if (cl_io_is_trunc(io)) + result = osc_cache_truncate_start(env, oio, cl2osc(obj), size); + + if (result == 0 && oio->oi_lockless == 0) { + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr; + unsigned int cl_valid = 0; + + if (ia_valid & ATTR_SIZE) { + attr->cat_size = attr->cat_kms = size; + cl_valid = (CAT_SIZE | CAT_KMS); + } + if (ia_valid & ATTR_MTIME_SET) { + attr->cat_mtime = lvb->lvb_mtime; + cl_valid |= CAT_MTIME; + } + if (ia_valid & ATTR_ATIME_SET) { + attr->cat_atime = lvb->lvb_atime; + cl_valid |= CAT_ATIME; + } + if (ia_valid & ATTR_CTIME_SET) { + attr->cat_ctime = lvb->lvb_ctime; + cl_valid |= CAT_CTIME; + } + result = cl_object_attr_set(env, obj, attr, cl_valid); } cl_object_attr_unlock(obj); } - + memset(oa, 0, sizeof(*oa)); if (result == 0) { oa->o_id = loi->loi_id; - oa->o_gr = loi->loi_gr; + oa->o_seq = loi->loi_seq; oa->o_mtime = attr->cat_mtime; oa->o_atime = attr->cat_atime; oa->o_ctime = attr->cat_ctime; oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME | OBD_MD_FLCTIME | OBD_MD_FLMTIME; - if (oio->oi_lockless) { - oa->o_flags = OBD_FL_TRUNCLOCK; - oa->o_valid |= OBD_MD_FLFLAGS; + if (ia_valid & ATTR_SIZE) { + oa->o_size = size; + oa->o_blocks = OBD_OBJECT_EOF; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + + if (oio->oi_lockless) { + oa->o_flags = OBD_FL_SRVLOCK; + oa->o_valid |= OBD_MD_FLFLAGS; + } + } else { + LASSERT(oio->oi_lockless == 0); } - oa->o_size = size; - oa->o_blocks = OBD_OBJECT_EOF; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - - capa = io->u.ci_truncate.tr_capa; - init_completion(&cbargs->opc_sync); - result = osc_punch_base(osc_export(cl2osc(obj)), oa, capa, - osc_punch_upcall, cbargs, PTLRPCD_SET); + + oinfo.oi_oa = oa; + oinfo.oi_capa = io->u.ci_setattr.sa_capa; + init_completion(&cbargs->opc_sync); + + if (ia_valid & ATTR_SIZE) + result = osc_punch_base(osc_export(cl2osc(obj)), + &oinfo, osc_async_upcall, + cbargs, PTLRPCD_SET); + else + result = osc_setattr_async_base(osc_export(cl2osc(obj)), + &oinfo, NULL, + osc_async_upcall, + cbargs, PTLRPCD_SET); + cbargs->opc_rpc_sent = result == 0; } return result; } -static void osc_io_trunc_end(const struct lu_env *env, - const struct cl_io_slice *slice) +static void osc_io_setattr_end(const struct lu_env *env, + const struct cl_io_slice *slice) { - struct cl_io *io = slice->cis_io; - struct osc_io *oio = cl2osc_io(env, slice); - struct osc_punch_cbargs *cbargs = &oio->oi_punch_cbarg; - struct obdo *oa = &oio->oi_oa; - int result; - - wait_for_completion(&cbargs->opc_sync); + struct cl_io *io = slice->cis_io; + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + int result = 0; - result = io->ci_result = cbargs->opc_rc; + if (cbargs->opc_rpc_sent) { + wait_for_completion(&cbargs->opc_sync); + result = io->ci_result = cbargs->opc_rc; + } if (result == 0) { - struct cl_object *obj = slice->cis_obj; - if (oio->oi_lockless == 0) { - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - int valid = 0; - - /* Update kms & size */ - if (oa->o_valid & OBD_MD_FLSIZE) { - attr->cat_size = oa->o_size; - attr->cat_kms = oa->o_size; - valid |= CAT_KMS|CAT_SIZE; - } - if (oa->o_valid & OBD_MD_FLBLOCKS) { - attr->cat_blocks = oa->o_blocks; - valid |= CAT_BLOCKS; - } - if (oa->o_valid & OBD_MD_FLMTIME) { - attr->cat_mtime = oa->o_mtime; - valid |= CAT_MTIME; - } - if (oa->o_valid & OBD_MD_FLCTIME) { - attr->cat_ctime = oa->o_ctime; - valid |= CAT_CTIME; - } - if (oa->o_valid & OBD_MD_FLATIME) { - attr->cat_atime = oa->o_atime; - valid |= CAT_ATIME; - } - cl_object_attr_lock(obj); - result = cl_object_attr_set(env, obj, attr, valid); - cl_object_attr_unlock(obj); - } else { /* lockless truncate */ + if (oio->oi_lockless) { + /* lockless truncate */ struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); + + LASSERT(cl_io_is_trunc(io)); /* XXX: Need a lock. */ osd->od_stats.os_lockless_truncates++; } } - /* return result; */ + if (cl_io_is_trunc(io)) { + __u64 size = io->u.ci_setattr.sa_attr.lvb_size; + osc_trunc_check(env, io, oio, size); + if (oio->oi_trunc != NULL) { + osc_cache_truncate_end(env, oio, cl2osc(obj)); + oio->oi_trunc = NULL; + } + } +} + +static int osc_io_read_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + int result = 0; + ENTRY; + + if (oio->oi_lockless == 0) { + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + attr->cat_atime = LTIME_S(CFS_CURRENT_TIME); + result = cl_object_attr_set(env, obj, attr, + CAT_ATIME); + } + cl_object_attr_unlock(obj); + } + RETURN(result); +} + +static int osc_io_write_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + int result = 0; + ENTRY; + + if (oio->oi_lockless == 0) { + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1); + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + attr->cat_mtime = attr->cat_ctime = + LTIME_S(CFS_CURRENT_TIME); + result = cl_object_attr_set(env, obj, attr, + CAT_MTIME | CAT_CTIME); + } + cl_object_attr_unlock(obj); + } + RETURN(result); +} + +static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj, + struct cl_fsync_io *fio) +{ + struct osc_io *oio = osc_env_io(env); + struct obdo *oa = &oio->oi_oa; + struct obd_info *oinfo = &oio->oi_info; + struct lov_oinfo *loi = obj->oo_oinfo; + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + int rc = 0; + ENTRY; + + memset(oa, 0, sizeof(*oa)); + oa->o_id = loi->loi_id; + oa->o_seq = loi->loi_seq; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + + /* reload size abd blocks for start and end of sync range */ + oa->o_size = fio->fi_start; + oa->o_blocks = fio->fi_end; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + + obdo_set_parent_fid(oa, fio->fi_fid); + + memset(oinfo, 0, sizeof(*oinfo)); + oinfo->oi_oa = oa; + oinfo->oi_capa = fio->fi_capa; + init_completion(&cbargs->opc_sync); + + rc = osc_sync_base(osc_export(obj), oinfo, osc_async_upcall, cbargs, + PTLRPCD_SET); + RETURN(rc); +} + +static int osc_io_fsync_start(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct cl_io *io = slice->cis_io; + struct cl_fsync_io *fio = &io->u.ci_fsync; + struct cl_object *obj = slice->cis_obj; + struct osc_object *osc = cl2osc(obj); + pgoff_t start = cl_index(obj, fio->fi_start); + pgoff_t end = cl_index(obj, fio->fi_end); + int result = 0; + ENTRY; + + if (fio->fi_end == OBD_OBJECT_EOF) + end = CL_PAGE_EOF; + + result = osc_cache_writeback_range(env, osc, start, end, 0, + fio->fi_mode == CL_FSYNC_DISCARD); + if (result > 0) { + fio->fi_nr_written += result; + result = 0; + } + if (fio->fi_mode == CL_FSYNC_ALL) { + int rc; + + /* we have to wait for writeback to finish before we can + * send OST_SYNC RPC. This is bad because it causes extents + * to be written osc by osc. However, we usually start + * writeback before CL_FSYNC_ALL so this won't have any real + * problem. */ + rc = osc_cache_wait_range(env, osc, start, end); + if (result == 0) + result = rc; + rc = osc_fsync_ost(env, osc, fio); + if (result == 0) + result = rc; + } + + RETURN(result); +} + +static void osc_io_fsync_end(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync; + struct cl_object *obj = slice->cis_obj; + pgoff_t start = cl_index(obj, fio->fi_start); + pgoff_t end = cl_index(obj, fio->fi_end); + int result = 0; + + if (fio->fi_mode == CL_FSYNC_LOCAL) { + result = osc_cache_wait_range(env, cl2osc(obj), start, end); + } else if (fio->fi_mode == CL_FSYNC_ALL) { + struct osc_io *oio = cl2osc_io(env, slice); + struct osc_async_cbargs *cbargs = &oio->oi_cbarg; + + wait_for_completion(&cbargs->opc_sync); + if (result == 0) + result = cbargs->opc_rc; + } + slice->cis_io->ci_result = result; +} + +static void osc_io_end(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct osc_io *oio = cl2osc_io(env, slice); + + if (oio->oi_active) { + osc_extent_release(env, oio->oi_active); + oio->oi_active = NULL; + } } static const struct cl_io_operations osc_io_ops = { .op = { [CIT_READ] = { + .cio_start = osc_io_read_start, .cio_fini = osc_io_fini }, [CIT_WRITE] = { + .cio_start = osc_io_write_start, + .cio_end = osc_io_end, .cio_fini = osc_io_fini }, - [CIT_TRUNC] = { - .cio_start = osc_io_trunc_start, - .cio_end = osc_io_trunc_end + [CIT_SETATTR] = { + .cio_start = osc_io_setattr_start, + .cio_end = osc_io_setattr_end }, [CIT_FAULT] = { - .cio_fini = osc_io_fini, - .cio_start = osc_io_fault_start - }, + .cio_start = osc_io_fault_start, + .cio_end = osc_io_end, + .cio_fini = osc_io_fini + }, + [CIT_FSYNC] = { + .cio_start = osc_io_fsync_start, + .cio_end = osc_io_fsync_end, + .cio_fini = osc_io_fini + }, [CIT_MISC] = { .cio_fini = osc_io_fini } @@ -572,43 +737,73 @@ static void osc_req_completion(const struct lu_env *env, /** * Implementation of struct cl_req_operations::cro_attr_set() for osc - * layer. osc is responsible for struct obdo::o_id and struct obdo::o_gr + * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq * fields. */ static void osc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, obd_valid flags) + const struct cl_req_slice *slice, + const struct cl_object *obj, + struct cl_req_attr *attr, obd_valid flags) { - struct lov_oinfo *oinfo; - struct cl_req *clerq; - struct cl_page *apage; /* _some_ page in @clerq */ - struct cl_lock *lock; /* _some_ lock protecting @apage */ - struct osc_lock *olck; - struct osc_page *opg; - struct obdo *oa; - - oa = attr->cra_oa; - oinfo = cl2osc(obj)->oo_oinfo; + struct lov_oinfo *oinfo; + struct cl_req *clerq; + struct cl_page *apage; /* _some_ page in @clerq */ + struct cl_lock *lock; /* _some_ lock protecting @apage */ + struct osc_lock *olck; + struct osc_page *opg; + struct obdo *oa; + struct ost_lvb *lvb; + + oinfo = cl2osc(obj)->oo_oinfo; + lvb = &oinfo->loi_lvb; + oa = attr->cra_oa; + + if ((flags & OBD_MD_FLMTIME) != 0) { + oa->o_mtime = lvb->lvb_mtime; + oa->o_valid |= OBD_MD_FLMTIME; + } + if ((flags & OBD_MD_FLATIME) != 0) { + oa->o_atime = lvb->lvb_atime; + oa->o_valid |= OBD_MD_FLATIME; + } + if ((flags & OBD_MD_FLCTIME) != 0) { + oa->o_ctime = lvb->lvb_ctime; + oa->o_valid |= OBD_MD_FLCTIME; + } if (flags & OBD_MD_FLID) { oa->o_id = oinfo->loi_id; oa->o_valid |= OBD_MD_FLID; } if (flags & OBD_MD_FLGROUP) { - oa->o_gr = oinfo->loi_gr; + oa->o_seq = oinfo->loi_seq; oa->o_valid |= OBD_MD_FLGROUP; } if (flags & OBD_MD_FLHANDLE) { clerq = slice->crs_req; - LASSERT(!list_empty(&clerq->crq_pages)); + LASSERT(!cfs_list_empty(&clerq->crq_pages)); apage = container_of(clerq->crq_pages.next, struct cl_page, cp_flight); opg = osc_cl_page_osc(apage); apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */ lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1); - LASSERT(lock != NULL); + if (lock == NULL) { + struct cl_object_header *head; + struct cl_lock *scan; + + head = cl_object_header(apage->cp_obj); + cfs_list_for_each_entry(scan, &head->coh_locks, + cll_linkage) + CL_LOCK_DEBUG(D_ERROR, env, scan, + "no cover page!\n"); + CL_PAGE_DEBUG(D_ERROR, env, apage, + "dump uncover page!\n"); + libcfs_debug_dumpstack(NULL); + LBUG(); + } + olck = osc_lock_at(lock); LASSERT(olck != NULL); + LASSERT(ergo(opg->ops_srvlock, olck->ols_lock == NULL)); /* check for lockless io. */ if (olck->ols_lock != NULL) { oa->o_handle = olck->ols_lock->l_remote_handle; @@ -641,7 +836,7 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev, struct osc_req *or; int result; - OBD_SLAB_ALLOC_PTR(or, osc_req_kmem); + OBD_SLAB_ALLOC_PTR_GFP(or, osc_req_kmem, CFS_ALLOC_IO); if (or != NULL) { cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops); result = 0;