Whamcloud - gitweb
LU-1030 clio: reimplement ll_fsync in clio way
[fs/lustre-release.git] / lustre / osc / osc_io.c
index 5fe0eb2..5818b8d 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -36,6 +36,7 @@
  * Implementation of cl_io for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
@@ -92,20 +93,12 @@ struct cl_page *osc_oap2cl_page(struct osc_async_page *oap)
         return container_of(oap, struct osc_page, ops_oap)->ops_cl.cpl_page;
 }
 
-static void osc_io_unplug(const struct lu_env *env, struct osc_object *osc,
-                          struct client_obd *cli)
-{
-        loi_list_maint(cli, osc->oo_oinfo);
-        osc_check_rpcs(env, cli);
-        client_obd_list_unlock(&cli->cl_loi_list_lock);
-}
-
 /**
  * An implementation of cl_io_operations::cio_io_submit() method for osc
  * layer. Iterates over pages in the in-queue, prepares each for io by calling
  * cl_page_prep() and then either submits them through osc_io_submit_page()
  * or, if page is already submitted, changes osc flags through
- * osc_set_async_flags_base().
+ * osc_set_async_flags().
  */
 static int osc_io_submit(const struct lu_env *env,
                          const struct cl_io_slice *ios,
@@ -127,7 +120,7 @@ static int osc_io_submit(const struct lu_env *env,
 
         LASSERT(qin->pl_nr > 0);
 
-        CDEBUG(D_INFO, "%i %i\n", qin->pl_nr, crt);
+        CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, crt);
         /*
          * NOTE: here @page is a top-level page. This is done to avoid
          *       creation of sub-page-list.
@@ -150,44 +143,40 @@ static int osc_io_submit(const struct lu_env *env,
                         oap->oap_async_flags |= ASYNC_HP;
                         cfs_spin_unlock(&oap->oap_lock);
                 }
-                /*
-                 * This can be checked without cli->cl_loi_list_lock, because
-                 * ->oap_*_item are always manipulated when the page is owned.
-                 */
-                if (!cfs_list_empty(&oap->oap_urgent_item) ||
-                    !cfs_list_empty(&oap->oap_rpc_item)) {
-                        result = -EBUSY;
-                        break;
-                }
 
                 if (osc0 == NULL) { /* first iteration */
                         cli = &exp->exp_obd->u.cli;
                         osc0 = osc;
+                        client_obd_list_lock(&cli->cl_loi_list_lock);
                 } else /* check that all pages are against the same object
                         * (for now) */
                         LASSERT(osc == osc0);
-                if (queued++ == 0)
-                        client_obd_list_lock(&cli->cl_loi_list_lock);
+
+                if (!cfs_list_empty(&oap->oap_urgent_item) ||
+                    !cfs_list_empty(&oap->oap_rpc_item)) {
+                        result = -EBUSY;
+                        break;
+                }
+
                 result = cl_page_prep(env, io, page, crt);
                 if (result == 0) {
+                        ++queued;
                         cl_page_list_move(qout, qin, page);
                         if (cfs_list_empty(&oap->oap_pending_item)) {
                                 osc_io_submit_page(env, cl2osc_io(env, ios),
                                                    opg, crt);
                         } else {
-                                result = osc_set_async_flags_base(cli,
-                                                                  osc->oo_oinfo,
-                                                                  oap,
-                                                                  OSC_FLAGS);
-                                /*
-                                 * bug 18881: we can't just break out here when
-                                 * error occurrs after cl_page_prep has been
-                                 * called against the page. The correct
-                                 * way is to call page's completion routine,
-                                 * as in osc_oap_interrupted.  For simplicity,
-                                 * we just force osc_set_async_flags_base() to
-                                 * not return error.
-                                 */
+                               result = osc_set_async_flags(osc, opg,
+                                                            OSC_FLAGS);
+                               /*
+                                * bug 18881: we can't just break out here when
+                                * error occurs after cl_page_prep has been
+                                * called against the page. The correct
+                                * way is to call page's completion routine,
+                                * as in osc_oap_interrupted.  For simplicity,
+                                * we just force osc_set_async_flags() to
+                                * not return error.
+                                */
                                 LASSERT(result == 0);
                         }
                         opg->ops_submit_time = cfs_time_current();
@@ -220,8 +209,10 @@ static int osc_io_submit(const struct lu_env *env,
         LASSERT(ergo(result == 0, osc == osc0));
 
         if (queued > 0)
-                osc_io_unplug(env, osc, cli);
-        CDEBUG(D_INFO, "%i/%i %i\n", qin->pl_nr, qout->pl_nr, result);
+               osc_io_unplug(env, cli, osc, PDL_POLICY_ROUND);
+        if (osc0)
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
+        CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
         return qout->pl_nr > 0 ? 0 : result;
 }
 
@@ -299,14 +290,24 @@ static int osc_io_prepare_write(const struct lu_env *env,
 {
         struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
         struct obd_import *imp = class_exp2cliimp(dev->od_exp);
-
+        struct osc_io     *oio = cl2osc_io(env, ios);
+        int result = 0;
         ENTRY;
 
         /*
          * This implements OBD_BRW_CHECK logic from old client.
          */
 
-        RETURN(imp == NULL || imp->imp_invalid ? -EIO : 0);
+        if (imp == NULL || imp->imp_invalid)
+                result = -EIO;
+        if (result == 0 && oio->oi_lockless)
+                /* this page contains `invalid' data, but who cares?
+                 * nobody can access the invalid data.
+                 * in osc_io_commit_write(), we're going to write exact
+                 * [from, to) bytes of this page to OST. -jay */
+                cl_page_export(env, slice->cpl_page, 1);
+
+        RETURN(result);
 }
 
 static int osc_io_commit_write(const struct lu_env *env,
@@ -314,6 +315,7 @@ static int osc_io_commit_write(const struct lu_env *env,
                                const struct cl_page_slice *slice,
                                unsigned from, unsigned to)
 {
+        struct osc_io         *oio = cl2osc_io(env, ios);
         struct osc_page       *opg = cl2osc_page(slice);
         struct osc_object     *obj = cl2osc(opg->ops_cl.cpl_obj);
         struct osc_async_page *oap = &opg->ops_oap;
@@ -331,6 +333,10 @@ static int osc_io_commit_write(const struct lu_env *env,
             cfs_capable(CFS_CAP_SYS_RESOURCE))
                 oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
 
+        if (oio->oi_lockless)
+                /* see osc_io_prepare_write() for lockless io handling. */
+                cl_page_clip(env, slice->cpl_page, from, to);
+
         RETURN(0);
 }
 
@@ -344,7 +350,7 @@ static int osc_io_fault_start(const struct lu_env *env,
 
         io  = ios->cis_io;
         fio = &io->u.ci_fault;
-        CDEBUG(D_INFO, "%lu %i %i\n",
+        CDEBUG(D_INFO, "%lu %d %d\n",
                fio->ft_index, fio->ft_writable, fio->ft_nob);
         /*
          * If mapping is writeable, adjust kms to cover this page,
@@ -357,9 +363,9 @@ static int osc_io_fault_start(const struct lu_env *env,
         RETURN(0);
 }
 
-static int osc_setattr_upcall(void *a, int rc)
+static int osc_async_upcall(void *a, int rc)
 {
-        struct osc_setattr_cbargs *args = a;
+       struct osc_async_cbargs *args = a;
 
         args->opc_rc = rc;
         cfs_complete(&args->opc_sync);
@@ -396,7 +402,7 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
          * XXX this is quite expensive check.
          */
         cl_page_list_init(list);
-        cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list, 0);
+        cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list);
 
         cl_page_list_for_each(page, list)
                 CL_PAGE_DEBUG(D_ERROR, env, page, "exists %lu\n", start);
@@ -415,7 +421,7 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
                         /*
                          * XXX Linux specific debugging stuff.
                          */
-                        CL_PAGE_DEBUG(D_ERROR, env, page, "%s/%i %lu\n",
+                        CL_PAGE_DEBUG(D_ERROR, env, page, "%s/%d %lu\n",
                                       submitter->comm, submitter->pid, start);
                         libcfs_debug_dumpstack(submitter);
                 }
@@ -435,7 +441,7 @@ static int osc_io_setattr_start(const struct lu_env *env,
         struct lov_oinfo        *loi    = cl2osc(obj)->oo_oinfo;
         struct cl_attr          *attr   = &osc_env_info(env)->oti_attr;
         struct obdo             *oa     = &oio->oi_oa;
-        struct osc_setattr_cbargs *cbargs = &oio->oi_setattr_cbarg;
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
         loff_t                   size   = io->u.ci_setattr.sa_attr.lvb_size;
         unsigned int             ia_valid = io->u.ci_setattr.sa_valid;
         int                      result = 0;
@@ -498,12 +504,12 @@ static int osc_io_setattr_start(const struct lu_env *env,
 
                 if (ia_valid & ATTR_SIZE)
                         result = osc_punch_base(osc_export(cl2osc(obj)),
-                                                &oinfo, osc_setattr_upcall,
+                                               &oinfo, osc_async_upcall,
                                                 cbargs, PTLRPCD_SET);
                 else
                         result = osc_setattr_async_base(osc_export(cl2osc(obj)),
                                                         &oinfo, NULL,
-                                                        osc_setattr_upcall,
+                                                       osc_async_upcall,
                                                         cbargs, PTLRPCD_SET);
         }
         return result;
@@ -514,7 +520,7 @@ static void osc_io_setattr_end(const struct lu_env *env,
 {
         struct cl_io            *io     = slice->cis_io;
         struct osc_io           *oio    = cl2osc_io(env, slice);
-        struct osc_setattr_cbargs *cbargs = &oio->oi_setattr_cbarg;
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
         int result;
 
         cfs_wait_for_completion(&cbargs->opc_sync);
@@ -578,6 +584,52 @@ static int osc_io_write_start(const struct lu_env *env,
         RETURN(result);
 }
 
+static int osc_io_fsync_start(const struct lu_env *env,
+                             const struct cl_io_slice *slice)
+{
+       struct cl_io     *io    = slice->cis_io;
+       struct osc_io    *oio   = cl2osc_io(env, slice);
+       struct obdo      *oa    = &oio->oi_oa;
+       struct obd_info  *oinfo = &oio->oi_info;
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+       struct cl_object *obj   = slice->cis_obj;
+       struct lov_oinfo *loi   = cl2osc(obj)->oo_oinfo;
+       int result = 0;
+       ENTRY;
+
+       memset(oa, 0, sizeof(*oa));
+       oa->o_id = loi->loi_id;
+       oa->o_seq = loi->loi_seq;
+       oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+
+       /* reload size and blocks for start and end of sync range */
+       oa->o_size = io->u.ci_fsync.fi_start;
+       oa->o_blocks = io->u.ci_fsync.fi_end;
+       oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+
+       obdo_set_parent_fid(oa, io->u.ci_fsync.fi_fid);
+
+       memset(oinfo, 0, sizeof(*oinfo));
+       oinfo->oi_oa = oa;
+       oinfo->oi_capa = io->u.ci_fsync.fi_capa;
+       cfs_init_completion(&cbargs->opc_sync);
+
+       result = osc_sync_base(osc_export(cl2osc(obj)), oinfo,
+                              osc_async_upcall, cbargs, PTLRPCD_SET);
+       RETURN(result);
+}
+
+static void osc_io_fsync_end(const struct lu_env *env,
+                            const struct cl_io_slice *slice)
+{
+       struct cl_io  *io  = slice->cis_io;
+       struct osc_io *oio = cl2osc_io(env, slice);
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+
+       cfs_wait_for_completion(&cbargs->opc_sync);
+       io->ci_result = cbargs->opc_rc;
+}
+
 static const struct cl_io_operations osc_io_ops = {
         .op = {
                 [CIT_READ] = {
@@ -596,6 +648,11 @@ static const struct cl_io_operations osc_io_ops = {
                         .cio_fini   = osc_io_fini,
                         .cio_start  = osc_io_fault_start
                 },
+               [CIT_FSYNC] = {
+                       .cio_start  = osc_io_fsync_start,
+                       .cio_end    = osc_io_fsync_end,
+                       .cio_fini   = osc_io_fini
+               },
                 [CIT_MISC] = {
                         .cio_fini   = osc_io_fini
                 }