Whamcloud - gitweb
LU-3569 ofd: packing ost_idx in IDIF
[fs/lustre-release.git] / lustre / ofd / ofd_io.c
index 320deab..c21c903 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 
 #include "ofd_internal.h"
 
-static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
-                          struct lu_fid *fid, struct lu_attr *la, int niocount,
+static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
+                          struct ofd_device *ofd, struct lu_fid *fid,
+                          struct lu_attr *la, int niocount,
                           struct niobuf_remote *rnb, int *nr_local,
-                          struct niobuf_local *lnb)
+                          struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, rc, tot_bytes = 0;
@@ -63,18 +64,20 @@ static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
                GOTO(unlock, rc = -ENOENT);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < niocount; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i,
                                 lnb + j, 0, ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(buf_put, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
 
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
        rc = dt_attr_get(env, ofd_object_child(fo), la,
                         ofd_object_capa(env, fo));
@@ -84,8 +87,8 @@ static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
        rc = dt_read_prep(env, ofd_object_child(fo), lnb, *nr_local);
        if (unlikely(rc))
                GOTO(buf_put, rc);
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_READ_BYTES, tot_bytes);
+
+       ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, tot_bytes);
        RETURN(0);
 
 buf_put:
@@ -101,8 +104,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct lu_attr *la, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            struct niobuf_remote *rnb, int *nr_local,
-                           struct niobuf_local *lnb,
-                           struct obd_trans_info *oti)
+                           struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, k, rc = 0, tot_bytes = 0;
@@ -139,106 +141,136 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 
        ofd_read_lock(env, fo);
        if (!ofd_object_exists(fo)) {
-               CERROR("%s: BRW to missing obj "LPU64"/"LPU64"\n",
-                      exp->exp_obd->obd_name, obj->ioo_id, obj->ioo_seq);
+               CERROR("%s: BRW to missing obj "DOSTID"\n",
+                      exp->exp_obd->obd_name, POSTID(&obj->ioo_oid));
                ofd_read_unlock(env, fo);
                ofd_object_put(env, fo);
                GOTO(out, rc = -ENOENT);
        }
 
-       /* Always sync if syncjournal parameter is set */
-       oti->oti_sync_write = ofd->ofd_syncjournal;
-
        /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some
         * space back if possible */
        ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo),
                                 rnb + i, lnb + j, 1,
                                 ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(err, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                for (k = 0; k < rc; k++) {
                        lnb[j+k].lnb_flags = rnb[i].rnb_flags;
                        if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED))
                                lnb[j+k].lnb_rc = -ENOSPC;
-                       if (!(rnb[i].rnb_flags & OBD_BRW_ASYNC))
-                               oti->oti_sync_write = 1;
+
                        /* remote client can't break through quota */
                        if (exp_connect_rmtclient(exp))
                                lnb[j+k].lnb_flags &= ~OBD_BRW_NOQUOTA;
                }
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
 
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_WRITE_BYTES, tot_bytes);
        rc = dt_write_prep(env, ofd_object_child(fo), lnb, *nr_local);
-       if (unlikely(rc != 0)) {
-               dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
-               ofd_read_unlock(env, fo);
-               /* ofd_grant_prepare_write() was called, so we must commit */
-               ofd_grant_commit(env, exp, rc);
-       }
+       if (unlikely(rc != 0))
+               GOTO(err, rc);
 
-       RETURN(rc);
+       ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes);
+       RETURN(0);
+err:
+       dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
+       ofd_read_unlock(env, fo);
+       /* ofd_grant_prepare_write() was called, so we must commit */
+       ofd_grant_commit(env, exp, rc);
 out:
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
         * Just like a read request actually. */
        ofd_grant_prepare_read(env, exp, oa);
-       RETURN(rc);
+       return rc;
 }
 
-int ofd_preprw(const struct lu_envenv, int cmd, struct obd_export *exp,
+int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
               struct niobuf_local *lnb, struct obd_trans_info *oti,
               struct lustre_capa *capa)
 {
+       struct tgt_session_info *tsi = tgt_ses_info(env);
        struct ofd_device       *ofd = ofd_exp(exp);
        struct ofd_thread_info  *info;
+       char                    *jobid;
        int                      rc = 0;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT) &&
-           ofd->ofd_destroys_in_progress == 0) {
-               /* don't fail lookups for orphan recovery, it causes
-                * later LBUGs when objects still exist during precreate */
-               CDEBUG(D_INFO, "*** obd_fail_loc=%x ***\n",OBD_FAIL_OST_ENOENT);
-               RETURN(-ENOENT);
+       if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
+               CERROR("%s: bulk has too many pages %d, which exceeds the"
+                      "maximum pages per RPC of %d\n",
+                      exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
+               RETURN(-EPROTO);
        }
 
-       info = ofd_info_init(env, exp);
+       if (tgt_ses_req(tsi) == NULL) { /* echo client case */
+               LASSERT(oti != NULL);
+               lu_env_refill((struct lu_env *)env);
+               info = ofd_info_init(env, exp);
+               ofd_oti2info(info, oti);
+               jobid = oti->oti_jobid;
+       } else {
+               info = tsi2ofd_info(tsi);
+               jobid = tsi->tsi_jobid;
+       }
+
+       LASSERT(oa != NULL);
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
+               struct ofd_seq          *oseq;
+
+               oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
+               if (IS_ERR(oseq)) {
+                       CERROR("%s: Can not find seq for "DOSTID
+                              ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi),
+                              PTR_ERR(oseq));
+                       RETURN(-EINVAL);
+               }
+
+               if (oseq->os_destroys_in_progress == 0) {
+                       /* don't fail lookups for orphan recovery, it causes
+                        * later LBUGs when objects still exist during
+                        * precreate */
+                       ofd_seq_put(env, oseq);
+                       RETURN(-ENOENT);
+               }
+               ofd_seq_put(env, oseq);
+       }
 
        LASSERT(objcount == 1);
        LASSERT(obj->ioo_bufcnt > 0);
 
-       fid_ostid_unpack(&info->fti_fid, &oa->o_oi, 0);
+       info->fti_fid = oa->o_oi.oi_fid;
        if (cmd == OBD_BRW_WRITE) {
-               rc = ofd_auth_capa(exp, &info->fti_fid, oa->o_seq,
+               rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                                   capa, CAPA_OPC_OSS_WRITE);
                if (rc == 0) {
-                       LASSERT(oa != NULL);
                        la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
                        rc = ofd_preprw_write(env, exp, ofd, &info->fti_fid,
                                              &info->fti_attr, oa, objcount,
-                                             obj, rnb, nr_local, lnb, oti);
+                                             obj, rnb, nr_local, lnb, jobid);
                }
        } else if (cmd == OBD_BRW_READ) {
-               rc = ofd_auth_capa(exp, &info->fti_fid, oa->o_seq,
+               rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                                   capa, CAPA_OPC_OSS_READ);
                if (rc == 0) {
                        ofd_grant_prepare_read(env, exp, oa);
-                       rc = ofd_preprw_read(env, ofd, &info->fti_fid,
+                       rc = ofd_preprw_read(env, exp, ofd, &info->fti_fid,
                                             &info->fti_attr, obj->ioo_bufcnt,
-                                            rnb, nr_local, lnb);
+                                            rnb, nr_local, lnb, jobid);
                        obdo_from_la(oa, &info->fti_attr, LA_ATIME);
                }
        } else {
@@ -361,12 +393,57 @@ out:
        return rc;
 }
 
+struct ofd_soft_sync_callback {
+       struct dt_txn_commit_cb  ossc_cb;
+       struct obd_export       *ossc_exp;
+};
+
+static void ofd_cb_soft_sync(struct lu_env *env, struct thandle *th,
+                            struct dt_txn_commit_cb *cb, int err)
+{
+       struct ofd_soft_sync_callback   *ossc;
+
+       ossc = container_of(cb, struct ofd_soft_sync_callback, ossc_cb);
+
+       CDEBUG(D_INODE, "export %p soft sync count is reset\n", ossc->ossc_exp);
+       atomic_set(&ossc->ossc_exp->exp_filter_data.fed_soft_sync_count, 0);
+
+       class_export_cb_put(ossc->ossc_exp);
+       OBD_FREE_PTR(ossc);
+}
+
+static int ofd_soft_sync_cb_add(struct thandle *th, struct obd_export *exp)
+{
+       struct ofd_soft_sync_callback           *ossc;
+       struct dt_txn_commit_cb                 *dcb;
+       int                                      rc;
+
+       OBD_ALLOC_PTR(ossc);
+       if (ossc == NULL)
+               return -ENOMEM;
+
+       ossc->ossc_exp = class_export_cb_get(exp);
+
+       dcb = &ossc->ossc_cb;
+       dcb->dcb_func = ofd_cb_soft_sync;
+       CFS_INIT_LIST_HEAD(&dcb->dcb_linkage);
+       strncpy(dcb->dcb_name, "ofd_cb_soft_sync", MAX_COMMIT_CB_STR_LEN);
+       dcb->dcb_name[MAX_COMMIT_CB_STR_LEN - 1] = '\0';
+
+       rc = dt_trans_cb_add(th, dcb);
+       if (rc) {
+               class_export_cb_put(exp);
+               OBD_FREE_PTR(ossc);
+       }
+
+       return rc;
+}
+
 static int
-ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
-                  struct lu_fid *fid, struct lu_attr *la,
-                  struct filter_fid *ff, int objcount,
-                  int niocount, struct niobuf_local *lnb,
-                  struct obd_trans_info *oti, int old_rc)
+ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
+                  struct ofd_device *ofd, struct lu_fid *fid,
+                  struct lu_attr *la, struct filter_fid *ff, int objcount,
+                  int niocount, struct niobuf_local *lnb, int old_rc)
 {
        struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_object       *fo;
@@ -374,6 +451,10 @@ ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
        struct thandle          *th;
        int                      rc = 0;
        int                      retries = 0;
+       int                      i;
+       struct filter_export_data *fed = &exp->exp_filter_data;
+       bool                     soft_sync = false;
+       bool                     cb_registered = false;
 
        ENTRY;
 
@@ -406,7 +487,17 @@ retry:
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
 
-       th->th_sync |= oti->oti_sync_write;
+       th->th_sync |= ofd->ofd_syncjournal;
+       if (th->th_sync == 0) {
+               for (i = 0; i < niocount; i++) {
+                       if (!(lnb[i].lnb_flags & OBD_BRW_ASYNC)) {
+                               th->th_sync = 1;
+                               break;
+                       }
+                       if (lnb[i].lnb_flags & OBD_BRW_SOFT_SYNC)
+                               soft_sync = true;
+               }
+       }
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET))
                GOTO(out_stop, rc = -EINPROGRESS);
@@ -437,7 +528,7 @@ retry:
        }
 
        /* get attr to return */
-       dt_attr_get(env, o, la, ofd_object_capa(env, fo));
+       rc = dt_attr_get(env, o, la, ofd_object_capa(env, fo));
 
 out_stop:
        /* Force commit to make the just-deleted blocks
@@ -445,6 +536,12 @@ out_stop:
        if (rc == -ENOSPC)
                th->th_sync = 1;
 
+       /* do this before trans stop in case commit has finished */
+       if (!th->th_sync && soft_sync && !cb_registered) {
+               ofd_soft_sync_cb_add(th, exp);
+               cb_registered = true;
+       }
+
        ofd_trans_stop(env, ofd, th, rc);
        if (rc == -ENOSPC && retries++ < 3) {
                CDEBUG(D_INODE, "retry after force commit, retries:%d\n",
@@ -452,6 +549,13 @@ out_stop:
                goto retry;
        }
 
+       if (!soft_sync)
+               /* reset fed_soft_sync_count upon non-SOFT_SYNC RPC */
+               atomic_set(&fed->fed_soft_sync_count, 0);
+       else if (atomic_inc_return(&fed->fed_soft_sync_count) ==
+                ofd->ofd_soft_sync_limit)
+               dt_commit_async(env, ofd->ofd_osd);
+
 out:
        dt_bufs_put(env, o, lnb, niocount);
        ofd_read_unlock(env, fo);
@@ -468,19 +572,16 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                 struct niobuf_local *lnb, struct obd_trans_info *oti,
                 int old_rc)
 {
-       struct ofd_thread_info  *info;
+       struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_mod_data     *fmd;
        __u64                    valid;
        struct ofd_device       *ofd = ofd_exp(exp);
        struct filter_fid       *ff = NULL;
        int                      rc = 0;
 
-       info = ofd_info(env);
-       ofd_oti2info(info, oti);
-
        LASSERT(npages > 0);
 
-       fid_ostid_unpack(&info->fti_fid, &oa->o_oi, 0);
+       info->fti_fid = oa->o_oi.oi_fid;
        if (cmd == OBD_BRW_WRITE) {
                /* Don't update timestamps if this write is older than a
                 * setattr which modifies the timestamps. b=10150 */
@@ -502,9 +603,9 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        ofd_prepare_fidea(ff, oa);
                }
 
-               rc = ofd_commitrw_write(env, ofd, &info->fti_fid,
+               rc = ofd_commitrw_write(env, exp, ofd, &info->fti_fid,
                                        &info->fti_attr, ff, objcount, npages,
-                                       lnb, oti, old_rc);
+                                       lnb, old_rc);
                if (rc == 0)
                        obdo_from_la(oa, &info->fti_attr,
                                     OFD_VALID_FLAGS | LA_GID | LA_UID);
@@ -541,7 +642,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                if (oa && ns && ns->ns_lvbo && ns->ns_lvbo->lvbo_update) {
                         struct ldlm_resource *rs = NULL;
 
-                       ofd_build_resid(&info->fti_fid, &info->fti_resid);
+                       ost_fid_build_resid(&info->fti_fid, &info->fti_resid);
                        rs = ldlm_resource_get(ns, NULL, &info->fti_resid,
                                               LDLM_EXTENT, 0);
                        if (rs != NULL) {
@@ -550,7 +651,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        }
                }
                rc = ofd_commitrw_read(env, ofd, &info->fti_fid, objcount,
-                                         npages, lnb);
+                                      npages, lnb);
                if (old_rc)
                        rc = old_rc;
        } else {
@@ -558,6 +659,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                rc = -EPROTO;
        }
 
-       ofd_info2oti(info, oti);
+       if (oti != NULL)
+               ofd_info2oti(info, oti);
        RETURN(rc);
 }