Whamcloud - gitweb
LU-3569 ofd: packing ost_idx in IDIF
[fs/lustre-release.git] / lustre / ofd / ofd_io.c
index 598a551..c21c903 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -46,8 +46,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
                           struct ofd_device *ofd, struct lu_fid *fid,
                           struct lu_attr *la, int niocount,
                           struct niobuf_remote *rnb, int *nr_local,
-                          struct niobuf_local *lnb,
-                          struct obd_trans_info *oti)
+                          struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, rc, tot_bytes = 0;
@@ -65,18 +64,20 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
                GOTO(unlock, rc = -ENOENT);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < niocount; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i,
                                 lnb + j, 0, ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(buf_put, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
 
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
        rc = dt_attr_get(env, ofd_object_child(fo), la,
                         ofd_object_capa(env, fo));
@@ -86,10 +87,8 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        rc = dt_read_prep(env, ofd_object_child(fo), lnb, *nr_local);
        if (unlikely(rc))
                GOTO(buf_put, rc);
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_READ_BYTES, tot_bytes);
-       ofd_counter_incr(exp, LPROC_OFD_STATS_READ,
-                        oti->oti_jobid, tot_bytes);
+
+       ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, tot_bytes);
        RETURN(0);
 
 buf_put:
@@ -105,8 +104,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct lu_attr *la, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            struct niobuf_remote *rnb, int *nr_local,
-                           struct niobuf_local *lnb,
-                           struct obd_trans_info *oti)
+                           struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, k, rc = 0, tot_bytes = 0;
@@ -150,52 +148,47 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                GOTO(out, rc = -ENOENT);
        }
 
-       /* Always sync if syncjournal parameter is set */
-       oti->oti_sync_write = ofd->ofd_syncjournal;
-
        /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some
         * space back if possible */
        ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo),
                                 rnb + i, lnb + j, 1,
                                 ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(err, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                for (k = 0; k < rc; k++) {
                        lnb[j+k].lnb_flags = rnb[i].rnb_flags;
                        if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED))
                                lnb[j+k].lnb_rc = -ENOSPC;
-                       if (!(rnb[i].rnb_flags & OBD_BRW_ASYNC))
-                               oti->oti_sync_write = 1;
+
                        /* remote client can't break through quota */
                        if (exp_connect_rmtclient(exp))
                                lnb[j+k].lnb_flags &= ~OBD_BRW_NOQUOTA;
                }
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
 
        rc = dt_write_prep(env, ofd_object_child(fo), lnb, *nr_local);
-       if (unlikely(rc != 0)) {
-               dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
-               ofd_read_unlock(env, fo);
-               /* ofd_grant_prepare_write() was called, so we must commit */
-               ofd_grant_commit(env, exp, rc);
-               GOTO(out, rc);
-       }
+       if (unlikely(rc != 0))
+               GOTO(err, rc);
 
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_WRITE_BYTES, tot_bytes);
-       ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE,
-                        oti->oti_jobid, tot_bytes);
+       ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes);
        RETURN(0);
+err:
+       dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
+       ofd_read_unlock(env, fo);
+       /* ofd_grant_prepare_write() was called, so we must commit */
+       ofd_grant_commit(env, exp, rc);
 out:
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
@@ -204,24 +197,41 @@ out:
        return rc;
 }
 
-int ofd_preprw(const struct lu_envenv, int cmd, struct obd_export *exp,
+int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
               struct niobuf_local *lnb, struct obd_trans_info *oti,
               struct lustre_capa *capa)
 {
+       struct tgt_session_info *tsi = tgt_ses_info(env);
        struct ofd_device       *ofd = ofd_exp(exp);
        struct ofd_thread_info  *info;
+       char                    *jobid;
        int                      rc = 0;
 
-       rc = lu_env_refill((struct lu_env *)env);
-       LASSERT(rc == 0);
-       info = ofd_info_init(env, exp);
+       if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
+               CERROR("%s: bulk has too many pages %d, which exceeds the"
+                      "maximum pages per RPC of %d\n",
+                      exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
+               RETURN(-EPROTO);
+       }
+
+       if (tgt_ses_req(tsi) == NULL) { /* echo client case */
+               LASSERT(oti != NULL);
+               lu_env_refill((struct lu_env *)env);
+               info = ofd_info_init(env, exp);
+               ofd_oti2info(info, oti);
+               jobid = oti->oti_jobid;
+       } else {
+               info = tsi2ofd_info(tsi);
+               jobid = tsi->tsi_jobid;
+       }
 
        LASSERT(oa != NULL);
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
                struct ofd_seq          *oseq;
+
                oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
                if (IS_ERR(oseq)) {
                        CERROR("%s: Can not find seq for "DOSTID
@@ -243,10 +253,7 @@ int ofd_preprw(const struct lu_env* env, int cmd, struct obd_export *exp,
        LASSERT(objcount == 1);
        LASSERT(obj->ioo_bufcnt > 0);
 
-       rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0);
-       if (unlikely(rc != 0))
-               RETURN(rc);
-
+       info->fti_fid = oa->o_oi.oi_fid;
        if (cmd == OBD_BRW_WRITE) {
                rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                                   capa, CAPA_OPC_OSS_WRITE);
@@ -254,7 +261,7 @@ int ofd_preprw(const struct lu_env* env, int cmd, struct obd_export *exp,
                        la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
                        rc = ofd_preprw_write(env, exp, ofd, &info->fti_fid,
                                              &info->fti_attr, oa, objcount,
-                                             obj, rnb, nr_local, lnb, oti);
+                                             obj, rnb, nr_local, lnb, jobid);
                }
        } else if (cmd == OBD_BRW_READ) {
                rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
@@ -263,7 +270,7 @@ int ofd_preprw(const struct lu_env* env, int cmd, struct obd_export *exp,
                        ofd_grant_prepare_read(env, exp, oa);
                        rc = ofd_preprw_read(env, exp, ofd, &info->fti_fid,
                                             &info->fti_attr, obj->ioo_bufcnt,
-                                            rnb, nr_local, lnb, oti);
+                                            rnb, nr_local, lnb, jobid);
                        obdo_from_la(oa, &info->fti_attr, LA_ATIME);
                }
        } else {
@@ -386,12 +393,57 @@ out:
        return rc;
 }
 
+struct ofd_soft_sync_callback {
+       struct dt_txn_commit_cb  ossc_cb;
+       struct obd_export       *ossc_exp;
+};
+
+static void ofd_cb_soft_sync(struct lu_env *env, struct thandle *th,
+                            struct dt_txn_commit_cb *cb, int err)
+{
+       struct ofd_soft_sync_callback   *ossc;
+
+       ossc = container_of(cb, struct ofd_soft_sync_callback, ossc_cb);
+
+       CDEBUG(D_INODE, "export %p soft sync count is reset\n", ossc->ossc_exp);
+       atomic_set(&ossc->ossc_exp->exp_filter_data.fed_soft_sync_count, 0);
+
+       class_export_cb_put(ossc->ossc_exp);
+       OBD_FREE_PTR(ossc);
+}
+
+static int ofd_soft_sync_cb_add(struct thandle *th, struct obd_export *exp)
+{
+       struct ofd_soft_sync_callback           *ossc;
+       struct dt_txn_commit_cb                 *dcb;
+       int                                      rc;
+
+       OBD_ALLOC_PTR(ossc);
+       if (ossc == NULL)
+               return -ENOMEM;
+
+       ossc->ossc_exp = class_export_cb_get(exp);
+
+       dcb = &ossc->ossc_cb;
+       dcb->dcb_func = ofd_cb_soft_sync;
+       CFS_INIT_LIST_HEAD(&dcb->dcb_linkage);
+       strncpy(dcb->dcb_name, "ofd_cb_soft_sync", MAX_COMMIT_CB_STR_LEN);
+       dcb->dcb_name[MAX_COMMIT_CB_STR_LEN - 1] = '\0';
+
+       rc = dt_trans_cb_add(th, dcb);
+       if (rc) {
+               class_export_cb_put(exp);
+               OBD_FREE_PTR(ossc);
+       }
+
+       return rc;
+}
+
 static int
-ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
-                  struct lu_fid *fid, struct lu_attr *la,
-                  struct filter_fid *ff, int objcount,
-                  int niocount, struct niobuf_local *lnb,
-                  struct obd_trans_info *oti, int old_rc)
+ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
+                  struct ofd_device *ofd, struct lu_fid *fid,
+                  struct lu_attr *la, struct filter_fid *ff, int objcount,
+                  int niocount, struct niobuf_local *lnb, int old_rc)
 {
        struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_object       *fo;
@@ -399,6 +451,10 @@ ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
        struct thandle          *th;
        int                      rc = 0;
        int                      retries = 0;
+       int                      i;
+       struct filter_export_data *fed = &exp->exp_filter_data;
+       bool                     soft_sync = false;
+       bool                     cb_registered = false;
 
        ENTRY;
 
@@ -431,7 +487,17 @@ retry:
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
 
-       th->th_sync |= oti->oti_sync_write;
+       th->th_sync |= ofd->ofd_syncjournal;
+       if (th->th_sync == 0) {
+               for (i = 0; i < niocount; i++) {
+                       if (!(lnb[i].lnb_flags & OBD_BRW_ASYNC)) {
+                               th->th_sync = 1;
+                               break;
+                       }
+                       if (lnb[i].lnb_flags & OBD_BRW_SOFT_SYNC)
+                               soft_sync = true;
+               }
+       }
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET))
                GOTO(out_stop, rc = -EINPROGRESS);
@@ -470,6 +536,12 @@ out_stop:
        if (rc == -ENOSPC)
                th->th_sync = 1;
 
+       /* do this before trans stop in case commit has finished */
+       if (!th->th_sync && soft_sync && !cb_registered) {
+               ofd_soft_sync_cb_add(th, exp);
+               cb_registered = true;
+       }
+
        ofd_trans_stop(env, ofd, th, rc);
        if (rc == -ENOSPC && retries++ < 3) {
                CDEBUG(D_INODE, "retry after force commit, retries:%d\n",
@@ -477,6 +549,13 @@ out_stop:
                goto retry;
        }
 
+       if (!soft_sync)
+               /* reset fed_soft_sync_count upon non-SOFT_SYNC RPC */
+               atomic_set(&fed->fed_soft_sync_count, 0);
+       else if (atomic_inc_return(&fed->fed_soft_sync_count) ==
+                ofd->ofd_soft_sync_limit)
+               dt_commit_async(env, ofd->ofd_osd);
+
 out:
        dt_bufs_put(env, o, lnb, niocount);
        ofd_read_unlock(env, fo);
@@ -493,21 +572,16 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                 struct niobuf_local *lnb, struct obd_trans_info *oti,
                 int old_rc)
 {
-       struct ofd_thread_info  *info;
+       struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_mod_data     *fmd;
        __u64                    valid;
        struct ofd_device       *ofd = ofd_exp(exp);
        struct filter_fid       *ff = NULL;
        int                      rc = 0;
 
-       info = ofd_info(env);
-       ofd_oti2info(info, oti);
-
        LASSERT(npages > 0);
 
-       rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0);
-       if (unlikely(rc != 0))
-               RETURN(rc);
+       info->fti_fid = oa->o_oi.oi_fid;
        if (cmd == OBD_BRW_WRITE) {
                /* Don't update timestamps if this write is older than a
                 * setattr which modifies the timestamps. b=10150 */
@@ -529,9 +603,9 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        ofd_prepare_fidea(ff, oa);
                }
 
-               rc = ofd_commitrw_write(env, ofd, &info->fti_fid,
+               rc = ofd_commitrw_write(env, exp, ofd, &info->fti_fid,
                                        &info->fti_attr, ff, objcount, npages,
-                                       lnb, oti, old_rc);
+                                       lnb, old_rc);
                if (rc == 0)
                        obdo_from_la(oa, &info->fti_attr,
                                     OFD_VALID_FLAGS | LA_GID | LA_UID);
@@ -577,7 +651,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        }
                }
                rc = ofd_commitrw_read(env, ofd, &info->fti_fid, objcount,
-                                         npages, lnb);
+                                      npages, lnb);
                if (old_rc)
                        rc = old_rc;
        } else {
@@ -585,6 +659,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                rc = -EPROTO;
        }
 
-       ofd_info2oti(info, oti);
+       if (oti != NULL)
+               ofd_info2oti(info, oti);
        RETURN(rc);
 }