Whamcloud - gitweb
LU-3467 ofd: use unified handler for OST requests
[fs/lustre-release.git] / lustre / ofd / ofd_io.c
index fa74959..a67e6c7 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 
 #include "ofd_internal.h"
 
-static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
-                          struct lu_fid *fid, struct lu_attr *la, int niocount,
+static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
+                          struct ofd_device *ofd, struct lu_fid *fid,
+                          struct lu_attr *la, int niocount,
                           struct niobuf_remote *rnb, int *nr_local,
-                          struct niobuf_local *lnb)
+                          struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, rc, tot_bytes = 0;
@@ -63,18 +64,20 @@ static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
                GOTO(unlock, rc = -ENOENT);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < niocount; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i,
                                 lnb + j, 0, ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(buf_put, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
 
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
        rc = dt_attr_get(env, ofd_object_child(fo), la,
                         ofd_object_capa(env, fo));
@@ -84,8 +87,8 @@ static int ofd_preprw_read(const struct lu_env *env, struct ofd_device *ofd,
        rc = dt_read_prep(env, ofd_object_child(fo), lnb, *nr_local);
        if (unlikely(rc))
                GOTO(buf_put, rc);
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_READ_BYTES, tot_bytes);
+
+       ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, tot_bytes);
        RETURN(0);
 
 buf_put:
@@ -101,8 +104,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct lu_attr *la, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            struct niobuf_remote *rnb, int *nr_local,
-                           struct niobuf_local *lnb,
-                           struct obd_trans_info *oti)
+                           struct niobuf_local *lnb, char *jobid)
 {
        struct ofd_object       *fo;
        int                      i, j, k, rc = 0, tot_bytes = 0;
@@ -139,103 +141,138 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 
        ofd_read_lock(env, fo);
        if (!ofd_object_exists(fo)) {
-               CERROR("%s: BRW to missing obj "LPU64"/"LPU64"\n",
-                      exp->exp_obd->obd_name, obj->ioo_id, obj->ioo_seq);
+               CERROR("%s: BRW to missing obj "DOSTID"\n",
+                      exp->exp_obd->obd_name, POSTID(&obj->ioo_oid));
                ofd_read_unlock(env, fo);
                ofd_object_put(env, fo);
                GOTO(out, rc = -ENOENT);
        }
 
-       /* Always sync if syncjournal parameter is set */
-       oti->oti_sync_write = ofd->ofd_syncjournal;
-
        /* Process incoming grant info, set OBD_BRW_GRANTED flag and grant some
         * space back if possible */
        ofd_grant_prepare_write(env, exp, oa, rnb, obj->ioo_bufcnt);
 
        /* parse remote buffers to local buffers and prepare the latter */
+       *nr_local = 0;
        for (i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
                rc = dt_bufs_get(env, ofd_object_child(fo),
                                 rnb + i, lnb + j, 1,
                                 ofd_object_capa(env, fo));
-               LASSERT(rc > 0);
+               if (unlikely(rc < 0))
+                       GOTO(err, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                for (k = 0; k < rc; k++) {
                        lnb[j+k].lnb_flags = rnb[i].rnb_flags;
                        if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED))
                                lnb[j+k].lnb_rc = -ENOSPC;
-                       if (!(rnb[i].rnb_flags & OBD_BRW_ASYNC))
-                               oti->oti_sync_write = 1;
+                       /* remote client can't break through quota */
+                       if (exp_connect_rmtclient(exp))
+                               lnb[j+k].lnb_flags &= ~OBD_BRW_NOQUOTA;
                }
                j += rc;
+               *nr_local += rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
-       *nr_local = j;
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
 
-       lprocfs_counter_add(ofd_obd(ofd)->obd_stats,
-                           LPROC_OFD_WRITE_BYTES, tot_bytes);
        rc = dt_write_prep(env, ofd_object_child(fo), lnb, *nr_local);
-       if (unlikely(rc != 0)) {
-               dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
-               ofd_read_unlock(env, fo);
-               /* ofd_grant_prepare_write() was called, so we must commit */
-               ofd_grant_commit(env, exp, rc);
-       }
+       if (unlikely(rc != 0))
+               GOTO(err, rc);
 
-       RETURN(rc);
+       ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, tot_bytes);
+       RETURN(0);
+err:
+       dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
+       ofd_read_unlock(env, fo);
+       /* ofd_grant_prepare_write() was called, so we must commit */
+       ofd_grant_commit(env, exp, rc);
 out:
        /* let's still process incoming grant information packed in the oa,
         * but without enforcing grant since we won't proceed with the write.
         * Just like a read request actually. */
        ofd_grant_prepare_read(env, exp, oa);
-       RETURN(rc);
+       return rc;
 }
 
-int ofd_preprw(const struct lu_envenv, int cmd, struct obd_export *exp,
+int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
               struct niobuf_local *lnb, struct obd_trans_info *oti,
               struct lustre_capa *capa)
 {
+       struct tgt_session_info *tsi = tgt_ses_info(env);
        struct ofd_device       *ofd = ofd_exp(exp);
        struct ofd_thread_info  *info;
+       char                    *jobid;
        int                      rc = 0;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT) &&
-           ofd->ofd_destroys_in_progress == 0) {
-               /* don't fail lookups for orphan recovery, it causes
-                * later LBUGs when objects still exist during precreate */
-               CDEBUG(D_INFO, "*** obd_fail_loc=%x ***\n",OBD_FAIL_OST_ENOENT);
-               RETURN(-ENOENT);
+       if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
+               CERROR("%s: bulk has too many pages %d, which exceeds the"
+                      "maximum pages per RPC of %d\n",
+                      exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
+               RETURN(-EPROTO);
+       }
+
+       if (tgt_ses_req(tsi) == NULL) { /* echo client case */
+               LASSERT(oti != NULL);
+               lu_env_refill((struct lu_env *)env);
+               info = ofd_info_init(env, exp);
+               ofd_oti2info(info, oti);
+               jobid = oti->oti_jobid;
+       } else {
+               info = tsi2ofd_info(tsi);
+               jobid = tsi->tsi_jobid;
        }
 
-       info = ofd_info_init(env, exp);
+       LASSERT(oa != NULL);
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
+               struct ofd_seq          *oseq;
+
+               oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
+               if (IS_ERR(oseq)) {
+                       CERROR("%s: Can not find seq for "DOSTID
+                              ": rc = %ld\n", ofd_name(ofd), POSTID(&oa->o_oi),
+                              PTR_ERR(oseq));
+                       RETURN(-EINVAL);
+               }
+
+               if (oseq->os_destroys_in_progress == 0) {
+                       /* don't fail lookups for orphan recovery, it causes
+                        * later LBUGs when objects still exist during
+                        * precreate */
+                       ofd_seq_put(env, oseq);
+                       RETURN(-ENOENT);
+               }
+               ofd_seq_put(env, oseq);
+       }
 
        LASSERT(objcount == 1);
        LASSERT(obj->ioo_bufcnt > 0);
 
-       fid_ostid_unpack(&info->fti_fid, &oa->o_oi, 0);
+       rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0);
+       if (unlikely(rc != 0))
+               RETURN(rc);
+
        if (cmd == OBD_BRW_WRITE) {
-               rc = ofd_auth_capa(exp, &info->fti_fid, oa->o_seq,
+               rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                                   capa, CAPA_OPC_OSS_WRITE);
                if (rc == 0) {
-                       LASSERT(oa != NULL);
                        la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
                        rc = ofd_preprw_write(env, exp, ofd, &info->fti_fid,
                                              &info->fti_attr, oa, objcount,
-                                             obj, rnb, nr_local, lnb, oti);
+                                             obj, rnb, nr_local, lnb, jobid);
                }
        } else if (cmd == OBD_BRW_READ) {
-               rc = ofd_auth_capa(exp, &info->fti_fid, oa->o_seq,
+               rc = ofd_auth_capa(exp, &info->fti_fid, ostid_seq(&oa->o_oi),
                                   capa, CAPA_OPC_OSS_READ);
                if (rc == 0) {
                        ofd_grant_prepare_read(env, exp, oa);
-                       rc = ofd_preprw_read(env, ofd, &info->fti_fid,
+                       rc = ofd_preprw_read(env, exp, ofd, &info->fti_fid,
                                             &info->fti_attr, obj->ioo_bufcnt,
-                                            rnb, nr_local, lnb);
+                                            rnb, nr_local, lnb, jobid);
                        obdo_from_la(oa, &info->fti_attr, LA_ATIME);
                }
        } else {
@@ -362,8 +399,7 @@ static int
 ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
                   struct lu_fid *fid, struct lu_attr *la,
                   struct filter_fid *ff, int objcount,
-                  int niocount, struct niobuf_local *lnb,
-                  struct obd_trans_info *oti, int old_rc)
+                  int niocount, struct niobuf_local *lnb, int old_rc)
 {
        struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_object       *fo;
@@ -371,6 +407,7 @@ ofd_commitrw_write(const struct lu_env *env, struct ofd_device *ofd,
        struct thandle          *th;
        int                      rc = 0;
        int                      retries = 0;
+       int                      i;
 
        ENTRY;
 
@@ -403,7 +440,15 @@ retry:
        if (IS_ERR(th))
                GOTO(out, rc = PTR_ERR(th));
 
-       th->th_sync |= oti->oti_sync_write;
+       th->th_sync |= ofd->ofd_syncjournal;
+       if (th->th_sync == 0) {
+               for (i = 0; i < niocount; i++) {
+                       if (!(lnb[i].lnb_flags & OBD_BRW_ASYNC)) {
+                               th->th_sync = 1;
+                               break;
+                       }
+               }
+       }
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET))
                GOTO(out_stop, rc = -EINPROGRESS);
@@ -434,7 +479,7 @@ retry:
        }
 
        /* get attr to return */
-       dt_attr_get(env, o, la, ofd_object_capa(env, fo));
+       rc = dt_attr_get(env, o, la, ofd_object_capa(env, fo));
 
 out_stop:
        /* Force commit to make the just-deleted blocks
@@ -465,19 +510,18 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                 struct niobuf_local *lnb, struct obd_trans_info *oti,
                 int old_rc)
 {
-       struct ofd_thread_info  *info;
+       struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_mod_data     *fmd;
        __u64                    valid;
        struct ofd_device       *ofd = ofd_exp(exp);
        struct filter_fid       *ff = NULL;
        int                      rc = 0;
 
-       info = ofd_info(env);
-       ofd_oti2info(info, oti);
-
        LASSERT(npages > 0);
 
-       fid_ostid_unpack(&info->fti_fid, &oa->o_oi, 0);
+       rc = ostid_to_fid(&info->fti_fid, &oa->o_oi, 0);
+       if (unlikely(rc != 0))
+               RETURN(rc);
        if (cmd == OBD_BRW_WRITE) {
                /* Don't update timestamps if this write is older than a
                 * setattr which modifies the timestamps. b=10150 */
@@ -501,19 +545,34 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
 
                rc = ofd_commitrw_write(env, ofd, &info->fti_fid,
                                        &info->fti_attr, ff, objcount, npages,
-                                       lnb, oti, old_rc);
+                                       lnb, old_rc);
                if (rc == 0)
                        obdo_from_la(oa, &info->fti_attr,
                                     OFD_VALID_FLAGS | LA_GID | LA_UID);
                else
                        obdo_from_la(oa, &info->fti_attr, LA_GID | LA_UID);
 
-               if (ofd_grant_prohibit(exp, ofd))
-                       /* Trick to prevent clients from waiting for bulk write
-                        * in flight since they won't get any grant in the reply
-                        * anyway so they had better firing the sync write RPC
-                        * straight away */
+               /* don't report overquota flag if we failed before reaching
+                * commit */
+               if (old_rc == 0 && (rc == 0 || rc == -EDQUOT)) {
+                       /* return the overquota flags to client */
+                       if (lnb[0].lnb_flags & OBD_BRW_OVER_USRQUOTA) {
+                               if (oa->o_valid & OBD_MD_FLFLAGS)
+                                       oa->o_flags |= OBD_FL_NO_USRQUOTA;
+                               else
+                                       oa->o_flags = OBD_FL_NO_USRQUOTA;
+                       }
+
+                       if (lnb[0].lnb_flags & OBD_BRW_OVER_GRPQUOTA) {
+                               if (oa->o_valid & OBD_MD_FLFLAGS)
+                                       oa->o_flags |= OBD_FL_NO_GRPQUOTA;
+                               else
+                                       oa->o_flags = OBD_FL_NO_GRPQUOTA;
+                       }
+
+                       oa->o_valid |= OBD_MD_FLFLAGS;
                        oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
+               }
        } else if (cmd == OBD_BRW_READ) {
                struct ldlm_namespace *ns = ofd->ofd_namespace;
 
@@ -523,7 +582,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                if (oa && ns && ns->ns_lvbo && ns->ns_lvbo->lvbo_update) {
                         struct ldlm_resource *rs = NULL;
 
-                       ofd_build_resid(&info->fti_fid, &info->fti_resid);
+                       ost_fid_build_resid(&info->fti_fid, &info->fti_resid);
                        rs = ldlm_resource_get(ns, NULL, &info->fti_resid,
                                               LDLM_EXTENT, 0);
                        if (rs != NULL) {
@@ -532,7 +591,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        }
                }
                rc = ofd_commitrw_read(env, ofd, &info->fti_fid, objcount,
-                                         npages, lnb);
+                                      npages, lnb);
                if (old_rc)
                        rc = old_rc;
        } else {
@@ -540,6 +599,7 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                rc = -EPROTO;
        }
 
-       ofd_info2oti(info, oti);
+       if (oti != NULL)
+               ofd_info2oti(info, oti);
        RETURN(rc);
 }