Whamcloud - gitweb
LU-13309 ofd: optimize the brw codepath
[fs/lustre-release.git] / lustre / ofd / ofd_io.c
index 092e592..d86bb5c 100644 (file)
@@ -165,7 +165,6 @@ static int ofd_inconsistency_verification_main(void *args)
        struct ptlrpc_thread *thread = &ofd->ofd_inconsistency_thread;
        struct ofd_inconsistency_item *oii;
        struct lfsck_req_local *lrl = NULL;
-       struct l_wait_info lwi = { 0 };
        int rc;
        ENTRY;
 
@@ -200,10 +199,9 @@ static int ofd_inconsistency_verification_main(void *args)
                }
 
                spin_unlock(&ofd->ofd_inconsistency_lock);
-               l_wait_event(thread->t_ctl_waitq,
-                            !list_empty(&ofd->ofd_inconsistency_list) ||
-                            !thread_is_running(thread),
-                            &lwi);
+               wait_event_idle(thread->t_ctl_waitq,
+                               !list_empty(&ofd->ofd_inconsistency_list) ||
+                               !thread_is_running(thread));
                spin_lock(&ofd->ofd_inconsistency_lock);
        }
 
@@ -254,7 +252,6 @@ out:
 int ofd_start_inconsistency_verification_thread(struct ofd_device *ofd)
 {
        struct ptlrpc_thread    *thread = &ofd->ofd_inconsistency_thread;
-       struct l_wait_info       lwi    = { 0 };
        struct task_struct      *task;
        int                      rc;
 
@@ -275,10 +272,9 @@ int ofd_start_inconsistency_verification_thread(struct ofd_device *ofd)
                       ofd_name(ofd), rc);
        } else {
                rc = 0;
-               l_wait_event(thread->t_ctl_waitq,
-                            thread_is_running(thread) ||
-                            thread_is_stopped(thread),
-                            &lwi);
+               wait_event_idle(thread->t_ctl_waitq,
+                               thread_is_running(thread) ||
+                               thread_is_stopped(thread));
        }
 
        return rc;
@@ -295,7 +291,6 @@ int ofd_start_inconsistency_verification_thread(struct ofd_device *ofd)
 int ofd_stop_inconsistency_verification_thread(struct ofd_device *ofd)
 {
        struct ptlrpc_thread    *thread = &ofd->ofd_inconsistency_thread;
-       struct l_wait_info       lwi    = { 0 };
 
        spin_lock(&ofd->ofd_inconsistency_lock);
        if (thread_is_init(thread) || thread_is_stopped(thread)) {
@@ -307,9 +302,8 @@ int ofd_stop_inconsistency_verification_thread(struct ofd_device *ofd)
        thread_set_flags(thread, SVC_STOPPING);
        spin_unlock(&ofd->ofd_inconsistency_lock);
        wake_up_all(&thread->t_ctl_waitq);
-       l_wait_event(thread->t_ctl_waitq,
-                    thread_is_stopped(thread),
-                    &lwi);
+       wait_event_idle(thread->t_ctl_waitq,
+                       thread_is_stopped(thread));
 
        return 0;
 }
@@ -511,6 +505,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
        struct ofd_object *fo;
        int i, j, rc, tot_bytes = 0;
        enum dt_bufs_type dbt = DT_BUFS_TYPE_READ;
+       int maxlnb = *nr_local;
 
        ENTRY;
        LASSERT(env != NULL);
@@ -520,6 +515,8 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
                RETURN(PTR_ERR(fo));
        LASSERT(fo != NULL);
 
+       ofd_info(env)->fti_obj = fo;
+
        ofd_read_lock(env, fo);
        if (!ofd_object_exists(fo))
                GOTO(unlock, rc = -ENOENT);
@@ -534,23 +531,24 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
                dbt |= DT_BUFS_TYPE_LOCAL;
 
        for (*nr_local = 0, i = 0, j = 0; i < niocount; i++) {
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF))
+                       rnb[i].rnb_len = 100 * 1024 * 1024;
+
                rc = dt_bufs_get(env, ofd_object_child(fo), rnb + i,
-                                lnb + j, dbt);
+                                lnb + j, maxlnb, dbt);
                if (unlikely(rc < 0))
                        GOTO(buf_put, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
                /* correct index for local buffers to continue with */
                j += rc;
                *nr_local += rc;
+               maxlnb -= rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
 
        LASSERT(*nr_local > 0 && *nr_local <= PTLRPC_MAX_BRW_PAGES);
-       rc = dt_attr_get(env, ofd_object_child(fo), la);
-       if (unlikely(rc))
-               GOTO(buf_put, rc);
-
        rc = dt_read_prep(env, ofd_object_child(fo), lnb, *nr_local);
        if (unlikely(rc))
                GOTO(buf_put, rc);
@@ -599,6 +597,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        struct ofd_object *fo;
        int i, j, k, rc = 0, tot_bytes = 0;
        enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE;
+       int maxlnb = *nr_local;
 
        ENTRY;
        LASSERT(env != NULL);
@@ -671,6 +670,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                GOTO(out, rc = PTR_ERR(fo));
        LASSERT(fo != NULL);
 
+       ofd_info(env)->fti_obj = fo;
+
        ofd_read_lock(env, fo);
        if (!ofd_object_exists(fo)) {
                CERROR("%s: BRW to missing obj "DOSTID"\n",
@@ -706,8 +707,10 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 
        /* parse remote buffers to local buffers and prepare the latter */
        for (*nr_local = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
+               if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF))
+                       rnb[i].rnb_len += PAGE_SIZE;
                rc = dt_bufs_get(env, ofd_object_child(fo),
-                                rnb + i, lnb + j, dbt);
+                                rnb + i, lnb + j, maxlnb, dbt);
                if (unlikely(rc < 0))
                        GOTO(err, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
@@ -720,6 +723,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                }
                j += rc;
                *nr_local += rc;
+               maxlnb -= rc;
                LASSERT(j <= PTLRPC_MAX_BRW_PAGES);
                tot_bytes += rnb[i].rnb_len;
        }
@@ -778,8 +782,7 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
        int                      rc = 0;
 
        if (*nr_local > PTLRPC_MAX_BRW_PAGES) {
-               CERROR("%s: bulk has too many pages %d, which exceeds the"
-                      "maximum pages per RPC of %d\n",
+               CERROR("%s: bulk has too many pages %d, which exceeds the maximum pages per RPC of %d\n",
                       exp->exp_obd->obd_name, *nr_local, PTLRPC_MAX_BRW_PAGES);
                RETURN(-EPROTO);
        }
@@ -827,7 +830,6 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
                rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa,
                                     obj->ioo_bufcnt, rnb, nr_local, lnb,
                                     jobid);
-               obdo_from_la(oa, &info->fti_attr, LA_ATIME);
        } else {
                CERROR("%s: wrong cmd %d received!\n",
                       exp->exp_obd->obd_name, cmd);
@@ -862,17 +864,13 @@ ofd_commitrw_read(const struct lu_env *env, struct ofd_device *ofd,
 
        LASSERT(niocount > 0);
 
-       fo = ofd_object_find(env, ofd, fid);
-       if (IS_ERR(fo))
-               RETURN(PTR_ERR(fo));
+       fo = ofd_info(env)->fti_obj;
        LASSERT(fo != NULL);
        LASSERT(ofd_object_exists(fo));
        dt_bufs_put(env, ofd_object_child(fo), lnb, niocount);
 
        ofd_read_unlock(env, fo);
        ofd_object_put(env, fo);
-       /* second put is pair to object_get in ofd_preprw_read */
-       ofd_object_put(env, fo);
 
        RETURN(0);
 }
@@ -933,11 +931,13 @@ ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd,
                        GOTO(out_tx, rc);
        }
 
-       rc = dt_declare_xattr_set(env, dt_obj, &info->fti_buf,
-                       XATTR_NAME_FID, 0, th);
-       if (rc)
-               GOTO(out_tx, rc);
-
+       if (oa->o_valid & (OBD_MD_FLFID | OBD_MD_FLOSTLAYOUT |
+                          OBD_MD_LAYOUT_VERSION)) {
+               rc = dt_declare_xattr_set(env, dt_obj, &info->fti_buf,
+                                         XATTR_NAME_FID, 0, th);
+               if (rc)
+                       GOTO(out_tx, rc);
+       }
        /* We don't need a transno for this operation which will be re-executed
         * anyway when the OST_WRITE (with a transno assigned) is replayed */
        rc = dt_trans_start_local(env, ofd->ofd_osd , th);
@@ -946,6 +946,17 @@ ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd,
 
        ofd_read_lock(env, ofd_obj);
 
+       rc = ofd_attr_handle_id(env, ofd_obj, la, 0 /* !is_setattr */);
+       if (rc != 0)
+               GOTO(out_unlock, rc);
+
+       if (!la->la_valid && !(oa->o_valid &
+           (OBD_MD_FLFID | OBD_MD_FLOSTLAYOUT | OBD_MD_LAYOUT_VERSION)))
+               /* no attributes to set */
+               GOTO(out_unlock, rc = 0);
+
+
+
        /* set uid/gid/projid */
        if (la->la_valid) {
                rc = dt_attr_set(env, dt_obj, la, th);
@@ -1087,6 +1098,7 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
                   int niocount, struct niobuf_local *lnb,
                   unsigned long granted, int old_rc)
 {
+       struct ofd_thread_info *info = ofd_info(env);
        struct filter_export_data *fed = &exp->exp_filter_data;
        struct ofd_object *fo;
        struct dt_object *o;
@@ -1103,7 +1115,7 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
 
        LASSERT(objcount == 1);
 
-       fo = ofd_object_find(env, ofd, fid);
+       fo = ofd_info(env)->fti_obj;
        LASSERT(fo != NULL);
 
        o = ofd_object_child(fo);
@@ -1195,7 +1207,9 @@ retry:
                        GOTO(out_unlock, rc);
        }
 
-       if (la->la_valid) {
+       /* Don't update timestamps if this write is older than a
+        * setattr which modifies the timestamps. b=10150 */
+       if (la->la_valid && tgt_fmd_check(exp, fid, info->fti_xid)) {
                rc = dt_attr_set(env, o, la, th);
                if (rc)
                        GOTO(out_unlock, rc);
@@ -1242,8 +1256,6 @@ out_stop:
 out:
        dt_bufs_put(env, o, lnb, niocount);
        ofd_object_put(env, fo);
-       /* second put is pair to object_get in ofd_preprw_write */
-       ofd_object_put(env, fo);
        if (granted > 0)
                tgt_grant_commit(exp, granted, old_rc);
        RETURN(rc);
@@ -1279,6 +1291,8 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
        struct ofd_thread_info *info = ofd_info(env);
        struct ofd_device *ofd = ofd_exp(exp);
        const struct lu_fid *fid = &oa->o_oi.oi_fid;
+       struct ldlm_namespace *ns = ofd->ofd_namespace;
+       struct ldlm_resource *rs = NULL;
        __u64 valid;
        int rc = 0;
 
@@ -1287,13 +1301,8 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
        if (cmd == OBD_BRW_WRITE) {
                struct lu_nodemap *nodemap;
 
-               /* Don't update timestamps if this write is older than a
-                * setattr which modifies the timestamps. b=10150 */
-               valid = OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLPROJID;
-               if (tgt_fmd_check(exp, fid, info->fti_xid))
-                       valid |= OBD_MD_FLATIME | OBD_MD_FLMTIME |
-                                OBD_MD_FLCTIME;
-
+               valid = OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLPROJID |
+                       OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
                la_from_obdo(&info->fti_attr, oa, valid);
 
                rc = ofd_commitrw_write(env, exp, ofd, fid, &info->fti_attr,
@@ -1335,6 +1344,23 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        oa->o_valid |= OBD_MD_FLALLQUOTA;
                }
 
+               /**
+                * Update LVB after writing finish for server lock, see
+                * comments in ldlm_lock_decref_internal(), If this is a
+                * local lock on a server namespace and this was the last
+                * reference, lock will be destroyed directly thus there
+                * is no chance for ldlm_request_cancel() to update lvb.
+                */
+               if (rc == 0 && (rnb[0].rnb_flags & OBD_BRW_SRVLOCK)) {
+                       ost_fid_build_resid(fid, &info->fti_resid);
+                       rs = ldlm_resource_get(ns, NULL, &info->fti_resid,
+                                              LDLM_EXTENT, 0);
+                       if (!IS_ERR(rs)) {
+                               ldlm_res_lvbo_update(rs, NULL, 1);
+                               ldlm_resource_putref(rs);
+                       }
+               }
+
                /* Convert back to client IDs. LU-9671.
                 * nodemap_get_from_exp() may fail due to nodemap deactivated,
                 * server ID will be returned back to client in that case. */
@@ -1349,22 +1375,6 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                        nodemap_putref(nodemap);
                }
        } else if (cmd == OBD_BRW_READ) {
-               struct ldlm_namespace *ns = ofd->ofd_namespace;
-
-               /* If oa != NULL then ofd_preprw_read updated the inode
-                * atime and we should update the lvb so that other glimpses
-                * will also get the updated value. bug 5972 */
-               if (oa && ns && ns->ns_lvbo && ns->ns_lvbo->lvbo_update) {
-                        struct ldlm_resource *rs = NULL;
-
-                       ost_fid_build_resid(fid, &info->fti_resid);
-                       rs = ldlm_resource_get(ns, NULL, &info->fti_resid,
-                                              LDLM_EXTENT, 0);
-                       if (!IS_ERR(rs)) {
-                               ldlm_res_lvbo_update(rs, NULL, 1);
-                               ldlm_resource_putref(rs);
-                       }
-               }
                rc = ofd_commitrw_read(env, ofd, fid, objcount,
                                       npages, lnb);
                if (old_rc)