Whamcloud - gitweb
LU-12681 osc: wrong cache of LVB attrs, part2
[fs/lustre-release.git] / lustre / mdc / mdc_dev.c
index 0bb7920..036dc92 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2017 Intel Corporation.
+ * Copyright (c) 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -34,6 +34,7 @@
 
 #include <obd_class.h>
 #include <lustre_osc.h>
+#include <uapi/linux/lustre/lustre_param.h>
 
 #include "mdc_internal.h"
 
@@ -62,11 +63,17 @@ static void mdc_lock_build_einfo(const struct lu_env *env,
        einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
 }
 
+static void mdc_lock_lvb_update(const struct lu_env *env,
+                               struct osc_object *osc,
+                               struct ldlm_lock *dlmlock,
+                               struct ost_lvb *lvb);
+
 static int mdc_set_dom_lock_data(struct ldlm_lock *lock, void *data)
 {
        int set = 0;
 
        LASSERT(lock != NULL);
+       LASSERT(lock->l_glimpse_ast == mdc_ldlm_glimpse_ast);
 
        lock_res_and_lock(lock);
 
@@ -80,9 +87,10 @@ static int mdc_set_dom_lock_data(struct ldlm_lock *lock, void *data)
        return set;
 }
 
-int mdc_dom_lock_match(struct obd_export *exp, struct ldlm_res_id *res_id,
-                      enum ldlm_type type, union ldlm_policy_data *policy,
-                      enum ldlm_mode mode, __u64 *flags, void *data,
+int mdc_dom_lock_match(const struct lu_env *env, struct obd_export *exp,
+                      struct ldlm_res_id *res_id, enum ldlm_type type,
+                      union ldlm_policy_data *policy, enum ldlm_mode mode,
+                      __u64 *flags, struct osc_object *obj,
                       struct lustre_handle *lockh, int unref)
 {
        struct obd_device *obd = exp->exp_obd;
@@ -96,11 +104,19 @@ int mdc_dom_lock_match(struct obd_export *exp, struct ldlm_res_id *res_id,
        if (rc == 0 || lflags & LDLM_FL_TEST_LOCK)
                RETURN(rc);
 
-       if (data != NULL) {
+       if (obj != NULL) {
                struct ldlm_lock *lock = ldlm_handle2lock(lockh);
 
                LASSERT(lock != NULL);
-               if (!mdc_set_dom_lock_data(lock, data)) {
+               if (mdc_set_dom_lock_data(lock, obj)) {
+                       lock_res_and_lock(lock);
+                       if (!ldlm_is_lvb_cached(lock)) {
+                               LASSERT(lock->l_ast_data == obj);
+                               mdc_lock_lvb_update(env, obj, lock, NULL);
+                               ldlm_set_lvb_cached(lock);
+                       }
+                       unlock_res_and_lock(lock);
+               } else {
                        ldlm_lock_decref(lockh, rc);
                        rc = 0;
                }
@@ -139,8 +155,9 @@ again:
        /* If we're trying to read, we also search for an existing PW lock.  The
         * VFS and page cache already protect us locally, so lots of readers/
         * writers can share a single PW lock. */
-       mode = mdc_dom_lock_match(osc_export(obj), resname, LDLM_IBITS, policy,
-                                 LCK_PR | LCK_PW, &flags, obj, &lockh,
+       mode = mdc_dom_lock_match(env, osc_export(obj), resname, LDLM_IBITS,
+                                 policy, LCK_PR | LCK_PW | LCK_GROUP, &flags,
+                                 obj, &lockh,
                                  dap_flags & OSC_DAP_FL_CANCELING);
        if (mode != 0) {
                lock = ldlm_handle2lock(&lockh);
@@ -248,7 +265,9 @@ static int mdc_lock_flush(const struct lu_env *env, struct osc_object *obj,
                        result = 0;
        }
 
-       rc = mdc_lock_discard_pages(env, obj, start, end, discard);
+       /* Avoid lock matching with CLM_WRITE, there can be no other locks */
+       rc = mdc_lock_discard_pages(env, obj, start, end,
+                                   mode == CLM_WRITE || discard);
        if (result == 0 && rc < 0)
                result = rc;
 
@@ -279,7 +298,7 @@ void mdc_lock_lockless_cancel(const struct lu_env *env,
  */
 static int mdc_dlm_blocking_ast0(const struct lu_env *env,
                                 struct ldlm_lock *dlmlock,
-                                void *data, int flag)
+                                int flag)
 {
        struct cl_object *obj = NULL;
        int result = 0;
@@ -370,7 +389,7 @@ int mdc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
                        break;
                }
 
-               rc = mdc_dlm_blocking_ast0(env, dlmlock, data, flag);
+               rc = mdc_dlm_blocking_ast0(env, dlmlock, flag);
                cl_env_put(env, &refcheck);
                break;
        }
@@ -388,16 +407,15 @@ int mdc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
  *
  * Called under lock and resource spin-locks.
  */
-static void mdc_lock_lvb_update(const struct lu_env *env,
-                               struct osc_object *osc,
-                               struct ldlm_lock *dlmlock,
-                               struct ost_lvb *lvb)
+void mdc_lock_lvb_update(const struct lu_env *env, struct osc_object *osc,
+                        struct ldlm_lock *dlmlock, struct ost_lvb *lvb)
 {
        struct cl_object *obj = osc2cl(osc);
        struct lov_oinfo *oinfo = osc->oo_oinfo;
        struct cl_attr *attr = &osc_env_info(env)->oti_attr;
        unsigned valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME |
                         CAT_SIZE;
+       unsigned int setkms = 0;
 
        ENTRY;
 
@@ -415,25 +433,31 @@ static void mdc_lock_lvb_update(const struct lu_env *env,
                size = lvb->lvb_size;
 
                if (size >= oinfo->loi_kms) {
-                       LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu,"
-                                  " kms=%llu", lvb->lvb_size, size);
                        valid |= CAT_KMS;
                        attr->cat_kms = size;
-               } else {
-                       LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu,"
-                                  " leaving kms=%llu, end=%llu",
-                                  lvb->lvb_size, oinfo->loi_kms,
-                                  dlmlock->l_policy_data.l_extent.end);
+                       setkms = 1;
                }
        }
+
+       /* The size should not be less than the kms */
+       if (attr->cat_size < oinfo->loi_kms)
+               attr->cat_size = oinfo->loi_kms;
+
+       LDLM_DEBUG(dlmlock, "acquired size %llu, setting rss=%llu;%s "
+                  "kms=%llu, end=%llu", lvb->lvb_size, attr->cat_size,
+                  setkms ? "" : " leaving",
+                  setkms ? attr->cat_kms : oinfo->loi_kms,
+                  dlmlock ? dlmlock->l_policy_data.l_extent.end : -1ull);
+
        cl_object_attr_update(env, obj, attr, valid);
        cl_object_attr_unlock(obj);
        EXIT;
 }
 
 static void mdc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
-                            struct lustre_handle *lockh, bool lvb_update)
+                            struct lustre_handle *lockh)
 {
+       struct osc_object *osc = cl2osc(oscl->ols_cl.cls_obj);
        struct ldlm_lock *dlmlock;
 
        ENTRY;
@@ -472,10 +496,11 @@ static void mdc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
                descr->cld_end = CL_PAGE_EOF;
 
                /* no lvb update for matched lock */
-               if (lvb_update) {
+               if (!ldlm_is_lvb_cached(dlmlock)) {
                        LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
-                       mdc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
-                                           dlmlock, NULL);
+                       LASSERT(osc == dlmlock->l_ast_data);
+                       mdc_lock_lvb_update(env, osc, dlmlock, NULL);
+                       ldlm_set_lvb_cached(dlmlock);
                }
        }
        unlock_res_and_lock(dlmlock);
@@ -516,7 +541,7 @@ static int mdc_lock_upcall(void *cookie, struct lustre_handle *lockh,
 
        CDEBUG(D_INODE, "rc %d, err %d\n", rc, errcode);
        if (rc == 0)
-               mdc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
+               mdc_lock_granted(env, oscl, lockh);
 
        /* Error handling, some errors are tolerable. */
        if (oscl->ols_locklessable && rc == -EUSERS) {
@@ -556,8 +581,9 @@ int mdc_fill_lvb(struct ptlrpc_request *req, struct ost_lvb *lvb)
        lvb->lvb_mtime = body->mbo_mtime;
        lvb->lvb_atime = body->mbo_atime;
        lvb->lvb_ctime = body->mbo_ctime;
-       lvb->lvb_blocks = body->mbo_blocks;
-       lvb->lvb_size = body->mbo_size;
+       lvb->lvb_blocks = body->mbo_dom_blocks;
+       lvb->lvb_size = body->mbo_dom_size;
+
        RETURN(0);
 }
 
@@ -608,8 +634,9 @@ int mdc_enqueue_fini(struct ptlrpc_request *req, osc_enqueue_upcall_f upcall,
 }
 
 int mdc_enqueue_interpret(const struct lu_env *env, struct ptlrpc_request *req,
-                         struct osc_enqueue_args *aa, int rc)
+                         void *args, int rc)
 {
+       struct osc_enqueue_args *aa = args;
        struct ldlm_lock *lock;
        struct lustre_handle *lockh = &aa->oa_lockh;
        enum ldlm_mode mode = aa->oa_mode;
@@ -659,8 +686,9 @@ int mdc_enqueue_interpret(const struct lu_env *env, struct ptlrpc_request *req,
  * when other sync requests do not get released lock from a client, the client
  * is excluded from the cluster -- such scenarious make the life difficult, so
  * release locks just after they are obtained. */
-int mdc_enqueue_send(struct obd_export *exp, struct ldlm_res_id *res_id,
-                    __u64 *flags, union ldlm_policy_data *policy,
+int mdc_enqueue_send(const struct lu_env *env, struct obd_export *exp,
+                    struct ldlm_res_id *res_id, __u64 *flags,
+                    union ldlm_policy_data *policy,
                     struct ost_lvb *lvb, int kms_valid,
                     osc_enqueue_upcall_f upcall, void *cookie,
                     struct ldlm_enqueue_info *einfo, int async)
@@ -672,19 +700,21 @@ int mdc_enqueue_send(struct obd_export *exp, struct ldlm_res_id *res_id,
        enum ldlm_mode mode;
        bool glimpse = *flags & LDLM_FL_HAS_INTENT;
        __u64 match_flags = *flags;
-       int rc;
+       struct list_head cancels = LIST_HEAD_INIT(cancels);
+       int rc, count;
 
        ENTRY;
 
-       if (!kms_valid)
-               goto no_match;
-
        mode = einfo->ei_mode;
        if (einfo->ei_mode == LCK_PR)
                mode |= LCK_PW;
 
-       if (!glimpse)
+       if (glimpse)
                match_flags |= LDLM_FL_BLOCK_GRANTED;
+       /* DOM locking uses LDLM_FL_KMS_IGNORE to mark locks wich have no valid
+        * LVB information, e.g. canceled locks or locks of just pruned object,
+        * such locks should be skipped.
+        */
        mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id,
                               einfo->ei_type, policy, mode, &lockh, 0);
        if (mode) {
@@ -694,6 +724,10 @@ int mdc_enqueue_send(struct obd_export *exp, struct ldlm_res_id *res_id,
                        RETURN(ELDLM_OK);
 
                matched = ldlm_handle2lock(&lockh);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GLIMPSE_DDOS))
+                       ldlm_set_kms_ignore(matched);
+
                if (mdc_set_dom_lock_data(matched, einfo->ei_cbdata)) {
                        *flags |= LDLM_FL_LVB_READY;
 
@@ -703,13 +737,11 @@ int mdc_enqueue_send(struct obd_export *exp, struct ldlm_res_id *res_id,
                        ldlm_lock_decref(&lockh, mode);
                        LDLM_LOCK_PUT(matched);
                        RETURN(ELDLM_OK);
-               } else {
-                       ldlm_lock_decref(&lockh, mode);
-                       LDLM_LOCK_PUT(matched);
                }
+               ldlm_lock_decref(&lockh, mode);
+               LDLM_LOCK_PUT(matched);
        }
 
-no_match:
        if (*flags & (LDLM_FL_TEST_LOCK | LDLM_FL_MATCH_LOCK))
                RETURN(-ENOLCK);
 
@@ -717,7 +749,15 @@ no_match:
        if (req == NULL)
                RETURN(-ENOMEM);
 
-       rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+       /* For WRITE lock cancel other locks on resource early if any */
+       if (einfo->ei_mode & LCK_PW)
+               count = mdc_resource_get_unused_res(exp, res_id, &cancels,
+                                                   einfo->ei_mode,
+                                                   MDS_INODELOCK_DOM);
+       else
+               count = 0;
+
+       rc = ldlm_prep_enqueue_req(exp, req, &cancels, count);
        if (rc < 0) {
                ptlrpc_request_free(req);
                RETURN(rc);
@@ -741,8 +781,7 @@ no_match:
                if (!rc) {
                        struct osc_enqueue_args *aa;
 
-                       CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
-                       aa = ptlrpc_req_async_args(req);
+                       aa = ptlrpc_req_async_args(aa, req);
                        aa->oa_exp = exp;
                        aa->oa_mode = einfo->ei_mode;
                        aa->oa_type = einfo->ei_type;
@@ -753,8 +792,7 @@ no_match:
                        aa->oa_flags = flags;
                        aa->oa_lvb = lvb;
 
-                       req->rq_interpret_reply =
-                               (ptlrpc_interpterer_t)mdc_enqueue_interpret;
+                       req->rq_interpret_reply = mdc_enqueue_interpret;
                        ptlrpcd_add_req(req);
                } else {
                        ptlrpc_req_finished(req);
@@ -848,9 +886,9 @@ enqueue_base:
        fid_build_reg_res_name(lu_object_fid(osc2lu(osc)), resname);
        mdc_lock_build_policy(env, policy);
        LASSERT(!oscl->ols_speculative);
-       result = mdc_enqueue_send(osc_export(osc), resname, &oscl->ols_flags,
-                                 policy, &oscl->ols_lvb,
-                                 osc->oo_oinfo->loi_kms_valid,
+       result = mdc_enqueue_send(env, osc_export(osc), resname,
+                                 &oscl->ols_flags, policy,
+                                 &oscl->ols_lvb, osc->oo_oinfo->loi_kms_valid,
                                  upcall, cookie, &oscl->ols_einfo, async);
        if (result == 0) {
                if (osc_lock_is_lockless(oscl)) {
@@ -949,6 +987,33 @@ static int mdc_async_upcall(void *a, int rc)
        return 0;
 }
 
+static int mdc_get_lock_handle(const struct lu_env *env, struct osc_object *osc,
+                              pgoff_t index, struct lustre_handle *lh)
+{
+       struct ldlm_lock *lock;
+
+       /* find DOM lock protecting object */
+       lock = mdc_dlmlock_at_pgoff(env, osc, index,
+                                   OSC_DAP_FL_TEST_LOCK |
+                                   OSC_DAP_FL_CANCELING);
+       if (lock == NULL) {
+               struct ldlm_resource *res;
+               struct ldlm_res_id *resname;
+
+               resname = &osc_env_info(env)->oti_resname;
+               fid_build_reg_res_name(lu_object_fid(osc2lu(osc)), resname);
+               res = ldlm_resource_get(osc_export(osc)->exp_obd->obd_namespace,
+                                       NULL, resname, LDLM_IBITS, 0);
+               ldlm_resource_dump(D_ERROR, res);
+               libcfs_debug_dumpstack(NULL);
+               return -ENOENT;
+       } else {
+               *lh = lock->l_remote_handle;
+               LDLM_LOCK_PUT(lock);
+       }
+       return 0;
+}
+
 static int mdc_io_setattr_start(const struct lu_env *env,
                                const struct cl_io_slice *slice)
 {
@@ -960,7 +1025,8 @@ static int mdc_io_setattr_start(const struct lu_env *env,
        struct obdo *oa = &oio->oi_oa;
        struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
        __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
-       unsigned int ia_valid = io->u.ci_setattr.sa_valid;
+       unsigned int ia_avalid = io->u.ci_setattr.sa_avalid;
+       enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid;
        int rc;
 
        /* silently ignore non-truncate setattr for Data-on-MDT object */
@@ -979,19 +1045,20 @@ static int mdc_io_setattr_start(const struct lu_env *env,
                        struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr;
                        unsigned int cl_valid = 0;
 
-                       if (ia_valid & ATTR_SIZE) {
-                               attr->cat_size = attr->cat_kms = size;
+                       if (ia_avalid & ATTR_SIZE) {
+                               attr->cat_size = size;
+                               attr->cat_kms = size;
                                cl_valid = (CAT_SIZE | CAT_KMS);
                        }
-                       if (ia_valid & ATTR_MTIME_SET) {
+                       if (ia_avalid & ATTR_MTIME_SET) {
                                attr->cat_mtime = lvb->lvb_mtime;
                                cl_valid |= CAT_MTIME;
                        }
-                       if (ia_valid & ATTR_ATIME_SET) {
+                       if (ia_avalid & ATTR_ATIME_SET) {
                                attr->cat_atime = lvb->lvb_atime;
                                cl_valid |= CAT_ATIME;
                        }
-                       if (ia_valid & ATTR_CTIME_SET) {
+                       if (ia_xvalid & OP_XVALID_CTIME_SET) {
                                attr->cat_ctime = lvb->lvb_ctime;
                                cl_valid |= CAT_CTIME;
                        }
@@ -1002,7 +1069,7 @@ static int mdc_io_setattr_start(const struct lu_env *env,
                        return rc;
        }
 
-       if (!(ia_valid & ATTR_SIZE))
+       if (!(ia_avalid & ATTR_SIZE))
                return 0;
 
        memset(oa, 0, sizeof(*oa));
@@ -1019,6 +1086,11 @@ static int mdc_io_setattr_start(const struct lu_env *env,
        if (oio->oi_lockless) {
                oa->o_flags = OBD_FL_SRVLOCK;
                oa->o_valid |= OBD_MD_FLFLAGS;
+       } else {
+               rc = mdc_get_lock_handle(env, cl2osc(obj), CL_PAGE_EOF,
+                                        &oa->o_handle);
+               if (!rc)
+                       oa->o_valid |= OBD_MD_FLHANDLE;
        }
 
        init_completion(&cbargs->opc_sync);
@@ -1058,16 +1130,171 @@ static int mdc_io_read_ahead(const struct lu_env *env,
        RETURN(0);
 }
 
+int mdc_io_fsync_start(const struct lu_env *env,
+                      const struct cl_io_slice *slice)
+{
+       struct cl_io *io = slice->cis_io;
+       struct cl_fsync_io *fio = &io->u.ci_fsync;
+       struct cl_object *obj = slice->cis_obj;
+       struct osc_object *osc = cl2osc(obj);
+       int result = 0;
+
+       ENTRY;
+
+       /* a MDC lock always covers whole object, do sync for whole
+        * possible range despite of supplied start/end values.
+        */
+       result = osc_cache_writeback_range(env, osc, 0, CL_PAGE_EOF, 0,
+                                          fio->fi_mode == CL_FSYNC_DISCARD);
+       if (result > 0) {
+               fio->fi_nr_written += result;
+               result = 0;
+       }
+       if (fio->fi_mode == CL_FSYNC_ALL) {
+               int rc;
+
+               rc = osc_cache_wait_range(env, osc, 0, CL_PAGE_EOF);
+               if (result == 0)
+                       result = rc;
+               /* Use OSC sync code because it is asynchronous.
+                * It is to be added into MDC and avoid the using of
+                * OST_SYNC at both MDC and MDT.
+                */
+               rc = osc_fsync_ost(env, osc, fio);
+               if (result == 0)
+                       result = rc;
+       }
+
+       RETURN(result);
+}
+
+struct mdc_data_version_args {
+       struct osc_io *dva_oio;
+};
+
+static int
+mdc_data_version_interpret(const struct lu_env *env, struct ptlrpc_request *req,
+                          void *args, int rc)
+{
+       struct mdc_data_version_args *dva = args;
+       struct osc_io *oio = dva->dva_oio;
+       const struct mdt_body *body;
+
+       ENTRY;
+       if (rc < 0)
+               GOTO(out, rc);
+
+       body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+       if (body == NULL)
+               GOTO(out, rc = -EPROTO);
+
+       /* Prepare OBDO from mdt_body for CLIO */
+       oio->oi_oa.o_valid = body->mbo_valid;
+       oio->oi_oa.o_flags = body->mbo_flags;
+       oio->oi_oa.o_data_version = body->mbo_version;
+       oio->oi_oa.o_layout_version = body->mbo_layout_gen;
+       EXIT;
+out:
+       oio->oi_cbarg.opc_rc = rc;
+       complete(&oio->oi_cbarg.opc_sync);
+       return 0;
+}
+
+static int mdc_io_data_version_start(const struct lu_env *env,
+                                    const struct cl_io_slice *slice)
+{
+       struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
+       struct osc_io *oio = cl2osc_io(env, slice);
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+       struct osc_object *obj = cl2osc(slice->cis_obj);
+       struct obd_export *exp = osc_export(obj);
+       struct ptlrpc_request *req;
+       struct mdt_body *body;
+       struct mdc_data_version_args *dva;
+       int rc;
+
+       ENTRY;
+
+       memset(&oio->oi_oa, 0, sizeof(oio->oi_oa));
+       oio->oi_oa.o_oi.oi_fid = *lu_object_fid(osc2lu(obj));
+       oio->oi_oa.o_valid = OBD_MD_FLID;
+
+       init_completion(&cbargs->opc_sync);
+
+       req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR);
+       if (req == NULL)
+               RETURN(-ENOMEM);
+
+       rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR);
+       if (rc < 0) {
+               ptlrpc_request_free(req);
+               RETURN(rc);
+       }
+
+       body = req_capsule_client_get(&req->rq_pill, &RMF_MDT_BODY);
+       body->mbo_fid1 = *lu_object_fid(osc2lu(obj));
+       body->mbo_valid = OBD_MD_FLID;
+       /* Indicate that data version is needed */
+       body->mbo_valid |= OBD_MD_FLDATAVERSION;
+       body->mbo_flags = 0;
+
+       if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) {
+               body->mbo_valid |= OBD_MD_FLFLAGS;
+               body->mbo_flags |= OBD_FL_SRVLOCK;
+               if (dv->dv_flags & LL_DV_WR_FLUSH)
+                       body->mbo_flags |= OBD_FL_FLUSH;
+       }
+
+       req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, 0);
+       req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, 0);
+       ptlrpc_request_set_replen(req);
+
+       req->rq_interpret_reply = mdc_data_version_interpret;
+       dva = ptlrpc_req_async_args(dva, req);
+       dva->dva_oio = oio;
+
+       ptlrpcd_add_req(req);
+
+       RETURN(0);
+}
+
+static void mdc_io_data_version_end(const struct lu_env *env,
+                                   const struct cl_io_slice *slice)
+{
+       struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
+       struct osc_io *oio = cl2osc_io(env, slice);
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+
+       ENTRY;
+       wait_for_completion(&cbargs->opc_sync);
+
+       if (cbargs->opc_rc != 0) {
+               slice->cis_io->ci_result = cbargs->opc_rc;
+       } else {
+               slice->cis_io->ci_result = 0;
+               if (!(oio->oi_oa.o_valid &
+                     (OBD_MD_LAYOUT_VERSION | OBD_MD_FLDATAVERSION)))
+                       slice->cis_io->ci_result = -ENOTSUPP;
+
+               if (oio->oi_oa.o_valid & OBD_MD_LAYOUT_VERSION)
+                       dv->dv_layout_version = oio->oi_oa.o_layout_version;
+               if (oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)
+                       dv->dv_data_version = oio->oi_oa.o_data_version;
+       }
+
+       EXIT;
+}
+
 static struct cl_io_operations mdc_io_ops = {
        .op = {
                [CIT_READ] = {
-                       .cio_iter_init = osc_io_iter_init,
-                       .cio_iter_fini = osc_io_iter_fini,
+                       .cio_iter_init = osc_io_rw_iter_init,
+                       .cio_iter_fini = osc_io_rw_iter_fini,
                        .cio_start     = osc_io_read_start,
                },
                [CIT_WRITE] = {
-                       .cio_iter_init = osc_io_write_iter_init,
-                       .cio_iter_fini = osc_io_write_iter_fini,
+                       .cio_iter_init = osc_io_rw_iter_init,
+                       .cio_iter_fini = osc_io_rw_iter_fini,
                        .cio_start     = osc_io_write_start,
                        .cio_end       = osc_io_end,
                },
@@ -1077,10 +1304,9 @@ static struct cl_io_operations mdc_io_ops = {
                        .cio_start     = mdc_io_setattr_start,
                        .cio_end       = osc_io_setattr_end,
                },
-               /* no support for data version so far */
                [CIT_DATA_VERSION] = {
-                       .cio_start = NULL,
-                       .cio_end   = NULL,
+                       .cio_start = mdc_io_data_version_start,
+                       .cio_end   = mdc_io_data_version_end,
                },
                [CIT_FAULT] = {
                        .cio_iter_init = osc_io_iter_init,
@@ -1089,7 +1315,7 @@ static struct cl_io_operations mdc_io_ops = {
                        .cio_end       = osc_io_end,
                },
                [CIT_FSYNC] = {
-                       .cio_start = osc_io_fsync_start,
+                       .cio_start = mdc_io_fsync_start,
                        .cio_end   = osc_io_fsync_end,
                },
        },
@@ -1134,35 +1360,22 @@ static void mdc_req_attr_set(const struct lu_env *env, struct cl_object *obj,
                attr->cra_oa->o_valid |= OBD_MD_FLID;
 
        if (flags & OBD_MD_FLHANDLE) {
-               struct ldlm_lock *lock;  /* _some_ lock protecting @apage */
                struct osc_page *opg;
 
                opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj));
-               lock = mdc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
-                               OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING);
-               if (lock == NULL && !opg->ops_srvlock) {
-                       struct ldlm_resource *res;
-                       struct ldlm_res_id *resname;
-
-                       CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page,
-                                     "uncovered page!\n");
-
-                       resname = &osc_env_info(env)->oti_resname;
-                       mdc_build_res_name(cl2osc(obj), resname);
-                       res = ldlm_resource_get(
-                               osc_export(cl2osc(obj))->exp_obd->obd_namespace,
-                               NULL, resname, LDLM_IBITS, 0);
-                       ldlm_resource_dump(D_ERROR, res);
-
-                       libcfs_debug_dumpstack(NULL);
-                       LBUG();
-               }
-
-               /* check for lockless io. */
-               if (lock != NULL) {
-                       attr->cra_oa->o_handle = lock->l_remote_handle;
-                       attr->cra_oa->o_valid |= OBD_MD_FLHANDLE;
-                       LDLM_LOCK_PUT(lock);
+               if (!opg->ops_srvlock) {
+                       int rc;
+
+                       rc = mdc_get_lock_handle(env, cl2osc(obj),
+                                                osc_index(opg),
+                                                &attr->cra_oa->o_handle);
+                       if (rc) {
+                               CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page,
+                                             "uncovered page!\n");
+                               LBUG();
+                       } else {
+                               attr->cra_oa->o_valid |= OBD_MD_FLHANDLE;
+                       }
                }
        }
 }
@@ -1178,6 +1391,59 @@ static int mdc_attr_get(const struct lu_env *env, struct cl_object *obj,
        return osc_attr_get(env, obj, attr);
 }
 
+static int mdc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+       struct osc_object *osc = (struct osc_object *)data;
+       struct ost_lvb *lvb = &lock->l_ost_lvb;
+       struct lov_oinfo *oinfo;
+       ENTRY;
+
+       if (lock->l_ast_data == data) {
+               lock->l_ast_data = NULL;
+
+               LASSERT(osc != NULL);
+               LASSERT(osc->oo_oinfo != NULL);
+               LASSERT(lvb != NULL);
+
+               /* Updates lvb in lock by the cached oinfo */
+               oinfo = osc->oo_oinfo;
+
+               LDLM_DEBUG(lock, "update lock size %llu blocks %llu [cma]time: "
+                          "%llu %llu %llu by oinfo size %llu blocks %llu "
+                          "[cma]time %llu %llu %llu", lvb->lvb_size,
+                          lvb->lvb_blocks, lvb->lvb_ctime, lvb->lvb_mtime,
+                          lvb->lvb_atime, oinfo->loi_lvb.lvb_size,
+                          oinfo->loi_lvb.lvb_blocks, oinfo->loi_lvb.lvb_ctime,
+                          oinfo->loi_lvb.lvb_mtime, oinfo->loi_lvb.lvb_atime);
+               LASSERT(oinfo->loi_lvb.lvb_size >= oinfo->loi_kms);
+
+               cl_object_attr_lock(&osc->oo_cl);
+               memcpy(lvb, &oinfo->loi_lvb, sizeof(oinfo->loi_lvb));
+               cl_object_attr_unlock(&osc->oo_cl);
+               ldlm_clear_lvb_cached(lock);
+       }
+       RETURN(LDLM_ITER_CONTINUE);
+}
+
+int mdc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+       struct osc_object *osc = cl2osc(obj);
+       struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname;
+
+       /* DLM locks don't hold a reference of osc_object so we have to
+        * clear it before the object is being destroyed. */
+       osc_build_res_name(osc, resname);
+       ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+                             mdc_object_ast_clear, osc);
+       return 0;
+}
+
+static int mdc_object_flush(const struct lu_env *env, struct cl_object *obj,
+                           struct ldlm_lock *lock)
+{
+       RETURN(mdc_dlm_blocking_ast0(env, lock, LDLM_CB_CANCELING));
+}
+
 static const struct cl_object_operations mdc_ops = {
        .coo_page_init = osc_page_init,
        .coo_lock_init = mdc_lock_init,
@@ -1186,7 +1452,8 @@ static const struct cl_object_operations mdc_ops = {
        .coo_attr_update = osc_attr_update,
        .coo_glimpse = osc_object_glimpse,
        .coo_req_attr_set = mdc_req_attr_set,
-       .coo_prune = osc_object_prune,
+       .coo_prune = mdc_object_prune,
+       .coo_object_flush = mdc_object_flush
 };
 
 static const struct osc_object_operations mdc_object_ops = {
@@ -1242,15 +1509,17 @@ struct lu_object *mdc_object_alloc(const struct lu_env *env,
        return obj;
 }
 
-static int mdc_cl_process_config(const struct lu_env *env,
-                                struct lu_device *d, struct lustre_cfg *cfg)
+static int mdc_process_config(const struct lu_env *env, struct lu_device *d,
+                             struct lustre_cfg *cfg)
 {
-       return mdc_process_config(d->ld_obd, 0, cfg);
+       size_t count  = class_modify_config(cfg, PARAM_MDC,
+                                           &d->ld_obd->obd_kset.kobj);
+       return count > 0 ? 0 : count;
 }
 
 const struct lu_device_operations mdc_lu_ops = {
        .ldo_object_alloc = mdc_object_alloc,
-       .ldo_process_config = mdc_cl_process_config,
+       .ldo_process_config = mdc_process_config,
        .ldo_recovery_complete = NULL,
 };