Whamcloud - gitweb
LU-6179 llite: Implement ladvise lockahead
[fs/lustre-release.git] / lustre / osc / osc_lock.c
index eebc0d6..6fd75da 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -44,8 +40,9 @@
 #include <libcfs/libcfs.h>
 /* fid_build_reg_res_name() */
 #include <lustre_fid.h>
+#include <lustre_osc.h>
 
-#include "osc_cl_internal.h"
+#include "osc_internal.h"
 
 /** \addtogroup osc
  *  @{
@@ -163,11 +160,13 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
 {
        __u64 result = 0;
 
+       CDEBUG(D_DLMTRACE, "flags: %x\n", enqflags);
+
        LASSERT((enqflags & ~CEF_MASK) == 0);
 
        if (enqflags & CEF_NONBLOCK)
                result |= LDLM_FL_BLOCK_NOWAIT;
-       if (enqflags & CEF_ASYNC)
+       if (enqflags & CEF_GLIMPSE)
                result |= LDLM_FL_HAS_INTENT;
        if (enqflags & CEF_DISCARD_DATA)
                result |= LDLM_FL_AST_DISCARD_DATA;
@@ -175,6 +174,10 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
                result |= LDLM_FL_TEST_LOCK;
        if (enqflags & CEF_LOCK_MATCH)
                result |= LDLM_FL_MATCH_LOCK;
+       if (enqflags & CEF_LOCK_NO_EXPAND)
+               result |= LDLM_FL_NO_EXPANSION;
+       if (enqflags & CEF_SPECULATIVE)
+               result |= LDLM_FL_SPECULATIVE;
        return result;
 }
 
@@ -221,13 +224,13 @@ static void osc_lock_lvb_update(const struct lu_env *env,
                 if (size > dlmlock->l_policy_data.l_extent.end)
                         size = dlmlock->l_policy_data.l_extent.end + 1;
                 if (size >= oinfo->loi_kms) {
-                        LDLM_DEBUG(dlmlock, "lock acquired, setting rss="LPU64
-                                   ", kms="LPU64, lvb->lvb_size, size);
+                       LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu"
+                                  ", kms=%llu", lvb->lvb_size, size);
                         valid |= CAT_KMS;
                         attr->cat_kms = size;
                 } else {
                         LDLM_DEBUG(dlmlock, "lock acquired, setting rss="
-                                   LPU64"; leaving kms="LPU64", end="LPU64,
+                                  "%llu; leaving kms=%llu, end=%llu",
                                    lvb->lvb_size, oinfo->loi_kms,
                                    dlmlock->l_policy_data.l_extent.end);
                 }
@@ -305,12 +308,11 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
        struct osc_lock         *oscl  = cookie;
        struct cl_lock_slice    *slice = &oscl->ols_cl;
        struct lu_env           *env;
-       struct cl_env_nest      nest;
        int                     rc;
 
        ENTRY;
 
-       env = cl_env_nested_get(&nest);
+       env = cl_env_percpu_get();
        /* should never happen, similar to osc_ldlm_blocking_ast(). */
        LASSERT(!IS_ERR(env));
 
@@ -349,21 +351,22 @@ static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
 
        if (oscl->ols_owner != NULL)
                cl_sync_io_note(env, oscl->ols_owner, rc);
-       cl_env_nested_put(&nest, env);
+       cl_env_percpu_put(env);
 
        RETURN(rc);
 }
 
-static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
-                              int errcode)
+static int osc_lock_upcall_speculative(void *cookie,
+                                      struct lustre_handle *lockh,
+                                      int errcode)
 {
        struct osc_object       *osc = cookie;
        struct ldlm_lock        *dlmlock;
        struct lu_env           *env;
-       struct cl_env_nest       nest;
+       __u16                    refcheck;
        ENTRY;
 
-       env = cl_env_nested_get(&nest);
+       env = cl_env_get(&refcheck);
        LASSERT(!IS_ERR(env));
 
        if (errcode == ELDLM_LOCK_MATCHED)
@@ -378,7 +381,7 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
        lock_res_and_lock(dlmlock);
        LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
 
-       /* there is no osc_lock associated with AGL lock */
+       /* there is no osc_lock associated with speculative locks */
        osc_lock_lvb_update(env, osc, dlmlock, NULL);
 
        unlock_res_and_lock(dlmlock);
@@ -386,21 +389,21 @@ static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
 
 out:
        cl_object_put(env, osc2cl(osc));
-       cl_env_nested_put(&nest, env);
+       cl_env_put(env, &refcheck);
        RETURN(ldlm_error2errno(errcode));
 }
 
 static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
-                         enum cl_lock_mode mode, int discard)
+                         enum cl_lock_mode mode, bool discard)
 {
        struct lu_env           *env;
-       struct cl_env_nest      nest;
+       __u16                   refcheck;
        int                     rc = 0;
        int                     rc2 = 0;
 
        ENTRY;
 
-       env = cl_env_nested_get(&nest);
+       env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                RETURN(PTR_ERR(env));
 
@@ -414,11 +417,11 @@ static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
                        rc = 0;
        }
 
-       rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
+       rc2 = osc_lock_discard_pages(env, obj, start, end, discard);
        if (rc == 0 && rc2 < 0)
                rc = rc2;
 
-       cl_env_nested_put(&nest, env);
+       cl_env_put(env, &refcheck);
        RETURN(rc);
 }
 
@@ -432,7 +435,7 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 {
        struct cl_object        *obj = NULL;
        int                     result = 0;
-       int                     discard;
+       bool                    discard;
        enum cl_lock_mode       mode = CLM_READ;
        ENTRY;
 
@@ -550,7 +553,7 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
        }
        case LDLM_CB_CANCELING: {
                struct lu_env     *env;
-               struct cl_env_nest nest;
+               __u16              refcheck;
 
                /*
                 * This can be called in the context of outer IO, e.g.,
@@ -563,14 +566,14 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
                 * new environment has to be created to not corrupt outer
                 * context.
                 */
-               env = cl_env_nested_get(&nest);
+               env = cl_env_get(&refcheck);
                if (IS_ERR(env)) {
                        result = PTR_ERR(env);
                        break;
                }
 
                result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
-               cl_env_nested_put(&nest, env);
+               cl_env_put(env, &refcheck);
                break;
        }
        default:
@@ -582,61 +585,62 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
 static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 {
        struct ptlrpc_request   *req  = data;
-       struct cl_env_nest      nest;
        struct lu_env           *env;
        struct ost_lvb          *lvb;
        struct req_capsule      *cap;
+       struct cl_object        *obj = NULL;
        int                     result;
+       __u16                   refcheck;
 
        ENTRY;
 
        LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK);
 
-       env = cl_env_nested_get(&nest);
-       if (!IS_ERR(env)) {
-               struct cl_object *obj = NULL;
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               GOTO(out, result = PTR_ERR(env));
 
-               lock_res_and_lock(dlmlock);
-               if (dlmlock->l_ast_data != NULL) {
-                       obj = osc2cl(dlmlock->l_ast_data);
-                       cl_object_get(obj);
+
+       lock_res_and_lock(dlmlock);
+       if (dlmlock->l_ast_data != NULL) {
+               obj = osc2cl(dlmlock->l_ast_data);
+               cl_object_get(obj);
+       }
+       unlock_res_and_lock(dlmlock);
+
+       if (obj != NULL) {
+               /* Do not grab the mutex of cl_lock for glimpse.
+                * See LU-1274 for details.
+                * BTW, it's okay for cl_lock to be cancelled during
+                * this period because server can handle this race.
+                * See ldlm_server_glimpse_ast() for details.
+                * cl_lock_mutex_get(env, lock); */
+               cap = &req->rq_pill;
+               req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
+               req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
+                                       sizeof *lvb);
+               result = req_capsule_server_pack(cap);
+               if (result == 0) {
+                       lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
+                       result = cl_object_glimpse(env, obj, lvb);
                }
-               unlock_res_and_lock(dlmlock);
+               if (!exp_connect_lvb_type(req->rq_export))
+                       req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB,
+                                       sizeof(struct ost_lvb_v1), RCL_SERVER);
+               cl_object_put(env, obj);
+       } else {
+               /*
+                * These errors are normal races, so we don't want to
+                * fill the console with messages by calling
+                * ptlrpc_error()
+                */
+               lustre_pack_reply(req, 1, NULL, NULL);
+               result = -ELDLM_NO_LOCK_DATA;
+       }
+       cl_env_put(env, &refcheck);
+       EXIT;
 
-               if (obj != NULL) {
-                        /* Do not grab the mutex of cl_lock for glimpse.
-                         * See LU-1274 for details.
-                         * BTW, it's okay for cl_lock to be cancelled during
-                         * this period because server can handle this race.
-                         * See ldlm_server_glimpse_ast() for details.
-                         * cl_lock_mutex_get(env, lock); */
-                        cap = &req->rq_pill;
-                        req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK);
-                        req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER,
-                                             sizeof *lvb);
-                        result = req_capsule_server_pack(cap);
-                        if (result == 0) {
-                                lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
-                                result = cl_object_glimpse(env, obj, lvb);
-                        }
-                       if (!exp_connect_lvb_type(req->rq_export))
-                               req_capsule_shrink(&req->rq_pill,
-                                                  &RMF_DLM_LVB,
-                                                  sizeof(struct ost_lvb_v1),
-                                                  RCL_SERVER);
-                       cl_object_put(env, obj);
-                } else {
-                        /*
-                         * These errors are normal races, so we don't want to
-                         * fill the console with messages by calling
-                         * ptlrpc_error()
-                         */
-                        lustre_pack_reply(req, 1, NULL, NULL);
-                        result = -ELDLM_NO_LOCK_DATA;
-                }
-                cl_env_nested_put(&nest, env);
-       } else
-               result = PTR_ERR(env);
+out:
        req->rq_status = result;
        RETURN(result);
 }
@@ -692,12 +696,12 @@ static unsigned long osc_lock_weight(const struct lu_env *env,
  */
 unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
 {
-       struct cl_env_nest       nest;
        struct lu_env           *env;
        struct osc_object       *obj;
        struct osc_lock         *oscl;
        unsigned long            weight;
        bool                    found = false;
+       __u16                   refcheck;
        ENTRY;
 
        might_sleep();
@@ -708,13 +712,18 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
         * the upper context because cl_lock_put don't modify environment
         * variables. But just in case ..
         */
-       env = cl_env_nested_get(&nest);
+       env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                /* Mostly because lack of memory, do not eliminate this lock */
                RETURN(1);
 
        LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
+       lock_res_and_lock(dlmlock);
        obj = dlmlock->l_ast_data;
+       if (obj)
+               cl_object_get(osc2cl(obj));
+       unlock_res_and_lock(dlmlock);
+
        if (obj == NULL)
                GOTO(out, weight = 1);
 
@@ -736,7 +745,10 @@ unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
        EXIT;
 
 out:
-       cl_env_nested_put(&nest, env);
+       if (obj)
+               cl_object_put(env, osc2cl(obj));
+
+       cl_env_put(env, &refcheck);
        return weight;
 }
 
@@ -812,7 +824,7 @@ static bool osc_lock_compatible(const struct osc_lock *qing,
        struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
        struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
 
-       if (qed->ols_glimpse)
+       if (qed->ols_glimpse || qed->ols_speculative)
                return true;
 
        if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
@@ -930,6 +942,7 @@ static int osc_lock_enqueue(const struct lu_env *env,
        struct osc_io                   *oio   = osc_env_io(env);
        struct osc_object               *osc   = cl2osc(slice->cls_obj);
        struct osc_lock                 *oscl  = cl2osc_lock(slice);
+       struct obd_export               *exp   = osc_export(osc);
        struct cl_lock                  *lock  = slice->cls_lock;
        struct ldlm_res_id              *resname = &info->oti_resname;
        union ldlm_policy_data          *policy  = &info->oti_policy;
@@ -946,11 +959,22 @@ static int osc_lock_enqueue(const struct lu_env *env,
        if (oscl->ols_state == OLS_GRANTED)
                RETURN(0);
 
+       if ((oscl->ols_flags & LDLM_FL_NO_EXPANSION) &&
+           !(exp_connect_lockahead_old(exp) || exp_connect_lockahead(exp))) {
+               result = -EOPNOTSUPP;
+               CERROR("%s: server does not support lockahead/locknoexpand:"
+                      "rc = %d\n", exp->exp_obd->obd_name, result);
+               RETURN(result);
+       }
+
        if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
                GOTO(enqueue_base, 0);
 
-       if (oscl->ols_glimpse) {
-               LASSERT(equi(oscl->ols_agl, anchor == NULL));
+       /* For glimpse and/or speculative locks, do not wait for reply from
+        * server on LDLM request */
+       if (oscl->ols_glimpse || oscl->ols_speculative) {
+               /* Speculative and glimpse locks do not have an anchor */
+               LASSERT(equi(oscl->ols_speculative, anchor == NULL));
                async = true;
                GOTO(enqueue_base, 0);
        }
@@ -976,25 +1000,31 @@ enqueue_base:
 
        /**
         * DLM lock's ast data must be osc_object;
-        * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
+        * if glimpse or speculative lock, async of osc_enqueue_base()
+        * must be true
+        *
+        * For non-speculative locks:
         * DLM's enqueue callback set to osc_lock_upcall() with cookie as
         * osc_lock.
+        * For speculative locks:
+        * osc_lock_upcall_speculative & cookie is the osc object, since
+        * there is no osc_lock
         */
        ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
        osc_lock_build_policy(env, lock, policy);
-       if (oscl->ols_agl) {
+       if (oscl->ols_speculative) {
                oscl->ols_einfo.ei_cbdata = NULL;
                /* hold a reference for callback */
                cl_object_get(osc2cl(osc));
-               upcall = osc_lock_upcall_agl;
+               upcall = osc_lock_upcall_speculative;
                cookie = osc;
        }
-       result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
+       result = osc_enqueue_base(exp, resname, &oscl->ols_flags,
                                  policy, &oscl->ols_lvb,
                                  osc->oo_oinfo->loi_kms_valid,
                                  upcall, cookie,
                                  &oscl->ols_einfo, PTLRPCD_SET, async,
-                                 oscl->ols_agl);
+                                 oscl->ols_speculative);
        if (result == 0) {
                if (osc_lock_is_lockless(oscl)) {
                        oio->oi_lockless = 1;
@@ -1003,9 +1033,12 @@ enqueue_base:
                        LASSERT(oscl->ols_hold);
                        LASSERT(oscl->ols_dlmlock != NULL);
                }
-       } else if (oscl->ols_agl) {
+       } else if (oscl->ols_speculative) {
                cl_object_put(env, osc2cl(osc));
-               result = 0;
+               if (oscl->ols_glimpse) {
+                       /* hide error for AGL request */
+                       result = 0;
+               }
        }
 
 out:
@@ -1086,7 +1119,7 @@ static int osc_lock_print(const struct lu_env *env, void *cookie,
 {
        struct osc_lock *lock = cl2osc_lock(slice);
 
-       (*p)(env, cookie, "%p "LPX64" "LPX64" %d %p ",
+       (*p)(env, cookie, "%p %#llx %#llx %d %p ",
             lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
             lock->ols_state, lock->ols_owner);
        osc_lvb_print(env, cookie, p, &lock->ols_lvb);
@@ -1110,7 +1143,7 @@ static void osc_lock_lockless_cancel(const struct lu_env *env,
 
        LASSERT(ols->ols_dlmlock == NULL);
        result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
-                               descr->cld_mode, 0);
+                               descr->cld_mode, false);
         if (result)
                 CERROR("Pages for lockless lock %p were not purged(%d)\n",
                        ols, result);
@@ -1137,20 +1170,17 @@ static void osc_lock_set_writer(const struct lu_env *env,
                return;
 
        if (likely(io->ci_type == CIT_WRITE)) {
-               io_start = cl_index(obj, io->u.ci_rw.crw_pos);
-               io_end = cl_index(obj, io->u.ci_rw.crw_pos +
-                                               io->u.ci_rw.crw_count - 1);
-               if (cl_io_is_append(io)) {
-                       io_start = 0;
-                       io_end = CL_PAGE_EOF;
-               }
+               io_start = cl_index(obj, io->u.ci_rw.rw_range.cir_pos);
+               io_end = cl_index(obj, io->u.ci_rw.rw_range.cir_pos +
+                                 io->u.ci_rw.rw_range.cir_count - 1);
        } else {
                LASSERT(cl_io_is_mkwrite(io));
                io_start = io_end = io->u.ci_fault.ft_index;
        }
 
        if (descr->cld_mode >= CLM_WRITE &&
-           descr->cld_start <= io_start && descr->cld_end >= io_end) {
+           (cl_io_is_append(io) ||
+            (descr->cld_start <= io_start && descr->cld_end >= io_end))) {
                struct osc_io *oio = osc_env_io(env);
 
                /* There must be only one lock to match the write region */
@@ -1176,10 +1206,15 @@ int osc_lock_init(const struct lu_env *env,
        INIT_LIST_HEAD(&oscl->ols_wait_entry);
        INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
 
+       /* Speculative lock requests must be either no_expand or glimpse
+        * request (CEF_GLIMPSE).  non-glimpse no_expand speculative extent
+        * locks will break ofd_intent_cb. (see comment there)*/
+       LASSERT(ergo((enqflags & CEF_SPECULATIVE) != 0,
+               (enqflags & (CEF_LOCK_NO_EXPAND | CEF_GLIMPSE)) != 0));
+
        oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
-       oscl->ols_agl = !!(enqflags & CEF_AGL);
-       if (oscl->ols_agl)
-               oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+       oscl->ols_speculative = !!(enqflags & CEF_SPECULATIVE);
+
        if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
                oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
                oscl->ols_glimpse = 1;
@@ -1197,7 +1232,7 @@ int osc_lock_init(const struct lu_env *env,
        if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
                osc_lock_set_writer(env, io, obj, oscl);
 
-       LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags "LPX64"\n",
+       LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %#llx",
                          lock, oscl, oscl->ols_flags);
 
        return 0;
@@ -1233,9 +1268,8 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
         * with a uniq gid and it conflicts with all other lock modes too
         */
 again:
-       mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
-                              flags, resname, LDLM_EXTENT, policy,
-                              LCK_PR | LCK_PW | LCK_GROUP, &lockh,
+       mode = osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy,
+                              LCK_PR | LCK_PW | LCK_GROUP, &flags, obj, &lockh,
                               dap_flags & OSC_DAP_FL_CANCELING);
        if (mode != 0) {
                lock = ldlm_handle2lock(&lockh);