Whamcloud - gitweb
LU-744 clio: save memory allocations for cl_page
[fs/lustre-release.git] / lustre / lov / lov_object.c
index 7f92431..4b1d3af 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -67,10 +67,8 @@ struct lov_layout_operations {
                             union lov_layout_state *state);
         int  (*llo_print)(const struct lu_env *env, void *cookie,
                           lu_printer_t p, const struct lu_object *o);
-        struct cl_page *(*llo_page_init)(const struct lu_env *env,
-                                         struct cl_object *obj,
-                                         struct cl_page *page,
-                                         cfs_page_t *vmpage);
+        int  (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
+                               struct cl_page *page, cfs_page_t *vmpage);
         int  (*llo_lock_init)(const struct lu_env *env,
                               struct cl_object *obj, struct cl_lock *lock,
                               const struct cl_io *io);
@@ -107,7 +105,6 @@ static void lov_install_raid0(const struct lu_env *env,
                               struct lov_object *lov,
                               union  lov_layout_state *state)
 {
-        lov->u = *state;
 }
 
 static struct cl_object *lov_sub_find(const struct lu_env *env,
@@ -133,6 +130,17 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
         struct lov_oinfo        *oinfo;
         int result;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) {
+               /* For sanity:test_206.
+                * Do not leave the object in cache to avoid accessing
+                * freed memory. This is because osc_object is referring to
+                * lov_oinfo of lsm_stripe_data which will be freed due to
+                * this failure. */
+               cl_object_kill(env, stripe);
+               cl_object_put(env, stripe);
+               return -EIO;
+       }
+
         hdr    = cl_object_header(lov2cl(lov));
         subhdr = cl_object_header(stripe);
         parent = subhdr->coh_parent;
@@ -197,7 +205,7 @@ static int lov_init_raid0(const struct lu_env *env,
         if (r0->lo_sub != NULL) {
                 result = 0;
                 subconf->coc_inode = conf->coc_inode;
-                cfs_spin_lock_init(&r0->lo_sub_lock);
+               spin_lock_init(&r0->lo_sub_lock);
                 /*
                  * Create stripe cl_objects.
                  */
@@ -211,6 +219,9 @@ static int lov_init_raid0(const struct lu_env *env,
                         subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
                         subconf->u.coc_oinfo = oinfo;
                         LASSERTF(subdev != NULL, "not init ost %d\n", ost_idx);
+                       /* In the function below, .hs_keycmp resolves to
+                        * lu_obj_hop_keycmp() */
+                       /* coverity[overrun-buffer-val] */
                         stripe = lov_sub_find(env, subdev, ofid, subconf);
                         if (!IS_ERR(stripe))
                                 result = lov_init_sub(env, lov, stripe, r0, i);
@@ -226,6 +237,7 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
                            union lov_layout_state *state)
 {
        LASSERT(lov->lo_type == LLT_EMPTY);
+       cl_object_prune(env, &lov->lo_cl);
        return 0;
 }
 
@@ -261,12 +273,12 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov,
                         /* this wait-queue is signaled at the end of
                          * lu_object_free(). */
                         cfs_set_current_state(CFS_TASK_UNINT);
-                        cfs_spin_lock(&r0->lo_sub_lock);
-                        if (r0->lo_sub[idx] == los) {
-                                cfs_spin_unlock(&r0->lo_sub_lock);
-                                cfs_waitq_wait(waiter, CFS_TASK_UNINT);
-                        } else {
-                                cfs_spin_unlock(&r0->lo_sub_lock);
+                       spin_lock(&r0->lo_sub_lock);
+                       if (r0->lo_sub[idx] == los) {
+                               spin_unlock(&r0->lo_sub_lock);
+                               cfs_waitq_wait(waiter, CFS_TASK_UNINT);
+                       } else {
+                               spin_unlock(&r0->lo_sub_lock);
                                 cfs_set_current_state(CFS_TASK_RUNNING);
                                 break;
                         }
@@ -286,19 +298,21 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
        ENTRY;
 
        dump_lsm(D_INODE, lsm);
-       if (cfs_atomic_read(&lsm->lsm_refc) > 1)
+       if (lov->lo_lsm_invalid && cfs_atomic_read(&lsm->lsm_refc) > 1)
                RETURN(-EBUSY);
 
         if (r0->lo_sub != NULL) {
                 for (i = 0; i < r0->lo_nr; ++i) {
                         struct lovsub_object *los = r0->lo_sub[i];
 
-                        if (los != NULL)
+                        if (los != NULL) {
+                               cl_locks_prune(env, &los->lso_cl, 1);
                                 /*
                                  * If top-level object is to be evicted from
                                  * the cache, so are its sub-objects.
                                  */
                                 lov_subobject_kill(env, lov, los, i);
+                       }
                 }
         }
        RETURN(0);
@@ -313,20 +327,16 @@ static void lov_fini_empty(const struct lu_env *env, struct lov_object *lov,
 static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov,
                            union lov_layout_state *state)
 {
-        struct lov_layout_raid0 *r0 = &state->raid0;
-
-        ENTRY;
+       struct lov_layout_raid0 *r0 = &state->raid0;
+       ENTRY;
 
-        if (r0->lo_sub != NULL) {
-                OBD_FREE_LARGE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
-                r0->lo_sub = NULL;
-        }
+       if (r0->lo_sub != NULL) {
+               OBD_FREE_LARGE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
+               r0->lo_sub = NULL;
+       }
 
-       LASSERTF(cfs_atomic_read(&lov->lo_lsm->lsm_refc) == 1,
-               "actual %d proc %p.\n",
-               cfs_atomic_read(&lov->lo_lsm->lsm_refc), cfs_current());
+       dump_lsm(D_INODE, lov->lo_lsm);
        lov_free_memmd(&lov->lo_lsm);
-       lov->lo_lsm = NULL;
 
        EXIT;
 }
@@ -385,8 +395,14 @@ static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj,
         ENTRY;
 
        /* this is called w/o holding type guard mutex, so it must be inside
-        * an on going IO otherwise lsm may be replaced. */
-       LASSERT(cfs_atomic_read(&lsm->lsm_refc) > 1);
+        * an on going IO otherwise lsm may be replaced.
+        * LU-2117: it turns out there exists one exception. For mmaped files,
+        * the lock of those files may be requested in the other file's IO
+        * context, and this function is called in ccc_lock_state(), it will
+        * hit this assertion.
+        * Anyway, it's still okay to call attr_get w/o type guard as layout
+        * can't go if locks exist. */
+       /* LASSERT(cfs_atomic_read(&lsm->lsm_refc) > 1); */
 
         if (!r0->lo_attr_valid) {
                 /*
@@ -430,7 +446,7 @@ const static struct lov_layout_operations lov_dispatch[] = {
                 .llo_install   = lov_install_empty,
                 .llo_print     = lov_print_empty,
                 .llo_page_init = lov_page_init_empty,
-                .llo_lock_init = NULL,
+                .llo_lock_init = lov_lock_init_empty,
                 .llo_io_init   = lov_io_init_empty,
                 .llo_getattr   = lov_attr_get_empty
         },
@@ -464,13 +480,13 @@ const static struct lov_layout_operations lov_dispatch[] = {
 static inline void lov_conf_freeze(struct lov_object *lov)
 {
        if (lov->lo_owner != cfs_current())
-               cfs_down_read(&lov->lo_type_guard);
+               down_read(&lov->lo_type_guard);
 }
 
 static inline void lov_conf_thaw(struct lov_object *lov)
 {
        if (lov->lo_owner != cfs_current())
-               cfs_up_read(&lov->lo_type_guard);
+               up_read(&lov->lo_type_guard);
 }
 
 #define LOV_2DISPATCH_MAYLOCK(obj, op, lock, ...)                       \
@@ -505,6 +521,20 @@ do {                                                                    \
        lov_conf_thaw(__obj);                                           \
 } while (0)
 
+static void lov_conf_lock(struct lov_object *lov)
+{
+       LASSERT(lov->lo_owner != cfs_current());
+       down_write(&lov->lo_type_guard);
+       LASSERT(lov->lo_owner == NULL);
+       lov->lo_owner = cfs_current();
+}
+
+static void lov_conf_unlock(struct lov_object *lov)
+{
+       lov->lo_owner = NULL;
+       up_write(&lov->lo_type_guard);
+}
+
 static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov)
 {
        struct l_wait_info lwi = { 0 };
@@ -514,22 +544,33 @@ static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov)
        if (!lov->lo_lsm_invalid || lsm == NULL)
                RETURN(0);
 
-       l_wait_event(lov->lo_waitq, cfs_atomic_read(&lsm->lsm_refc) == 1, &lwi);
+       LASSERT(cfs_atomic_read(&lsm->lsm_refc) > 0);
+       while (cfs_atomic_read(&lsm->lsm_refc) > 1 && lov->lo_lsm_invalid) {
+               lov_conf_unlock(lov);
+
+               CDEBUG(D_INODE, "file:"DFID" wait for active IO, now: %d.\n",
+                       PFID(lu_object_fid(lov2lu(lov))),
+                       cfs_atomic_read(&lsm->lsm_refc));
+
+               l_wait_event(lov->lo_waitq,
+                            cfs_atomic_read(&lsm->lsm_refc) == 1, &lwi);
+               lov_conf_lock(lov);
+       }
        RETURN(0);
 }
 
-static int lov_layout_change(const struct lu_env *env,
+static int lov_layout_change(const struct lu_env *unused,
                              struct lov_object *lov, enum lov_layout_type llt,
                              const struct cl_object_conf *conf)
 {
        int result;
-       union lov_layout_state *state = &lov_env_info(env)->lti_state;
+       union lov_layout_state *state = &lov->u;
        const struct lov_layout_operations *old_ops;
        const struct lov_layout_operations *new_ops;
 
        struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
        void *cookie;
-       struct lu_env *nested;
+       struct lu_env *env;
        int refcheck;
 
        LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch));
@@ -537,13 +578,11 @@ static int lov_layout_change(const struct lu_env *env,
        ENTRY;
 
        cookie = cl_env_reenter();
-       nested = cl_env_get(&refcheck);
-       if (!IS_ERR(nested))
-               cl_object_prune(nested, &lov->lo_cl);
-       else
-               result = PTR_ERR(nested);
-       cl_env_put(nested, &refcheck);
-       cl_env_reexit(cookie);
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env)) {
+               cl_env_reexit(cookie);
+               RETURN(PTR_ERR(env));
+       }
 
        old_ops = &lov_dispatch[lov->lo_type];
        new_ops = &lov_dispatch[llt];
@@ -555,14 +594,22 @@ static int lov_layout_change(const struct lu_env *env,
                LASSERT(hdr->coh_tree.rnode == NULL);
                LASSERT(hdr->coh_pages == 0);
 
+               lov->lo_type = LLT_EMPTY;
                result = new_ops->llo_init(env,
                                        lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
                                        lov, conf, state);
                if (result == 0) {
                        new_ops->llo_install(env, lov, state);
                        lov->lo_type = llt;
+               } else {
+                       new_ops->llo_delete(env, lov, state);
+                       new_ops->llo_fini(env, lov, state);
+                       /* this file becomes an EMPTY file. */
                }
        }
+
+       cl_env_put(env, &refcheck);
+       cl_env_reexit(cookie);
        RETURN(result);
 }
 
@@ -578,54 +625,66 @@ int lov_object_init(const struct lu_env *env, struct lu_object *obj,
         struct lov_device            *dev   = lu2lov_dev(obj->lo_dev);
         struct lov_object            *lov   = lu2lov(obj);
         const struct cl_object_conf  *cconf = lu2cl_conf(conf);
-        union  lov_layout_state      *set   = &lov_env_info(env)->lti_state;
+        union  lov_layout_state      *set   = &lov->u;
         const struct lov_layout_operations *ops;
         int result;
 
         ENTRY;
-        cfs_init_rwsem(&lov->lo_type_guard);
+       init_rwsem(&lov->lo_type_guard);
        cfs_waitq_init(&lov->lo_waitq);
 
+       cl_object_page_init(lu2cl(obj), sizeof(struct lov_page));
+
         /* no locking is necessary, as object is being created */
         lov->lo_type = cconf->u.coc_md->lsm != NULL ? LLT_RAID0 : LLT_EMPTY;
         ops = &lov_dispatch[lov->lo_type];
         result = ops->llo_init(env, dev, lov, cconf, set);
         if (result == 0)
                 ops->llo_install(env, lov, set);
-        else
-                ops->llo_fini(env, lov, set);
         RETURN(result);
 }
 
 static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
                         const struct cl_object_conf *conf)
 {
-       struct lov_stripe_md *lsm = conf->u.coc_md->lsm;
+       struct lov_stripe_md *lsm = NULL;
        struct lov_object *lov = cl2lov(obj);
        int result = 0;
        ENTRY;
 
-       /*
-        * Only LLT_EMPTY <-> LLT_RAID0 transitions are supported.
-        */
-       LASSERT(lov->lo_owner != cfs_current());
-       cfs_down_write(&lov->lo_type_guard);
-       LASSERT(lov->lo_owner == NULL);
-       lov->lo_owner = cfs_current();
-
-       if (conf->coc_invalidate) {
+       lov_conf_lock(lov);
+       if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
                lov->lo_lsm_invalid = 1;
                GOTO(out, result = 0);
        }
 
-       if (conf->coc_validate_only) {
-               if (!lov->lo_lsm_invalid)
-                       GOTO(out, result = 0);
+       if (conf->coc_opc == OBJECT_CONF_WAIT) {
+               result = lov_layout_wait(env, lov);
+               GOTO(out, result);
+       }
+
+       LASSERT(conf->coc_opc == OBJECT_CONF_SET);
 
-               lov_layout_wait(env, lov);
-               /* fall through to set up new layout */
+       if (conf->u.coc_md != NULL)
+               lsm = conf->u.coc_md->lsm;
+       if ((lsm == NULL && lov->lo_lsm == NULL) ||
+           (lsm != NULL && lov->lo_lsm != NULL &&
+            lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen)) {
+               /* same version of layout */
+               lov->lo_lsm_invalid = 0;
+               GOTO(out, result = 0);
        }
 
+       /* will change layout - check if there still exists active IO. */
+       if (lov->lo_lsm != NULL &&
+           cfs_atomic_read(&lov->lo_lsm->lsm_refc) > 1) {
+               lov->lo_lsm_invalid = 1;
+               GOTO(out, result = -EBUSY);
+       }
+
+       /*
+        * Only LLT_EMPTY <-> LLT_RAID0 transitions are supported.
+        */
        switch (lov->lo_type) {
        case LLT_EMPTY:
                if (lsm != NULL)
@@ -644,8 +703,7 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
        EXIT;
 
 out:
-       lov->lo_owner = NULL;
-       cfs_up_write(&lov->lo_type_guard);
+       lov_conf_unlock(lov);
        RETURN(result);
 }
 
@@ -675,11 +733,11 @@ static int lov_object_print(const struct lu_env *env, void *cookie,
         return LOV_2DISPATCH(lu2lov(o), llo_print, env, cookie, p, o);
 }
 
-struct cl_page *lov_page_init(const struct lu_env *env, struct cl_object *obj,
-                              struct cl_page *page, cfs_page_t *vmpage)
+int lov_page_init(const struct lu_env *env, struct cl_object *obj,
+               struct cl_page *page, cfs_page_t *vmpage)
 {
-        return LOV_2DISPATCH(cl2lov(obj),
-                             llo_page_init, env, obj, page, vmpage);
+        return LOV_2DISPATCH_NOLOCK(cl2lov(obj),
+                                   llo_page_init, env, obj, page, vmpage);
 }
 
 /**
@@ -687,29 +745,11 @@ struct cl_page *lov_page_init(const struct lu_env *env, struct cl_object *obj,
  * layer. Dispatches to the appropriate layout io initialization method.
  */
 int lov_io_init(const struct lu_env *env, struct cl_object *obj,
-                struct cl_io *io)
+               struct cl_io *io)
 {
-       struct lov_io *lio = lov_env_io(env);
-
-        CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
-
-       /* hold lsm before initializing because io relies on it */
-       lio->lis_lsm = lov_lsm_addref(cl2lov(obj));
-
-        /*
-         * Do not take lock in case of CIT_MISC io, because
-         *
-         *     - if this is an io for a glimpse, then we don't care;
-         *
-         *     - if this not a glimpse (writepage or lock cancellation), then
-         *       layout change cannot happen because a page or a lock
-         *       already exist; and
-         *
-         *     - lock ordering (lock mutex nests within layout rw-semaphore)
-         *       is obeyed in case of lock cancellation.
-         */
-        return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
-                                     io->ci_type != CIT_MISC, env, obj, io);
+       CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
+       return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
+                                    !io->ci_ignore_layout, env, obj, io);
 }
 
 /**
@@ -735,9 +775,11 @@ static int lov_attr_set(const struct lu_env *env, struct cl_object *obj,
 }
 
 int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
-                  struct cl_lock *lock, const struct cl_io *io)
+                 struct cl_lock *lock, const struct cl_io *io)
 {
-        return LOV_2DISPATCH(cl2lov(obj), llo_lock_init, env, obj, lock, io);
+       /* No need to lock because we've taken one refcount of layout.  */
+       return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_lock_init, env, obj, lock,
+                                   io);
 }
 
 static const struct cl_object_operations lov_ops = {
@@ -788,10 +830,11 @@ struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
        struct lov_stripe_md *lsm = NULL;
 
        lov_conf_freeze(lov);
-       if (!lov->lo_lsm_invalid && lov->lo_lsm != NULL) {
+       if (lov->lo_lsm != NULL) {
                lsm = lsm_addref(lov->lo_lsm);
-               CDEBUG(D_INODE, "lsm %p addref %d by %p.\n",
-                       lsm, cfs_atomic_read(&lsm->lsm_refc), cfs_current());
+               CDEBUG(D_INODE, "lsm %p addref %d/%d by %p.\n",
+                       lsm, cfs_atomic_read(&lsm->lsm_refc),
+                       lov->lo_lsm_invalid, cfs_current());
        }
        lov_conf_thaw(lov);
        return lsm;