Whamcloud - gitweb
LU-6271 osc: handle osc eviction correctly 89/14989/6
authorJinshan Xiong <jinshan.xiong@intel.com>
Fri, 29 May 2015 04:26:40 +0000 (21:26 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 16 Sep 2015 01:06:18 +0000 (01:06 +0000)
Cleanup everything if an OSC is being evicted.

Group lock is not well supported yet.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: I24f815a9a1bf13cb565109b2dae7b4d07b57f921
Reviewed-on: http://review.whamcloud.com/14989
Tested-by: Jenkins
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_lock.c
lustre/osc/osc_cache.c
lustre/osc/osc_cl_internal.h
lustre/osc/osc_internal.h
lustre/osc/osc_io.c
lustre/osc/osc_object.c
lustre/osc/osc_request.c
lustre/tests/recovery-small.sh

index d75352c..39abaef 100644 (file)
@@ -1207,8 +1207,7 @@ static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data)
        if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE))
                return INTERVAL_ITER_CONT;
 
        if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE))
                return INTERVAL_ITER_CONT;
 
-       if ((data->lmd_flags & LDLM_FL_LOCAL_ONLY) &&
-           !ldlm_is_local(lock))
+       if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock)))
                return INTERVAL_ITER_CONT;
 
        if (data->lmd_flags & LDLM_FL_TEST_LOCK) {
                return INTERVAL_ITER_CONT;
 
        if (data->lmd_flags & LDLM_FL_TEST_LOCK) {
index a4ae79b..dd28035 100644 (file)
@@ -226,7 +226,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
        if (ext->oe_sync && ext->oe_grants > 0)
                GOTO(out, rc = 90);
 
        if (ext->oe_sync && ext->oe_grants > 0)
                GOTO(out, rc = 90);
 
-       if (ext->oe_dlmlock != NULL) {
+       if (ext->oe_dlmlock != NULL && !ldlm_is_failed(ext->oe_dlmlock)) {
                struct ldlm_extent *extent;
 
                extent = &ext->oe_dlmlock->l_policy_data.l_extent;
                struct ldlm_extent *extent;
 
                extent = &ext->oe_dlmlock->l_policy_data.l_extent;
@@ -2670,8 +2670,8 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 /**
  * Called by osc_io_setattr_start() to freeze and destroy covering extents.
  */
 /**
  * Called by osc_io_setattr_start() to freeze and destroy covering extents.
  */
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
-                            struct osc_object *obj, __u64 size)
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
+                            __u64 size, struct osc_extent **extp)
 {
        struct client_obd *cli = osc_cli(obj);
        struct osc_extent *ext;
 {
        struct client_obd *cli = osc_cli(obj);
        struct osc_extent *ext;
@@ -2765,9 +2765,11 @@ again:
 
                        /* we need to hold this extent in OES_TRUNC state so
                         * that no writeback will happen. This is to avoid
 
                        /* we need to hold this extent in OES_TRUNC state so
                         * that no writeback will happen. This is to avoid
-                        * BUG 17397. */
-                       LASSERT(oio->oi_trunc == NULL);
-                       oio->oi_trunc = osc_extent_get(ext);
+                        * BUG 17397.
+                        * Only partial truncate can reach here, if @size is
+                        * not zero, the caller should provide a valid @extp. */
+                       LASSERT(*extp == NULL);
+                       *extp = osc_extent_get(ext);
                        OSC_EXTENT_DUMP(D_CACHE, ext,
                                        "trunc at "LPU64"\n", size);
                }
                        OSC_EXTENT_DUMP(D_CACHE, ext,
                                        "trunc at "LPU64"\n", size);
                }
@@ -2792,13 +2794,10 @@ again:
 /**
  * Called after osc_io_setattr_end to add oio->oi_trunc back to cache.
  */
 /**
  * Called after osc_io_setattr_end to add oio->oi_trunc back to cache.
  */
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
-                           struct osc_object *obj)
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext)
 {
 {
-       struct osc_extent *ext = oio->oi_trunc;
-
-       oio->oi_trunc = NULL;
        if (ext != NULL) {
        if (ext != NULL) {
+               struct osc_object *obj = ext->oe_obj;
                bool unplug = false;
 
                EASSERT(ext->oe_nr_pages > 0, ext);
                bool unplug = false;
 
                EASSERT(ext->oe_nr_pages > 0, ext);
@@ -3136,8 +3135,10 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io,
        /* page is top page. */
        info->oti_next_index = osc_index(ops) + 1;
        if (cl_page_own(env, io, page) == 0) {
        /* page is top page. */
        info->oti_next_index = osc_index(ops) + 1;
        if (cl_page_own(env, io, page) == 0) {
-               KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
-                             !PageDirty(cl_page_vmpage(page))));
+               if (!ergo(page->cp_type == CPT_CACHEABLE,
+                         !PageDirty(cl_page_vmpage(page))))
+                       CL_PAGE_DEBUG(D_ERROR, env, page,
+                                       "discard dirty page?\n");
 
                /* discard the page */
                cl_page_discard(env, io, page);
 
                /* discard the page */
                cl_page_discard(env, io, page);
index 41edbbe..65658f6 100644 (file)
@@ -172,6 +172,10 @@ struct osc_object {
        /* Protect osc_lock this osc_object has */
        spinlock_t              oo_ol_spin;
        struct list_head        oo_ol_list;
        /* Protect osc_lock this osc_object has */
        spinlock_t              oo_ol_spin;
        struct list_head        oo_ol_list;
+
+       /** number of active IOs of this object */
+       atomic_t                oo_nr_ios;
+       wait_queue_head_t       oo_io_waitq;
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
@@ -420,10 +424,9 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
                         struct osc_page *ops);
 int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
                         struct list_head *list, int cmd, int brw_flags);
                         struct osc_page *ops);
 int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
                         struct list_head *list, int cmd, int brw_flags);
-int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
-                            struct osc_object *obj, __u64 size);
-void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
-                           struct osc_object *obj);
+int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj,
+                            __u64 size, struct osc_extent **extp);
+void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext);
 int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
                              pgoff_t start, pgoff_t end, int hp, int discard);
 int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
 int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
                              pgoff_t start, pgoff_t end, int hp, int discard);
 int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
index 6dddf2b..9a03ba3 100644 (file)
@@ -236,5 +236,5 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
                                       struct osc_object *obj, pgoff_t index,
                                       enum osc_dap_flags flags);
 void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa);
                                       struct osc_object *obj, pgoff_t index,
                                       enum osc_dap_flags flags);
 void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa);
-
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
 #endif /* OSC_INTERNAL_H */
 #endif /* OSC_INTERNAL_H */
index 23ef9f4..3837da2 100644 (file)
@@ -352,8 +352,25 @@ static int osc_io_commit_async(const struct lu_env *env,
        RETURN(result);
 }
 
        RETURN(result);
 }
 
-static int osc_io_rw_iter_init(const struct lu_env *env,
-                               const struct cl_io_slice *ios)
+static int osc_io_iter_init(const struct lu_env *env,
+                           const struct cl_io_slice *ios)
+{
+       struct osc_object *osc = cl2osc(ios->cis_obj);
+       struct obd_import *imp = osc_cli(osc)->cl_import;
+       int rc = -EIO;
+
+       spin_lock(&imp->imp_lock);
+       if (likely(!imp->imp_invalid)) {
+               atomic_inc(&osc->oo_nr_ios);
+               rc = 0;
+       }
+       spin_unlock(&imp->imp_lock);
+
+       return rc;
+}
+
+static int osc_io_write_iter_init(const struct lu_env *env,
+                                 const struct cl_io_slice *ios)
 {
        struct cl_io *io = ios->cis_io;
        struct osc_io *oio = osc_env_io(env);
 {
        struct cl_io *io = ios->cis_io;
        struct osc_io *oio = osc_env_io(env);
@@ -365,7 +382,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
        ENTRY;
 
        if (cl_io_is_append(io))
        ENTRY;
 
        if (cl_io_is_append(io))
-               RETURN(0);
+               RETURN(osc_io_iter_init(env, ios));
 
        npages = io->u.ci_rw.crw_count >> PAGE_CACHE_SHIFT;
        if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
 
        npages = io->u.ci_rw.crw_count >> PAGE_CACHE_SHIFT;
        if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
@@ -395,11 +412,21 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
                (void)ptlrpcd_queue_work(cli->cl_lru_work);
        }
 
                (void)ptlrpcd_queue_work(cli->cl_lru_work);
        }
 
-       RETURN(0);
+       RETURN(osc_io_iter_init(env, ios));
 }
 
 }
 
-static void osc_io_rw_iter_fini(const struct lu_env *env,
-                               const struct cl_io_slice *ios)
+static void osc_io_iter_fini(const struct lu_env *env,
+                            const struct cl_io_slice *ios)
+{
+       struct osc_object *osc = cl2osc(ios->cis_obj);
+
+       LASSERT(atomic_read(&osc->oo_nr_ios) > 0);
+       if (atomic_dec_and_test(&osc->oo_nr_ios))
+               wake_up_all(&osc->oo_io_waitq);
+}
+
+static void osc_io_write_iter_fini(const struct lu_env *env,
+                                  const struct cl_io_slice *ios)
 {
        struct osc_io *oio = osc_env_io(env);
        struct osc_object *osc = cl2osc(ios->cis_obj);
 {
        struct osc_io *oio = osc_env_io(env);
        struct osc_object *osc = cl2osc(ios->cis_obj);
@@ -410,6 +437,8 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
                oio->oi_lru_reserved = 0;
        }
        oio->oi_write_osclock = NULL;
                oio->oi_lru_reserved = 0;
        }
        oio->oi_write_osclock = NULL;
+
+       osc_io_iter_fini(env, ios);
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
@@ -501,7 +530,8 @@ static int osc_io_setattr_start(const struct lu_env *env,
 
        /* truncate cache dirty pages first */
        if (cl_io_is_trunc(io))
 
        /* truncate cache dirty pages first */
        if (cl_io_is_trunc(io))
-               result = osc_cache_truncate_start(env, oio, cl2osc(obj), size);
+               result = osc_cache_truncate_start(env, cl2osc(obj), size,
+                                                 &oio->oi_trunc);
 
        if (result == 0 && oio->oi_lockless == 0) {
                cl_object_attr_lock(obj);
 
        if (result == 0 && oio->oi_lockless == 0) {
                cl_object_attr_lock(obj);
@@ -611,10 +641,8 @@ static void osc_io_setattr_end(const struct lu_env *env,
        if (cl_io_is_trunc(io)) {
                __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
                osc_trunc_check(env, io, oio, size);
        if (cl_io_is_trunc(io)) {
                __u64 size = io->u.ci_setattr.sa_attr.lvb_size;
                osc_trunc_check(env, io, oio, size);
-               if (oio->oi_trunc != NULL) {
-                       osc_cache_truncate_end(env, oio, cl2osc(obj));
-                       oio->oi_trunc = NULL;
-               }
+               osc_cache_truncate_end(env, oio->oi_trunc);
+               oio->oi_trunc = NULL;
        }
 }
 
        }
 }
 
@@ -861,17 +889,21 @@ static void osc_io_end(const struct lu_env *env,
 static const struct cl_io_operations osc_io_ops = {
        .op = {
                [CIT_READ] = {
 static const struct cl_io_operations osc_io_ops = {
        .op = {
                [CIT_READ] = {
+                       .cio_iter_init = osc_io_iter_init,
+                       .cio_iter_fini = osc_io_iter_fini,
                        .cio_start  = osc_io_read_start,
                        .cio_fini   = osc_io_fini
                },
                [CIT_WRITE] = {
                        .cio_start  = osc_io_read_start,
                        .cio_fini   = osc_io_fini
                },
                [CIT_WRITE] = {
-                       .cio_iter_init = osc_io_rw_iter_init,
-                       .cio_iter_fini = osc_io_rw_iter_fini,
+                       .cio_iter_init = osc_io_write_iter_init,
+                       .cio_iter_fini = osc_io_write_iter_fini,
                        .cio_start  = osc_io_write_start,
                        .cio_end    = osc_io_end,
                        .cio_fini   = osc_io_fini
                },
                [CIT_SETATTR] = {
                        .cio_start  = osc_io_write_start,
                        .cio_end    = osc_io_end,
                        .cio_fini   = osc_io_fini
                },
                [CIT_SETATTR] = {
+                       .cio_iter_init = osc_io_iter_init,
+                       .cio_iter_fini = osc_io_iter_fini,
                        .cio_start  = osc_io_setattr_start,
                        .cio_end    = osc_io_setattr_end
                },
                        .cio_start  = osc_io_setattr_start,
                        .cio_end    = osc_io_setattr_end
                },
@@ -880,6 +912,8 @@ static const struct cl_io_operations osc_io_ops = {
                        .cio_end    = osc_io_data_version_end,
                },
                [CIT_FAULT] = {
                        .cio_end    = osc_io_data_version_end,
                },
                [CIT_FAULT] = {
+                       .cio_iter_init = osc_io_iter_init,
+                       .cio_iter_fini = osc_io_iter_fini,
                        .cio_start  = osc_io_fault_start,
                        .cio_end    = osc_io_end,
                        .cio_fini   = osc_io_fini
                        .cio_start  = osc_io_fault_start,
                        .cio_end    = osc_io_end,
                        .cio_fini   = osc_io_fini
index f840257..16bb564 100644 (file)
@@ -97,6 +97,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
        spin_lock_init(&osc->oo_ol_spin);
        INIT_LIST_HEAD(&osc->oo_ol_list);
 
        spin_lock_init(&osc->oo_ol_spin);
        INIT_LIST_HEAD(&osc->oo_ol_list);
 
+       atomic_set(&osc->oo_nr_ios, 0);
+       init_waitqueue_head(&osc->oo_io_waitq);
+
        cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
        return 0;
        cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
        return 0;
@@ -119,6 +122,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
        LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
        LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
        LASSERT(list_empty(&osc->oo_ol_list));
        LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
        LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
        LASSERT(list_empty(&osc->oo_ol_list));
+       LASSERT(atomic_read(&osc->oo_nr_ios) == 0);
 
        lu_object_fini(obj);
        OBD_SLAB_FREE_PTR(osc, osc_object_kmem);
 
        lu_object_fini(obj);
        OBD_SLAB_FREE_PTR(osc, osc_object_kmem);
@@ -408,4 +412,20 @@ struct lu_object *osc_object_alloc(const struct lu_env *env,
        return obj;
 }
 
        return obj;
 }
 
+int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc)
+{
+       struct l_wait_info lwi = { 0 };
+       ENTRY;
+
+       CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n",
+              osc, atomic_read(&osc->oo_nr_ios));
+
+       l_wait_event(osc->oo_io_waitq, atomic_read(&osc->oo_nr_ios) == 0, &lwi);
+
+       /* Discard all pages of this object. */
+       osc_cache_truncate_start(env, osc, 0, NULL);
+
+       RETURN(0);
+}
+
 /** @} osc */
 /** @} osc */
index cadef12..42d132d 100644 (file)
@@ -2513,6 +2513,33 @@ static int osc_disconnect(struct obd_export *exp)
         return rc;
 }
 
         return rc;
 }
 
+static int osc_ldlm_resource_invalidate(struct cfs_hash *hs,
+       struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg)
+{
+       struct lu_env *env = arg;
+       struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+       struct ldlm_lock *lock;
+       struct osc_object *osc = NULL;
+       ENTRY;
+
+       lock_res(res);
+       list_for_each_entry(lock, &res->lr_granted, l_res_link) {
+               if (lock->l_ast_data != NULL && osc == NULL) {
+                       osc = lock->l_ast_data;
+                       cl_object_get(osc2cl(osc));
+               }
+               lock->l_ast_data = NULL;
+       }
+       unlock_res(res);
+
+       if (osc != NULL) {
+               osc_object_invalidate(env, osc);
+               cl_object_put(env, osc2cl(osc));
+       }
+
+       RETURN(0);
+}
+
 static int osc_import_event(struct obd_device *obd,
                             struct obd_import *imp,
                             enum obd_import_event event)
 static int osc_import_event(struct obd_device *obd,
                             struct obd_import *imp,
                             enum obd_import_event event)
@@ -2541,16 +2568,18 @@ static int osc_import_event(struct obd_device *obd,
                 struct lu_env         *env;
                 int                    refcheck;
 
                 struct lu_env         *env;
                 int                    refcheck;
 
+               ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+
                 env = cl_env_get(&refcheck);
                 if (!IS_ERR(env)) {
                 env = cl_env_get(&refcheck);
                 if (!IS_ERR(env)) {
-                        /* Reset grants */
-                        cli = &obd->u.cli;
-                        /* all pages go to failing rpcs due to the invalid
-                         * import */
-                       osc_io_unplug(env, cli, NULL);
-
-                        ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
-                        cl_env_put(env, &refcheck);
+                       osc_io_unplug(env, &obd->u.cli, NULL);
+
+                       cfs_hash_for_each_nolock(ns->ns_rs_hash,
+                                                osc_ldlm_resource_invalidate,
+                                                env, 0);
+                       cl_env_put(env, &refcheck);
+
+                       ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
                 } else
                         rc = PTR_ERR(env);
                 break;
                 } else
                         rc = PTR_ERR(env);
                 break;
index a2fb151..af5c209 100755 (executable)
@@ -1822,6 +1822,23 @@ test_107 () {
 }
 run_test 107 "drop reint reply, then restart MDT"
 
 }
 run_test 107 "drop reint reply, then restart MDT"
 
+test_108() {
+       mkdir -p $DIR/$tdir
+       $SETSTRIPE -c 1 -i 0 $DIR/$tdir
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=256 &
+       local dd_pid=$!
+       sleep 0.1
+
+       ost_evict_client
+
+       wait $dd_pid
+
+       client_up || error "reconnect failed"
+       rm -f $DIR/$tdir/$tfile
+}
+run_test 108 "client eviction don't crash"
+
 test_110a () {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
        local remote_dir=$DIR/$tdir/remote_dir
 test_110a () {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
        local remote_dir=$DIR/$tdir/remote_dir