From 8f01f8b51d114b0d2d54a5ab7db3161782e52447 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Thu, 28 May 2015 21:26:40 -0700 Subject: [PATCH] LU-6271 osc: handle osc eviction correctly Cleanup everything if an OSC is being evicted. Group lock is not well supported yet. Signed-off-by: Jinshan Xiong Change-Id: I24f815a9a1bf13cb565109b2dae7b4d07b57f921 Reviewed-on: http://review.whamcloud.com/14989 Tested-by: Jenkins Reviewed-by: John L. Hammond Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lock.c | 3 +-- lustre/osc/osc_cache.c | 27 ++++++++++--------- lustre/osc/osc_cl_internal.h | 11 +++++--- lustre/osc/osc_internal.h | 2 +- lustre/osc/osc_io.c | 60 +++++++++++++++++++++++++++++++++--------- lustre/osc/osc_object.c | 20 ++++++++++++++ lustre/osc/osc_request.c | 45 +++++++++++++++++++++++++------ lustre/tests/recovery-small.sh | 17 ++++++++++++ 8 files changed, 144 insertions(+), 41 deletions(-) diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index d75352c..39abaef 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1207,8 +1207,7 @@ static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data) if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE)) return INTERVAL_ITER_CONT; - if ((data->lmd_flags & LDLM_FL_LOCAL_ONLY) && - !ldlm_is_local(lock)) + if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock))) return INTERVAL_ITER_CONT; if (data->lmd_flags & LDLM_FL_TEST_LOCK) { diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index a4ae79b..dd28035 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -226,7 +226,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, if (ext->oe_sync && ext->oe_grants > 0) GOTO(out, rc = 90); - if (ext->oe_dlmlock != NULL) { + if (ext->oe_dlmlock != NULL && !ldlm_is_failed(ext->oe_dlmlock)) { struct ldlm_extent *extent; extent = &ext->oe_dlmlock->l_policy_data.l_extent; @@ -2670,8 +2670,8 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, /** * Called by osc_io_setattr_start() to freeze and destroy covering extents. */ -int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj, __u64 size) +int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, + __u64 size, struct osc_extent **extp) { struct client_obd *cli = osc_cli(obj); struct osc_extent *ext; @@ -2765,9 +2765,11 @@ again: /* we need to hold this extent in OES_TRUNC state so * that no writeback will happen. This is to avoid - * BUG 17397. */ - LASSERT(oio->oi_trunc == NULL); - oio->oi_trunc = osc_extent_get(ext); + * BUG 17397. + * Only partial truncate can reach here, if @size is + * not zero, the caller should provide a valid @extp. */ + LASSERT(*extp == NULL); + *extp = osc_extent_get(ext); OSC_EXTENT_DUMP(D_CACHE, ext, "trunc at "LPU64"\n", size); } @@ -2792,13 +2794,10 @@ again: /** * Called after osc_io_setattr_end to add oio->oi_trunc back to cache. */ -void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj) +void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext) { - struct osc_extent *ext = oio->oi_trunc; - - oio->oi_trunc = NULL; if (ext != NULL) { + struct osc_object *obj = ext->oe_obj; bool unplug = false; EASSERT(ext->oe_nr_pages > 0, ext); @@ -3136,8 +3135,10 @@ static int discard_cb(const struct lu_env *env, struct cl_io *io, /* page is top page. */ info->oti_next_index = osc_index(ops) + 1; if (cl_page_own(env, io, page) == 0) { - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageDirty(cl_page_vmpage(page)))); + if (!ergo(page->cp_type == CPT_CACHEABLE, + !PageDirty(cl_page_vmpage(page)))) + CL_PAGE_DEBUG(D_ERROR, env, page, + "discard dirty page?\n"); /* discard the page */ cl_page_discard(env, io, page); diff --git a/lustre/osc/osc_cl_internal.h b/lustre/osc/osc_cl_internal.h index 41edbbe..65658f6 100644 --- a/lustre/osc/osc_cl_internal.h +++ b/lustre/osc/osc_cl_internal.h @@ -172,6 +172,10 @@ struct osc_object { /* Protect osc_lock this osc_object has */ spinlock_t oo_ol_spin; struct list_head oo_ol_list; + + /** number of active IOs of this object */ + atomic_t oo_nr_ios; + wait_queue_head_t oo_io_waitq; }; static inline void osc_object_lock(struct osc_object *obj) @@ -420,10 +424,9 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, struct list_head *list, int cmd, int brw_flags); -int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj, __u64 size); -void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, - struct osc_object *obj); +int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, + __u64 size, struct osc_extent **extp); +void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext); int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end, int hp, int discard); int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 6dddf2b..9a03ba3 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -236,5 +236,5 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, struct osc_object *obj, pgoff_t index, enum osc_dap_flags flags); void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa); - +int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc); #endif /* OSC_INTERNAL_H */ diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 23ef9f4..3837da2 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -352,8 +352,25 @@ static int osc_io_commit_async(const struct lu_env *env, RETURN(result); } -static int osc_io_rw_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) +static int osc_io_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_object *osc = cl2osc(ios->cis_obj); + struct obd_import *imp = osc_cli(osc)->cl_import; + int rc = -EIO; + + spin_lock(&imp->imp_lock); + if (likely(!imp->imp_invalid)) { + atomic_inc(&osc->oo_nr_ios); + rc = 0; + } + spin_unlock(&imp->imp_lock); + + return rc; +} + +static int osc_io_write_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; struct osc_io *oio = osc_env_io(env); @@ -365,7 +382,7 @@ static int osc_io_rw_iter_init(const struct lu_env *env, ENTRY; if (cl_io_is_append(io)) - RETURN(0); + RETURN(osc_io_iter_init(env, ios)); npages = io->u.ci_rw.crw_count >> PAGE_CACHE_SHIFT; if (io->u.ci_rw.crw_pos & ~PAGE_MASK) @@ -395,11 +412,21 @@ static int osc_io_rw_iter_init(const struct lu_env *env, (void)ptlrpcd_queue_work(cli->cl_lru_work); } - RETURN(0); + RETURN(osc_io_iter_init(env, ios)); } -static void osc_io_rw_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) +static void osc_io_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_object *osc = cl2osc(ios->cis_obj); + + LASSERT(atomic_read(&osc->oo_nr_ios) > 0); + if (atomic_dec_and_test(&osc->oo_nr_ios)) + wake_up_all(&osc->oo_io_waitq); +} + +static void osc_io_write_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) { struct osc_io *oio = osc_env_io(env); struct osc_object *osc = cl2osc(ios->cis_obj); @@ -410,6 +437,8 @@ static void osc_io_rw_iter_fini(const struct lu_env *env, oio->oi_lru_reserved = 0; } oio->oi_write_osclock = NULL; + + osc_io_iter_fini(env, ios); } static int osc_io_fault_start(const struct lu_env *env, @@ -501,7 +530,8 @@ static int osc_io_setattr_start(const struct lu_env *env, /* truncate cache dirty pages first */ if (cl_io_is_trunc(io)) - result = osc_cache_truncate_start(env, oio, cl2osc(obj), size); + result = osc_cache_truncate_start(env, cl2osc(obj), size, + &oio->oi_trunc); if (result == 0 && oio->oi_lockless == 0) { cl_object_attr_lock(obj); @@ -611,10 +641,8 @@ static void osc_io_setattr_end(const struct lu_env *env, if (cl_io_is_trunc(io)) { __u64 size = io->u.ci_setattr.sa_attr.lvb_size; osc_trunc_check(env, io, oio, size); - if (oio->oi_trunc != NULL) { - osc_cache_truncate_end(env, oio, cl2osc(obj)); - oio->oi_trunc = NULL; - } + osc_cache_truncate_end(env, oio->oi_trunc); + oio->oi_trunc = NULL; } } @@ -861,17 +889,21 @@ static void osc_io_end(const struct lu_env *env, static const struct cl_io_operations osc_io_ops = { .op = { [CIT_READ] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_read_start, .cio_fini = osc_io_fini }, [CIT_WRITE] = { - .cio_iter_init = osc_io_rw_iter_init, - .cio_iter_fini = osc_io_rw_iter_fini, + .cio_iter_init = osc_io_write_iter_init, + .cio_iter_fini = osc_io_write_iter_fini, .cio_start = osc_io_write_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini }, [CIT_SETATTR] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_setattr_start, .cio_end = osc_io_setattr_end }, @@ -880,6 +912,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_end = osc_io_data_version_end, }, [CIT_FAULT] = { + .cio_iter_init = osc_io_iter_init, + .cio_iter_fini = osc_io_iter_fini, .cio_start = osc_io_fault_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini diff --git a/lustre/osc/osc_object.c b/lustre/osc/osc_object.c index f840257..16bb564 100644 --- a/lustre/osc/osc_object.c +++ b/lustre/osc/osc_object.c @@ -97,6 +97,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, spin_lock_init(&osc->oo_ol_spin); INIT_LIST_HEAD(&osc->oo_ol_list); + atomic_set(&osc->oo_nr_ios, 0); + init_waitqueue_head(&osc->oo_io_waitq); + cl_object_page_init(lu2cl(obj), sizeof(struct osc_page)); return 0; @@ -119,6 +122,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj) LASSERT(atomic_read(&osc->oo_nr_reads) == 0); LASSERT(atomic_read(&osc->oo_nr_writes) == 0); LASSERT(list_empty(&osc->oo_ol_list)); + LASSERT(atomic_read(&osc->oo_nr_ios) == 0); lu_object_fini(obj); OBD_SLAB_FREE_PTR(osc, osc_object_kmem); @@ -408,4 +412,20 @@ struct lu_object *osc_object_alloc(const struct lu_env *env, return obj; } +int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc) +{ + struct l_wait_info lwi = { 0 }; + ENTRY; + + CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n", + osc, atomic_read(&osc->oo_nr_ios)); + + l_wait_event(osc->oo_io_waitq, atomic_read(&osc->oo_nr_ios) == 0, &lwi); + + /* Discard all pages of this object. */ + osc_cache_truncate_start(env, osc, 0, NULL); + + RETURN(0); +} + /** @} osc */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index cadef12..42d132d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2513,6 +2513,33 @@ static int osc_disconnect(struct obd_export *exp) return rc; } +static int osc_ldlm_resource_invalidate(struct cfs_hash *hs, + struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg) +{ + struct lu_env *env = arg; + struct ldlm_resource *res = cfs_hash_object(hs, hnode); + struct ldlm_lock *lock; + struct osc_object *osc = NULL; + ENTRY; + + lock_res(res); + list_for_each_entry(lock, &res->lr_granted, l_res_link) { + if (lock->l_ast_data != NULL && osc == NULL) { + osc = lock->l_ast_data; + cl_object_get(osc2cl(osc)); + } + lock->l_ast_data = NULL; + } + unlock_res(res); + + if (osc != NULL) { + osc_object_invalidate(env, osc); + cl_object_put(env, osc2cl(osc)); + } + + RETURN(0); +} + static int osc_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) @@ -2541,16 +2568,18 @@ static int osc_import_event(struct obd_device *obd, struct lu_env *env; int refcheck; + ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); + env = cl_env_get(&refcheck); if (!IS_ERR(env)) { - /* Reset grants */ - cli = &obd->u.cli; - /* all pages go to failing rpcs due to the invalid - * import */ - osc_io_unplug(env, cli, NULL); - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); - cl_env_put(env, &refcheck); + osc_io_unplug(env, &obd->u.cli, NULL); + + cfs_hash_for_each_nolock(ns->ns_rs_hash, + osc_ldlm_resource_invalidate, + env, 0); + cl_env_put(env, &refcheck); + + ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); } else rc = PTR_ERR(env); break; diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index a2fb151..af5c209 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1822,6 +1822,23 @@ test_107 () { } run_test 107 "drop reint reply, then restart MDT" +test_108() { + mkdir -p $DIR/$tdir + $SETSTRIPE -c 1 -i 0 $DIR/$tdir + + dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=256 & + local dd_pid=$! + sleep 0.1 + + ost_evict_client + + wait $dd_pid + + client_up || error "reconnect failed" + rm -f $DIR/$tdir/$tfile +} +run_test 108 "client eviction don't crash" + test_110a () { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 local remote_dir=$DIR/$tdir/remote_dir -- 1.8.3.1