From f0137d89fd40ae66aa1d3a180e4e5a6240009dcc Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 24 Sep 2014 17:30:56 +0800 Subject: [PATCH] LU-5885 lfsck: deadlock when remove striped dir There is potential deadlock race condition between removing striped directory and namespace LFSCK. Consider the following scenario: 1) The LFSCK thread obtained the master object firstly, at that time, the master object has not been destroyed yet. 2) One RPC service thread destroyed the master and all its slave objects (shards). Because the LFSCK is referencing the master object, then the master object will be marked as dying in RAM. On the other hand, the master object is referencing all its slave objects, then all slave objects will be marked as dying in RAM also. 3) The LFSCK thread tries to find some slave object with the master object referenced. Then it will find that the slave object is dying. According to the object visibility rules: the object with dying flag cannot be returned to others. So the LFSCK thread has to wait until the dying object has been purged from RAM, then it can allocate a new object (with the same FID) in RAM. Unfortunately, the LFSCK thread itself is referencing the master object, and cause the master object cannot be purged, then cause the slave object cannot be purged also. So the LFSCK thread will fall into deadlock. To resolve such trouble, the LFSCK should use non-blocked version lu_object_find() to locate the slave object of the striped dir, and return failure immediately (instead of wait) when it finds dying (slave) object. This patch also contorls the async pipeline depth between the LFSCK main engine and the namespace assistant thread to avoid potential RAM pressure. Some other code adjustment to avoid potential data overflow that may cause weird LFSCK statistics information. Signed-off-by: Fan Yong Change-Id: I00c601eca8ade5d2e4260c729463f7ecdba0ed53 Reviewed-on: http://review.whamcloud.com/12741 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/lfsck/lfsck_internal.h | 33 ++++++++++++++++++++++++++++++--- lustre/lfsck/lfsck_layout.c | 8 ++++---- lustre/lfsck/lfsck_lib.c | 8 ++++---- lustre/lfsck/lfsck_namespace.c | 31 +++++++++++++++++++++++++------ lustre/lfsck/lfsck_striped_dir.c | 13 ++++++++++--- lustre/obdclass/lu_object.c | 9 +++------ 6 files changed, 76 insertions(+), 26 deletions(-) diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 2e71189..f9473d6 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -1184,9 +1184,9 @@ static inline struct dt_object *lfsck_object_find(const struct lu_env *env, return lfsck_object_find_by_dev(env, lfsck->li_next, fid); } -static inline struct dt_object * -lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck, - const struct lu_fid *fid) +static inline struct dt_device * +lfsck_find_dev_by_fid(const struct lu_env *env, struct lfsck_instance *lfsck, + const struct lu_fid *fid) { struct dt_device *dev; int idx; @@ -1207,9 +1207,36 @@ lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck, dev = ltd->ltd_tgt; } + return dev; +} + +static inline struct dt_object * +lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck, + const struct lu_fid *fid) +{ + struct dt_device *dev; + + dev = lfsck_find_dev_by_fid(env, lfsck, fid); + if (IS_ERR(dev)) + return (struct dt_object *)dev; + return lfsck_object_find_by_dev(env, dev, fid); } +static inline struct dt_object * +lfsck_object_find_bottom_nowait(const struct lu_env *env, + struct lfsck_instance *lfsck, + const struct lu_fid *fid) +{ + struct dt_device *dev; + + dev = lfsck_find_dev_by_fid(env, lfsck, fid); + if (IS_ERR(dev)) + return (struct dt_object *)dev; + + return lfsck_object_find_by_dev_nowait(env, dev, fid); +} + static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds, __u32 index) { diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index c9dfeba..b83bc87 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -4773,8 +4773,8 @@ static int lfsck_layout_dump(const struct lu_env *env, __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked; __u64 speed = checked; - __u64 new_checked = msecs_to_jiffies(com->lc_new_checked * - MSEC_PER_SEC); + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = lo->ll_run_time_phase1 + cfs_duration_sec(duration + HALF_SEC); @@ -4817,8 +4817,8 @@ static int lfsck_layout_dump(const struct lu_env *env, com->lc_new_checked; __u64 speed1 = lo->ll_objs_checked_phase1; __u64 speed2 = checked; - __u64 new_checked = msecs_to_jiffies(com->lc_new_checked * - MSEC_PER_SEC); + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = lo->ll_run_time_phase2 + cfs_duration_sec(duration + HALF_SEC); diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 2a64ea7..60f1df6 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -1646,13 +1646,13 @@ bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit) if (limit != LFSCK_SPEED_NO_LIMIT) { if (limit > msecs_to_jiffies(MSEC_PER_SEC)) { - lfsck->li_sleep_rate = jiffies_to_msecs(limit) / - MSEC_PER_SEC; + lfsck->li_sleep_rate = limit / + msecs_to_jiffies(MSEC_PER_SEC); lfsck->li_sleep_jif = 1; } else { lfsck->li_sleep_rate = 1; - lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC / - limit); + lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC) / + limit; } } else { lfsck->li_sleep_jif = 0; diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index 7032827..5e37a78 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -4150,9 +4150,28 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, struct lfsck_component *com, struct lu_dirent *ent, __u16 type) { - struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_assistant_data *lad = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace_req *lnr; - bool wakeup = false; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &lad->lad_thread; + struct l_wait_info lwi = { 0 }; + bool wakeup = false; + + l_wait_event(mthread->t_ctl_waitq, + bk->lb_async_windows == 0 || + lad->lad_prefetched < bk->lb_async_windows || + !thread_is_running(mthread) || + thread_is_stopped(athread), + &lwi); + + if (unlikely(!thread_is_running(mthread)) || + thread_is_stopped(athread)) + return 0; + + if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir))) + return 0; lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, ent, type); if (IS_ERR(lnr)) { @@ -4304,8 +4323,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, lfsck->li_time_last_checkpoint; __u64 checked = ns->ln_items_checked + com->lc_new_checked; __u64 speed = checked; - __u64 new_checked = msecs_to_jiffies(com->lc_new_checked * - MSEC_PER_SEC); + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = ns->ln_run_time_phase1 + cfs_duration_sec(duration + HALF_SEC); @@ -4359,8 +4378,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, com->lc_new_checked; __u64 speed1 = ns->ln_items_checked; __u64 speed2 = checked; - __u64 new_checked = msecs_to_jiffies(com->lc_new_checked * - MSEC_PER_SEC); + __u64 new_checked = com->lc_new_checked * + msecs_to_jiffies(MSEC_PER_SEC); __u32 rtime = ns->ln_run_time_phase2 + cfs_duration_sec(duration + HALF_SEC); diff --git a/lustre/lfsck/lfsck_striped_dir.c b/lustre/lfsck/lfsck_striped_dir.c index c42d233..b0103df 100644 --- a/lustre/lfsck/lfsck_striped_dir.c +++ b/lustre/lfsck/lfsck_striped_dir.c @@ -1935,8 +1935,11 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env, cname = lfsck_name_get_const(env, info->lti_tmpbuf, len); memcpy(lnr->lnr_name, info->lti_tmpbuf, len); - obj = lfsck_object_find_bottom(env, lfsck, cfid); + obj = lfsck_object_find_bottom_nowait(env, lfsck, cfid); if (IS_ERR(obj)) { + if (lfsck_is_dead_obj(dir)) + RETURN(0); + rc1 = PTR_ERR(obj); goto next; } @@ -2241,9 +2244,13 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env, dev = ltd->ltd_tgt; } - obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid); - if (IS_ERR(obj)) + obj = lfsck_object_find_by_dev_nowait(env, dev, &lnr->lnr_fid); + if (IS_ERR(obj)) { + if (lfsck_is_dead_obj(dir)) + RETURN(0); + GOTO(fail_lmv, rc = PTR_ERR(obj)); + } if (!dt_object_exists(obj)) { stripe = lfsck_shard_name_to_index(env, lnr->lnr_name, diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 75e422a..eec1bbb 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -796,13 +796,10 @@ struct lu_object *lu_object_find_at(const struct lu_env *env, struct lu_object *obj; wait_queue_t wait; - while (1) { - if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT) { - obj = lu_object_find_try(env, dev, f, conf, NULL); - - return obj; - } + if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT) + return lu_object_find_try(env, dev, f, conf, NULL); + while (1) { obj = lu_object_find_try(env, dev, f, conf, &wait); if (obj != ERR_PTR(-EAGAIN)) return obj; -- 1.8.3.1