There is potential deadlock race condition between removing
striped directory and namespace LFSCK. Consider the following
scenario:
1) The LFSCK thread obtained the master object firstly, at
that time, the master object has not been destroyed yet.
2) One RPC service thread destroyed the master and all its
slave objects (shards). Because the LFSCK is referencing
the master object, then the master object will be marked
as dying in RAM. On the other hand, the master object is
referencing all its slave objects, then all slave objects
will be marked as dying in RAM also.
3) The LFSCK thread tries to find some slave object with the
master object referenced. Then it will find that the slave
object is dying. According to the object visibility rules:
the object with dying flag cannot be returned to others.
So the LFSCK thread has to wait until the dying object has
been purged from RAM, then it can allocate a new object (with
the same FID) in RAM. Unfortunately, the LFSCK thread itself
is referencing the master object, and cause the master object
cannot be purged, then cause the slave object cannot be purged
also. So the LFSCK thread will fall into deadlock.
To resolve such trouble, the LFSCK should use non-blocked version
lu_object_find() to locate the slave object of the striped dir,
and return failure immediately (instead of wait) when it finds
dying (slave) object.
This patch also contorls the async pipeline depth between the
LFSCK main engine and the namespace assistant thread to avoid
potential RAM pressure.
Some other code adjustment to avoid potential data overflow
that may cause weird LFSCK statistics information.
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I00c601eca8ade5d2e4260c729463f7ecdba0ed53
Reviewed-on: http://review.whamcloud.com/12741
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
return lfsck_object_find_by_dev(env, lfsck->li_next, fid);
}
return lfsck_object_find_by_dev(env, lfsck->li_next, fid);
}
-static inline struct dt_object *
-lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck,
- const struct lu_fid *fid)
+static inline struct dt_device *
+lfsck_find_dev_by_fid(const struct lu_env *env, struct lfsck_instance *lfsck,
+ const struct lu_fid *fid)
{
struct dt_device *dev;
int idx;
{
struct dt_device *dev;
int idx;
+ return dev;
+}
+
+static inline struct dt_object *
+lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck,
+ const struct lu_fid *fid)
+{
+ struct dt_device *dev;
+
+ dev = lfsck_find_dev_by_fid(env, lfsck, fid);
+ if (IS_ERR(dev))
+ return (struct dt_object *)dev;
+
return lfsck_object_find_by_dev(env, dev, fid);
}
return lfsck_object_find_by_dev(env, dev, fid);
}
+static inline struct dt_object *
+lfsck_object_find_bottom_nowait(const struct lu_env *env,
+ struct lfsck_instance *lfsck,
+ const struct lu_fid *fid)
+{
+ struct dt_device *dev;
+
+ dev = lfsck_find_dev_by_fid(env, lfsck, fid);
+ if (IS_ERR(dev))
+ return (struct dt_object *)dev;
+
+ return lfsck_object_find_by_dev_nowait(env, dev, fid);
+}
+
static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds,
__u32 index)
{
static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds,
__u32 index)
{
__u64 checked = lo->ll_objs_checked_phase1 +
com->lc_new_checked;
__u64 speed = checked;
__u64 checked = lo->ll_objs_checked_phase1 +
com->lc_new_checked;
__u64 speed = checked;
- __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
- MSEC_PER_SEC);
+ __u64 new_checked = com->lc_new_checked *
+ msecs_to_jiffies(MSEC_PER_SEC);
__u32 rtime = lo->ll_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = lo->ll_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
com->lc_new_checked;
__u64 speed1 = lo->ll_objs_checked_phase1;
__u64 speed2 = checked;
com->lc_new_checked;
__u64 speed1 = lo->ll_objs_checked_phase1;
__u64 speed2 = checked;
- __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
- MSEC_PER_SEC);
+ __u64 new_checked = com->lc_new_checked *
+ msecs_to_jiffies(MSEC_PER_SEC);
__u32 rtime = lo->ll_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = lo->ll_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
if (limit != LFSCK_SPEED_NO_LIMIT) {
if (limit > msecs_to_jiffies(MSEC_PER_SEC)) {
if (limit != LFSCK_SPEED_NO_LIMIT) {
if (limit > msecs_to_jiffies(MSEC_PER_SEC)) {
- lfsck->li_sleep_rate = jiffies_to_msecs(limit) /
- MSEC_PER_SEC;
+ lfsck->li_sleep_rate = limit /
+ msecs_to_jiffies(MSEC_PER_SEC);
lfsck->li_sleep_jif = 1;
} else {
lfsck->li_sleep_rate = 1;
lfsck->li_sleep_jif = 1;
} else {
lfsck->li_sleep_rate = 1;
- lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC /
- limit);
+ lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC) /
+ limit;
}
} else {
lfsck->li_sleep_jif = 0;
}
} else {
lfsck->li_sleep_jif = 0;
struct lfsck_component *com,
struct lu_dirent *ent, __u16 type)
{
struct lfsck_component *com,
struct lu_dirent *ent, __u16 type)
{
- struct lfsck_assistant_data *lad = com->lc_data;
+ struct lfsck_assistant_data *lad = com->lc_data;
+ struct lfsck_instance *lfsck = com->lc_lfsck;
struct lfsck_namespace_req *lnr;
struct lfsck_namespace_req *lnr;
+ struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
+ struct ptlrpc_thread *mthread = &lfsck->li_thread;
+ struct ptlrpc_thread *athread = &lad->lad_thread;
+ struct l_wait_info lwi = { 0 };
+ bool wakeup = false;
+
+ l_wait_event(mthread->t_ctl_waitq,
+ bk->lb_async_windows == 0 ||
+ lad->lad_prefetched < bk->lb_async_windows ||
+ !thread_is_running(mthread) ||
+ thread_is_stopped(athread),
+ &lwi);
+
+ if (unlikely(!thread_is_running(mthread)) ||
+ thread_is_stopped(athread))
+ return 0;
+
+ if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir)))
+ return 0;
lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, ent, type);
if (IS_ERR(lnr)) {
lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, ent, type);
if (IS_ERR(lnr)) {
lfsck->li_time_last_checkpoint;
__u64 checked = ns->ln_items_checked + com->lc_new_checked;
__u64 speed = checked;
lfsck->li_time_last_checkpoint;
__u64 checked = ns->ln_items_checked + com->lc_new_checked;
__u64 speed = checked;
- __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
- MSEC_PER_SEC);
+ __u64 new_checked = com->lc_new_checked *
+ msecs_to_jiffies(MSEC_PER_SEC);
__u32 rtime = ns->ln_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = ns->ln_run_time_phase1 +
cfs_duration_sec(duration + HALF_SEC);
com->lc_new_checked;
__u64 speed1 = ns->ln_items_checked;
__u64 speed2 = checked;
com->lc_new_checked;
__u64 speed1 = ns->ln_items_checked;
__u64 speed2 = checked;
- __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
- MSEC_PER_SEC);
+ __u64 new_checked = com->lc_new_checked *
+ msecs_to_jiffies(MSEC_PER_SEC);
__u32 rtime = ns->ln_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
__u32 rtime = ns->ln_run_time_phase2 +
cfs_duration_sec(duration + HALF_SEC);
cname = lfsck_name_get_const(env, info->lti_tmpbuf, len);
memcpy(lnr->lnr_name, info->lti_tmpbuf, len);
cname = lfsck_name_get_const(env, info->lti_tmpbuf, len);
memcpy(lnr->lnr_name, info->lti_tmpbuf, len);
- obj = lfsck_object_find_bottom(env, lfsck, cfid);
+ obj = lfsck_object_find_bottom_nowait(env, lfsck, cfid);
+ if (lfsck_is_dead_obj(dir))
+ RETURN(0);
+
rc1 = PTR_ERR(obj);
goto next;
}
rc1 = PTR_ERR(obj);
goto next;
}
- obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid);
- if (IS_ERR(obj))
+ obj = lfsck_object_find_by_dev_nowait(env, dev, &lnr->lnr_fid);
+ if (IS_ERR(obj)) {
+ if (lfsck_is_dead_obj(dir))
+ RETURN(0);
+
GOTO(fail_lmv, rc = PTR_ERR(obj));
GOTO(fail_lmv, rc = PTR_ERR(obj));
if (!dt_object_exists(obj)) {
stripe = lfsck_shard_name_to_index(env, lnr->lnr_name,
if (!dt_object_exists(obj)) {
stripe = lfsck_shard_name_to_index(env, lnr->lnr_name,
struct lu_object *obj;
wait_queue_t wait;
struct lu_object *obj;
wait_queue_t wait;
- while (1) {
- if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT) {
- obj = lu_object_find_try(env, dev, f, conf, NULL);
-
- return obj;
- }
+ if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT)
+ return lu_object_find_try(env, dev, f, conf, NULL);
obj = lu_object_find_try(env, dev, f, conf, &wait);
if (obj != ERR_PTR(-EAGAIN))
return obj;
obj = lu_object_find_try(env, dev, f, conf, &wait);
if (obj != ERR_PTR(-EAGAIN))
return obj;