The LFSCK start logic will hold li_mutex on the lfsck instance
during LFSCK start processing. The LFSCK stop logic also needs
to take the li_mutex on the lfsck instance when stop the LFSCK.
If someone triggers lfsck_stop (such as when umount the target)
before the lfsck_start return, then lfsck_stop will be blocked
on the li_mutex. And if the li_mutex holder is blocked by other
things, for example, it may be waiting for the LFSCK RPC to be
handled by remote server (MDT/OST) but the connection or remote
server is not ready yet, then the lfsck_stop will be blocked.
To avoid such cascade block trouble, the patch makes lfsck_stop
can go ahead without taking li_mutex, then it can directly tell
related LFSCK engines the stop event even if former lfsck_start
does not complete yet.
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I6e168d955db33d74778142235a8ed2802d3577d9
Reviewed-on: https://review.whamcloud.com/30420
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
current_pid());
spin_lock(&lfsck->li_lock);
current_pid());
spin_lock(&lfsck->li_lock);
+ if (unlikely(!thread_is_starting(thread))) {
+ spin_unlock(&lfsck->li_lock);
+ GOTO(fini_oit, rc = 0);
+ }
+
thread_set_flags(thread, SVC_RUNNING);
spin_unlock(&lfsck->li_lock);
wake_up_all(&thread->t_ctl_waitq);
thread_set_flags(thread, SVC_RUNNING);
spin_unlock(&lfsck->li_lock);
wake_up_all(&thread->t_ctl_waitq);
l_wait_event(mthread->t_ctl_waitq,
thread_is_running(athread) ||
l_wait_event(mthread->t_ctl_waitq,
thread_is_running(athread) ||
- thread_is_stopped(athread),
+ thread_is_stopped(athread) ||
+ !thread_is_starting(mthread),
- if (unlikely(!thread_is_running(athread)))
+ if (unlikely(!thread_is_starting(mthread)))
+ /* stopped by race */
+ rc = -ESRCH;
+ else if (unlikely(!thread_is_running(athread)))
rc = lad->lad_assistant_status;
else
rc = 0;
rc = lad->lad_assistant_status;
else
rc = 0;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
set_current_state(TASK_RUNNING);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
set_current_state(TASK_RUNNING);
- if (!signal_pending(current))
+ if (!signal_pending(current) &&
+ thread_is_running(&lfsck->li_thread))
GOTO(put, rc = -EINPROGRESS);
/* start == NULL means auto trigger paused LFSCK. */
GOTO(put, rc = -EINPROGRESS);
/* start == NULL means auto trigger paused LFSCK. */
- if ((start == NULL) &&
- (list_empty(&lfsck->li_list_scan) ||
- OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
- GOTO(put, rc = 0);
+ if (!start) {
+ if (list_empty(&lfsck->li_list_scan) ||
+ OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO))
+ GOTO(put, rc = 0);
+ } else if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) {
+ CERROR("%s: only allow to specify '-A | -o' via MDS\n",
+ lfsck_lfsck2name(lfsck));
+
+ GOTO(put, rc = -EPERM);
+ }
bk = &lfsck->li_bookmark_ram;
thread = &lfsck->li_thread;
mutex_lock(&lfsck->li_mutex);
spin_lock(&lfsck->li_lock);
bk = &lfsck->li_bookmark_ram;
thread = &lfsck->li_thread;
mutex_lock(&lfsck->li_mutex);
spin_lock(&lfsck->li_lock);
+ if (unlikely(thread_is_stopping(thread))) {
+ /* Someone is stopping the LFSCK. */
+ spin_unlock(&lfsck->li_lock);
+ GOTO(out, rc = -EBUSY);
+ }
+
if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
rc = -EALREADY;
if (unlikely(start == NULL)) {
if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
rc = -EALREADY;
if (unlikely(start == NULL)) {
if (start == NULL)
goto trigger;
if (start == NULL)
goto trigger;
- if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) {
- CERROR("%s: only allow to specify '-A | -o' via MDS\n",
- lfsck_lfsck2name(lfsck));
-
- GOTO(out, rc = -EPERM);
- }
-
start->ls_version = bk->lb_version;
if (start->ls_active != 0) {
start->ls_version = bk->lb_version;
if (start->ls_active != 0) {
flags |= DOIF_OUTUSED;
lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
flags |= DOIF_OUTUSED;
lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
- thread_set_flags(thread, 0);
lta = lfsck_thread_args_init(lfsck, NULL, lsp);
if (IS_ERR(lta))
GOTO(out, rc = PTR_ERR(lta));
__lfsck_set_speed(lfsck, bk->lb_speed_limit);
lta = lfsck_thread_args_init(lfsck, NULL, lsp);
if (IS_ERR(lta))
GOTO(out, rc = PTR_ERR(lta));
__lfsck_set_speed(lfsck, bk->lb_speed_limit);
+ spin_lock(&lfsck->li_lock);
+ thread_set_flags(thread, SVC_STARTING);
+ spin_unlock(&lfsck->li_lock);
task = kthread_run(lfsck_master_engine, lta, "lfsck");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
task = kthread_run(lfsck_master_engine, lta, "lfsck");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
RETURN(-ENXIO);
thread = &lfsck->li_thread;
RETURN(-ENXIO);
thread = &lfsck->li_thread;
- /* release lfsck::li_mutex to avoid deadlock. */
- if (stop != NULL && stop->ls_flags & LPF_BROADCAST) {
- if (!lfsck->li_master) {
- CERROR("%s: only allow to specify '-A' via MDS\n",
- lfsck_lfsck2name(lfsck));
-
- GOTO(out, rc = -EPERM);
- }
-
- rc1 = lfsck_stop_all(env, lfsck, stop);
+ if (stop && stop->ls_flags & LPF_BROADCAST && !lfsck->li_master) {
+ CERROR("%s: only allow to specify '-A' via MDS\n",
+ lfsck_lfsck2name(lfsck));
+ GOTO(put, rc = -EPERM);
- mutex_lock(&lfsck->li_mutex);
spin_lock(&lfsck->li_lock);
spin_lock(&lfsck->li_lock);
- /* no error if LFSCK is already stopped, or was never started */
- if (thread_is_init(thread) || thread_is_stopped(thread)) {
- spin_unlock(&lfsck->li_lock);
- GOTO(out, rc = 0);
- }
+ if (thread_is_init(thread) || thread_is_stopped(thread))
+ /* no error if LFSCK stopped already, or not started */
+ GOTO(unlock, rc = 0);
+ if (thread_is_stopping(thread))
+ /* Someone is stopping LFSCK. */
+ GOTO(unlock, rc = -EINPROGRESS);
+
+ if (stop) {
lfsck->li_status = stop->ls_status;
lfsck->li_flags = stop->ls_flags;
} else {
lfsck->li_status = stop->ls_status;
lfsck->li_flags = stop->ls_flags;
} else {
+ wake_up_all(&thread->t_ctl_waitq);
spin_unlock(&lfsck->li_lock);
spin_unlock(&lfsck->li_lock);
+ if (stop && stop->ls_flags & LPF_BROADCAST)
+ rc1 = lfsck_stop_all(env, lfsck, stop);
- wake_up_all(&thread->t_ctl_waitq);
+ /* It was me set the status as 'stopping' just now, if it is not
+ * 'stopping' now, then either stopped, or re-started by race. */
l_wait_event(thread->t_ctl_waitq,
l_wait_event(thread->t_ctl_waitq,
- thread_is_stopped(thread),
+ !thread_is_stopping(thread),
-out:
- mutex_unlock(&lfsck->li_mutex);
+unlock:
+ spin_unlock(&lfsck->li_lock);
+put:
lfsck_instance_put(env, lfsck);
return rc != 0 ? rc : rc1;
lfsck_instance_put(env, lfsck);
return rc != 0 ? rc : rc1;