set_producer_func set_producer;
/** opaq argument passed to the producer callback */
void *set_producer_arg;
+ unsigned int set_allow_intr:1;
};
/**
* status */
rq_allow_replay:1,
/* bulk request, sent to server, but uncommitted */
- rq_unstable:1;
+ rq_unstable:1,
+ rq_allow_intr:1;
/** @} */
/** server-side flags @{ */
#define OBD_FAIL_LFSCK_LOST_SLAVE_LMV 0x162a
#define OBD_FAIL_LFSCK_BAD_SLAVE_LMV 0x162b
#define OBD_FAIL_LFSCK_BAD_SLAVE_NAME 0x162c
+#define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d
#define OBD_FAIL_LFSCK_NOTIFY_NET 0x16f0
#define OBD_FAIL_LFSCK_QUERY_NET 0x16f1
}
spin_lock(&lad->lad_lock);
+ lad->lad_task = current;
thread_set_flags(athread, SVC_RUNNING);
spin_unlock(&lad->lad_lock);
wake_up_all(&mthread->t_ctl_waitq);
lad->lad_assistant_status = (rc1 != 0 ? rc1 : rc);
thread_set_flags(athread, SVC_STOPPED);
wake_up_all(&mthread->t_ctl_waitq);
+ lad->lad_task = NULL;
spin_unlock(&lad->lad_lock);
CDEBUG(D_LFSCK, "%s: %s LFSCK assistant thread exit: rc = %d\n",
const char *lad_name;
struct ptlrpc_thread lad_thread;
+ struct task_struct *lad_task;
struct lfsck_assistant_operations *lad_ops;
if (lso->lso_dead)
RETURN(0);
+ CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_ASSISTANT_DIRECT, cfs_fail_val);
+
rc = dt_attr_get(env, child, cla);
if (rc == -ENOENT) {
parent = lfsck_assistant_object_load(env, lfsck, lso);
up_write(&com->lc_sem);
CDEBUG(D_LFSCK, "%s: layout LFSCK master checkpoint at the pos ["
- LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
- lfsck->li_pos_current.lp_oit_cookie, rc);
+ LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck),
+ lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc);
return rc;
}
up_write(&com->lc_sem);
CDEBUG(D_LFSCK, "%s: layout LFSCK slave checkpoint at the pos ["
- LPU64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
- lfsck->li_pos_current.lp_oit_cookie, rc);
+ LPU64"], status = %d: rc = %d\n", lfsck_lfsck2name(lfsck),
+ lfsck->li_pos_current.lp_oit_cookie, lo->ll_status, rc);
return rc;
}
goto next;
}
- rc = dt_declare_attr_get(env, cobj);
- if (rc != 0)
- goto next;
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_ASSISTANT_DIRECT)) {
+ rc = dt_declare_attr_get(env, cobj);
+ if (rc != 0)
+ goto next;
- rc = dt_declare_xattr_get(env, cobj, &buf, XATTR_NAME_FID);
- if (rc != 0)
- goto next;
+ rc = dt_declare_xattr_get(env, cobj, &buf,
+ XATTR_NAME_FID);
+ if (rc != 0)
+ goto next;
+ }
if (lso == NULL) {
struct lu_attr *attr = &info->lti_la;
int rc;
bool done = false;
+ down_write(&com->lc_sem);
rc = lfsck_layout_lastid_store(env, com);
if (rc != 0)
result = rc;
LASSERT(lfsck->li_out_notify != NULL);
- down_write(&com->lc_sem);
spin_lock(&lfsck->li_lock);
if (!init)
lo->ll_pos_last_checkpoint =
LASSERT(llsd != NULL);
+ down_write(&com->lc_sem);
list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
lls_list) {
list_del_init(&lls->lls_list);
lfsck_object_put(env, lls->lls_lastid_obj);
OBD_FREE_PTR(lls);
}
+ up_write(&com->lc_sem);
spin_lock(&llsd->llsd_lock);
while (!list_empty(&llsd->llsd_master_list)) {
#define DEBUG_SUBSYSTEM S_LFSCK
#include <linux/kthread.h>
+#include <linux/sched.h>
#include <libcfs/list.h>
#include <lu_object.h>
#include <dt_object.h>
lad->lad_exit = 1;
lad->lad_to_post = 1;
+ CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s post, rc = %d\n",
+ lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result);
+
wake_up_all(&athread->t_ctl_waitq);
l_wait_event(mthread->t_ctl_waitq,
(*result > 0 && list_empty(&lad->lad_req_list)) ||
if (lad->lad_assistant_status < 0)
*result = lad->lad_assistant_status;
+
+ CDEBUG(D_LFSCK, "%s: the assistant has done %s post, rc = %d\n",
+ lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, *result);
}
int lfsck_double_scan_generic(const struct lu_env *env,
else
lad->lad_to_double_scan = 1;
+ CDEBUG(D_LFSCK, "%s: waiting for assistant to do %s double_scan, "
+ "status %d\n",
+ lfsck_lfsck2name(com->lc_lfsck), lad->lad_name, status);
+
wake_up_all(&athread->t_ctl_waitq);
l_wait_event(mthread->t_ctl_waitq,
lad->lad_in_double_scan ||
thread_is_stopped(athread),
&lwi);
+ CDEBUG(D_LFSCK, "%s: the assistant has done %s double_scan, "
+ "status %d\n", lfsck_lfsck2name(com->lc_lfsck), lad->lad_name,
+ lad->lad_assistant_status);
+
if (lad->lad_assistant_status < 0)
return lad->lad_assistant_status;
}
thread_set_flags(thread, SVC_STOPPING);
+
+ if (lfsck->li_master) {
+ struct lfsck_component *com;
+ struct lfsck_assistant_data *lad;
+
+ list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+ lad = com->lc_data;
+ spin_lock(&lad->lad_lock);
+ if (lad->lad_task != NULL)
+ force_sig(SIGINT, lad->lad_task);
+ spin_unlock(&lad->lad_lock);
+ }
+
+ list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
+ lad = com->lc_data;
+ spin_lock(&lad->lad_lock);
+ if (lad->lad_task != NULL)
+ force_sig(SIGINT, lad->lad_task);
+ spin_unlock(&lad->lad_lock);
+ }
+ }
+
spin_unlock(&lfsck->li_lock);
wake_up_all(&thread->t_ctl_waitq);
log:
CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos ["LPU64
- ", "DFID", "LPX64"]: rc = %d\n", lfsck_lfsck2name(lfsck),
- lfsck->li_pos_current.lp_oit_cookie,
+ ", "DFID", "LPX64"], status = %d: rc = %d\n",
+ lfsck_lfsck2name(lfsck), lfsck->li_pos_current.lp_oit_cookie,
PFID(&lfsck->li_pos_current.lp_dir_parent),
- lfsck->li_pos_current.lp_dir_cookie, rc);
+ lfsck->li_pos_current.lp_dir_cookie, ns->ln_status, rc);
return rc > 0 ? 0 : rc;
}
list_for_each_entry(lwp, &lsi->lsi_lwp_list, obd_lwp_list) {
char *ptr = strstr(lwp->obd_name, lwp_name);
- if (ptr != NULL) {
+ if (ptr != NULL && lwp->obd_lwp_export != NULL) {
exp = class_export_get(lwp->obd_lwp_export);
break;
}
* might be used to retrieve update log during recovery process, so
* it will be allowed to send during recovery process */
req->rq_allow_replay = 1;
+ req->rq_allow_intr = 1;
/* Note: some dt index api might return non-zero result here, like
* osd_index_ea_lookup, so we should only check rc < 0 here */
*/
cond_resched();
+ /* If the caller requires to allow to be interpreted by force
+ * and it has really been interpreted, then move the request
+ * to RQ_PHASE_INTERPRET phase in spite of what the current
+ * phase is. */
+ if (unlikely(req->rq_allow_intr && req->rq_intr)) {
+ req->rq_status = -EINTR;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+ GOTO(interpret, req->rq_status);
+ }
+
if (req->rq_phase == RQ_PHASE_NEW &&
ptlrpc_send_new_req(req)) {
force_timer_recalc = 1;
list_entry(tmp, struct ptlrpc_request, rq_set_chain);
if (req->rq_phase != RQ_PHASE_RPC &&
- req->rq_phase != RQ_PHASE_UNREGISTERING)
+ req->rq_phase != RQ_PHASE_UNREGISTERING &&
+ !req->rq_allow_intr)
continue;
ptlrpc_mark_interrupted(req);
lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
ptlrpc_expired_set,
ptlrpc_interrupted_set, set);
+ else if (set->set_allow_intr)
+ lwi = LWI_TIMEOUT_INTR_ALL(
+ cfs_time_seconds(timeout ? timeout : 1),
+ ptlrpc_expired_set,
+ ptlrpc_interrupted_set, set);
else
/*
* At least one request is in flight, so no
/* LU-769 - if we ignored the signal because it was already
* pending when we started, we need to handle it now or we risk
* it being ignored forever */
- if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
+ if (rc == -ETIMEDOUT &&
+ (!lwi.lwi_allow_intr || set->set_allow_intr) &&
signal_pending(current)) {
sigset_t blocked_sigs =
cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
RETURN(-ENOMEM);
}
+ if (req->rq_allow_intr)
+ set->set_allow_intr = 1;
+
/* for distributed debugging */
lustre_msg_set_status(req->rq_reqmsg, current_pid());
}
run_test 31h "Repair the corrupted shard's name entry"
+test_32()
+{
+ lfsck_prep 5 5
+ umount_client $MOUNT
+
+ #define OBD_FAIL_LFSCK_ASSISTANT_DIRECT 0x162d
+ do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x162d
+ $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!"
+
+ local STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }')
+ [ "$STATUS" == "scanning-phase1" ] ||
+ error "(3) Expect 'scanning-phase1', but got '$STATUS'"
+
+ echo "stop ost1"
+ stop ost1 > /dev/null || error "(4) Fail to stop OST1!"
+
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+ sleep 1
+
+ echo "stop LFSCK"
+ $STOP_LFSCK || error "(5) Fail to stop LFSCK!"
+}
+run_test 32 "stop LFSCK when some OST failed"
+
# restore MDS/OST size
MDSSIZE=${SAVED_MDSSIZE}
OSTSIZE=${SAVED_OSTSIZE}