Sometimes, the LFSCK start request may comes (from remote server)
before local target initialized. If we start the LFSCK right away
on current server, the LFSCK engine may access NULL pointer, such
as lookup FID with NULL 'ss_server_fld'.
To avoid such trouble, start LFSCK logic will return -EINPROGRESS
to the request sponsor. It is the sponsor duty to retry the start
request some time later.
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: If7bc44e025b5f3c4f977b3a35e3784ada548a2df
Reviewed-on: https://review.whamcloud.com/30259
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
__u32 ltd_layout_gen;
__u32 ltd_namespace_gen;
unsigned int ltd_dead:1,
__u32 ltd_layout_gen;
__u32 ltd_namespace_gen;
unsigned int ltd_dead:1,
ltd_layout_done:1,
ltd_namespace_done:1,
ltd_synced_failures:1;
ltd_layout_done:1,
ltd_namespace_done:1,
ltd_synced_failures:1;
switch (lr->lr_event) {
case LE_START:
switch (lr->lr_event) {
case LE_START:
+ if (unlikely(rc == -EINPROGRESS)) {
+ ltd->ltd_retry_start = 1;
+ break;
+ }
+
if (rc != 0) {
CDEBUG(D_LFSCK, "%s: fail to notify %s %x for %s "
"start: rc = %d\n",
if (rc != 0) {
CDEBUG(D_LFSCK, "%s: fail to notify %s %x for %s "
"start: rc = %d\n",
struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
__u32 idx;
int rc = 0;
struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
__u32 idx;
int rc = 0;
ENTRY;
LASSERT(start->ls_flags & LPF_BROADCAST);
ENTRY;
LASSERT(start->ls_flags & LPF_BROADCAST);
- set = ptlrpc_prep_set();
- if (unlikely(set == NULL))
- RETURN(-ENOMEM);
-
memset(lr, 0, sizeof(*lr));
lr->lr_event = LE_START;
lr->lr_index = lfsck_dev_idx(lfsck);
memset(lr, 0, sizeof(*lr));
lr->lr_event = LE_START;
lr->lr_index = lfsck_dev_idx(lfsck);
laia->laia_lr = lr;
laia->laia_shared = 1;
laia->laia_lr = lr;
laia->laia_shared = 1;
+again:
+ set = ptlrpc_prep_set();
+ if (unlikely(!set))
+ RETURN(-ENOMEM);
+
down_read(<ds->ltd_rw_sem);
cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
ltd = lfsck_tgt_get(ltds, idx);
LASSERT(ltd != NULL);
down_read(<ds->ltd_rw_sem);
cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
ltd = lfsck_tgt_get(ltds, idx);
LASSERT(ltd != NULL);
+ if (retry && !ltd->ltd_retry_start) {
+ lfsck_tgt_put(ltd);
+ continue;
+ }
+
+ ltd->ltd_retry_start = 0;
ltd->ltd_layout_done = 0;
ltd->ltd_namespace_done = 0;
ltd->ltd_synced_failures = 0;
ltd->ltd_layout_done = 0;
ltd->ltd_namespace_done = 0;
ltd->ltd_synced_failures = 0;
if (rc == 0)
rc = laia->laia_result;
if (rc == 0)
rc = laia->laia_result;
+ if (unlikely(rc == -EINPROGRESS)) {
+ retry = true;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
+ set_current_state(TASK_RUNNING);
+ if (!signal_pending(current))
+ goto again;
+
+ rc = -EINTR;
+ }
+
if (rc != 0) {
struct lfsck_stop *stop = &info->lti_stop;
if (rc != 0) {
struct lfsck_stop *stop = &info->lti_stop;
RETURN(-ENXIO);
/* System is not ready, try again later. */
RETURN(-ENXIO);
/* System is not ready, try again later. */
- if (unlikely(lfsck->li_namespace == NULL))
- GOTO(put, rc = -EAGAIN);
+ if (unlikely(lfsck->li_namespace == NULL ||
+ lfsck_dev_site(lfsck)->ss_server_fld == NULL))
+ GOTO(put, rc = -EINPROGRESS);
/* start == NULL means auto trigger paused LFSCK. */
if ((start == NULL) &&
/* start == NULL means auto trigger paused LFSCK. */
if ((start == NULL) &&