#include <obd_class.h>
#include <lustre_nodemap.h>
#include <sys/dsl_dataset.h>
+#include <sys/zap_impl.h>
+#include <sys/zap.h>
+#include <sys/zap_leaf.h>
#include "osd_internal.h"
#define OSD_OTABLE_MAX_HASH ((1ULL << 48) - 1)
#define OTABLE_PREFETCH 256
-#define DTO_INDEX_INSERT 1
-#define DTO_INDEX_DELETE 2
-#define DTO_INDEX_UPDATE 3
-
static inline bool osd_scrub_has_window(struct osd_otable_it *it)
{
return it->ooi_prefetched < OTABLE_PREFETCH;
* \retval 0, changed successfully
* \retval -ve, on error
*/
-static int osd_scrub_refresh_mapping(const struct lu_env *env,
- struct osd_device *dev,
- const struct lu_fid *fid,
- uint64_t oid, int ops,
- bool force, const char *name)
+int osd_scrub_refresh_mapping(const struct lu_env *env,
+ struct osd_device *dev,
+ const struct lu_fid *fid,
+ uint64_t oid, enum dt_txn_op ops,
+ bool force, const char *name)
{
struct osd_thread_info *info = osd_oti_get(env);
struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
GOTO(out, rc);
}
+ spin_lock(&scrub->os_lock);
scrub->os_full_speed = 1;
+ spin_unlock(&scrub->os_lock);
+
sf->sf_flags |= SF_INCONSISTENT;
} else if (oid == oid2) {
GOTO(out, rc = 0);
}
update:
+ spin_lock(&scrub->os_lock);
scrub->os_full_speed = 1;
+ spin_unlock(&scrub->os_lock);
sf->sf_flags |= SF_INCONSISTENT;
}
GOTO(out, rc);
out:
+ if (dev->od_is_ost) {
+ sa_handle_t *hdl;
+ uint64_t nlink, mode;
+
+ rc = -sa_handle_get(dev->od_os, oid, NULL, SA_HDL_PRIVATE,
+ &hdl);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ rc = -sa_lookup(hdl, SA_ZPL_MODE(dev), &mode, sizeof(mode));
+ if (rc || !S_ISREG(mode)) {
+ sa_handle_destroy(hdl);
+ GOTO(cleanup, rc);
+ }
+
+ rc = -sa_lookup(hdl, SA_ZPL_LINKS(dev), &nlink, sizeof(nlink));
+ if (rc == 0 && nlink > 1)
+ scrub->os_has_ml_file = 1;
+
+ sa_handle_destroy(hdl);
+ }
+
+cleanup:
if (nvbuf)
nvlist_free(nvbuf);
RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
}
-static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
-{
- struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
- struct scrub_file *sf = &scrub->os_file;
- __u32 flags = scrub->os_start_flags;
- int rc;
- bool drop_dryrun = false;
- ENTRY;
-
- CDEBUG(D_LFSCK, "%s: OI scrub prep, flags = 0x%x\n",
- scrub->os_name, flags);
-
- down_write(&scrub->os_rwsem);
- if (flags & SS_SET_FAILOUT)
- sf->sf_param |= SP_FAILOUT;
- else if (flags & SS_CLEAR_FAILOUT)
- sf->sf_param &= ~SP_FAILOUT;
-
- if (flags & SS_SET_DRYRUN) {
- sf->sf_param |= SP_DRYRUN;
- } else if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) {
- sf->sf_param &= ~SP_DRYRUN;
- drop_dryrun = true;
- }
-
- if (flags & SS_RESET)
- scrub_file_reset(scrub, dev->od_uuid, 0);
-
- scrub->os_partial_scan = 0;
- if (flags & SS_AUTO_FULL) {
- scrub->os_full_speed = 1;
- sf->sf_flags |= SF_AUTO;
- } else if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
- SF_UPGRADE)) {
- scrub->os_full_speed = 1;
- } else {
- scrub->os_full_speed = 0;
- }
-
- spin_lock(&scrub->os_lock);
- scrub->os_in_prior = 0;
- scrub->os_waiting = 0;
- scrub->os_paused = 0;
- scrub->os_in_join = 0;
- scrub->os_full_scrub = 0;
- spin_unlock(&scrub->os_lock);
- scrub->os_new_checked = 0;
- if (drop_dryrun && sf->sf_pos_first_inconsistent != 0)
- sf->sf_pos_latest_start = sf->sf_pos_first_inconsistent;
- else if (sf->sf_pos_last_checkpoint != 0)
- sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
- else
- sf->sf_pos_latest_start = 1;
-
- scrub->os_pos_current = sf->sf_pos_latest_start;
- sf->sf_status = SS_SCANNING;
- sf->sf_time_latest_start = cfs_time_current_sec();
- sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
- sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1;
- rc = scrub_file_store(env, scrub);
- if (!rc) {
- spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_RUNNING);
- spin_unlock(&scrub->os_lock);
- wake_up_all(&thread->t_ctl_waitq);
- }
- up_write(&scrub->os_rwsem);
-
- RETURN(rc);
-}
-
-static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev,
- int result)
-{
- struct lustre_scrub *scrub = &dev->od_scrub;
- struct scrub_file *sf = &scrub->os_file;
- int rc;
- ENTRY;
-
- CDEBUG(D_LFSCK, "%s: OI scrub post with result = %d\n",
- scrub->os_name, result);
-
- down_write(&scrub->os_rwsem);
- spin_lock(&scrub->os_lock);
- thread_set_flags(&scrub->os_thread, SVC_STOPPING);
- spin_unlock(&scrub->os_lock);
- if (scrub->os_new_checked > 0) {
- sf->sf_items_checked += scrub->os_new_checked;
- scrub->os_new_checked = 0;
- sf->sf_pos_last_checkpoint = scrub->os_pos_current;
- }
- sf->sf_time_last_checkpoint = cfs_time_current_sec();
- if (result > 0) {
- sf->sf_status = SS_COMPLETED;
- if (!(sf->sf_param & SP_DRYRUN)) {
- memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
- sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
- SF_UPGRADE | SF_AUTO);
- }
- sf->sf_time_last_complete = sf->sf_time_last_checkpoint;
- sf->sf_success_count++;
- } else if (result == 0) {
- if (scrub->os_paused)
- sf->sf_status = SS_PAUSED;
- else
- sf->sf_status = SS_STOPPED;
- } else {
- sf->sf_status = SS_FAILED;
- }
- sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
- scrub->os_time_last_checkpoint);
- rc = scrub_file_store(env, scrub);
- up_write(&scrub->os_rwsem);
-
- RETURN(rc < 0 ? rc : result);
-}
-
/* iteration engine */
static inline int
spin_lock(&scrub->os_lock);
if (osd_scrub_has_window(it) ||
!list_empty(&scrub->os_inconsistent_items) ||
- it->ooi_waiting || !thread_is_running(&scrub->os_thread))
+ it->ooi_waiting || kthread_should_stop())
scrub->os_waiting = 0;
else
scrub->os_waiting = 1;
static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev,
struct lu_fid *fid, uint64_t *oid)
{
- struct l_wait_info lwi = { 0 };
struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct osd_otable_it *it = dev->od_otable_it;
struct lustre_mdt_attrs *lma = NULL;
nvlist_t *nvbuf = NULL;
ENTRY;
if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
- lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
- if (likely(lwi.lwi_timeout > 0)) {
- l_wait_event(thread->t_ctl_waitq,
- !list_empty(&scrub->os_inconsistent_items) ||
- !thread_is_running(thread),
- &lwi);
- if (unlikely(!thread_is_running(thread)))
- RETURN(SCRUB_NEXT_EXIT);
- }
+ wait_var_event_timeout(
+ scrub,
+ !list_empty(&scrub->os_inconsistent_items) ||
+ kthread_should_stop(),
+ cfs_time_seconds(cfs_fail_val));
+
+ if (kthread_should_stop())
+ RETURN(SCRUB_NEXT_EXIT);
}
if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_STOPPING);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
RETURN(SCRUB_NEXT_CRASH);
}
spin_unlock(&scrub->os_lock);
}
- if (!scrub->os_full_speed && !osd_scrub_has_window(it)) {
- memset(&lwi, 0, sizeof(lwi));
- l_wait_event(thread->t_ctl_waitq,
- osd_scrub_wakeup(scrub, it),
- &lwi);
- }
+ if (!scrub->os_full_speed && !osd_scrub_has_window(it))
+ wait_var_event(scrub, osd_scrub_wakeup(scrub, it));
- if (unlikely(!thread_is_running(thread)))
+ if (kthread_should_stop())
GOTO(out, rc = SCRUB_NEXT_EXIT);
rc = -dmu_object_next(dev->od_os, &scrub->os_pos_current, B_FALSE, 0);
it->ooi_prefetched++;
if (it->ooi_waiting) {
it->ooi_waiting = 0;
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
}
spin_unlock(&scrub->os_lock);
}
const struct lu_fid *fid, uint64_t oid, int rc)
{
struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct osd_otable_it *it = dev->od_otable_it;
rc = osd_scrub_check_update(env, dev, fid, oid, rc);
it->ooi_prefetched++;
if (it->ooi_waiting) {
it->ooi_waiting = 0;
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
}
spin_unlock(&scrub->os_lock);
}
} else {
+ spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
+ spin_unlock(&scrub->os_lock);
}
if (rc)
return 0;
}
+static int osd_scan_ml_file_main(const struct lu_env *env,
+ struct osd_device *dev);
+
static int osd_scrub_main(void *args)
{
struct lu_env env;
struct osd_device *dev = (struct osd_device *)args;
struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct lu_fid *fid;
uint64_t oid;
- int rc = 0;
+ int rc = 0, ret;
ENTRY;
rc = lu_env_init(&env, LCT_LOCAL | LCT_DT_THREAD);
GOTO(noenv, rc);
}
- rc = osd_scrub_prep(&env, dev);
+ rc = scrub_thread_prep(&env, scrub, dev->od_uuid, 1);
if (rc) {
CDEBUG(D_LFSCK, "%s: OI scrub fail to scrub prep: rc = %d\n",
scrub->os_name, rc);
}
if (!scrub->os_full_speed) {
- struct l_wait_info lwi = { 0 };
struct osd_otable_it *it = dev->od_otable_it;
- l_wait_event(thread->t_ctl_waitq,
- it->ooi_user_ready || !thread_is_running(thread),
- &lwi);
- if (unlikely(!thread_is_running(thread)))
+ wait_var_event(scrub,
+ it->ooi_user_ready ||
+ kthread_should_stop());
+
+ if (kthread_should_stop())
GOTO(post, rc = 0);
scrub->os_pos_current = it->ooi_pos;
scrub->os_pos_current);
fid = &osd_oti_get(&env)->oti_fid;
- while (!rc && thread_is_running(thread)) {
+ while (!rc && !kthread_should_stop()) {
rc = osd_scrub_next(&env, dev, fid, &oid);
switch (rc) {
case SCRUB_NEXT_EXIT:
GOTO(post, rc = 0);
case SCRUB_NEXT_CRASH:
spin_lock(&scrub->os_lock);
- thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
GOTO(out, rc = -EINVAL);
case SCRUB_NEXT_FATAL:
GOTO(post, rc);
post:
- rc = osd_scrub_post(&env, dev, rc);
+ if (scrub->os_has_ml_file) {
+ ret = osd_scan_ml_file_main(&env, dev);
+ if (ret != 0)
+ rc = ret;
+ }
+
+ rc = scrub_thread_post(&env, &dev->od_scrub, rc);
CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d\n",
scrub->os_name, scrub->os_pos_current, rc);
noenv:
spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_STOPPED);
- wake_up_all(&thread->t_ctl_waitq);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
+ if (xchg(&scrub->os_task, NULL) == NULL)
+ /* scrub_stop is waiting, we need to synchronize */
+ wait_var_event(scrub, kthread_should_stop());
+ wake_up_var(scrub);
return rc;
}
/* PENDING */
{
- .olm_name = "PENDING",
+ .olm_name = MDT_ORPHAN_DIR,
},
/* ROOT */
/* LFSCK */
{
.olm_name = LFSCK_DIR,
- .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_flags = OLF_SCAN_SUBITEMS | OLF_NOT_BACKUP,
.olm_scan_dir = osd_ios_general_sd,
.olm_handle_dirent = osd_ios_varfid_hd,
},
.olm_name = LUSTRE_NODEMAP_NAME,
},
+ /* index_backup */
+ {
+ .olm_name = INDEX_BACKUP_DIR,
+ .olm_fid = {
+ .f_seq = FID_SEQ_LOCAL_FILE,
+ .f_oid = INDEX_BACKUP_OID,
+ },
+ .olm_flags = OLF_SCAN_SUBITEMS | OLF_NOT_BACKUP,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
{
.olm_name = NULL
}
return 0;
}
+static bool osd_index_need_recreate(const struct lu_env *env,
+ struct osd_device *dev, uint64_t oid)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ zap_attribute_t *za = &info->oti_za2;
+ zap_cursor_t *zc = &info->oti_zc2;
+ int rc;
+ ENTRY;
+
+ zap_cursor_init_serialized(zc, dev->od_os, oid, 0);
+ rc = -zap_cursor_retrieve(zc, za);
+ zap_cursor_fini(zc);
+ if (rc && rc != -ENOENT)
+ RETURN(true);
+
+ RETURN(false);
+}
+
+static void osd_ios_index_register(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid, uint64_t oid)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ zap_attribute_t *za = &info->oti_za2;
+ zap_cursor_t *zc = &info->oti_zc2;
+ struct zap_leaf_entry *le;
+ dnode_t *dn = NULL;
+ sa_handle_t *hdl;
+ __u64 mode = 0;
+ __u32 keysize = 0;
+ __u32 recsize = 0;
+ int rc;
+ ENTRY;
+
+ rc = __osd_obj2dnode(osd->od_os, oid, &dn);
+ if (rc == -EEXIST || rc == -ENOENT)
+ RETURN_EXIT;
+
+ if (rc < 0)
+ GOTO(log, rc);
+
+ if (!osd_object_is_zap(dn))
+ GOTO(log, rc = 1);
+
+ rc = -sa_handle_get(osd->od_os, oid, NULL, SA_HDL_PRIVATE, &hdl);
+ if (rc)
+ GOTO(log, rc);
+
+ rc = -sa_lookup(hdl, SA_ZPL_MODE(osd), &mode, sizeof(mode));
+ sa_handle_destroy(hdl);
+ if (rc)
+ GOTO(log, rc);
+
+ if (!S_ISREG(mode))
+ GOTO(log, rc = 1);
+
+ zap_cursor_init_serialized(zc, osd->od_os, oid, 0);
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc)
+ /* Skip empty index object */
+ GOTO(fini, rc = (rc == -ENOENT ? 1 : rc));
+
+ if (zc->zc_zap->zap_ismicro ||
+ !(zap_f_phys(zc->zc_zap)->zap_flags & ZAP_FLAG_UINT64_KEY))
+ GOTO(fini, rc = 1);
+
+ le = ZAP_LEAF_ENTRY(zc->zc_leaf, 0);
+ keysize = le->le_name_numints * 8;
+ recsize = za->za_integer_length * za->za_num_integers;
+ if (likely(keysize && recsize))
+ rc = osd_index_register(osd, fid, keysize, recsize);
+
+ GOTO(fini, rc);
+
+fini:
+ zap_cursor_fini(zc);
+
+log:
+ if (dn)
+ osd_dnode_rele(dn);
+ if (rc < 0)
+ CWARN("%s: failed to register index "DFID" (%u/%u): rc = %d\n",
+ osd_name(osd), PFID(fid), keysize, recsize, rc);
+ else if (!rc)
+ CDEBUG(D_LFSCK, "%s: registered index "DFID" (%u/%u)\n",
+ osd_name(osd), PFID(fid), keysize, recsize);
+}
+
+static void osd_index_restore(const struct lu_env *env, struct osd_device *dev,
+ struct lustre_index_restore_unit *liru, void *buf,
+ int bufsize)
+{
+ struct luz_direntry *zde = &osd_oti_get(env)->oti_zde;
+ struct lu_fid *tgt_fid = &liru->liru_cfid;
+ struct lu_fid bak_fid;
+ int rc;
+ ENTRY;
+
+ lustre_fid2lbx(buf, tgt_fid, bufsize);
+ rc = -zap_lookup(dev->od_os, dev->od_index_backup_id, buf, 8,
+ sizeof(*zde) / 8, (void *)zde);
+ if (rc)
+ GOTO(log, rc);
+
+ rc = osd_get_fid_by_oid(env, dev, zde->lzd_reg.zde_dnode, &bak_fid);
+ if (rc)
+ GOTO(log, rc);
+
+ /* The OI mapping for index may be invalid, since it will be
+ * re-created, not update the OI mapping, just cache it in RAM. */
+ rc = osd_idc_find_and_init_with_oid(env, dev, tgt_fid,
+ liru->liru_clid);
+ if (!rc)
+ rc = lustre_index_restore(env, &dev->od_dt_dev,
+ &liru->liru_pfid, tgt_fid, &bak_fid,
+ liru->liru_name, &dev->od_index_backup_list,
+ &dev->od_lock, buf, bufsize);
+ GOTO(log, rc);
+
+log:
+ CDEBUG(D_WARNING, "%s: restore index '%s' with "DFID": rc = %d\n",
+ osd_name(dev), liru->liru_name, PFID(tgt_fid), rc);
+}
+
/**
* verify FID-in-LMA and OI entry for one object
*
RETURN(0);
}
+ if (lma->lma_compat & LMAC_IDX_BACKUP &&
+ osd_index_need_recreate(env, dev, oid)) {
+ if (parent == dev->od_root) {
+ lu_local_obj_fid(&tfid,
+ OSD_FS_ROOT_OID);
+ } else {
+ rc = osd_get_fid_by_oid(env, dev,
+ parent, &tfid);
+ if (rc) {
+ nvlist_free(nvbuf);
+ RETURN(rc);
+ }
+ }
+
+ rc = lustre_liru_new(
+ &dev->od_index_restore_list,
+ &tfid, &lma->lma_self_fid, oid,
+ name, strlen(name));
+ nvlist_free(nvbuf);
+ RETURN(rc);
+ }
+
tfid = lma->lma_self_fid;
+ if (!(flags & OLF_NOT_BACKUP))
+ osd_ios_index_register(env, dev, &tfid, oid);
}
nvlist_free(nvbuf);
}
sizeof(*zde) / 8, (void *)zde);
if (rc) {
if (rc != -ENOENT)
- CWARN("%s: initial OI scrub failed to find"
- "the entry %s under .lustre: rc = %d\n",
+ CWARN("%s: initial OI scrub failed to find the entry %s under .lustre: rc = %d\n",
osd_name(dev), map->olm_name, rc);
else if (!fid_is_zero(&map->olm_fid))
/* Try to remove the stale OI mapping. */
OBD_FREE_PTR(item);
}
+ if (!list_empty(&dev->od_index_restore_list)) {
+ char *buf;
+
+ OBD_ALLOC_LARGE(buf, INDEX_BACKUP_BUFSIZE);
+ if (!buf)
+ CERROR("%s: not enough RAM for rebuild index\n",
+ osd_name(dev));
+
+ while (!list_empty(&dev->od_index_restore_list)) {
+ struct lustre_index_restore_unit *liru;
+
+ liru = list_entry(dev->od_index_restore_list.next,
+ struct lustre_index_restore_unit,
+ liru_link);
+ list_del(&liru->liru_link);
+ if (buf)
+ osd_index_restore(env, dev, liru, buf,
+ INDEX_BACKUP_BUFSIZE);
+ OBD_FREE(liru, liru->liru_len);
+ }
+
+ if (buf)
+ OBD_FREE_LARGE(buf, INDEX_BACKUP_BUFSIZE);
+ }
+
EXIT;
}
RETURN(rc == -EALREADY ? 0 : rc);
}
-static void osd_scrub_stop(struct osd_device *dev)
+void osd_scrub_stop(struct osd_device *dev)
{
struct lustre_scrub *scrub = &dev->od_scrub;
ENTRY;
/* od_otable_sem: prevent concurrent start/stop */
down(&dev->od_otable_sem);
+ spin_lock(&scrub->os_lock);
scrub->os_paused = 1;
+ spin_unlock(&scrub->os_lock);
scrub_stop(scrub);
up(&dev->od_otable_sem);
static const char osd_scrub_name[] = "OI_scrub";
-int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
+int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
+ bool resetoi)
{
struct osd_thread_info *info = osd_oti_get(env);
struct lustre_scrub *scrub = &dev->od_scrub;
bool dirty = false;
ENTRY;
- memcpy(dev->od_uuid,
+ memcpy(dev->od_uuid.b,
&dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid,
sizeof(dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid));
memset(&dev->od_scrub, 0, sizeof(struct lustre_scrub));
- init_waitqueue_head(&scrub->os_thread.t_ctl_waitq);
init_rwsem(&scrub->os_rwsem);
spin_lock_init(&scrub->os_lock);
INIT_LIST_HEAD(&scrub->os_inconsistent_items);
if (IS_ERR_OR_NULL(obj))
RETURN(obj ? PTR_ERR(obj) : -ENOENT);
+ obj->do_body_ops = &osd_body_scrub_ops;
scrub->os_obj = obj;
rc = scrub_file_load(env, scrub);
if (rc == -ENOENT || rc == -EFAULT) {
} else if (rc < 0) {
GOTO(cleanup_obj, rc);
} else {
- if (memcmp(sf->sf_uuid, dev->od_uuid, 16) != 0) {
- struct obd_uuid *old_uuid;
- struct obd_uuid *new_uuid;
-
- OBD_ALLOC_PTR(old_uuid);
- OBD_ALLOC_PTR(new_uuid);
- if (!old_uuid || !new_uuid) {
- CERROR("%s: UUID has been changed, but"
- "failed to allocate RAM for report\n",
- osd_name(dev));
- } else {
- class_uuid_unparse(sf->sf_uuid, old_uuid);
- class_uuid_unparse(dev->od_uuid, new_uuid);
- CDEBUG(D_LFSCK, "%s: UUID has been changed "
- "from %s to %s\n", osd_name(dev),
- old_uuid->uuid, new_uuid->uuid);
- }
+ if (!uuid_equal(&sf->sf_uuid, &dev->od_uuid)) {
+ CDEBUG(D_LFSCK,
+ "%s: UUID has been changed from %pU to %pU\n",
+ osd_name(dev), &sf->sf_uuid, &dev->od_uuid);
scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT);
dirty = true;
- if (old_uuid)
- OBD_FREE_PTR(old_uuid);
- if (new_uuid)
- OBD_FREE_PTR(new_uuid);
} else if (sf->sf_status == SS_SCANNING) {
sf->sf_status = SS_CRASHED;
dirty = true;
}
/* Initialize OI files. */
- rc = osd_oi_init(env, dev);
+ rc = osd_oi_init(env, dev, resetoi);
if (rc < 0)
GOTO(cleanup_obj, rc);
{
spin_lock(&scrub->os_lock);
if (it->ooi_pos < scrub->os_pos_current || scrub->os_waiting ||
- !thread_is_running(&scrub->os_thread))
+ !scrub->os_running)
it->ooi_waiting = 0;
else
it->ooi_waiting = 1;
struct osd_otable_it *it = (struct osd_otable_it *)di;
struct osd_device *dev = it->ooi_dev;
struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
- struct l_wait_info lwi = { 0 };
struct lustre_mdt_attrs *lma = NULL;
nvlist_t *nvbuf = NULL;
- int size = 0;
- int rc;
+ int rc, size = 0;
+ bool locked;
ENTRY;
LASSERT(it->ooi_user_ready);
}
if (it->ooi_pos >= scrub->os_pos_current)
- l_wait_event(thread->t_ctl_waitq,
- osd_otable_it_wakeup(scrub, it),
- &lwi);
+ wait_var_event(scrub,
+ osd_otable_it_wakeup(scrub, it));
- if (!thread_is_running(thread) && !it->ooi_used_outside)
+ if (!scrub->os_running && !it->ooi_used_outside)
GOTO(out, rc = 1);
rc = -dmu_object_next(dev->od_os, &it->ooi_pos, B_FALSE, 0);
rc = __osd_xattr_load_by_oid(dev, it->ooi_pos, &nvbuf);
- if (!scrub->os_full_speed)
+ locked = false;
+ if (!scrub->os_full_speed) {
spin_lock(&scrub->os_lock);
+ locked = true;
+ }
it->ooi_prefetched--;
if (!scrub->os_full_speed) {
if (scrub->os_waiting) {
scrub->os_waiting = 0;
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
}
- spin_unlock(&scrub->os_lock);
}
+ if (locked)
+ spin_unlock(&scrub->os_lock);
if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
goto again;
it->ooi_prefetched_dnode = 0;
it->ooi_user_ready = 1;
if (!scrub->os_full_speed)
- wake_up_all(&scrub->os_thread.t_ctl_waitq);
+ wake_up_var(scrub);
/* Unplug OSD layer iteration by the first next() call. */
rc = osd_otable_it_next(env, (struct dt_it *)it);
const struct lu_fid *fid, uint64_t oid, bool insert)
{
struct lustre_scrub *scrub = &dev->od_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct osd_inconsistent_item *oii;
bool wakeup = false;
ENTRY;
oii->oii_insert = insert;
spin_lock(&scrub->os_lock);
- if (unlikely(!thread_is_running(thread))) {
+ if (!scrub->os_running) {
spin_unlock(&scrub->os_lock);
OBD_FREE_PTR(oii);
RETURN(-EAGAIN);
spin_unlock(&scrub->os_lock);
if (wakeup)
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
RETURN(0);
}
RETURN(ret);
}
+
+typedef int (*scan_dir_helper_t)(const struct lu_env *env,
+ struct osd_device *dev, uint64_t dir_oid,
+ struct osd_zap_it *ozi);
+
+static int osd_scan_dir(const struct lu_env *env, struct osd_device *dev,
+ uint64_t id, scan_dir_helper_t cb)
+{
+ struct osd_zap_it *it;
+ struct luz_direntry *zde;
+ zap_attribute_t *za;
+ int rc;
+
+ ENTRY;
+
+ OBD_SLAB_ALLOC_PTR_GFP(it, osd_zapit_cachep, GFP_NOFS);
+ if (it == NULL)
+ RETURN(-ENOMEM);
+
+ rc = osd_zap_cursor_init(&it->ozi_zc, dev->od_os, id, 0);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ za = &it->ozi_za;
+ zde = &it->ozi_zde;
+ while (1) {
+ rc = -zap_cursor_retrieve(it->ozi_zc, za);
+ if (unlikely(rc)) {
+ if (rc == -ENOENT)
+ rc = 0;
+
+ break;
+ }
+
+ if (name_is_dot_or_dotdot(za->za_name, strlen(za->za_name))) {
+ zap_cursor_advance(it->ozi_zc);
+ continue;
+ }
+
+ strncpy(it->ozi_name, za->za_name, sizeof(it->ozi_name));
+ if (za->za_integer_length != 8) {
+ rc = -EIO;
+ break;
+ }
+
+ rc = osd_zap_lookup(dev, it->ozi_zc->zc_zapobj, NULL,
+ za->za_name, za->za_integer_length,
+ sizeof(*zde) / za->za_integer_length, zde);
+ if (rc)
+ break;
+
+ rc = cb(env, dev, id, it);
+ if (rc)
+ break;
+
+ zap_cursor_advance(it->ozi_zc);
+ }
+ osd_zap_cursor_fini(it->ozi_zc);
+
+out:
+ OBD_SLAB_FREE_PTR(it, osd_zapit_cachep);
+ RETURN(rc);
+}
+
+static int osd_remove_ml_file(const struct lu_env *env, struct osd_device *dev,
+ uint64_t dir, uint64_t id, struct lu_fid *fid,
+ char *name)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct dt_object *dt;
+ struct osd_object *obj = NULL;
+ dmu_tx_t *tx;
+ sa_handle_t *hdl;
+ uint64_t nlink;
+ int rc;
+
+ rc = -sa_handle_get(dev->od_os, id, NULL, SA_HDL_PRIVATE, &hdl);
+ if (rc)
+ RETURN(rc);
+
+ dt = lu2dt(lu_object_find_slice(env, osd2lu_dev(dev), fid, NULL));
+ if (IS_ERR(dt))
+ RETURN(PTR_ERR(dt));
+
+ if (dt) {
+ obj = osd_dt_obj(dt);
+ down_read(&obj->oo_guard);
+ }
+
+ rc = -sa_lookup(hdl, SA_ZPL_LINKS(dev), &nlink, sizeof(nlink));
+ if (rc)
+ GOTO(out, rc);
+
+ if (nlink <= 1) {
+ CERROR("%s: multi-link file O/%s/%s/%s has nlink %llu\n",
+ osd_name(dev), info->oti_seq_name, info->oti_dir_name,
+ name, nlink);
+ GOTO(out, rc = 0);
+ }
+
+ tx = dmu_tx_create(dev->od_os);
+ if (!tx) {
+ CERROR("%s: fail to create tx to remove multi-link file!\n",
+ osd_name(dev));
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ dmu_tx_hold_zap(tx, dir, FALSE, NULL);
+ rc = -dmu_tx_assign(tx, TXG_WAIT);
+ if (rc)
+ GOTO(abort, rc);
+
+ nlink--;
+ rc = -sa_update(hdl, SA_ZPL_LINKS(dev), &nlink, sizeof(nlink), tx);
+ if (rc)
+ GOTO(abort, rc);
+
+ rc = -zap_remove(dev->od_os, dir, name, tx);
+ if (rc)
+ GOTO(abort, rc);
+
+ dmu_tx_commit(tx);
+ GOTO(out, rc);
+
+abort:
+ dmu_tx_abort(tx);
+
+out:
+ if (dt) {
+ up_read(&obj->oo_guard);
+ dt_object_put_nocache(env, dt);
+ }
+
+ sa_handle_destroy(hdl);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file(const struct lu_env *env, struct osd_device *dev,
+ uint64_t dir_oid, struct osd_zap_it *ozi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lu_fid *fid = &info->oti_fid;
+ struct ost_id *ostid = &info->oti_ostid;
+ char name[32];
+ u64 seq;
+ int rc = 0;
+
+ ENTRY;
+
+ rc = osd_get_fid_by_oid(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode, fid);
+ if (rc)
+ RETURN(rc);
+
+ seq = fid_seq(fid);
+ fid_to_ostid(fid, ostid);
+
+ snprintf(name, sizeof(name), (fid_seq_is_rsvd(seq) ||
+ fid_seq_is_mdt0(seq)) ? "%llu" : "%llx",
+ fid_seq_is_idif(seq) ? 0 : seq);
+ if (strcmp(info->oti_seq_name, name) != 0)
+ GOTO(fix, rc);
+
+ snprintf(name, sizeof(name), "d%d",
+ (int)ostid_id(ostid) % OSD_OST_MAP_SIZE);
+ if (strcmp(info->oti_dir_name, name) != 0)
+ GOTO(fix, rc);
+
+ snprintf(name, sizeof(name), "%llu", ostid_id(ostid));
+ if (strcmp(ozi->ozi_name, name) == 0)
+ RETURN(0);
+
+fix:
+ CDEBUG(D_LFSCK, "%s: the file O/%s/%s/%s is corrupted\n",
+ osd_name(dev), info->oti_seq_name, info->oti_dir_name,
+ ozi->ozi_name);
+
+ rc = osd_remove_ml_file(env, dev, dir_oid,
+ ozi->ozi_zde.lzd_reg.zde_dnode, fid,
+ ozi->ozi_name);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file_dir(const struct lu_env *env,
+ struct osd_device *dev, uint64_t dir_oid,
+ struct osd_zap_it *ozi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+
+ if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+ return 0;
+
+ info->oti_dir_name = ozi->ozi_name;
+ return osd_scan_dir(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode,
+ osd_scan_ml_file);
+}
+
+static int osd_scan_ml_file_seq(const struct lu_env *env,
+ struct osd_device *dev, uint64_t dir_oid,
+ struct osd_zap_it *ozi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+
+ if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+ return 0;
+
+ info->oti_seq_name = ozi->ozi_name;
+ return osd_scan_dir(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode,
+ osd_scan_ml_file_dir);
+}
+
+static int osd_scan_ml_file_main(const struct lu_env *env,
+ struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_ml_file_seq);
+}