X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_scrub.c;h=9df6f7487075bc1bac7fefaad8ba86b223731829;hb=23a6e1ed8eec2c07653ed07c35bb109ecb87a5b7;hp=b2cda0a203e691fed665c295fadf016289e3562a;hpb=89ead218ebe99a955afc0bc7f6aba83ef35019fb;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_scrub.c b/lustre/osd-zfs/osd_scrub.c index b2cda0a..9df6f74 100644 --- a/lustre/osd-zfs/osd_scrub.c +++ b/lustre/osd-zfs/osd_scrub.c @@ -46,6 +46,9 @@ #include #include #include +#include +#include +#include #include "osd_internal.h" @@ -193,7 +196,10 @@ zget: GOTO(out, rc); } + spin_lock(&scrub->os_lock); scrub->os_full_speed = 1; + spin_unlock(&scrub->os_lock); + sf->sf_flags |= SF_INCONSISTENT; } else if (oid == oid2) { GOTO(out, rc = 0); @@ -224,7 +230,9 @@ zget: } update: + spin_lock(&scrub->os_lock); scrub->os_full_speed = 1; + spin_unlock(&scrub->os_lock); sf->sf_flags |= SF_INCONSISTENT; } @@ -301,6 +309,7 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev) if (flags & SS_RESET) scrub_file_reset(scrub, dev->od_uuid, 0); + spin_lock(&scrub->os_lock); scrub->os_partial_scan = 0; if (flags & SS_AUTO_FULL) { scrub->os_full_speed = 1; @@ -312,7 +321,6 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev) scrub->os_full_speed = 0; } - spin_lock(&scrub->os_lock); scrub->os_in_prior = 0; scrub->os_waiting = 0; scrub->os_paused = 0; @@ -329,7 +337,7 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev) scrub->os_pos_current = sf->sf_pos_latest_start; sf->sf_status = SS_SCANNING; - sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_latest_start = ktime_get_real_seconds(); sf->sf_time_last_checkpoint = sf->sf_time_latest_start; sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1; rc = scrub_file_store(env, scrub); @@ -364,7 +372,7 @@ static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev, scrub->os_new_checked = 0; sf->sf_pos_last_checkpoint = scrub->os_pos_current; } - sf->sf_time_last_checkpoint = cfs_time_current_sec(); + sf->sf_time_last_checkpoint = ktime_get_real_seconds(); if (result > 0) { sf->sf_status = SS_COMPLETED; if (!(sf->sf_param & SP_DRYRUN)) { @@ -382,8 +390,9 @@ static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev, } else { sf->sf_status = SS_FAILED; } - sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC - - scrub->os_time_last_checkpoint); + sf->sf_run_time += ktime_get_seconds() - + scrub->os_time_last_checkpoint; + rc = scrub_file_store(env, scrub); up_write(&scrub->os_rwsem); @@ -410,7 +419,6 @@ osd_scrub_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it) static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev, struct lu_fid *fid, uint64_t *oid) { - struct l_wait_info lwi = { 0 }; struct lustre_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; struct osd_otable_it *it = dev->od_otable_it; @@ -421,15 +429,14 @@ static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev, ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) { - lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL); - if (likely(lwi.lwi_timeout > 0)) { - l_wait_event(thread->t_ctl_waitq, - !list_empty(&scrub->os_inconsistent_items) || - !thread_is_running(thread), - &lwi); - if (unlikely(!thread_is_running(thread))) - RETURN(SCRUB_NEXT_EXIT); - } + wait_event_idle_timeout( + thread->t_ctl_waitq, + !list_empty(&scrub->os_inconsistent_items) || + !thread_is_running(thread), + cfs_time_seconds(cfs_fail_val)); + + if (unlikely(!thread_is_running(thread))) + RETURN(SCRUB_NEXT_EXIT); } if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) { @@ -466,12 +473,9 @@ again: spin_unlock(&scrub->os_lock); } - if (!scrub->os_full_speed && !osd_scrub_has_window(it)) { - memset(&lwi, 0, sizeof(lwi)); - l_wait_event(thread->t_ctl_waitq, - osd_scrub_wakeup(scrub, it), - &lwi); - } + if (!scrub->os_full_speed && !osd_scrub_has_window(it)) + wait_event_idle(thread->t_ctl_waitq, + osd_scrub_wakeup(scrub, it)); if (unlikely(!thread_is_running(thread))) GOTO(out, rc = SCRUB_NEXT_EXIT); @@ -539,7 +543,9 @@ static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev, spin_unlock(&scrub->os_lock); } } else { + spin_lock(&scrub->os_lock); scrub->os_in_prior = 0; + spin_unlock(&scrub->os_lock); } if (rc) @@ -581,12 +587,12 @@ static int osd_scrub_main(void *args) } if (!scrub->os_full_speed) { - struct l_wait_info lwi = { 0 }; struct osd_otable_it *it = dev->od_otable_it; - l_wait_event(thread->t_ctl_waitq, - it->ooi_user_ready || !thread_is_running(thread), - &lwi); + wait_event_idle(thread->t_ctl_waitq, + it->ooi_user_ready || + !thread_is_running(thread)); + if (unlikely(!thread_is_running(thread))) GOTO(post, rc = 0); @@ -697,7 +703,7 @@ static const struct osd_lf_map osd_lf_maps[] = { /* PENDING */ { - .olm_name = "PENDING", + .olm_name = MDT_ORPHAN_DIR, }, /* ROOT */ @@ -749,7 +755,7 @@ static const struct osd_lf_map osd_lf_maps[] = { /* LFSCK */ { .olm_name = LFSCK_DIR, - .olm_flags = OLF_SCAN_SUBITEMS, + .olm_flags = OLF_SCAN_SUBITEMS | OLF_NOT_BACKUP, .olm_scan_dir = osd_ios_general_sd, .olm_handle_dirent = osd_ios_varfid_hd, }, @@ -803,6 +809,18 @@ static const struct osd_lf_map osd_lf_maps[] = { .olm_name = LUSTRE_NODEMAP_NAME, }, + /* index_backup */ + { + .olm_name = INDEX_BACKUP_DIR, + .olm_fid = { + .f_seq = FID_SEQ_LOCAL_FILE, + .f_oid = INDEX_BACKUP_OID, + }, + .olm_flags = OLF_SCAN_SUBITEMS | OLF_NOT_BACKUP, + .olm_scan_dir = osd_ios_general_sd, + .olm_handle_dirent = osd_ios_varfid_hd, + }, + { .olm_name = NULL } @@ -864,6 +882,130 @@ static int osd_ios_new_item(struct osd_device *dev, uint64_t parent, return 0; } +static bool osd_index_need_recreate(const struct lu_env *env, + struct osd_device *dev, uint64_t oid) +{ + struct osd_thread_info *info = osd_oti_get(env); + zap_attribute_t *za = &info->oti_za2; + zap_cursor_t *zc = &info->oti_zc2; + int rc; + ENTRY; + + zap_cursor_init_serialized(zc, dev->od_os, oid, 0); + rc = -zap_cursor_retrieve(zc, za); + zap_cursor_fini(zc); + if (rc && rc != -ENOENT) + RETURN(true); + + RETURN(false); +} + +static void osd_ios_index_register(const struct lu_env *env, + struct osd_device *osd, + const struct lu_fid *fid, uint64_t oid) +{ + struct osd_thread_info *info = osd_oti_get(env); + zap_attribute_t *za = &info->oti_za2; + zap_cursor_t *zc = &info->oti_zc2; + struct zap_leaf_entry *le; + dnode_t *dn = NULL; + sa_handle_t *hdl; + __u64 mode = 0; + __u32 keysize = 0; + __u32 recsize = 0; + int rc; + ENTRY; + + rc = __osd_obj2dnode(osd->od_os, oid, &dn); + if (rc == -EEXIST || rc == -ENOENT) + RETURN_EXIT; + + if (rc < 0) + GOTO(log, rc); + + if (!osd_object_is_zap(dn)) + GOTO(log, rc = 1); + + rc = -sa_handle_get(osd->od_os, oid, NULL, SA_HDL_PRIVATE, &hdl); + if (rc) + GOTO(log, rc); + + rc = -sa_lookup(hdl, SA_ZPL_MODE(osd), &mode, sizeof(mode)); + sa_handle_destroy(hdl); + if (rc) + GOTO(log, rc); + + if (!S_ISREG(mode)) + GOTO(log, rc = 1); + + zap_cursor_init_serialized(zc, osd->od_os, oid, 0); + rc = -zap_cursor_retrieve(zc, za); + if (rc) + /* Skip empty index object */ + GOTO(fini, rc = (rc == -ENOENT ? 1 : rc)); + + if (zc->zc_zap->zap_ismicro || + !(zap_f_phys(zc->zc_zap)->zap_flags & ZAP_FLAG_UINT64_KEY)) + GOTO(fini, rc = 1); + + le = ZAP_LEAF_ENTRY(zc->zc_leaf, 0); + keysize = le->le_name_numints * 8; + recsize = za->za_integer_length * za->za_num_integers; + if (likely(keysize && recsize)) + rc = osd_index_register(osd, fid, keysize, recsize); + + GOTO(fini, rc); + +fini: + zap_cursor_fini(zc); + +log: + if (dn) + osd_dnode_rele(dn); + if (rc < 0) + CWARN("%s: failed to register index "DFID" (%u/%u): rc = %d\n", + osd_name(osd), PFID(fid), keysize, recsize, rc); + else if (!rc) + CDEBUG(D_LFSCK, "%s: registered index "DFID" (%u/%u)\n", + osd_name(osd), PFID(fid), keysize, recsize); +} + +static void osd_index_restore(const struct lu_env *env, struct osd_device *dev, + struct lustre_index_restore_unit *liru, void *buf, + int bufsize) +{ + struct luz_direntry *zde = &osd_oti_get(env)->oti_zde; + struct lu_fid *tgt_fid = &liru->liru_cfid; + struct lu_fid bak_fid; + int rc; + ENTRY; + + lustre_fid2lbx(buf, tgt_fid, bufsize); + rc = -zap_lookup(dev->od_os, dev->od_index_backup_id, buf, 8, + sizeof(*zde) / 8, (void *)zde); + if (rc) + GOTO(log, rc); + + rc = osd_get_fid_by_oid(env, dev, zde->lzd_reg.zde_dnode, &bak_fid); + if (rc) + GOTO(log, rc); + + /* The OI mapping for index may be invalid, since it will be + * re-created, not update the OI mapping, just cache it in RAM. */ + rc = osd_idc_find_and_init_with_oid(env, dev, tgt_fid, + liru->liru_clid); + if (!rc) + rc = lustre_index_restore(env, &dev->od_dt_dev, + &liru->liru_pfid, tgt_fid, &bak_fid, + liru->liru_name, &dev->od_index_backup_list, + &dev->od_lock, buf, bufsize); + GOTO(log, rc); + +log: + CDEBUG(D_WARNING, "%s: restore index '%s' with "DFID": rc = %d\n", + osd_name(dev), liru->liru_name, PFID(tgt_fid), rc); +} + /** * verify FID-in-LMA and OI entry for one object * @@ -912,7 +1054,31 @@ static int osd_ios_scan_one(const struct lu_env *env, struct osd_device *dev, RETURN(0); } + if (lma->lma_compat & LMAC_IDX_BACKUP && + osd_index_need_recreate(env, dev, oid)) { + if (parent == dev->od_root) { + lu_local_obj_fid(&tfid, + OSD_FS_ROOT_OID); + } else { + rc = osd_get_fid_by_oid(env, dev, + parent, &tfid); + if (rc) { + nvlist_free(nvbuf); + RETURN(rc); + } + } + + rc = lustre_liru_new( + &dev->od_index_restore_list, + &tfid, &lma->lma_self_fid, oid, + name, strlen(name)); + nvlist_free(nvbuf); + RETURN(rc); + } + tfid = lma->lma_self_fid; + if (!(flags & OLF_NOT_BACKUP)) + osd_ios_index_register(env, dev, &tfid, oid); } nvlist_free(nvbuf); } @@ -1109,8 +1275,7 @@ static int osd_ios_ROOT_sd(const struct lu_env *env, struct osd_device *dev, sizeof(*zde) / 8, (void *)zde); if (rc) { if (rc != -ENOENT) - CWARN("%s: initial OI scrub failed to find" - "the entry %s under .lustre: rc = %d\n", + CWARN("%s: initial OI scrub failed to find the entry %s under .lustre: rc = %d\n", osd_name(dev), map->olm_name, rc); else if (!fid_is_zero(&map->olm_fid)) /* Try to remove the stale OI mapping. */ @@ -1176,6 +1341,31 @@ static void osd_initial_OI_scrub(const struct lu_env *env, OBD_FREE_PTR(item); } + if (!list_empty(&dev->od_index_restore_list)) { + char *buf; + + OBD_ALLOC_LARGE(buf, INDEX_BACKUP_BUFSIZE); + if (!buf) + CERROR("%s: not enough RAM for rebuild index\n", + osd_name(dev)); + + while (!list_empty(&dev->od_index_restore_list)) { + struct lustre_index_restore_unit *liru; + + liru = list_entry(dev->od_index_restore_list.next, + struct lustre_index_restore_unit, + liru_link); + list_del(&liru->liru_link); + if (buf) + osd_index_restore(env, dev, liru, buf, + INDEX_BACKUP_BUFSIZE); + OBD_FREE(liru, liru->liru_len); + } + + if (buf) + OBD_FREE_LARGE(buf, INDEX_BACKUP_BUFSIZE); + } + EXIT; } @@ -1198,14 +1388,16 @@ int osd_scrub_start(const struct lu_env *env, struct osd_device *dev, RETURN(rc == -EALREADY ? 0 : rc); } -static void osd_scrub_stop(struct osd_device *dev) +void osd_scrub_stop(struct osd_device *dev) { struct lustre_scrub *scrub = &dev->od_scrub; ENTRY; /* od_otable_sem: prevent concurrent start/stop */ down(&dev->od_otable_sem); + spin_lock(&scrub->os_lock); scrub->os_paused = 1; + spin_unlock(&scrub->os_lock); scrub_stop(scrub); up(&dev->od_otable_sem); @@ -1228,7 +1420,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) bool dirty = false; ENTRY; - memcpy(dev->od_uuid, + memcpy(dev->od_uuid.b, &dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid, sizeof(dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid)); memset(&dev->od_scrub, 0, sizeof(struct lustre_scrub)); @@ -1259,6 +1451,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) if (IS_ERR_OR_NULL(obj)) RETURN(obj ? PTR_ERR(obj) : -ENOENT); + obj->do_body_ops = &osd_body_scrub_ops; scrub->os_obj = obj; rc = scrub_file_load(env, scrub); if (rc == -ENOENT || rc == -EFAULT) { @@ -1267,29 +1460,12 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) } else if (rc < 0) { GOTO(cleanup_obj, rc); } else { - if (memcmp(sf->sf_uuid, dev->od_uuid, 16) != 0) { - struct obd_uuid *old_uuid; - struct obd_uuid *new_uuid; - - OBD_ALLOC_PTR(old_uuid); - OBD_ALLOC_PTR(new_uuid); - if (!old_uuid || !new_uuid) { - CERROR("%s: UUID has been changed, but" - "failed to allocate RAM for report\n", - osd_name(dev)); - } else { - class_uuid_unparse(sf->sf_uuid, old_uuid); - class_uuid_unparse(dev->od_uuid, new_uuid); - CDEBUG(D_LFSCK, "%s: UUID has been changed " - "from %s to %s\n", osd_name(dev), - old_uuid->uuid, new_uuid->uuid); - } + if (!uuid_equal(&sf->sf_uuid, &dev->od_uuid)) { + CDEBUG(D_LFSCK, + "%s: UUID has been changed from %pU to %pU\n", + osd_name(dev), &sf->sf_uuid, &dev->od_uuid); scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT); dirty = true; - if (old_uuid) - OBD_FREE_PTR(old_uuid); - if (new_uuid) - OBD_FREE_PTR(new_uuid); } else if (sf->sf_status == SS_SCANNING) { sf->sf_status = SS_CRASHED; dirty = true; @@ -1509,11 +1685,10 @@ static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di) struct osd_device *dev = it->ooi_dev; struct lustre_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; - struct l_wait_info lwi = { 0 }; struct lustre_mdt_attrs *lma = NULL; nvlist_t *nvbuf = NULL; - int size = 0; - int rc; + int rc, size = 0; + bool locked; ENTRY; LASSERT(it->ooi_user_ready); @@ -1531,9 +1706,8 @@ again: } if (it->ooi_pos >= scrub->os_pos_current) - l_wait_event(thread->t_ctl_waitq, - osd_otable_it_wakeup(scrub, it), - &lwi); + wait_event_idle(thread->t_ctl_waitq, + osd_otable_it_wakeup(scrub, it)); if (!thread_is_running(thread) && !it->ooi_used_outside) GOTO(out, rc = 1); @@ -1550,16 +1724,20 @@ again: rc = __osd_xattr_load_by_oid(dev, it->ooi_pos, &nvbuf); - if (!scrub->os_full_speed) + locked = false; + if (!scrub->os_full_speed) { spin_lock(&scrub->os_lock); + locked = true; + } it->ooi_prefetched--; if (!scrub->os_full_speed) { if (scrub->os_waiting) { scrub->os_waiting = 0; wake_up_all(&thread->t_ctl_waitq); } - spin_unlock(&scrub->os_lock); } + if (locked) + spin_unlock(&scrub->os_lock); if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA) goto again;