#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
#define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
#define OBD_FAIL_OSD_OST_EA_FID_SET 0x197
+#define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
if (!lfsck->li_current_oit_processed && !init)
pos->lp_oit_cookie--;
- LASSERT(pos->lp_oit_cookie > 0);
+ if (unlikely(pos->lp_oit_cookie == 0))
+ pos->lp_oit_cookie = 1;
if (lfsck->li_di_dir != NULL) {
struct dt_object *dto = lfsck->li_obj_dir;
err_ops:
lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0);
if (!cfs_hash_is_empty(mgs2lu_dev(mgs)->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL);
lu_site_print(env, mgs2lu_dev(mgs)->ld_site, &msgdata,
lu_cdebug_printer);
}
lu_site_purge(env, d->ld_site, ~0);
if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL);
lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
}
lu_site_purge(env, top->ld_site, ~0);
if (!cfs_hash_is_empty(top->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL);
lu_site_print(env, top->ld_site, &msgdata, lu_cdebug_printer);
}
obj->oo_dt.do_body_ops = &osd_body_ops;
}
- if (result == 0)
+ if (!result && !CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY))
result = __osd_oi_insert(env, obj, fid, th);
/* a small optimization - dt_insert() isn't usually applied
MODULES := osd_zfs
osd_zfs-objs := osd_handler.o osd_lproc.o osd_quota.o
osd_zfs-objs += osd_object.o osd_io.o osd_oi.o osd_xattr.o osd_index.o
+osd_zfs-objs += osd_scrub.o
EXTRA_PRE_CFLAGS += -include @SPL_OBJ@/spl_config.h
EXTRA_PRE_CFLAGS += -include @ZFS_OBJ@/zfs_config.h
if (rc >= sizeof(o->od_svname))
RETURN(-E2BIG);
+ o->od_index = -1; /* -1 means index is invalid */
+ rc = server_name2index(o->od_svname, &o->od_index, NULL);
str = strstr(str, ":");
if (str) {
unsigned long flags;
LCONSOLE_WARN("%s: set dev_rdonly on this device\n",
svname);
}
+
+ if (flags & LMD_FLG_NOSCRUB)
+ o->od_auto_scrub_interval = AS_NEVER;
}
if (server_name_is_ost(o->od_svname))
}
#endif
- /* 1. initialize oi before any file create or file open */
- rc = osd_oi_init(env, o);
- if (rc)
- GOTO(err, rc);
-
rc = lu_site_init(&o->od_site, osd2lu_dev(o));
if (rc)
GOTO(err, rc);
if (rc)
GOTO(err, rc);
+ o->od_in_init = 1;
+ rc = osd_scrub_setup(env, o);
+ o->od_in_init = 0;
+ if (rc)
+ GOTO(err, rc);
+
rc = osd_procfs_init(o, o->od_svname);
if (rc)
GOTO(err, rc);
l->ld_ops = &osd_lu_ops;
o->od_dt_dev.dd_ops = &osd_dt_ops;
+ sema_init(&o->od_otable_sem, 1);
+ INIT_LIST_HEAD(&o->od_ios_list);
+ o->od_auto_scrub_interval = AS_DEFAULT;
out:
RETURN(rc);
/* now with all the callbacks completed we can cleanup the remainings */
osd_shutdown(env, o);
- osd_oi_fini(env, o);
+ osd_scrub_cleanup(env, o);
rc = osd_procfs_fini(o);
if (rc) {
lu_kmem_fini(osd_caches);
}
-extern unsigned int osd_oi_count;
module_param(osd_oi_count, int, 0444);
MODULE_PARM_DESC(osd_oi_count, "Number of Object Index containers to be created, it's only valid for new filesystem.");
*/
static int osd_find_parent_by_dnode(const struct lu_env *env,
struct dt_object *o,
- struct lu_fid *fid)
+ struct lu_fid *fid, uint64_t *oid)
{
struct osd_object *obj = osd_dt_obj(o);
struct osd_device *osd = osd_obj2dev(obj);
if (rc != 0)
RETURN(rc);
rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_PARENT(osd), &dnode, 8);
- if (rc == 0)
+ if (!rc) {
+ if (oid)
+ *oid = dnode;
rc = osd_get_fid_by_oid(env, osd, dnode, fid);
+ }
RETURN(rc);
}
static int osd_find_parent_fid(const struct lu_env *env, struct dt_object *o,
- struct lu_fid *fid)
+ struct lu_fid *fid, uint64_t *oid)
{
struct link_ea_header *leh;
struct link_ea_entry *lee;
if (rc == 0) {
struct lu_fid fid2;
int rc2;
- rc2 = osd_find_parent_by_dnode(env, o, &fid2);
+ rc2 = osd_find_parent_by_dnode(env, o, &fid2, oid);
if (rc2 == 0)
if (lu_fid_eq(fid, &fid2) == 0)
CERROR("wrong parent: "DFID" != "DFID"\n",
/* no LinkEA is found, let's try to find the fid in parent's LMA */
if (unlikely(rc != 0))
- rc = osd_find_parent_by_dnode(env, o, fid);
+ rc = osd_find_parent_by_dnode(env, o, fid, oid);
RETURN(rc);
}
+/*
+ * When lookup item under striped directory, we need to locate the master
+ * MDT-object of the striped directory firstly, then the client will send
+ * lookup (getattr_by_name) RPC to the MDT with some slave MDT-object's FID
+ * and the item's name. If the system is restored from MDT file level backup,
+ * then before the OI scrub completely built the OI files, the OI mappings of
+ * the master MDT-object and slave MDT-object may be invalid. Usually, it is
+ * not a problem for the master MDT-object. Because when locate the master
+ * MDT-object, we will do name based lookup (for the striped directory itself)
+ * firstly, during such process we can setup the correct OI mapping for the
+ * master MDT-object. But it will be trouble for the slave MDT-object. Because
+ * the client will not trigger name based lookup on the MDT to locate the slave
+ * MDT-object before locating item under the striped directory, then when
+ * osd_fid_lookup(), it will find that the OI mapping for the slave MDT-object
+ * is invalid and does not know what the right OI mapping is, then the MDT has
+ * to return -EINPROGRESS to the client to notify that the OI scrub is rebuiding
+ * the OI file, related OI mapping is unknown yet, please try again later. And
+ * then client will re-try the RPC again and again until related OI mapping has
+ * been updated. That is quite inefficient.
+ *
+ * To resolve above trouble, we will handle it as the following two cases:
+ *
+ * 1) The slave MDT-object and the master MDT-object are on different MDTs.
+ * It is relative easy. Be as one of remote MDT-objects, the slave MDT-object
+ * is linked under /REMOTE_PARENT_DIR with the name of its FID string.
+ * We can locate the slave MDT-object via lookup the /REMOTE_PARENT_DIR
+ * directly. Please check osd_fid_lookup().
+ *
+ * 2) The slave MDT-object and the master MDT-object reside on the same MDT.
+ * Under such case, during lookup the master MDT-object, we will lookup the
+ * slave MDT-object via readdir against the master MDT-object, because the
+ * slave MDT-objects information are stored as sub-directories with the name
+ * "${FID}:${index}". Then when find the local slave MDT-object, its OI
+ * mapping will be recorded. Then subsequent osd_fid_lookup() will know
+ * the correct OI mapping for the slave MDT-object.
+ */
+static int osd_check_lmv(const struct lu_env *env, struct osd_device *osd,
+ uint64_t oid, const struct lu_fid *fid)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct luz_direntry *zde = &info->oti_zde;
+ zap_attribute_t *za = &info->oti_za;
+ zap_cursor_t *zc = &info->oti_zc;
+ struct lu_fid *tfid = &info->oti_fid;
+ nvlist_t *nvbuf = NULL;
+ struct lmv_mds_md_v1 *lmv = NULL;
+ int size;
+ int rc;
+ ENTRY;
+
+ rc = __osd_xattr_load_by_oid(osd, oid, &nvbuf);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ RETURN(0);
+
+ if (rc)
+ RETURN(rc);
+
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMV,
+ (uchar_t **)&lmv, &size);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ GOTO(out_nvbuf, rc = 0);
+
+ if (rc || le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ GOTO(out_nvbuf, rc);
+
+ zap_cursor_init_serialized(zc, osd->od_os, oid, 0);
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc == -ENOENT) {
+ zap_cursor_advance(zc);
+ } else if (rc) {
+ CERROR("%s: fail to init for check LMV "DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ while (1) {
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc == -ENOENT)
+ GOTO(out_zc, rc = 0);
+
+ if (rc) {
+ CERROR("%s: fail to locate next for check LMV "
+ DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ fid_zero(tfid);
+ sscanf(za->za_name + 1, SFID, RFID(tfid));
+ if (fid_is_sane(tfid) && !osd_remote_fid(env, osd, tfid)) {
+ rc = osd_zap_lookup(osd, oid, NULL, za->za_name,
+ za->za_integer_length,
+ sizeof(*zde) / za->za_integer_length,
+ (void *)zde);
+ if (rc) {
+ CERROR("%s: fail to lookup for check LMV "
+ DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ rc = osd_oii_insert(env, osd, tfid,
+ zde->lzd_reg.zde_dnode, false);
+ GOTO(out_zc, rc);
+ }
+
+ zap_cursor_advance(zc);
+ }
+
+out_zc:
+ zap_cursor_fini(zc);
+out_nvbuf:
+ nvlist_free(nvbuf);
+
+ return rc;
+}
+
+static int
+osd_consistency_check(const struct lu_env *env, struct osd_device *osd,
+ struct osd_object *obj, const struct lu_fid *fid,
+ uint64_t oid, bool is_dir)
+{
+ struct lustre_scrub *scrub = &osd->od_scrub;
+ dnode_t *dn = NULL;
+ uint64_t oid2;
+ int once = 0;
+ bool insert;
+ int rc;
+ ENTRY;
+
+ if (!fid_is_norm(fid) && !fid_is_igif(fid))
+ RETURN(0);
+
+ /* oid == ZFS_NO_OBJECT must be for lookup ".." case */
+ if (oid == ZFS_NO_OBJECT) {
+ rc = osd_sa_handle_get(obj);
+ if (rc)
+ RETURN(rc);
+
+ rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_PARENT(osd), &oid, 8);
+ if (rc)
+ RETURN(rc);
+ }
+
+ if (thread_is_running(&scrub->os_thread)) {
+ if (scrub->os_pos_current > oid)
+ RETURN(0);
+ } else if (osd->od_auto_scrub_interval == AS_NEVER) {
+ RETURN(0);
+ } else {
+ if (cfs_time_before(cfs_time_current_sec(),
+ scrub->os_file.sf_time_last_complete +
+ osd->od_auto_scrub_interval))
+ RETURN(0);
+ }
+
+again:
+ rc = osd_fid_lookup(env, osd, fid, &oid2);
+ if (rc == -ENOENT) {
+ insert = true;
+ if (dn)
+ goto trigger;
+
+ rc = __osd_obj2dnode(osd->od_os, oid, &dn);
+ /* The object has been removed (by race maybe). */
+ if (rc)
+ RETURN(rc = (rc == -EEXIST ? -ENOENT : rc));
+
+ goto trigger;
+ } else if (rc || oid == oid2) {
+ GOTO(out, rc);
+ }
+
+ insert = false;
+
+trigger:
+ if (thread_is_running(&scrub->os_thread)) {
+ if (!dn) {
+ rc = __osd_obj2dnode(osd->od_os, oid, &dn);
+ /* The object has been removed (by race maybe). */
+ if (rc)
+ RETURN(rc = (rc == -EEXIST ? -ENOENT : rc));
+ }
+
+ rc = osd_oii_insert(env, osd, fid, oid, insert);
+ /* There is race condition between osd_oi_lookup and OI scrub.
+ * The OI scrub finished just after osd_oi_lookup() failure.
+ * Under such case, it is unnecessary to trigger OI scrub again,
+ * but try to call osd_oi_lookup() again. */
+ if (unlikely(rc == -EAGAIN))
+ goto again;
+
+ if (is_dir)
+ rc = osd_check_lmv(env, osd, oid, fid);
+ else
+ rc = 0;
+
+ GOTO(out, rc);
+ }
+
+ if (osd->od_auto_scrub_interval != AS_NEVER && ++once == 1) {
+ rc = osd_scrub_start(env, osd, SS_AUTO_FULL |
+ SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
+ CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING,
+ "%s: trigger partial OI scrub for RPC inconsistency "
+ "checking FID "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), rc);
+ if (!rc)
+ goto again;
+ }
+
+ GOTO(out, rc);
+
+out:
+ if (dn)
+ osd_dnode_rele(dn);
+
+ return rc;
+}
+
static int osd_dir_lookup(const struct lu_env *env, struct dt_object *dt,
struct dt_rec *rec, const struct dt_key *key)
{
struct osd_thread_info *oti = osd_oti_get(env);
- struct osd_object *obj = osd_dt_obj(dt);
- struct osd_device *osd = osd_obj2dev(obj);
- char *name = (char *)key;
- int rc;
+ struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
+ struct lu_fid *fid = (struct lu_fid *)rec;
+ char *name = (char *)key;
+ uint64_t oid = ZFS_NO_OBJECT;
+ int rc;
ENTRY;
if (name[0] == '.') {
memcpy(rec, f, sizeof(*f));
RETURN(1);
} else if (name[1] == '.' && name[2] == 0) {
- rc = osd_find_parent_fid(env, dt, (struct lu_fid *)rec);
- RETURN(rc == 0 ? 1 : rc);
+ rc = osd_find_parent_fid(env, dt, fid, &oid);
+ GOTO(out, rc);
}
}
if (rc != 0)
RETURN(rc);
+ oid = oti->oti_zde.lzd_reg.zde_dnode;
if (likely(fid_is_sane(&oti->oti_zde.lzd_fid))) {
memcpy(rec, &oti->oti_zde.lzd_fid, sizeof(struct lu_fid));
- RETURN(1);
+ GOTO(out, rc = 0);
}
- rc = osd_get_fid_by_oid(env, osd, oti->oti_zde.lzd_reg.zde_dnode,
- (struct lu_fid *)rec);
+ rc = osd_get_fid_by_oid(env, osd, oti->oti_zde.lzd_reg.zde_dnode, fid);
+
+ GOTO(out, rc);
+
+out:
+ if (!rc && !osd_remote_fid(env, osd, fid)) {
+ rc = osd_consistency_check(env, osd, obj, fid, oid,
+ S_ISDIR(DTTOIF(oti->oti_zde.lzd_reg.zde_type)));
+ /* Only -ENOENT error will affect the lookup result. */
+ if (rc != -ENOENT)
+ rc = 0;
+ }
- RETURN(rc == 0 ? 1 : (rc == -ENOENT ? -ENODATA : rc));
+ return rc == 0 ? 1 : (rc == -ENOENT ? -ENODATA : rc);
}
/*
lde->lde_hash = cpu_to_le64(2);
strcpy(lde->lde_name, "..");
lde->lde_namelen = cpu_to_le16(2);
- rc = osd_find_parent_fid(env, &it->ozi_obj->oo_dt, fid);
+ rc = osd_find_parent_fid(env, &it->ozi_obj->oo_dt, fid, NULL);
if (!rc) {
fid_cpu_to_le(&lde->lde_fid, fid);
lde->lde_attrs = LUDA_FID;
}
};
-struct osd_metadnode_it {
- struct osd_device *mit_dev;
- __u64 mit_pos;
- struct lu_fid mit_fid;
- int mit_prefetched;
- __u64 mit_prefetched_dnode;
-};
-
-static struct dt_it *osd_zfs_otable_it_init(const struct lu_env *env,
- struct dt_object *dt, __u32 attr)
-{
- struct osd_device *dev = osd_dev(dt->do_lu.lo_dev);
- struct osd_metadnode_it *it;
- ENTRY;
-
- OBD_ALLOC_PTR(it);
- if (unlikely(it == NULL))
- RETURN(ERR_PTR(-ENOMEM));
-
- it->mit_dev = dev;
-
- /* XXX: dmu_object_next() does NOT find dnodes allocated
- * in the current non-committed txg, so we force txg
- * commit to find all existing dnodes ... */
- if (!dev->od_dt_dev.dd_rdonly)
- txg_wait_synced(dmu_objset_pool(dev->od_os), 0ULL);
-
- RETURN((struct dt_it *)it);
-}
-
-static void osd_zfs_otable_it_fini(const struct lu_env *env, struct dt_it *di)
-{
- struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
-
- OBD_FREE_PTR(it);
-}
-
-static int osd_zfs_otable_it_get(const struct lu_env *env,
- struct dt_it *di, const struct dt_key *key)
-{
- return 0;
-}
-
-static void osd_zfs_otable_it_put(const struct lu_env *env, struct dt_it *di)
-{
-}
-
-#define OTABLE_PREFETCH 256
-
-static void osd_zfs_otable_prefetch(const struct lu_env *env,
- struct osd_metadnode_it *it)
-{
- struct osd_device *dev = it->mit_dev;
- int rc;
-
- /* can go negative on the very first access to the iterator
- * or if some non-Lustre objects were found */
- if (unlikely(it->mit_prefetched < 0))
- it->mit_prefetched = 0;
-
- if (it->mit_prefetched >= (OTABLE_PREFETCH >> 1))
- return;
-
- if (it->mit_prefetched_dnode == 0)
- it->mit_prefetched_dnode = it->mit_pos;
-
- while (it->mit_prefetched < OTABLE_PREFETCH) {
- rc = -dmu_object_next(dev->od_os, &it->mit_prefetched_dnode,
- B_FALSE, 0);
- if (unlikely(rc != 0))
- break;
-
- osd_dmu_prefetch(dev->od_os, it->mit_prefetched_dnode,
- 0, 0, 0, ZIO_PRIORITY_ASYNC_READ);
-
- it->mit_prefetched++;
- }
-}
-
-static int osd_zfs_otable_it_next(const struct lu_env *env, struct dt_it *di)
-{
- struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
- struct lustre_mdt_attrs *lma;
- struct osd_device *dev = it->mit_dev;
- nvlist_t *nvbuf = NULL;
- uchar_t *v;
- __u64 dnode;
- int rc, s;
-
- memset(&it->mit_fid, 0, sizeof(it->mit_fid));
-
- dnode = it->mit_pos;
- do {
- rc = -dmu_object_next(dev->od_os, &it->mit_pos, B_FALSE, 0);
- if (unlikely(rc != 0))
- GOTO(out, rc = 1);
- it->mit_prefetched--;
-
- /* LMA is required for this to be a Lustre object.
- * If there is no xattr skip it. */
- rc = __osd_xattr_load_by_oid(dev, it->mit_pos, &nvbuf);
- if (unlikely(rc != 0))
- continue;
-
- LASSERT(nvbuf != NULL);
- rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, &v, &s);
- if (likely(rc == 0)) {
- /* Lustre object */
- lma = (struct lustre_mdt_attrs *)v;
- lustre_lma_swab(lma);
- if (likely(!(lma->lma_compat & LMAC_NOT_IN_OI) &&
- !(lma->lma_incompat & LMAI_AGENT))) {
- it->mit_fid = lma->lma_self_fid;
- nvlist_free(nvbuf);
- break;
- }
- }
-
- /* not a Lustre visible object, try next one */
- nvlist_free(nvbuf);
- } while (1);
-
-
- /* we aren't prefetching in the above loop because the number of
- * non-Lustre objects is very small and we will be repeating very
- * rare. in case we want to use this to iterate over non-Lustre
- * objects (i.e. when we convert regular ZFS in Lustre) it makes
- * sense to initiate prefetching in the loop */
-
- /* 0 - there are more items, +1 - the end */
- if (likely(rc == 0))
- osd_zfs_otable_prefetch(env, it);
-
- CDEBUG(D_OTHER, "advance: %llu -> %llu "DFID": %d\n", dnode,
- it->mit_pos, PFID(&it->mit_fid), rc);
-
-out:
- return rc;
-}
-
-static struct dt_key *osd_zfs_otable_it_key(const struct lu_env *env,
- const struct dt_it *di)
-{
- return NULL;
-}
-
-static int osd_zfs_otable_it_key_size(const struct lu_env *env,
- const struct dt_it *di)
-{
- return sizeof(__u64);
-}
-
-static int osd_zfs_otable_it_rec(const struct lu_env *env,
- const struct dt_it *di,
- struct dt_rec *rec, __u32 attr)
-{
- struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
- struct lu_fid *fid = (struct lu_fid *)rec;
- ENTRY;
-
- *fid = it->mit_fid;
-
- RETURN(0);
-}
-
-
-static __u64 osd_zfs_otable_it_store(const struct lu_env *env,
- const struct dt_it *di)
-{
- struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
-
- return it->mit_pos;
-}
-
-static int osd_zfs_otable_it_load(const struct lu_env *env,
- const struct dt_it *di, __u64 hash)
-{
- struct osd_metadnode_it *it = (struct osd_metadnode_it *)di;
-
- it->mit_pos = hash;
- it->mit_prefetched = 0;
- it->mit_prefetched_dnode = 0;
-
- return osd_zfs_otable_it_next(env, (struct dt_it *)di);
-}
-
-static int osd_zfs_otable_it_key_rec(const struct lu_env *env,
- const struct dt_it *di, void *key_rec)
-{
- return 0;
-}
-
-const struct dt_index_operations osd_zfs_otable_ops = {
- .dio_it = {
- .init = osd_zfs_otable_it_init,
- .fini = osd_zfs_otable_it_fini,
- .get = osd_zfs_otable_it_get,
- .put = osd_zfs_otable_it_put,
- .next = osd_zfs_otable_it_next,
- .key = osd_zfs_otable_it_key,
- .key_size = osd_zfs_otable_it_key_size,
- .rec = osd_zfs_otable_it_rec,
- .store = osd_zfs_otable_it_store,
- .load = osd_zfs_otable_it_load,
- .key_rec = osd_zfs_otable_it_key_rec,
- }
-};
-
int osd_index_try(const struct lu_env *env, struct dt_object *dt,
const struct dt_index_features *feat)
{
GOTO(out, rc = -ERANGE);
if (unlikely(feat == &dt_otable_features)) {
- dt->do_index_ops = &osd_zfs_otable_ops;
+ dt->do_index_ops = &osd_otable_ops;
GOTO(out, rc = 0);
}
#include <dt_object.h>
#include <md_object.h>
#include <lustre_quota.h>
+#include <lustre_scrub.h>
+#include <obd.h>
#ifdef SHRINK_STOP
#undef SHRINK_STOP
#endif
oic_remote:1; /* FID isn't local */
};
+struct osd_inconsistent_item {
+ /* link into lustre_scrub::os_inconsistent_items,
+ * protected by lustr_scrub::os_lock. */
+ struct list_head oii_list;
+
+ /* The right FID <=> oid mapping. */
+ struct osd_idmap_cache oii_cache;
+
+ unsigned int oii_insert:1; /* insert or update mapping. */
+};
+
+struct osd_otable_it {
+ struct osd_device *ooi_dev;
+ struct lu_fid ooi_fid;
+ __u64 ooi_pos;
+ __u64 ooi_prefetched_dnode;
+ int ooi_prefetched;
+
+ /* The following bits can be updated/checked w/o lock protection.
+ * If more bits will be introduced in the future and need lock to
+ * protect, please add comment. */
+ unsigned int ooi_used_outside:1, /* Some user out of OSD
+ * uses the iteration. */
+ ooi_all_cached:1, /* No more entries can be
+ * filled into cache. */
+ ooi_user_ready:1, /* The user out of OSD is
+ * ready to iterate. */
+ ooi_waiting:1; /* it::next is waiting. */
+};
+
+extern const struct dt_index_operations osd_otable_ops;
+
/* max.number of regular attributes the callers may ask for */
# define OSD_MAX_IN_BULK (sizeof(struct osa_attr)/sizeof(uint64_t))
int oti_ins_cache_size;
int oti_ins_cache_used;
struct lu_buf oti_xattr_lbuf;
+ zap_cursor_t oti_zc;
};
extern struct lu_context_key osd_key;
od_prop_rdonly:1, /**< ZFS property readonly */
od_xattr_in_sa:1,
od_is_ost:1,
+ od_in_init:1,
od_posix_acl:1;
unsigned int od_dnsize;
char od_mntdev[128];
char od_svname[128];
+ char od_uuid[16];
int od_connects;
+ int od_index;
+ __s64 od_auto_scrub_interval;
struct lu_site od_site;
dnode_t *od_groupused_dn;
/* osd seq instance */
struct lu_client_seq *od_cl_seq;
+
+ struct semaphore od_otable_sem;
+ struct osd_otable_it *od_otable_it;
+ struct lustre_scrub od_scrub;
+ struct list_head od_ios_list;
};
enum osd_destroy_type {
};
uint64_t oo_parent; /* used only at object creation */
};
+ struct lu_object_header *oo_header;
};
int osd_statfs(const struct lu_env *, struct dt_device *, struct obd_statfs *);
static inline char *osd_name(struct osd_device *osd)
{
- return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name;
+ return osd->od_svname;
+}
+
+static inline void zfs_set_bit(int nr, __u8 *addr)
+{
+ set_bit(nr, (unsigned long *)addr);
+}
+
+static inline int zfs_test_bit(int nr, __u8 *addr)
+{
+ return test_bit(nr, (const unsigned long *)addr);
+}
+
+static inline int osd_oi_fid2idx(struct osd_device *dev,
+ const struct lu_fid *fid)
+{
+ return fid->f_seq & (dev->od_oi_count - 1);
+}
+
+static inline struct osd_oi *osd_fid2oi(struct osd_device *osd,
+ const struct lu_fid *fid)
+{
+ LASSERTF(osd->od_oi_table && osd->od_oi_count >= 1,
+ "%s: "DFID", oi_count %d\n",
+ osd_name(osd), PFID(fid), osd->od_oi_count);
+
+ return osd->od_oi_table[osd_oi_fid2idx(osd, fid)];
}
#ifdef CONFIG_PROC_FS
int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
struct osd_object *obj, sa_handle_t *sa_hdl, dmu_tx_t *tx,
struct lu_attr *la, uint64_t parent, nvlist_t *);
+int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
+ uint64_t oid, dnode_t **dnp);
+int osd_object_init0(const struct lu_env *env, struct osd_object *obj);
/* osd_oi.c */
int osd_oi_init(const struct lu_env *env, struct osd_device *o);
struct osd_idmap_cache *osd_idc_find(const struct lu_env *env,
struct osd_device *osd,
const struct lu_fid *fid);
+int osd_idc_find_and_init_with_oid(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid,
+ uint64_t oid);
+int fid_is_on_ost(const struct lu_env *env, struct osd_device *osd,
+ const struct lu_fid *fid);
+int osd_obj_find_or_create(const struct lu_env *env, struct osd_device *o,
+ uint64_t parent, const char *name, uint64_t *child,
+ const struct lu_fid *fid, bool isdir);
+
+extern unsigned int osd_oi_count;
/* osd_index.c */
int osd_index_try(const struct lu_env *env, struct dt_object *dt,
struct osd_device *osd,
struct osd_object *obj,
struct osd_thandle *oh, bool destroy);
+int __osd_xattr_load_by_oid(struct osd_device *osd, uint64_t oid,
+ nvlist_t **sa);
+
+/* osd_scrub.c */
+int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev);
+void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
+int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
+ __u32 flags);
+int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
+ const struct lu_fid *fid, uint64_t oid, bool insert);
+int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
+ uint64_t *oid);
/* osd_xattr.c */
int __osd_sa_xattr_schedule_update(const struct lu_env *env,
#include <obd.h>
#include <obd_class.h>
#include <lprocfs_status.h>
+#include <lustre_scrub.h>
#include "osd_internal.h"
RETURN(result);
}
+static int zfs_osd_auto_scrub_seq_show(struct seq_file *m, void *data)
+{
+ struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
+
+ LASSERT(dev != NULL);
+ if (unlikely(!dev->od_os))
+ return -EINPROGRESS;
+
+ seq_printf(m, "%lld\n", dev->od_auto_scrub_interval);
+ return 0;
+}
+
+static ssize_t
+zfs_osd_auto_scrub_seq_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct dt_device *dt = m->private;
+ struct osd_device *dev = osd_dt_dev(dt);
+ int rc;
+ __s64 val;
+
+ LASSERT(dev != NULL);
+ if (unlikely(!dev->od_os))
+ return -EINPROGRESS;
+
+ rc = lprocfs_str_to_s64(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ dev->od_auto_scrub_interval = val;
+ return count;
+}
+LPROC_SEQ_FOPS(zfs_osd_auto_scrub);
+
+static int zfs_osd_oi_scrub_seq_show(struct seq_file *m, void *data)
+{
+ struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
+
+ LASSERT(dev != NULL);
+ if (unlikely(!dev->od_os))
+ return -EINPROGRESS;
+
+ scrub_dump(m, &dev->od_scrub);
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(zfs_osd_oi_scrub);
+
static int zfs_osd_fstype_seq_show(struct seq_file *m, void *data)
{
seq_puts(m, "zfs\n");
.fops = &zfs_dt_filestotal_fops },
{ .name = "filesfree",
.fops = &zfs_dt_filesfree_fops },
+ { .name = "auto_scrub",
+ .fops = &zfs_osd_auto_scrub_fops },
+ { .name = "oi_scrub",
+ .fops = &zfs_osd_oi_scrub_fops },
{ .name = "fstype",
.fops = &zfs_osd_fstype_fops },
{ .name = "mntdev",
OBD_SLAB_ALLOC_PTR_GFP(mo, osd_object_kmem, GFP_NOFS);
if (mo != NULL) {
struct lu_object *l;
+ struct lu_object_header *h;
+ struct osd_device *o = osd_dev(d);
l = &mo->oo_dt.do_lu;
- dt_object_init(&mo->oo_dt, NULL, d);
+ if (unlikely(o->od_in_init)) {
+ OBD_ALLOC_PTR(h);
+ if (!h) {
+ OBD_FREE_PTR(mo);
+ return NULL;
+ }
+
+ lu_object_header_init(h);
+ lu_object_init(l, h, d);
+ lu_object_add_top(h, l);
+ mo->oo_header = h;
+ } else {
+ dt_object_init(&mo->oo_dt, NULL, d);
+ mo->oo_header = NULL;
+ }
+
mo->oo_dt.do_ops = &osd_obj_ops;
l->lo_ops = &osd_lu_obj_ops;
INIT_LIST_HEAD(&mo->oo_sa_linkage);
struct lu_buf buf;
int rc;
struct lustre_mdt_attrs *lma;
+ const struct lu_fid *rfid = lu_object_fid(&obj->oo_dt.do_lu);
ENTRY;
CLASSERT(sizeof(info->oti_buf) >= sizeof(*lma));
CWARN("%s: unsupported incompat LMA feature(s) %#x for "
"fid = "DFID"\n", osd_obj2dev(obj)->od_svname,
lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
- PFID(lu_object_fid(&obj->oo_dt.do_lu)));
+ PFID(rfid));
rc = -EOPNOTSUPP;
+ } else if (unlikely(!lu_fid_eq(rfid, &lma->lma_self_fid))) {
+ CERROR("%s: FID-in-LMA "DFID" does not match the "
+ "object self-fid "DFID"\n",
+ osd_obj2dev(obj)->od_svname,
+ PFID(&lma->lma_self_fid), PFID(rfid));
+ rc = -EREMCHG;
} else {
struct osd_device *osd = osd_obj2dev(obj);
struct osd_object *obj = osd_obj(l);
struct osd_device *osd = osd_obj2dev(obj);
const struct lu_fid *fid = lu_object_fid(l);
+ struct lustre_scrub *scrub = &osd->od_scrub;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct luz_direntry *zde = &info->oti_zde;
+ struct osd_idmap_cache *idc;
+ char *name = info->oti_str;
uint64_t oid;
int rc = 0;
+ int rc1;
+ bool remote = false;
ENTRY;
LASSERT(osd_invariant(obj));
if (fid_is_otable_it(&l->lo_header->loh_fid)) {
obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
l->lo_header->loh_attr |= LOHA_EXISTS;
- RETURN(0);
+
+ GOTO(out, rc = 0);
}
- if (conf != NULL && conf->loc_flags & LOC_F_NEW)
+ if (conf && conf->loc_flags & LOC_F_NEW)
GOTO(out, rc = 0);
if (unlikely(fid_is_acct(fid))) {
GOTO(out, rc = 0);
}
- rc = osd_fid_lookup(env, osd, fid, &oid);
- if (rc == 0) {
- LASSERT(obj->oo_dn == NULL);
- rc = __osd_obj2dnode(osd->od_os, oid, &obj->oo_dn);
- /* EEXIST will be returned if object is being deleted in ZFS */
- if (rc == -EEXIST) {
- rc = 0;
- GOTO(out, rc);
+ idc = osd_idc_find(env, osd, fid);
+ if (idc && !idc->oic_remote && idc->oic_dnode != ZFS_NO_OBJECT) {
+ oid = idc->oic_dnode;
+ goto zget;
+ }
+
+ rc = -ENOENT;
+ if (!list_empty(&osd->od_scrub.os_inconsistent_items))
+ rc = osd_oii_lookup(osd, fid, &oid);
+
+ if (rc)
+ rc = osd_fid_lookup(env, osd, fid, &oid);
+
+ if (rc == -ENOENT) {
+ if (likely(!(fid_is_norm(fid) || fid_is_igif(fid)) ||
+ fid_is_on_ost(env, osd, fid) ||
+ !zfs_test_bit(osd_oi_fid2idx(osd, fid),
+ scrub->os_file.sf_oi_bitmap)))
+ GOTO(out, rc = 0);
+
+ rc = -EREMCHG;
+ goto trigger;
+ }
+
+ if (rc)
+ GOTO(out, rc);
+
+zget:
+ LASSERT(obj->oo_dn == NULL);
+
+ rc = __osd_obj2dnode(osd->od_os, oid, &obj->oo_dn);
+ /* EEXIST will be returned if object is being deleted in ZFS */
+ if (rc == -EEXIST)
+ GOTO(out, rc = 0);
+
+ if (rc) {
+ CERROR("%s: lookup "DFID"/%#llx failed: rc = %d\n",
+ osd->od_svname, PFID(lu_object_fid(l)), oid, rc);
+ GOTO(out, rc);
+ }
+
+ rc = osd_object_init0(env, obj);
+ if (rc)
+ GOTO(out, rc);
+
+ if (unlikely(obj->oo_header))
+ GOTO(out, rc = 0);
+
+ rc = osd_check_lma(env, obj);
+ if ((!rc && !remote) || (rc != -EREMCHG))
+ GOTO(out, rc);
+
+trigger:
+ /* We still have chance to get the valid dnode: for the object that is
+ * referenced by remote name entry, the object on the local MDT will be
+ * linked under the dir /REMOTE_PARENT_DIR with its FID string as name.
+ *
+ * During the OI scrub, if we cannot find the OI mapping, we may still
+ * have change to map the FID to local OID via lookup the dir
+ * /REMOTE_PARENT_DIR. */
+ if (!remote && !fid_is_on_ost(env, osd, fid)) {
+ osd_fid2str(name, fid, sizeof(info->oti_str));
+ rc = osd_zap_lookup(osd, osd->od_remote_parent_dir,
+ NULL, name, 8, 3, (void *)zde);
+ if (!rc) {
+ oid = zde->lzd_reg.zde_dnode;
+ osd_dnode_rele(obj->oo_dn);
+ obj->oo_dn = NULL;
+ remote = true;
+ goto zget;
}
- if (rc != 0) {
- CERROR("%s: lookup "DFID"/%#llx failed: rc = %d\n",
- osd->od_svname, PFID(lu_object_fid(l)), oid, rc);
- GOTO(out, rc);
+ }
+
+ /* The case someone triggered the OI scrub already. */
+ if (thread_is_running(&scrub->os_thread)) {
+ if (!rc) {
+ LASSERT(remote);
+
+ lu_object_set_agent_entry(l);
+ osd_oii_insert(env, osd, fid, oid, false);
+ } else {
+ rc = -EINPROGRESS;
}
- rc = osd_object_init0(env, obj);
- if (rc != 0)
- GOTO(out, rc);
- rc = osd_check_lma(env, obj);
- if (rc != 0)
- GOTO(out, rc);
- } else if (rc == -ENOENT) {
- rc = 0;
+ GOTO(out, rc);
}
- LASSERT(osd_invariant(obj));
+
+ /* The case NOT allow to trigger OI scrub automatically. */
+ if (osd->od_auto_scrub_interval == AS_NEVER)
+ GOTO(out, rc);
+
+ /* It is me to trigger the OI scrub. */
+ rc1 = osd_scrub_start(env, osd, SS_CLEAR_DRYRUN |
+ SS_CLEAR_FAILOUT | SS_AUTO_FULL);
+ LCONSOLE_WARN("%s: trigger OI scrub by RPC for the "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), rc1);
+ if (!rc) {
+ LASSERT(remote);
+
+ lu_object_set_agent_entry(l);
+ if (!rc1)
+ osd_oii_insert(env, osd, fid, oid, false);
+ } else {
+ if (!rc1)
+ rc = -EINPROGRESS;
+ else
+ rc = -EREMCHG;
+ }
+
+ GOTO(out, rc);
+
out:
RETURN(rc);
}
static void osd_object_free(const struct lu_env *env, struct lu_object *l)
{
struct osd_object *obj = osd_obj(l);
+ struct lu_object_header *h = obj->oo_header;
LASSERT(osd_invariant(obj));
dt_object_fini(&obj->oo_dt);
OBD_SLAB_FREE_PTR(obj, osd_object_kmem);
+ if (unlikely(h)) {
+ lu_object_header_fini(h);
+ OBD_FREE_PTR(h);
+ }
}
static int
/* remove obj ref from index dir (it depends) */
zapid = osd_get_name_n_idx(env, osd, fid, buf,
sizeof(info->oti_str), &zdn);
- rc = osd_zap_remove(osd, zapid, zdn, buf, oh->ot_tx);
- if (rc) {
- CERROR("%s: zap_remove(%s) failed: rc = %d\n",
- osd->od_svname, buf, rc);
- GOTO(out, rc);
- }
-
rc = osd_xattrs_destroy(env, obj, oh);
if (rc) {
CERROR("%s: cannot destroy xattrs for %s: rc = %d\n",
osd->od_svname, buf, oid, rc);
}
+ /* Remove the OI mapping after the destroy to handle the race with
+ * OI scrub that may insert missed OI mapping during the interval. */
+ rc = osd_zap_remove(osd, zapid, zdn, buf, oh->ot_tx);
+ if (unlikely(rc == -ENOENT))
+ rc = 0;
+ if (rc)
+ CERROR("%s: zap_remove(%s) failed: rc = %d\n",
+ osd->od_svname, buf, rc);
+
+ GOTO(out, rc);
+
out:
/* not needed in the cache anymore */
set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags);
transaction group. */
LASSERT(oh->ot_tx->tx_txg != 0);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_FID_MAPPING) && !osd->od_is_ost) {
+ struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
+ char *buf = info->oti_str;
+ dnode_t *zdn = NULL;
+ uint64_t zapid;
+
+ zapid = osd_get_name_n_idx(env, osd, lu_object_fid(&dt->do_lu),
+ buf, sizeof(info->oti_str), &zdn);
+ rc = osd_zap_lookup(osd, zapid, zdn, buf, 8,
+ sizeof(*zde) / 8, zde);
+ if (!rc) {
+ zde->zde_dnode -= 1;
+ rc = -zap_update(osd->od_os, zapid, buf, 8,
+ sizeof(*zde) / 8, zde, oh->ot_tx);
+ }
+ up_read(&obj->oo_guard);
+
+ RETURN(rc > 0 ? 0 : rc);
+ }
+
/* Only allow set size for regular file */
if (!S_ISREG(dt->do_lu.lo_header->loh_attr))
valid &= ~(LA_SIZE | LA_BLOCKS);
return rc;
}
-static int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
- uint64_t oid, dnode_t **dnp)
+int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
+ uint64_t oid, dnode_t **dnp)
{
dmu_tx_hold_t *txh;
int rc = 0;
dnode_t *dn = NULL, *zdn = NULL;
uint64_t zapid, parent = 0;
int rc;
+ __u32 compat = 0;
ENTRY;
zapid = osd_get_name_n_idx(env, osd, fid, buf,
sizeof(info->oti_str), &zdn);
- rc = osd_zap_add(osd, zapid, zdn, buf, 8, 1, zde, oh->ot_tx);
- if (rc)
- GOTO(out, rc);
+ if (!CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY)) {
+ if (osd->od_is_ost &&
+ OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
+ zde->zde_dnode++;
+
+ if (!osd->od_is_ost ||
+ !OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY)) {
+ rc = osd_zap_add(osd, zapid, zdn, buf, 8, 1,
+ zde, oh->ot_tx);
+ if (rc)
+ GOTO(out, rc);
+ }
+ }
+
obj->oo_dn = dn;
/* Now add in all of the "SA" attributes */
rc = osd_sa_handle_get(obj);
GOTO(out, rc);
/* initialize LMA */
- lustre_lma_init(lma, fid, 0, 0);
+ if (fid_is_idif(fid) || (fid_is_norm(fid) && osd->od_is_ost))
+ compat |= LMAC_FID_ON_OST;
+ lustre_lma_init(lma, fid, compat, 0);
lustre_lma_swab(lma);
rc = -nvlist_add_byte_array(obj->oo_sa_xattr, XATTR_NAME_LMA,
(uchar_t *)lma, sizeof(*lma));
return 0;
}
-/**
- * Create a new OI with the given name.
- */
-static int
-osd_oi_create(const struct lu_env *env, struct osd_device *o,
- uint64_t parent, const char *name, uint64_t *child)
+static int osd_obj_create(const struct lu_env *env, struct osd_device *o,
+ uint64_t parent, const char *name, uint64_t *child,
+ const struct lu_fid *fid, bool isdir)
{
- struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg;
- struct lu_attr *la = &osd_oti_get(env)->oti_la;
- sa_handle_t *sa_hdl = NULL;
- dmu_tx_t *tx;
- uint64_t oid;
- int rc;
-
- /* verify it doesn't already exist */
- rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde);
- if (rc == 0)
- return -EEXIST;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct lu_attr *la = &info->oti_la;
+ sa_handle_t *sa_hdl = NULL;
+ nvlist_t *nvbuf = NULL;
+ dmu_tx_t *tx;
+ uint64_t oid;
+ __u32 compat = LMAC_NOT_IN_OI;
+ int rc;
+ ENTRY;
if (o->od_dt_dev.dd_rdonly)
- return -EROFS;
+ RETURN(-EROFS);
+
+ memset(la, 0, sizeof(*la));
+ la->la_valid = LA_MODE | LA_UID | LA_GID;
+ la->la_mode = S_IRUGO | S_IWUSR | (isdir ? S_IXUGO | S_IFDIR : S_IFREG);
+
+ if (fid) {
+ rc = -nvlist_alloc(&nvbuf, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rc)
+ RETURN(rc);
+
+ if (o->od_is_ost)
+ compat |= LMAC_FID_ON_OST;
+ lustre_lma_init(lma, fid, compat, 0);
+ lustre_lma_swab(lma);
+ rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t *)lma, sizeof(*lma));
+ if (rc)
+ GOTO(out, rc);
+ }
/* create fid-to-dnode index */
tx = dmu_tx_create(o->od_os);
- if (tx == NULL)
- return -ENOMEM;
+ if (!tx)
+ GOTO(out, rc = -ENOMEM);
- dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1, NULL);
+ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
dmu_tx_hold_bonus(tx, parent);
dmu_tx_hold_zap(tx, parent, TRUE, name);
dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE);
-
rc = -dmu_tx_assign(tx, TXG_WAIT);
if (rc) {
dmu_tx_abort(tx);
- return rc;
+ GOTO(out, rc);
}
- oid = osd_zap_create_flags(o->od_os, 0, ZAP_FLAG_HASH64,
- DMU_OT_DIRECTORY_CONTENTS,
- 14, /* == ZFS fzap_default_block_shift */
- DN_MAX_INDBLKSHIFT,
- 0, tx);
-
+ if (isdir)
+ oid = osd_zap_create_flags(o->od_os, 0, ZAP_FLAG_HASH64,
+ DMU_OT_DIRECTORY_CONTENTS,
+ 14, DN_MAX_INDBLKSHIFT, 0, tx);
+ else
+ oid = osd_dmu_object_alloc(o->od_os, DMU_OTN_UINT8_METADATA,
+ 0, 0, tx);
rc = -sa_handle_get(o->od_os, oid, NULL, SA_HDL_PRIVATE, &sa_hdl);
if (rc)
- goto commit;
- memset(la, 0, sizeof(*la));
- la->la_valid = LA_MODE | LA_UID | LA_GID;
- la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- rc = __osd_attr_init(env, o, NULL, sa_hdl, tx, la, parent, NULL);
+ GOTO(commit, rc);
+
+ rc = __osd_attr_init(env, o, NULL, sa_hdl, tx, la, parent, nvbuf);
sa_handle_destroy(sa_hdl);
if (rc)
- goto commit;
+ GOTO(commit, rc);
zde->zde_dnode = oid;
zde->zde_pad = 0;
- zde->zde_type = IFTODT(S_IFDIR);
-
+ zde->zde_type = IFTODT(isdir ? S_IFDIR : S_IFREG);
rc = -zap_add(o->od_os, parent, name, 8, 1, (void *)zde, tx);
+ GOTO(commit, rc);
+
commit:
if (rc)
dmu_object_free(o->od_os, oid, tx);
- dmu_tx_commit(tx);
-
- if (rc == 0)
+ else
*child = oid;
-
+ dmu_tx_commit(tx);
+out:
+ if (nvbuf)
+ nvlist_free(nvbuf);
return rc;
}
if (rc == 0)
*child = oi.oi_zapid;
else if (rc == -ENOENT)
- rc = osd_oi_create(env, o, parent, name, child);
+ rc = osd_obj_create(env, o, parent, name, child, NULL, true);
+
+ return rc;
+}
+
+int osd_obj_find_or_create(const struct lu_env *env, struct osd_device *o,
+ uint64_t parent, const char *name, uint64_t *child,
+ const struct lu_fid *fid, bool isdir)
+{
+ struct osd_oi oi;
+ int rc;
+
+ rc = osd_oi_lookup(env, o, parent, name, &oi);
+ if (!rc)
+ *child = oi.oi_zapid;
+ else if (rc == -ENOENT)
+ rc = osd_obj_create(env, o, parent, name, child, fid, isdir);
return rc;
}
return 0;
}
- LASSERT(ss != NULL);
+ /* The seq_server_site may be NOT ready during initial OI scrub */
+ if (unlikely(!ss || !ss->ss_server_fld ||
+ !ss->ss_server_fld->lsf_cache))
+ return -ENOENT;
+
fld_range_set_any(range);
/* OSD will only do local fld lookup */
return fld_local_lookup(env, ss->ss_server_fld, seq, range);
RETURN(1);
if (unlikely(fid_is_local_file(fid) || fid_is_llog(fid)) ||
- fid_is_name_llog(fid) || fid_is_quota(fid))
+ fid_is_name_llog(fid) || fid_is_quota(fid) ||
+ fid_is_igif(fid))
RETURN(0);
rc = osd_fld_lookup(env, osd, fid_seq(fid), range);
{
struct osd_oi *oi;
- LASSERT(osd->od_oi_table != NULL);
- oi = osd->od_oi_table[fid_seq(fid) & (osd->od_oi_count - 1)];
+ oi = osd_fid2oi(osd, fid);
if (buf)
osd_fid2str(buf, fid, bufsize);
if (zdn)
/**
* Determine if the type and number of OIs used by this file system.
*/
-static int
-osd_oi_probe(const struct lu_env *env, struct osd_device *o, int *count)
+static int osd_oi_probe(const struct lu_env *env, struct osd_device *o)
{
- uint64_t root_oid = o->od_root;
- struct osd_oi oi;
- char name[16];
- int rc;
+ struct lustre_scrub *scrub = &o->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ struct osd_oi oi;
+ char name[16];
+ int max = sf->sf_oi_count > 0 ? sf->sf_oi_count : OSD_OI_FID_NR_MAX;
+ int count;
+ int rc;
ENTRY;
/*
* The only safeguard is that we know the number of OIs must be a
* power of two and this is checked for basic sanity.
*/
- for (*count = 0; *count < OSD_OI_FID_NR_MAX; (*count)++) {
- sprintf(name, "%s.%d", DMU_OSD_OI_NAME_BASE, *count);
- rc = osd_oi_lookup(env, o, root_oid, name, &oi);
- if (rc == 0)
+ for (count = 0; count < max; count++) {
+ snprintf(name, 15, "%s.%d", DMU_OSD_OI_NAME_BASE, count);
+ rc = osd_oi_lookup(env, o, o->od_root, name, &oi);
+ if (!rc)
continue;
if (rc == -ENOENT) {
- if (*count == 0)
- break;
-
- if ((*count & (*count - 1)) != 0)
- RETURN(-EDOM);
+ if (sf->sf_oi_count == 0)
+ RETURN(count);
- RETURN(0);
+ zfs_set_bit(count, sf->sf_oi_bitmap);
+ continue;
}
- RETURN(rc);
+ if (rc)
+ RETURN(rc);
}
- /*
- * No OIs exist, this must be a new filesystem.
- */
- *count = 0;
-
- RETURN(0);
+ RETURN(count);
}
static void osd_ost_seq_fini(const struct lu_env *env, struct osd_device *osd)
*/
int osd_oi_init(const struct lu_env *env, struct osd_device *o)
{
- char *key = osd_oti_get(env)->oti_buf;
- int i, rc, count = 0;
+ struct lustre_scrub *scrub = &o->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ char *key = osd_oti_get(env)->oti_buf;
+ uint64_t sdb;
+ int i, rc, count;
ENTRY;
+ LASSERTF((sf->sf_oi_count & (sf->sf_oi_count - 1)) == 0,
+ "Invalid OI count in scrub file %d\n", sf->sf_oi_count);
+
osd_oi_init_remote_parent(env, o);
- rc = osd_oi_probe(env, o, &count);
+ rc = osd_oi_init_compat(env, o);
if (rc)
RETURN(rc);
- if (count == 0) {
- uint64_t odb, sdb;
+ count = osd_oi_probe(env, o);
+ if (count < 0)
+ GOTO(out, rc = count);
- count = osd_oi_count;
- odb = o->od_root;
+ if (count > 0) {
+ if (count == sf->sf_oi_count)
+ goto open;
- for (i = 0; i < count; i++) {
- sprintf(key, "%s.%d", DMU_OSD_OI_NAME_BASE, i);
- rc = osd_oi_find_or_create(env, o, odb, key, &sdb);
- if (rc)
- RETURN(rc);
+ if (sf->sf_oi_count == 0) {
+ if (likely((count & (count - 1)) == 0)) {
+ sf->sf_oi_count = count;
+ rc = scrub_file_store(env, scrub);
+ if (rc)
+ GOTO(out, rc);
+
+ goto open;
+ }
+
+ LCONSOLE_ERROR("%s: invalid oi count %d. You can "
+ "remove all OIs, then remount it\n",
+ osd_name(o), count);
+ GOTO(out, rc = -EDOM);
+ }
+
+ scrub_file_reset(scrub, o->od_uuid, SF_RECREATED);
+ count = sf->sf_oi_count;
+ } else {
+ if (sf->sf_oi_count > 0) {
+ count = sf->sf_oi_count;
+ memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
+ for (i = 0; i < count; i++)
+ zfs_set_bit(i, sf->sf_oi_bitmap);
+ scrub_file_reset(scrub, o->od_uuid, SF_RECREATED);
+ } else {
+ count = sf->sf_oi_count = osd_oi_count;
}
}
- rc = osd_oi_init_compat(env, o);
+ rc = scrub_file_store(env, scrub);
if (rc)
- RETURN(rc);
+ GOTO(out, rc);
+ for (i = 0; i < count; i++) {
+ LASSERT(sizeof(osd_oti_get(env)->oti_buf) >= 32);
+
+ snprintf(key, sizeof(osd_oti_get(env)->oti_buf) - 1,
+ "%s.%d", DMU_OSD_OI_NAME_BASE, i);
+ rc = osd_oi_find_or_create(env, o, o->od_root, key, &sdb);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+open:
LASSERT((count & (count - 1)) == 0);
o->od_oi_count = count;
OBD_ALLOC(o->od_oi_table, sizeof(struct osd_oi *) * count);
if (o->od_oi_table == NULL)
- RETURN(-ENOMEM);
+ GOTO(out, rc = -ENOMEM);
rc = osd_oi_open_table(env, o, count);
+
+ GOTO(out, rc);
+
+out:
if (rc) {
- OBD_FREE(o->od_oi_table, sizeof(struct osd_oi *) * count);
- o->od_oi_table = NULL;
+ osd_ost_seq_fini(env, o);
+
+ if (o->od_oi_table) {
+ OBD_FREE(o->od_oi_table,
+ sizeof(struct osd_oi *) * count);
+ o->od_oi_table = NULL;
+ }
}
- RETURN(rc);
+ return rc;
}
void osd_oi_fini(const struct lu_env *env, struct osd_device *o)
return 0;
}
+
+int osd_idc_find_and_init_with_oid(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid,
+ uint64_t oid)
+{
+ struct osd_idmap_cache *idc;
+
+ idc = osd_idc_find(env, osd, fid);
+ if (!idc) {
+ idc = osd_idc_add(env, osd, fid);
+ if (IS_ERR(idc))
+ return PTR_ERR(idc);
+ }
+
+ idc->oic_dnode = oid;
+ idc->oic_remote = 0;
+
+ return 0;
+}
--- /dev/null
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ */
+/*
+ * lustre/osd-zfs/osd_scrub.c
+ *
+ * Top-level entry points into osd module
+ *
+ * The OI scrub is used for rebuilding Object Index files when restores MDT from
+ * file-level backup.
+ *
+ * The otable based iterator scans ZFS objects to feed up layer LFSCK.
+ *
+ * Author: Fan Yong <fan.yong@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LFSCK
+
+#include <linux/kthread.h>
+#include <uapi/linux/lustre/lustre_idl.h>
+#include <lustre_disk.h>
+#include <dt_object.h>
+#include <linux/xattr.h>
+#include <lustre_scrub.h>
+#include <obd_class.h>
+#include <lustre_nodemap.h>
+#include <sys/dsl_dataset.h>
+
+#include "osd_internal.h"
+
+#define OSD_OTABLE_MAX_HASH ((1ULL << 48) - 1)
+#define OTABLE_PREFETCH 256
+
+#define DTO_INDEX_INSERT 1
+#define DTO_INDEX_DELETE 2
+#define DTO_INDEX_UPDATE 3
+
+static inline bool osd_scrub_has_window(struct osd_otable_it *it)
+{
+ return it->ooi_prefetched < OTABLE_PREFETCH;
+}
+
+/**
+ * update/insert/delete the specified OI mapping (@fid @id) according to the ops
+ *
+ * \retval 1, changed nothing
+ * \retval 0, changed successfully
+ * \retval -ve, on error
+ */
+static int osd_scrub_refresh_mapping(const struct lu_env *env,
+ struct osd_device *dev,
+ const struct lu_fid *fid,
+ uint64_t oid, int ops,
+ bool force, const char *name)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
+ char *buf = info->oti_str;
+ dmu_tx_t *tx = NULL;
+ dnode_t *dn = NULL;
+ uint64_t zapid;
+ int rc;
+ ENTRY;
+
+ if (dev->od_scrub.os_file.sf_param & SP_DRYRUN && !force)
+ GOTO(log, rc = 0);
+
+ tx = dmu_tx_create(dev->od_os);
+ if (!tx)
+ GOTO(log, rc = -ENOMEM);
+
+ zapid = osd_get_name_n_idx(env, dev, fid, buf,
+ sizeof(info->oti_str), &dn);
+ osd_tx_hold_zap(tx, zapid, dn,
+ ops == DTO_INDEX_INSERT ? TRUE : FALSE, NULL);
+ rc = -dmu_tx_assign(tx, TXG_WAIT);
+ if (rc) {
+ dmu_tx_abort(tx);
+ GOTO(log, rc);
+ }
+
+ switch (ops) {
+ case DTO_INDEX_UPDATE:
+ zde->zde_pad = 0;
+ zde->zde_dnode = oid;
+ zde->zde_type = 0; /* The type in OI mapping is useless. */
+ rc = -zap_update(dev->od_os, zapid, buf, 8, sizeof(*zde) / 8,
+ zde, tx);
+ if (unlikely(rc == -ENOENT)) {
+ /* Some unlink thread may removed the OI mapping. */
+ rc = 1;
+ }
+ break;
+ case DTO_INDEX_INSERT:
+ zde->zde_pad = 0;
+ zde->zde_dnode = oid;
+ zde->zde_type = 0; /* The type in OI mapping is useless. */
+ rc = osd_zap_add(dev, zapid, dn, buf, 8, sizeof(*zde) / 8,
+ zde, tx);
+ if (unlikely(rc == -EEXIST))
+ rc = 1;
+ break;
+ case DTO_INDEX_DELETE:
+ rc = osd_zap_remove(dev, zapid, dn, buf, tx);
+ if (rc == -ENOENT) {
+ /* It is normal that the unlink thread has removed the
+ * OI mapping already. */
+ rc = 1;
+ }
+ break;
+ default:
+ LASSERTF(0, "Unexpected ops %d\n", ops);
+ rc = -EINVAL;
+ break;
+ }
+
+ dmu_tx_commit(tx);
+ GOTO(log, rc);
+
+log:
+ CDEBUG(D_LFSCK, "%s: refresh OI map for scrub, op %d, force %s, "
+ DFID" => %llu (%s): rc = %d\n", osd_name(dev), ops,
+ force ? "yes" : "no", PFID(fid), oid, name ? name : "null", rc);
+
+ return rc;
+}
+
+static int
+osd_scrub_check_update(const struct lu_env *env, struct osd_device *dev,
+ const struct lu_fid *fid, uint64_t oid, int val)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ struct osd_inconsistent_item *oii = NULL;
+ nvlist_t *nvbuf = NULL;
+ dnode_t *dn = NULL;
+ uint64_t oid2;
+ int ops = DTO_INDEX_UPDATE;
+ int rc;
+ ENTRY;
+
+ down_write(&scrub->os_rwsem);
+ scrub->os_new_checked++;
+ if (val < 0)
+ GOTO(out, rc = val);
+
+ if (scrub->os_in_prior)
+ oii = list_entry(scrub->os_inconsistent_items.next,
+ struct osd_inconsistent_item, oii_list);
+
+ if (oid < sf->sf_pos_latest_start && !oii)
+ GOTO(out, rc = 0);
+
+ if (oii && oii->oii_insert) {
+ ops = DTO_INDEX_INSERT;
+ goto zget;
+ }
+
+ rc = osd_fid_lookup(env, dev, fid, &oid2);
+ if (rc) {
+ if (rc != -ENOENT)
+ GOTO(out, rc);
+
+ ops = DTO_INDEX_INSERT;
+
+zget:
+ rc = __osd_obj2dnode(dev->od_os, oid, &dn);
+ if (rc) {
+ /* Someone removed the object by race. */
+ if (rc == -ENOENT || rc == -EEXIST)
+ rc = 0;
+ GOTO(out, rc);
+ }
+
+ scrub->os_full_speed = 1;
+ sf->sf_flags |= SF_INCONSISTENT;
+ } else if (oid == oid2) {
+ GOTO(out, rc = 0);
+ } else {
+ struct lustre_mdt_attrs *lma = NULL;
+ int size;
+
+ rc = __osd_xattr_load_by_oid(dev, oid2, &nvbuf);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ goto update;
+ if (rc)
+ GOTO(out, rc);
+
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t **)&lma, &size);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ goto update;
+ if (rc)
+ GOTO(out, rc);
+
+ lustre_lma_swab(lma);
+ if (unlikely(lu_fid_eq(&lma->lma_self_fid, fid))) {
+ CDEBUG(D_LFSCK, "%s: the FID "DFID" is used by "
+ "two objects: %llu and %llu (in OI)\n",
+ osd_name(dev), PFID(fid), oid, oid2);
+
+ GOTO(out, rc = -EEXIST);
+ }
+
+update:
+ scrub->os_full_speed = 1;
+ sf->sf_flags |= SF_INCONSISTENT;
+ }
+
+ rc = osd_scrub_refresh_mapping(env, dev, fid, oid, ops, false, NULL);
+ if (!rc) {
+ if (scrub->os_in_prior)
+ sf->sf_items_updated_prior++;
+ else
+ sf->sf_items_updated++;
+ }
+
+ GOTO(out, rc);
+
+out:
+ if (nvbuf)
+ nvlist_free(nvbuf);
+
+ if (rc < 0) {
+ sf->sf_items_failed++;
+ if (sf->sf_pos_first_inconsistent == 0 ||
+ sf->sf_pos_first_inconsistent > oid)
+ sf->sf_pos_first_inconsistent = oid;
+ } else {
+ rc = 0;
+ }
+
+ /* There may be conflict unlink during the OI scrub,
+ * if happend, then remove the new added OI mapping. */
+ if (ops == DTO_INDEX_INSERT && dn && dn->dn_free_txg)
+ osd_scrub_refresh_mapping(env, dev, fid, oid,
+ DTO_INDEX_DELETE, false, NULL);
+ up_write(&scrub->os_rwsem);
+
+ if (dn)
+ osd_dnode_rele(dn);
+
+ if (oii) {
+ spin_lock(&scrub->os_lock);
+ if (likely(!list_empty(&oii->oii_list)))
+ list_del(&oii->oii_list);
+ spin_unlock(&scrub->os_lock);
+ OBD_FREE_PTR(oii);
+ }
+
+ RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
+}
+
+static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct scrub_file *sf = &scrub->os_file;
+ __u32 flags = scrub->os_start_flags;
+ int rc;
+ bool drop_dryrun = false;
+ ENTRY;
+
+ CDEBUG(D_LFSCK, "%s: OI scrub prep, flags = 0x%x\n",
+ scrub->os_name, flags);
+
+ down_write(&scrub->os_rwsem);
+ if (flags & SS_SET_FAILOUT)
+ sf->sf_param |= SP_FAILOUT;
+ else if (flags & SS_CLEAR_FAILOUT)
+ sf->sf_param &= ~SP_FAILOUT;
+
+ if (flags & SS_SET_DRYRUN) {
+ sf->sf_param |= SP_DRYRUN;
+ } else if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) {
+ sf->sf_param &= ~SP_DRYRUN;
+ drop_dryrun = true;
+ }
+
+ if (flags & SS_RESET)
+ scrub_file_reset(scrub, dev->od_uuid, 0);
+
+ scrub->os_partial_scan = 0;
+ if (flags & SS_AUTO_FULL) {
+ scrub->os_full_speed = 1;
+ sf->sf_flags |= SF_AUTO;
+ } else if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
+ SF_UPGRADE)) {
+ scrub->os_full_speed = 1;
+ } else {
+ scrub->os_full_speed = 0;
+ }
+
+ spin_lock(&scrub->os_lock);
+ scrub->os_in_prior = 0;
+ scrub->os_waiting = 0;
+ scrub->os_paused = 0;
+ scrub->os_in_join = 0;
+ scrub->os_full_scrub = 0;
+ spin_unlock(&scrub->os_lock);
+ scrub->os_new_checked = 0;
+ if (drop_dryrun && sf->sf_pos_first_inconsistent != 0)
+ sf->sf_pos_latest_start = sf->sf_pos_first_inconsistent;
+ else if (sf->sf_pos_last_checkpoint != 0)
+ sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
+ else
+ sf->sf_pos_latest_start = 1;
+
+ scrub->os_pos_current = sf->sf_pos_latest_start;
+ sf->sf_status = SS_SCANNING;
+ sf->sf_time_latest_start = cfs_time_current_sec();
+ sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
+ sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1;
+ rc = scrub_file_store(env, scrub);
+ if (!rc) {
+ spin_lock(&scrub->os_lock);
+ thread_set_flags(thread, SVC_RUNNING);
+ spin_unlock(&scrub->os_lock);
+ wake_up_all(&thread->t_ctl_waitq);
+ }
+ up_write(&scrub->os_rwsem);
+
+ RETURN(rc);
+}
+
+static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev,
+ int result)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ int rc;
+ ENTRY;
+
+ CDEBUG(D_LFSCK, "%s: OI scrub post with result = %d\n",
+ scrub->os_name, result);
+
+ down_write(&scrub->os_rwsem);
+ spin_lock(&scrub->os_lock);
+ thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+ spin_unlock(&scrub->os_lock);
+ if (scrub->os_new_checked > 0) {
+ sf->sf_items_checked += scrub->os_new_checked;
+ scrub->os_new_checked = 0;
+ sf->sf_pos_last_checkpoint = scrub->os_pos_current;
+ }
+ sf->sf_time_last_checkpoint = cfs_time_current_sec();
+ if (result > 0) {
+ sf->sf_status = SS_COMPLETED;
+ if (!(sf->sf_param & SP_DRYRUN)) {
+ memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
+ sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
+ SF_UPGRADE | SF_AUTO);
+ }
+ sf->sf_time_last_complete = sf->sf_time_last_checkpoint;
+ sf->sf_success_count++;
+ } else if (result == 0) {
+ if (scrub->os_paused)
+ sf->sf_status = SS_PAUSED;
+ else
+ sf->sf_status = SS_STOPPED;
+ } else {
+ sf->sf_status = SS_FAILED;
+ }
+ sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
+ scrub->os_time_last_checkpoint);
+ rc = scrub_file_store(env, scrub);
+ up_write(&scrub->os_rwsem);
+
+ RETURN(rc < 0 ? rc : result);
+}
+
+/* iteration engine */
+
+static inline int
+osd_scrub_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
+{
+ spin_lock(&scrub->os_lock);
+ if (osd_scrub_has_window(it) ||
+ !list_empty(&scrub->os_inconsistent_items) ||
+ it->ooi_waiting || !thread_is_running(&scrub->os_thread))
+ scrub->os_waiting = 0;
+ else
+ scrub->os_waiting = 1;
+ spin_unlock(&scrub->os_lock);
+
+ return !scrub->os_waiting;
+}
+
+static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev,
+ struct lu_fid *fid, uint64_t *oid)
+{
+ struct l_wait_info lwi = { 0 };
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct osd_otable_it *it = dev->od_otable_it;
+ struct lustre_mdt_attrs *lma = NULL;
+ nvlist_t *nvbuf = NULL;
+ int size = 0;
+ int rc = 0;
+ ENTRY;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
+ lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
+ if (likely(lwi.lwi_timeout > 0)) {
+ l_wait_event(thread->t_ctl_waitq,
+ !list_empty(&scrub->os_inconsistent_items) ||
+ !thread_is_running(thread),
+ &lwi);
+ if (unlikely(!thread_is_running(thread)))
+ RETURN(SCRUB_NEXT_EXIT);
+ }
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
+ spin_lock(&scrub->os_lock);
+ thread_set_flags(thread, SVC_STOPPING);
+ spin_unlock(&scrub->os_lock);
+ RETURN(SCRUB_NEXT_CRASH);
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL))
+ RETURN(SCRUB_NEXT_FATAL);
+
+again:
+ if (nvbuf) {
+ nvlist_free(nvbuf);
+ nvbuf = NULL;
+ lma = NULL;
+ }
+
+ if (!list_empty(&scrub->os_inconsistent_items)) {
+ spin_lock(&scrub->os_lock);
+ if (likely(!list_empty(&scrub->os_inconsistent_items))) {
+ struct osd_inconsistent_item *oii;
+
+ oii = list_entry(scrub->os_inconsistent_items.next,
+ struct osd_inconsistent_item, oii_list);
+ *fid = oii->oii_cache.oic_fid;
+ *oid = oii->oii_cache.oic_dnode;
+ scrub->os_in_prior = 1;
+ spin_unlock(&scrub->os_lock);
+
+ GOTO(out, rc = 0);
+ }
+ spin_unlock(&scrub->os_lock);
+ }
+
+ if (!scrub->os_full_speed && !osd_scrub_has_window(it)) {
+ memset(&lwi, 0, sizeof(lwi));
+ l_wait_event(thread->t_ctl_waitq,
+ osd_scrub_wakeup(scrub, it),
+ &lwi);
+ }
+
+ if (unlikely(!thread_is_running(thread)))
+ GOTO(out, rc = SCRUB_NEXT_EXIT);
+
+ rc = -dmu_object_next(dev->od_os, &scrub->os_pos_current, B_FALSE, 0);
+ if (rc)
+ GOTO(out, rc = (rc == -ESRCH ? SCRUB_NEXT_BREAK : rc));
+
+ rc = __osd_xattr_load_by_oid(dev, scrub->os_pos_current, &nvbuf);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ goto again;
+
+ if (rc)
+ GOTO(out, rc);
+
+ LASSERT(nvbuf != NULL);
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t **)&lma, &size);
+ if (!rc) {
+ lustre_lma_swab(lma);
+ if (likely(!(lma->lma_compat & LMAC_NOT_IN_OI) &&
+ !(lma->lma_incompat & LMAI_AGENT))) {
+ *fid = lma->lma_self_fid;
+ *oid = scrub->os_pos_current;
+
+ GOTO(out, rc = 0);
+ }
+ }
+
+ if (!scrub->os_full_speed) {
+ spin_lock(&scrub->os_lock);
+ it->ooi_prefetched++;
+ if (it->ooi_waiting) {
+ it->ooi_waiting = 0;
+ wake_up_all(&thread->t_ctl_waitq);
+ }
+ spin_unlock(&scrub->os_lock);
+ }
+
+ goto again;
+
+out:
+ if (nvbuf)
+ nvlist_free(nvbuf);
+
+ return rc;
+}
+
+static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev,
+ const struct lu_fid *fid, uint64_t oid, int rc)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct osd_otable_it *it = dev->od_otable_it;
+
+ rc = osd_scrub_check_update(env, dev, fid, oid, rc);
+ if (!scrub->os_in_prior) {
+ if (!scrub->os_full_speed) {
+ spin_lock(&scrub->os_lock);
+ it->ooi_prefetched++;
+ if (it->ooi_waiting) {
+ it->ooi_waiting = 0;
+ wake_up_all(&thread->t_ctl_waitq);
+ }
+ spin_unlock(&scrub->os_lock);
+ }
+ } else {
+ scrub->os_in_prior = 0;
+ }
+
+ if (rc)
+ return rc;
+
+ rc = scrub_checkpoint(env, scrub);
+ if (rc) {
+ CDEBUG(D_LFSCK, "%s: fail to checkpoint, pos = %llu: "
+ "rc = %d\n", scrub->os_name, scrub->os_pos_current, rc);
+ /* Continue, as long as the scrub itself can go ahead. */
+ }
+
+ return 0;
+}
+
+static int osd_scrub_main(void *args)
+{
+ struct lu_env env;
+ struct osd_device *dev = (struct osd_device *)args;
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct lu_fid *fid;
+ uint64_t oid;
+ int rc = 0;
+ ENTRY;
+
+ rc = lu_env_init(&env, LCT_LOCAL | LCT_DT_THREAD);
+ if (rc) {
+ CDEBUG(D_LFSCK, "%s: OI scrub fail to init env: rc = %d\n",
+ scrub->os_name, rc);
+ GOTO(noenv, rc);
+ }
+
+ rc = osd_scrub_prep(&env, dev);
+ if (rc) {
+ CDEBUG(D_LFSCK, "%s: OI scrub fail to scrub prep: rc = %d\n",
+ scrub->os_name, rc);
+ GOTO(out, rc);
+ }
+
+ if (!scrub->os_full_speed) {
+ struct l_wait_info lwi = { 0 };
+ struct osd_otable_it *it = dev->od_otable_it;
+
+ l_wait_event(thread->t_ctl_waitq,
+ it->ooi_user_ready || !thread_is_running(thread),
+ &lwi);
+ if (unlikely(!thread_is_running(thread)))
+ GOTO(post, rc = 0);
+
+ scrub->os_pos_current = it->ooi_pos;
+ }
+
+ CDEBUG(D_LFSCK, "%s: OI scrub start, flags = 0x%x, pos = %llu\n",
+ scrub->os_name, scrub->os_start_flags,
+ scrub->os_pos_current);
+
+ fid = &osd_oti_get(&env)->oti_fid;
+ while (!rc && thread_is_running(thread)) {
+ rc = osd_scrub_next(&env, dev, fid, &oid);
+ switch (rc) {
+ case SCRUB_NEXT_EXIT:
+ GOTO(post, rc = 0);
+ case SCRUB_NEXT_CRASH:
+ spin_lock(&scrub->os_lock);
+ thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+ spin_unlock(&scrub->os_lock);
+ GOTO(out, rc = -EINVAL);
+ case SCRUB_NEXT_FATAL:
+ GOTO(post, rc = -EINVAL);
+ case SCRUB_NEXT_BREAK:
+ GOTO(post, rc = 1);
+ }
+
+ rc = osd_scrub_exec(&env, dev, fid, oid, rc);
+ }
+
+ GOTO(post, rc);
+
+post:
+ rc = osd_scrub_post(&env, dev, rc);
+ CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d\n",
+ scrub->os_name, scrub->os_pos_current, rc);
+
+out:
+ while (!list_empty(&scrub->os_inconsistent_items)) {
+ struct osd_inconsistent_item *oii;
+
+ oii = list_entry(scrub->os_inconsistent_items.next,
+ struct osd_inconsistent_item, oii_list);
+ list_del_init(&oii->oii_list);
+ OBD_FREE_PTR(oii);
+ }
+
+ lu_env_fini(&env);
+
+noenv:
+ spin_lock(&scrub->os_lock);
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up_all(&thread->t_ctl_waitq);
+ spin_unlock(&scrub->os_lock);
+ return rc;
+}
+
+/* initial OI scrub */
+
+struct osd_lf_map;
+
+typedef int (*handle_dirent_t)(const struct lu_env *, struct osd_device *,
+ const char *, uint64_t, uint64_t,
+ enum osd_lf_flags, bool);
+static int osd_ios_varfid_hd(const struct lu_env *, struct osd_device *,
+ const char *, uint64_t, uint64_t,
+ enum osd_lf_flags, bool);
+static int osd_ios_uld_hd(const struct lu_env *, struct osd_device *,
+ const char *, uint64_t, uint64_t,
+ enum osd_lf_flags, bool);
+
+typedef int (*scan_dir_t)(const struct lu_env *, struct osd_device *,
+ uint64_t, handle_dirent_t, enum osd_lf_flags);
+static int osd_ios_general_sd(const struct lu_env *, struct osd_device *,
+ uint64_t, handle_dirent_t, enum osd_lf_flags);
+static int osd_ios_ROOT_sd(const struct lu_env *, struct osd_device *,
+ uint64_t, handle_dirent_t, enum osd_lf_flags);
+
+struct osd_lf_map {
+ char *olm_name;
+ struct lu_fid olm_fid;
+ enum osd_lf_flags olm_flags;
+ scan_dir_t olm_scan_dir;
+ handle_dirent_t olm_handle_dirent;
+};
+
+/* Add the new introduced local files in the list in the future. */
+static const struct osd_lf_map osd_lf_maps[] = {
+ /* CONFIGS */
+ {
+ .olm_name = MOUNT_CONFIGS_DIR,
+ .olm_fid = {
+ .f_seq = FID_SEQ_LOCAL_FILE,
+ .f_oid = MGS_CONFIGS_OID,
+ },
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
+ /* NIDTBL_VERSIONS */
+ {
+ .olm_name = MGS_NIDTBL_DIR,
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
+ /* PENDING */
+ {
+ .olm_name = "PENDING",
+ },
+
+ /* ROOT */
+ {
+ .olm_name = "ROOT",
+ .olm_fid = {
+ .f_seq = FID_SEQ_ROOT,
+ .f_oid = FID_OID_ROOT,
+ },
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_ROOT_sd,
+ },
+
+ /* fld */
+ {
+ .olm_name = "fld",
+ .olm_fid = {
+ .f_seq = FID_SEQ_LOCAL_FILE,
+ .f_oid = FLD_INDEX_OID,
+ },
+ },
+
+ /* changelog_catalog */
+ {
+ .olm_name = CHANGELOG_CATALOG,
+ },
+
+ /* changelog_users */
+ {
+ .olm_name = CHANGELOG_USERS,
+ },
+
+ /* quota_master */
+ {
+ .olm_name = QMT_DIR,
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
+ /* quota_slave */
+ {
+ .olm_name = QSD_DIR,
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
+ /* LFSCK */
+ {
+ .olm_name = LFSCK_DIR,
+ .olm_flags = OLF_SCAN_SUBITEMS,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_varfid_hd,
+ },
+
+ /* lfsck_bookmark */
+ {
+ .olm_name = LFSCK_BOOKMARK,
+ },
+
+ /* lfsck_layout */
+ {
+ .olm_name = LFSCK_LAYOUT,
+ },
+
+ /* lfsck_namespace */
+ {
+ .olm_name = LFSCK_NAMESPACE,
+ },
+
+ /* OSP update logs update_log{_dir} use f_seq = FID_SEQ_UPDATE_LOG{_DIR}
+ * and f_oid = index for their log files. See lu_update_log{_dir}_fid()
+ * for more details. */
+
+ /* update_log */
+ {
+ .olm_name = "update_log",
+ .olm_fid = {
+ .f_seq = FID_SEQ_UPDATE_LOG,
+ },
+ .olm_flags = OLF_IDX_IN_FID,
+ },
+
+ /* update_log_dir */
+ {
+ .olm_name = "update_log_dir",
+ .olm_fid = {
+ .f_seq = FID_SEQ_UPDATE_LOG_DIR,
+ },
+ .olm_flags = OLF_SCAN_SUBITEMS | OLF_IDX_IN_FID,
+ .olm_scan_dir = osd_ios_general_sd,
+ .olm_handle_dirent = osd_ios_uld_hd,
+ },
+
+ /* hsm_actions */
+ {
+ .olm_name = HSM_ACTIONS,
+ },
+
+ /* nodemap */
+ {
+ .olm_name = LUSTRE_NODEMAP_NAME,
+ },
+
+ {
+ .olm_name = NULL
+ }
+};
+
+/* Add the new introduced files under .lustre/ in the list in the future. */
+static const struct osd_lf_map osd_dl_maps[] = {
+ /* .lustre/fid */
+ {
+ .olm_name = "fid",
+ .olm_fid = {
+ .f_seq = FID_SEQ_DOT_LUSTRE,
+ .f_oid = FID_OID_DOT_LUSTRE_OBF,
+ },
+ },
+
+ /* .lustre/lost+found */
+ {
+ .olm_name = "lost+found",
+ .olm_fid = {
+ .f_seq = FID_SEQ_DOT_LUSTRE,
+ .f_oid = FID_OID_DOT_LUSTRE_LPF,
+ },
+ },
+
+ {
+ .olm_name = NULL
+ }
+};
+
+struct osd_ios_item {
+ struct list_head oii_list;
+ uint64_t oii_parent;
+ enum osd_lf_flags oii_flags;
+ scan_dir_t oii_scan_dir;
+ handle_dirent_t oii_handle_dirent;
+};
+
+static int osd_ios_new_item(struct osd_device *dev, uint64_t parent,
+ enum osd_lf_flags flags, scan_dir_t scan_dir,
+ handle_dirent_t handle_dirent)
+{
+ struct osd_ios_item *item;
+
+ OBD_ALLOC_PTR(item);
+ if (!item) {
+ CWARN("%s: initial OI scrub failed to add item for %llu\n",
+ osd_name(dev), parent);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&item->oii_list);
+ item->oii_parent = parent;
+ item->oii_flags = flags;
+ item->oii_scan_dir = scan_dir;
+ item->oii_handle_dirent = handle_dirent;
+ list_add_tail(&item->oii_list, &dev->od_ios_list);
+
+ return 0;
+}
+
+/**
+ * verify FID-in-LMA and OI entry for one object
+ *
+ * ios: Initial OI Scrub.
+ */
+static int osd_ios_scan_one(const struct lu_env *env, struct osd_device *dev,
+ const struct lu_fid *fid, uint64_t parent,
+ uint64_t oid, const char *name,
+ enum osd_lf_flags flags)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ struct lustre_mdt_attrs *lma = NULL;
+ nvlist_t *nvbuf = NULL;
+ struct lu_fid tfid;
+ uint64_t oid2 = 0;
+ __u64 flag = 0;
+ int size = 0;
+ int op = 0;
+ int rc;
+ ENTRY;
+
+ rc = __osd_xattr_load_by_oid(dev, oid, &nvbuf);
+ if (unlikely(rc == -ENOENT || rc == -EEXIST))
+ RETURN(0);
+
+ if (rc && rc != -ENODATA) {
+ CWARN("%s: initial OI scrub failed to get lma for %llu: "
+ "rc = %d\n", osd_name(dev), oid, rc);
+
+ RETURN(rc);
+ }
+
+ if (!rc) {
+ LASSERT(nvbuf != NULL);
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t **)&lma, &size);
+ if (rc || size == 0) {
+ LASSERT(lma == NULL);
+ rc = -ENODATA;
+ } else {
+ LASSERTF(lma != NULL, "corrupted LMA, size %d\n", size);
+ lustre_lma_swab(lma);
+ if (lma->lma_compat & LMAC_NOT_IN_OI) {
+ nvlist_free(nvbuf);
+ RETURN(0);
+ }
+
+ tfid = lma->lma_self_fid;
+ }
+ nvlist_free(nvbuf);
+ }
+
+ if (rc == -ENODATA) {
+ if (!fid) {
+ /* Skip the object without FID-in-LMA */
+ CDEBUG(D_LFSCK, "%s: %llu has no FID-in-LMA, skip it\n",
+ osd_name(dev), oid);
+
+ RETURN(0);
+ }
+
+ LASSERT(!fid_is_zero(fid));
+
+ tfid = *fid;
+ if (flags & OLF_IDX_IN_FID) {
+ LASSERT(dev->od_index >= 0);
+
+ tfid.f_oid = dev->od_index;
+ }
+ }
+
+ rc = osd_fid_lookup(env, dev, &tfid, &oid2);
+ if (rc) {
+ if (rc != -ENOENT) {
+ CWARN("%s: initial OI scrub failed to lookup fid for "
+ DFID"=>%llu: rc = %d\n",
+ osd_name(dev), PFID(&tfid), oid, rc);
+
+ RETURN(rc);
+ }
+
+ flag = SF_RECREATED;
+ op = DTO_INDEX_INSERT;
+ } else {
+ if (oid == oid2)
+ RETURN(0);
+
+ flag = SF_INCONSISTENT;
+ op = DTO_INDEX_UPDATE;
+ }
+
+ if (!(sf->sf_flags & flag)) {
+ scrub_file_reset(scrub, dev->od_uuid, flag);
+ rc = scrub_file_store(env, scrub);
+ if (rc)
+ RETURN(rc);
+ }
+
+ rc = osd_scrub_refresh_mapping(env, dev, &tfid, oid, op, true, name);
+
+ RETURN(rc > 0 ? 0 : rc);
+}
+
+static int osd_ios_varfid_hd(const struct lu_env *env, struct osd_device *dev,
+ const char *name, uint64_t parent, uint64_t oid,
+ enum osd_lf_flags flags, bool is_dir)
+{
+ int rc;
+ ENTRY;
+
+ rc = osd_ios_scan_one(env, dev, NULL, parent, oid, name, 0);
+ if (!rc && is_dir)
+ rc = osd_ios_new_item(dev, oid, flags, osd_ios_general_sd,
+ osd_ios_varfid_hd);
+
+ RETURN(rc);
+}
+
+static int osd_ios_uld_hd(const struct lu_env *env, struct osd_device *dev,
+ const char *name, uint64_t parent, uint64_t oid,
+ enum osd_lf_flags flags, bool is_dir)
+{
+ struct lu_fid tfid;
+ int rc;
+ ENTRY;
+
+ /* skip any non-DFID format name */
+ if (name[0] != '[')
+ RETURN(0);
+
+ /* skip the start '[' */
+ sscanf(&name[1], SFID, RFID(&tfid));
+ if (fid_is_sane(&tfid))
+ rc = osd_ios_scan_one(env, dev, &tfid, parent, oid, name, 0);
+ else
+ rc = -EIO;
+
+ RETURN(rc);
+}
+
+/*
+ * General scanner for the directories execpt /ROOT during initial OI scrub.
+ * It scans the name entries under the given directory one by one. For each
+ * entry, verifies its OI mapping via the given @handle_dirent.
+ */
+static int osd_ios_general_sd(const struct lu_env *env, struct osd_device *dev,
+ uint64_t parent, handle_dirent_t handle_dirent,
+ enum osd_lf_flags flags)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct luz_direntry *zde = &info->oti_zde;
+ zap_attribute_t *za = &info->oti_za;
+ zap_cursor_t *zc = &info->oti_zc;
+ int rc;
+ ENTRY;
+
+ zap_cursor_init_serialized(zc, dev->od_os, parent, 0);
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc == -ENOENT)
+ zap_cursor_advance(zc);
+ else if (rc)
+ GOTO(log, rc);
+
+ while (1) {
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc)
+ GOTO(log, rc = (rc == -ENOENT ? 0 : rc));
+
+ /* skip the entry started with '.' */
+ if (likely(za->za_name[0] != '.')) {
+ rc = osd_zap_lookup(dev, parent, NULL, za->za_name,
+ za->za_integer_length,
+ sizeof(*zde) / za->za_integer_length,
+ (void *)zde);
+ if (rc) {
+ CWARN("%s: initial OI scrub failed to lookup "
+ "%s under %llu: rc = %d\n",
+ osd_name(dev), za->za_name, parent, rc);
+ continue;
+ }
+
+ rc = handle_dirent(env, dev, za->za_name, parent,
+ zde->lzd_reg.zde_dnode, flags,
+ S_ISDIR(DTTOIF(zde->lzd_reg.zde_type)) ?
+ true : false);
+ CDEBUG(D_LFSCK, "%s: initial OI scrub handled %s under "
+ "%llu: rc = %d\n",
+ osd_name(dev), za->za_name, parent, rc);
+ }
+
+ zap_cursor_advance(zc);
+ }
+
+log:
+ if (rc)
+ CWARN("%s: initial OI scrub failed to scan the directory %llu: "
+ "rc = %d\n", osd_name(dev), parent, rc);
+ zap_cursor_fini(zc);
+
+ return rc;
+}
+
+/*
+ * The scanner for /ROOT directory. It is not all the items under /ROOT will
+ * be scanned during the initial OI scrub, instead, only the .lustre and the
+ * sub-items under .lustre will be handled.
+ */
+static int osd_ios_ROOT_sd(const struct lu_env *env, struct osd_device *dev,
+ uint64_t parent, handle_dirent_t handle_dirent,
+ enum osd_lf_flags flags)
+{
+ struct luz_direntry *zde = &osd_oti_get(env)->oti_zde;
+ const struct osd_lf_map *map;
+ uint64_t oid;
+ int rc;
+ int rc1 = 0;
+ ENTRY;
+
+ rc = osd_zap_lookup(dev, parent, NULL, dot_lustre_name, 8,
+ sizeof(*zde) / 8, (void *)zde);
+ if (rc == -ENOENT) {
+ /* The .lustre directory is lost. That is not fatal. It can
+ * be re-created in the subsequent MDT start processing. */
+ RETURN(0);
+ }
+
+ if (rc) {
+ CWARN("%s: initial OI scrub failed to find .lustre: "
+ "rc = %d\n", osd_name(dev), rc);
+
+ RETURN(rc);
+ }
+
+ oid = zde->lzd_reg.zde_dnode;
+ rc = osd_ios_scan_one(env, dev, &LU_DOT_LUSTRE_FID, parent, oid,
+ dot_lustre_name, 0);
+ if (rc)
+ RETURN(rc);
+
+ for (map = osd_dl_maps; map->olm_name; map++) {
+ rc = osd_zap_lookup(dev, oid, NULL, map->olm_name, 8,
+ sizeof(*zde) / 8, (void *)zde);
+ if (rc) {
+ if (rc != -ENOENT)
+ CWARN("%s: initial OI scrub failed to find"
+ "the entry %s under .lustre: rc = %d\n",
+ osd_name(dev), map->olm_name, rc);
+ else if (!fid_is_zero(&map->olm_fid))
+ /* Try to remove the stale OI mapping. */
+ osd_scrub_refresh_mapping(env, dev,
+ &map->olm_fid, 0,
+ DTO_INDEX_DELETE, true,
+ map->olm_name);
+ continue;
+ }
+
+ rc = osd_ios_scan_one(env, dev, &map->olm_fid, oid,
+ zde->lzd_reg.zde_dnode, map->olm_name,
+ map->olm_flags);
+ if (rc)
+ rc1 = rc;
+ }
+
+ RETURN(rc1);
+}
+
+static void osd_initial_OI_scrub(const struct lu_env *env,
+ struct osd_device *dev)
+{
+ struct luz_direntry *zde = &osd_oti_get(env)->oti_zde;
+ const struct osd_lf_map *map;
+ int rc;
+ ENTRY;
+
+ for (map = osd_lf_maps; map->olm_name; map++) {
+ rc = osd_zap_lookup(dev, dev->od_root, NULL, map->olm_name, 8,
+ sizeof(*zde) / 8, (void *)zde);
+ if (rc) {
+ if (rc != -ENOENT)
+ CWARN("%s: initial OI scrub failed "
+ "to find the entry %s: rc = %d\n",
+ osd_name(dev), map->olm_name, rc);
+ else if (!fid_is_zero(&map->olm_fid))
+ /* Try to remove the stale OI mapping. */
+ osd_scrub_refresh_mapping(env, dev,
+ &map->olm_fid, 0,
+ DTO_INDEX_DELETE, true,
+ map->olm_name);
+ continue;
+ }
+
+ rc = osd_ios_scan_one(env, dev, &map->olm_fid, dev->od_root,
+ zde->lzd_reg.zde_dnode, map->olm_name,
+ map->olm_flags);
+ if (!rc && map->olm_flags & OLF_SCAN_SUBITEMS)
+ osd_ios_new_item(dev, zde->lzd_reg.zde_dnode,
+ map->olm_flags, map->olm_scan_dir,
+ map->olm_handle_dirent);
+ }
+
+ while (!list_empty(&dev->od_ios_list)) {
+ struct osd_ios_item *item;
+
+ item = list_entry(dev->od_ios_list.next,
+ struct osd_ios_item, oii_list);
+ list_del_init(&item->oii_list);
+ item->oii_scan_dir(env, dev, item->oii_parent,
+ item->oii_handle_dirent, item->oii_flags);
+ OBD_FREE_PTR(item);
+ }
+
+ EXIT;
+}
+
+/* OI scrub start/stop */
+
+int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
+ __u32 flags)
+{
+ int rc;
+ ENTRY;
+
+ if (dev->od_dt_dev.dd_rdonly)
+ RETURN(-EROFS);
+
+ /* od_otable_sem: prevent concurrent start/stop */
+ down(&dev->od_otable_sem);
+ rc = scrub_start(osd_scrub_main, &dev->od_scrub, dev, flags);
+ up(&dev->od_otable_sem);
+
+ RETURN(rc == -EALREADY ? 0 : rc);
+}
+
+static void osd_scrub_stop(struct osd_device *dev)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ ENTRY;
+
+ /* od_otable_sem: prevent concurrent start/stop */
+ down(&dev->od_otable_sem);
+ scrub->os_paused = 1;
+ scrub_stop(scrub);
+ up(&dev->od_otable_sem);
+
+ EXIT;
+}
+
+/* OI scrub setup/cleanup */
+
+static const char osd_scrub_name[] = "OI_scrub";
+
+int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct scrub_file *sf = &scrub->os_file;
+ struct lu_fid *fid = &info->oti_fid;
+ struct dt_object *obj;
+ uint64_t oid;
+ int rc = 0;
+ bool dirty = false;
+ ENTRY;
+
+ memcpy(dev->od_uuid,
+ &dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid,
+ sizeof(dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid));
+ memset(&dev->od_scrub, 0, sizeof(struct lustre_scrub));
+ init_waitqueue_head(&scrub->os_thread.t_ctl_waitq);
+ init_rwsem(&scrub->os_rwsem);
+ spin_lock_init(&scrub->os_lock);
+ INIT_LIST_HEAD(&scrub->os_inconsistent_items);
+ scrub->os_name = osd_name(dev);
+
+ /* 'What the @fid is' is not imporatant, because the object
+ * has no OI mapping, and only is visible inside the OSD.*/
+ fid->f_seq = FID_SEQ_IGIF_MAX;
+ if (dev->od_is_ost)
+ fid->f_oid = ((1 << 31) | dev->od_index) + 1;
+ else
+ fid->f_oid = dev->od_index + 1;
+ fid->f_ver = 0;
+ rc = osd_obj_find_or_create(env, dev, dev->od_root,
+ osd_scrub_name, &oid, fid, false);
+ if (rc)
+ RETURN(rc);
+
+ rc = osd_idc_find_and_init_with_oid(env, dev, fid, oid);
+ if (rc)
+ RETURN(rc);
+
+ obj = lu2dt(lu_object_find_slice(env, osd2lu_dev(dev), fid, NULL));
+ if (IS_ERR_OR_NULL(obj))
+ RETURN(obj ? PTR_ERR(obj) : -ENOENT);
+
+ scrub->os_obj = obj;
+ rc = scrub_file_load(env, scrub);
+ if (rc == -ENOENT || rc == -EFAULT) {
+ scrub_file_init(scrub, dev->od_uuid);
+ dirty = true;
+ } else if (rc < 0) {
+ GOTO(cleanup_obj, rc);
+ } else {
+ if (memcmp(sf->sf_uuid, dev->od_uuid, 16) != 0) {
+ struct obd_uuid *old_uuid;
+ struct obd_uuid *new_uuid;
+
+ OBD_ALLOC_PTR(old_uuid);
+ OBD_ALLOC_PTR(new_uuid);
+ if (!old_uuid || !new_uuid) {
+ CERROR("%s: UUID has been changed, but"
+ "failed to allocate RAM for report\n",
+ osd_name(dev));
+ } else {
+ class_uuid_unparse(sf->sf_uuid, old_uuid);
+ class_uuid_unparse(dev->od_uuid, new_uuid);
+ CDEBUG(D_LFSCK, "%s: UUID has been changed "
+ "from %s to %s\n", osd_name(dev),
+ old_uuid->uuid, new_uuid->uuid);
+ }
+ scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT);
+ dirty = true;
+ if (old_uuid)
+ OBD_FREE_PTR(old_uuid);
+ if (new_uuid)
+ OBD_FREE_PTR(new_uuid);
+ } else if (sf->sf_status == SS_SCANNING) {
+ sf->sf_status = SS_CRASHED;
+ dirty = true;
+ }
+
+ if ((sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0) {
+ LCONSOLE_WARN("%s: invalid oi count %d, set it to %d\n",
+ osd_name(dev), sf->sf_oi_count,
+ osd_oi_count);
+ sf->sf_oi_count = osd_oi_count;
+ dirty = true;
+ }
+ }
+
+ if (sf->sf_pos_last_checkpoint != 0)
+ scrub->os_pos_current = sf->sf_pos_last_checkpoint + 1;
+ else
+ scrub->os_pos_current = 1;
+
+ if (dirty) {
+ rc = scrub_file_store(env, scrub);
+ if (rc)
+ GOTO(cleanup_obj, rc);
+ }
+
+ /* Initialize OI files. */
+ rc = osd_oi_init(env, dev);
+ if (rc < 0)
+ GOTO(cleanup_obj, rc);
+
+ if (!dev->od_dt_dev.dd_rdonly)
+ osd_initial_OI_scrub(env, dev);
+
+ if (!dev->od_dt_dev.dd_rdonly &&
+ dev->od_auto_scrub_interval != AS_NEVER &&
+ ((sf->sf_status == SS_PAUSED) ||
+ (sf->sf_status == SS_CRASHED &&
+ sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
+ SF_UPGRADE | SF_AUTO)) ||
+ (sf->sf_status == SS_INIT &&
+ sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
+ SF_UPGRADE))))
+ rc = osd_scrub_start(env, dev, SS_AUTO_FULL);
+
+ if (rc)
+ GOTO(cleanup_oi, rc);
+
+ RETURN(0);
+
+cleanup_oi:
+ osd_oi_fini(env, dev);
+cleanup_obj:
+ dt_object_put_nocache(env, scrub->os_obj);
+ scrub->os_obj = NULL;
+
+ return rc;
+}
+
+void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+
+ LASSERT(!dev->od_otable_it);
+
+ if (scrub->os_obj) {
+ osd_scrub_stop(dev);
+ dt_object_put_nocache(env, scrub->os_obj);
+ scrub->os_obj = NULL;
+ }
+
+ if (dev->od_oi_table)
+ osd_oi_fini(env, dev);
+}
+
+/* object table based iteration APIs */
+
+static struct dt_it *osd_otable_it_init(const struct lu_env *env,
+ struct dt_object *dt, __u32 attr)
+{
+ enum dt_otable_it_flags flags = attr >> DT_OTABLE_IT_FLAGS_SHIFT;
+ enum dt_otable_it_valid valid = attr & ~DT_OTABLE_IT_FLAGS_MASK;
+ struct osd_device *dev = osd_dev(dt->do_lu.lo_dev);
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct osd_otable_it *it;
+ __u32 start = 0;
+ int rc;
+ ENTRY;
+
+ if (dev->od_dt_dev.dd_rdonly)
+ RETURN(ERR_PTR(-EROFS));
+
+ /* od_otable_sem: prevent concurrent init/fini */
+ down(&dev->od_otable_sem);
+ if (dev->od_otable_it)
+ GOTO(out, it = ERR_PTR(-EALREADY));
+
+ OBD_ALLOC_PTR(it);
+ if (!it)
+ GOTO(out, it = ERR_PTR(-ENOMEM));
+
+ if (flags & DOIF_OUTUSED)
+ it->ooi_used_outside = 1;
+
+ if (flags & DOIF_RESET)
+ start |= SS_RESET;
+
+ if (valid & DOIV_ERROR_HANDLE) {
+ if (flags & DOIF_FAILOUT)
+ start |= SS_SET_FAILOUT;
+ else
+ start |= SS_CLEAR_FAILOUT;
+ }
+
+ if (valid & DOIV_DRYRUN) {
+ if (flags & DOIF_DRYRUN)
+ start |= SS_SET_DRYRUN;
+ else
+ start |= SS_CLEAR_DRYRUN;
+ }
+
+ /* XXX: dmu_object_next() does NOT find dnodes allocated
+ * in the current non-committed txg, so we force txg
+ * commit to find all existing dnodes ... */
+ txg_wait_synced(dmu_objset_pool(dev->od_os), 0ULL);
+
+ dev->od_otable_it = it;
+ it->ooi_dev = dev;
+ rc = scrub_start(osd_scrub_main, scrub, dev, start & ~SS_AUTO_PARTIAL);
+ if (rc == -EALREADY) {
+ it->ooi_pos = 1;
+ } else if (rc < 0) {
+ dev->od_otable_it = NULL;
+ OBD_FREE_PTR(it);
+ it = ERR_PTR(rc);
+ } else {
+ it->ooi_pos = scrub->os_pos_current;
+ }
+
+ GOTO(out, it);
+
+out:
+ up(&dev->od_otable_sem);
+ return (struct dt_it *)it;
+}
+
+static void osd_otable_it_fini(const struct lu_env *env, struct dt_it *di)
+{
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+ struct osd_device *dev = it->ooi_dev;
+
+ /* od_otable_sem: prevent concurrent init/fini */
+ down(&dev->od_otable_sem);
+ scrub_stop(&dev->od_scrub);
+ LASSERT(dev->od_otable_it == it);
+
+ dev->od_otable_it = NULL;
+ up(&dev->od_otable_sem);
+ OBD_FREE_PTR(it);
+}
+
+static int osd_otable_it_get(const struct lu_env *env,
+ struct dt_it *di, const struct dt_key *key)
+{
+ return 0;
+}
+
+static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di)
+{
+}
+
+static void osd_otable_it_preload(const struct lu_env *env,
+ struct osd_otable_it *it)
+{
+ struct osd_device *dev = it->ooi_dev;
+ int rc;
+
+ /* can go negative on the very first access to the iterator
+ * or if some non-Lustre objects were found */
+ if (unlikely(it->ooi_prefetched < 0))
+ it->ooi_prefetched = 0;
+
+ if (it->ooi_prefetched >= (OTABLE_PREFETCH >> 1))
+ return;
+
+ if (it->ooi_prefetched_dnode == 0)
+ it->ooi_prefetched_dnode = it->ooi_pos;
+
+ while (it->ooi_prefetched < OTABLE_PREFETCH) {
+ rc = -dmu_object_next(dev->od_os, &it->ooi_prefetched_dnode,
+ B_FALSE, 0);
+ if (rc)
+ break;
+
+ osd_dmu_prefetch(dev->od_os, it->ooi_prefetched_dnode,
+ 0, 0, 0, ZIO_PRIORITY_ASYNC_READ);
+ it->ooi_prefetched++;
+ }
+}
+
+static inline int
+osd_otable_it_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
+{
+ spin_lock(&scrub->os_lock);
+ if (it->ooi_pos < scrub->os_pos_current || scrub->os_waiting ||
+ !thread_is_running(&scrub->os_thread))
+ it->ooi_waiting = 0;
+ else
+ it->ooi_waiting = 1;
+ spin_unlock(&scrub->os_lock);
+
+ return !it->ooi_waiting;
+}
+
+static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di)
+{
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+ struct osd_device *dev = it->ooi_dev;
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct l_wait_info lwi = { 0 };
+ struct lustre_mdt_attrs *lma = NULL;
+ nvlist_t *nvbuf = NULL;
+ int size = 0;
+ int rc;
+ ENTRY;
+
+ LASSERT(it->ooi_user_ready);
+ fid_zero(&it->ooi_fid);
+
+ if (unlikely(it->ooi_all_cached))
+ RETURN(1);
+
+again:
+ if (nvbuf) {
+ nvlist_free(nvbuf);
+ nvbuf = NULL;
+ lma = NULL;
+ size = 0;
+ }
+
+ if (it->ooi_pos >= scrub->os_pos_current)
+ l_wait_event(thread->t_ctl_waitq,
+ osd_otable_it_wakeup(scrub, it),
+ &lwi);
+
+ if (!thread_is_running(thread) && !it->ooi_used_outside)
+ GOTO(out, rc = 1);
+
+ rc = -dmu_object_next(dev->od_os, &it->ooi_pos, B_FALSE, 0);
+ if (rc) {
+ if (unlikely(rc == -ESRCH)) {
+ it->ooi_all_cached = 1;
+ rc = 1;
+ }
+
+ GOTO(out, rc);
+ }
+
+ rc = __osd_xattr_load_by_oid(dev, it->ooi_pos, &nvbuf);
+
+ if (!scrub->os_full_speed)
+ spin_lock(&scrub->os_lock);
+ it->ooi_prefetched--;
+ if (!scrub->os_full_speed) {
+ if (scrub->os_waiting) {
+ scrub->os_waiting = 0;
+ wake_up_all(&thread->t_ctl_waitq);
+ }
+ spin_unlock(&scrub->os_lock);
+ }
+
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ goto again;
+
+ if (rc)
+ GOTO(out, rc);
+
+ LASSERT(nvbuf != NULL);
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t **)&lma, &size);
+ if (rc || size == 0)
+ /* It is either non-Lustre object or OSD internal object,
+ * ignore it, go ahead */
+ goto again;
+
+ LASSERTF(lma != NULL, "corrupted LMA, size %d\n", size);
+ lustre_lma_swab(lma);
+ if (unlikely(lma->lma_compat & LMAC_NOT_IN_OI ||
+ lma->lma_incompat & LMAI_AGENT))
+ goto again;
+
+ it->ooi_fid = lma->lma_self_fid;
+
+ GOTO(out, rc = 0);
+
+out:
+ if (nvbuf)
+ nvlist_free(nvbuf);
+
+ if (!rc && scrub->os_full_speed)
+ osd_otable_it_preload(env, it);
+
+ return rc;
+}
+
+static struct dt_key *osd_otable_it_key(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ return NULL;
+}
+
+static int osd_otable_it_key_size(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ return sizeof(__u64);
+}
+
+static int osd_otable_it_rec(const struct lu_env *env, const struct dt_it *di,
+ struct dt_rec *rec, __u32 attr)
+{
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+ struct lu_fid *fid = (struct lu_fid *)rec;
+
+ *fid = it->ooi_fid;
+ return 0;
+}
+
+static __u64 osd_otable_it_store(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+
+ return it->ooi_pos;
+}
+
+/**
+ * Set the OSD layer iteration start position as the specified hash.
+ */
+static int osd_otable_it_load(const struct lu_env *env,
+ const struct dt_it *di, __u64 hash)
+{
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+ struct osd_device *dev = it->ooi_dev;
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ int rc;
+ ENTRY;
+
+ /* Forbid to set iteration position after iteration started. */
+ if (it->ooi_user_ready)
+ RETURN(-EPERM);
+
+ if (hash > OSD_OTABLE_MAX_HASH)
+ hash = OSD_OTABLE_MAX_HASH;
+
+ /* The hash is the last checkpoint position,
+ * we will start from the next one. */
+ it->ooi_pos = hash + 1;
+ it->ooi_prefetched = 0;
+ it->ooi_prefetched_dnode = 0;
+ it->ooi_user_ready = 1;
+ if (!scrub->os_full_speed)
+ wake_up_all(&scrub->os_thread.t_ctl_waitq);
+
+ /* Unplug OSD layer iteration by the first next() call. */
+ rc = osd_otable_it_next(env, (struct dt_it *)it);
+
+ RETURN(rc);
+}
+
+static int osd_otable_it_key_rec(const struct lu_env *env,
+ const struct dt_it *di, void *key_rec)
+{
+ return 0;
+}
+
+const struct dt_index_operations osd_otable_ops = {
+ .dio_it = {
+ .init = osd_otable_it_init,
+ .fini = osd_otable_it_fini,
+ .get = osd_otable_it_get,
+ .put = osd_otable_it_put,
+ .next = osd_otable_it_next,
+ .key = osd_otable_it_key,
+ .key_size = osd_otable_it_key_size,
+ .rec = osd_otable_it_rec,
+ .store = osd_otable_it_store,
+ .load = osd_otable_it_load,
+ .key_rec = osd_otable_it_key_rec,
+ }
+};
+
+/* high priority inconsistent items list APIs */
+
+int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
+ const struct lu_fid *fid, uint64_t oid, bool insert)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct ptlrpc_thread *thread = &scrub->os_thread;
+ struct osd_inconsistent_item *oii;
+ bool wakeup = false;
+ ENTRY;
+
+ osd_idc_find_and_init_with_oid(env, dev, fid, oid);
+ OBD_ALLOC_PTR(oii);
+ if (unlikely(!oii))
+ RETURN(-ENOMEM);
+
+ INIT_LIST_HEAD(&oii->oii_list);
+ oii->oii_cache.oic_dev = dev;
+ oii->oii_cache.oic_fid = *fid;
+ oii->oii_cache.oic_dnode = oid;
+ oii->oii_insert = insert;
+
+ spin_lock(&scrub->os_lock);
+ if (unlikely(!thread_is_running(thread))) {
+ spin_unlock(&scrub->os_lock);
+ OBD_FREE_PTR(oii);
+ RETURN(-EAGAIN);
+ }
+
+ if (list_empty(&scrub->os_inconsistent_items))
+ wakeup = true;
+ list_add_tail(&oii->oii_list, &scrub->os_inconsistent_items);
+ spin_unlock(&scrub->os_lock);
+
+ if (wakeup)
+ wake_up_all(&thread->t_ctl_waitq);
+
+ RETURN(0);
+}
+
+int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
+ uint64_t *oid)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct osd_inconsistent_item *oii;
+ int ret = -ENOENT;
+ ENTRY;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry(oii, &scrub->os_inconsistent_items, oii_list) {
+ if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
+ *oid = oii->oii_cache.oic_dnode;
+ ret = 0;
+ break;
+ }
+ }
+ spin_unlock(&scrub->os_lock);
+
+ RETURN(ret);
+}
exit 0
fi
-[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
- skip "ldiskfs only test" && exit 0
-
-[ $(facet_fstype ost1) != "ldiskfs" ] &&
- skip "ldiskfs only test" && exit 0
-
-[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
- skip "Need MDS version at least 2.2.90" && exit 0
+stopall
SAVED_MDSSIZE=${MDSSIZE}
SAVED_OSTSIZE=${OSTSIZE}
SAVED_OSTCOUNT=${OSTCOUNT}
+
# use small MDS + OST size to speed formatting time
# do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
-# 200M MDT device can guarantee uninitialized groups during the OI scrub
-MDSSIZE=200000
-OSTSIZE=100000
-# no need too much OSTs, to reduce the format/start/stop overhead
-stopall
-[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
+# 400M MDT device can guarantee uninitialized groups during the OI scrub
+MDSSIZE=400000
+OSTSIZE=200000
-MOUNT_2=""
+# no need too many OSTs, to reduce the format/start/stop overhead
+[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
# build up a clean test environment.
formatall
setupall
-[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
-
-[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
-
-[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
-
-[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
-[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
-
-[[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
-
-[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
- SCRUB_ONLY="-t scrub"
-
build_test_filter
MDT_DEV="${FSNAME}-MDT0000"
# use "lfsck_start -A" when we no longer need testing interop
for n in $(seq $MDSCOUNT); do
do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
- $SCRUB_ONLY "$@" ||
+ -t scrub "$@" ||
error "($error_id) Failed to start OI scrub on mds$n"
done
}
scrub_status() {
local n=$1
- do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
}
-START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
-START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
+START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
+START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
SHOW_SCRUB="do_facet $SINGLEMDS \
- $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
+ $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
SHOW_SCRUB_ON_OST="do_facet ost1 \
- $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
+ $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
MOUNT_OPTS_SCRUB="-o user_xattr"
MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
scrub_prep() {
local nfiles=$1
+ local inject=$2
local n
check_mount_and_prep
fi
done
echo "prepared $(date)."
+
+ [ ! -z $inject ] && [ $inject -eq 2 ] && {
+ #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
+ do_nodes $(comma_list $(mdts_nodes)) \
+ $LCTL set_param fail_loc=0x198
+
+ for n in $(seq $MDSCOUNT); do
+ cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
+ error "Fail to copy runas to MDS$n"
+ done
+
+ do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
+ }
+
+ [ ! -z $inject ] && [ $inject -eq 1 ] &&
+ [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
+ #define OBD_FAIL_OSD_FID_MAPPING 0x193
+ do_nodes $(comma_list $(mdts_nodes)) \
+ $LCTL set_param fail_loc=0x193
+
+ for n in $(seq $MDSCOUNT); do
+ chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
+ chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
+ done
+
+ do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
+ }
+
cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
# sync local transactions on every MDT
echo "stop mds$n"
stop mds$n > /dev/null || error "Fail to stop MDS$n!"
done
+
+ [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
+ if [ $inject -eq 1 ]; then
+ for n in $(seq $MDSCOUNT); do
+ mds_backup_restore mds$n ||
+ error "Backup/restore on mds$n failed"
+ done
+ elif [ $inject -eq 2 ]; then
+ scrub_remove_ois 1
+ fi
+ }
}
scrub_start_mds() {
for n in $(seq $MDSCOUNT); do
wait_update_facet mds$n "$LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
+ osd-*.$(facet_svc mds$n).oi_scrub |
awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
error "($error_id) Expected '$expected' on mds$n"
done
for n in $(seq $MDSCOUNT); do
actual=$(do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
+ osd-*.$(facet_svc mds$n).oi_scrub |
awk '/^flags/ { print $2 }')
if [ "$actual" != "$expected" ]; then
error "($error_id) Expected '$expected' on mds$n, but" \
for n in $(seq $MDSCOUNT); do
actual=$(do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
+ osd-*.$(facet_svc mds$n).oi_scrub |
awk '/^param/ { print $2 }')
if [ "$actual" != "$expected" ]; then
error "($error_id) Expected '$expected' on mds$n, but" \
for n in $(seq $MDSCOUNT); do
if [ $dryrun -eq 1 ]; then
actual=$(do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
+ osd-*.$(facet_svc mds$n).oi_scrub |
awk '/^inconsistent:/ { print $2 }')
else
actual=$(do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
+ osd-*.$(facet_svc mds$n).oi_scrub |
awk '/^updated:/ { print $2 }')
fi
}
scrub_remove_ois() {
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
+
local error_id=$1
local index=$2
local n
done
}
-scrub_backup_restore() {
- local error_id=$1
- local igif=$2
- local n
-
- for n in $(seq $MDSCOUNT); do
- mds_backup_restore mds$n $igif ||
- error "($error_id) Backup/restore on mds$n failed"
- done
-}
-
scrub_enable_auto() {
do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
- osd-ldiskfs.*.auto_scrub=1
+ osd-*.*.auto_scrub=1
}
full_scrub_ratio() {
- [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
- return
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
local ratio=$1
do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
- osd-ldiskfs.*.full_scrub_ratio=$ratio
+ osd-*.*.full_scrub_ratio=$ratio
}
full_scrub_threshold_rate() {
- [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
- return
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
local rate=$1
do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
- osd-ldiskfs.*.full_scrub_threshold_rate=$rate
+ osd-*.*.full_scrub_threshold_rate=$rate
}
test_0() {
run_test 1a "Auto trigger initial OI scrub when server mounts"
test_1b() {
- scrub_prep 0
- scrub_remove_ois 1
+ scrub_prep 0 2
echo "start MDTs without disabling OI scrub"
scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
- scrub_check_status 3 completed
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_status 3 completed
mount_client $MOUNT || error "(4) Fail to start client!"
- scrub_check_data 5
+ scrub_check_data2 runas 5
+ scrub_check_status 6 completed
}
run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
test_1c() {
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
local index
# OI files to be removed:
run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
test_2() {
- scrub_prep 0
- scrub_backup_restore 1
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
+ scrub_prep 0 1
echo "starting MDTs without disabling OI scrub"
scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
scrub_check_status 3 completed
formatall > /dev/null
setupall > /dev/null
- scrub_prep 0
- scrub_backup_restore 1
+ scrub_prep 0 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
}
#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
test_4a() {
- scrub_prep 0
- scrub_backup_restore 1
+ scrub_prep 0 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
test_4b() {
- scrub_prep 5
- scrub_backup_restore 1
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
+ scrub_prep 5 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_flags 4 recreated,inconsistent
echo "OI scrub on MDS$n status for the 1st time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
done
scrub_check_data2 sanity-scrub.sh 9
echo "OI scrub on MDS$n status for the 2nd time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
[ ${updated0[$n]} -lt ${updated1[$n]} ] ||
error "(12) Auto trigger full scrub unexpectedly"
echo "OI scrub on MDS$n status for the 3rd time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
[ ${updated0[$n]} -gt ${updated1[$n]} ] ||
error "(16) Auto trigger full scrub unexpectedly"
[ ${updated0[$n]} -eq ${updated1[$n]} ] || {
echo "OI scrub on MDS$n status for the 4th time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
error "(18) NOT auto trigger full scrub as expected"
}
run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
test_4c() {
- scrub_prep 500
- scrub_backup_restore 1
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
+ scrub_prep 500 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_flags 4 recreated,inconsistent
echo "OI scrub on MDS$n status for the 1st time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
done
scrub_check_data2 sanity-scrub.sh 9
echo "OI scrub on MDS$n status for the 2nd time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
[ ${updated0[$n]} -lt ${updated1[$n]} ] ||
error "(12) Auto trigger full scrub unexpectedly"
echo "OI scrub on MDS$n status for the 3rd time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
[ ${updated0[$n]} -gt ${updated1[$n]} ] ||
error "(16) Auto trigger full scrub unexpectedly"
[ ${updated0[$n]} -eq ${updated1[$n]} ] || {
echo "OI scrub on MDS$n status for the 4th time:"
do_facet mds$n $LCTL get_param -n \
- osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+ osd-*.$(facet_svc mds$n).oi_scrub
error "(18) NOT auto trigger full scrub as expected"
}
formatall > /dev/null
setupall > /dev/null
- scrub_prep 1000
- scrub_backup_restore 1
+ scrub_prep 100 1
echo "starting MDTs with OI scrub disabled (1)"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
declare -a pids
for n in $(seq $MDSCOUNT); do
- stat $DIR/$tdir/mds$n/${tfile}800 &
+ stat $DIR/$tdir/mds$n/sanity-scrub.sh &
pids[$n]=$!
done
for n in $(seq $MDSCOUNT); do
- wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
+ wait ${pids[$n]} ||
+ error "(18) Fail to stat mds$n/sanity-scrub.sh"
done
scrub_check_status 19 completed
run_test 5 "OI scrub state machine"
test_6() {
- scrub_prep 1000
- scrub_backup_restore 1
+ scrub_prep 100 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
local n
for n in $(seq $MDSCOUNT); do
# stat will re-trigger OI scrub
- stat $DIR/$tdir/mds$n/${tfile}800 ||
- error "(8) Failed to stat mds$n/${tfile}800"
+ stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
+ error "(8) Failed to stat mds$n/sanity-scrub.sh"
done
umount_client $MOUNT || error "(9) Fail to stop client!"
run_test 6 "OI scrub resumes from last checkpoint"
test_7() {
- scrub_prep 500
- scrub_backup_restore 1
+ scrub_prep 500 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
done
scrub_check_status 8 scanning
- scrub_check_flags 9 recreated,inconsistent,auto
+ if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
+ scrub_check_flags 9 inconsistent,auto
+ else
+ scrub_check_flags 9 recreated,inconsistent,auto
+ fi
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_loc=0 fail_val=0
run_test 7 "System is available during OI scrub scanning"
test_8() {
- scrub_prep 128
- scrub_backup_restore 1
+ scrub_prep 128 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
run_test 8 "Control OI scrub manually"
test_9() {
+ # Skip scrub speed test for ZFS because of performance unstable
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "test scrub speed only on ldiskfs" && return
+
if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
skip "Testing on UP system, the speed may be inaccurate."
return 0
fi
- scrub_prep 6000
- scrub_backup_restore 1
+ scrub_prep 6000 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
run_test 9 "OI scrub speed control"
test_10a() {
- scrub_prep 0
- scrub_backup_restore 1
+ scrub_prep 0 1
echo "starting mds$n with OI scrub disabled (1)"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
# test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
test_10b() {
- scrub_prep 0
- scrub_backup_restore 1
+ scrub_prep 0 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
#run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
test_11() {
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
local CREATED=100
local n
do_facet ost1 $LCTL set_param fail_loc=0
wait_update_facet ost1 "$LCTL get_param -n \
- osd-ldiskfs.$(facet_svc ost1).oi_scrub |
+ osd-*.$(facet_svc ost1).oi_scrub |
awk '/^status/ { print \\\$2 }'" "completed" 6 ||
error "(7) Expected '$expected' on ost1"
$START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
wait_update_facet ost1 "$LCTL get_param -n \
- osd-ldiskfs.$(facet_svc ost1).oi_scrub |
+ osd-*.$(facet_svc ost1).oi_scrub |
awk '/^status/ { print \\\$2 }'" "completed" 6 ||
error "(7) Expected '$expected' on ost1"
run_test 13 "OI scrub can rebuild missed /O entries"
test_14() {
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "ldiskfs special test" && return
+
check_mount_and_prep
$SETSTRIPE -c 1 -i 0 $DIR/$tdir
run_test 14 "OI scrub can repair objects under lost+found"
test_15() {
- local server_version=$(lustre_version_code $SINGLEMDS)
- scrub_prep 20
- scrub_backup_restore 1
+ local repaired
+
+ formatall > /dev/null
+ setupall > /dev/null
+
+ scrub_prep 20 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 recreated,inconsistent
+ [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
+ scrub_check_flags 4 recreated,inconsistent
# run under dryrun mode
- if [ $server_version -lt $(version_code 2.5.58) ]; then
- scrub_start 5 --dryrun on
+ scrub_start 5 --dryrun
+ scrub_check_status 6 completed
+ if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
+ scrub_check_flags 7 inconsistent
+ repaired=2
else
- scrub_start 5 --dryrun
+ scrub_check_flags 7 recreated,inconsistent
+ repaired=20
fi
- scrub_check_status 6 completed
- scrub_check_flags 7 recreated,inconsistent
scrub_check_params 8 dryrun
- scrub_check_repaired 9 20 1
+ scrub_check_repaired 9 $repaired 1
# run under dryrun mode again
- if [ $server_version -lt $(version_code 2.5.58) ]; then
- scrub_start 10 --dryrun on
+ scrub_start 10 --dryrun
+ scrub_check_status 11 completed
+ if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
+ scrub_check_flags 12 inconsistent
else
- scrub_start 10 --dryrun
+ scrub_check_flags 12 recreated,inconsistent
fi
- scrub_check_status 11 completed
- scrub_check_flags 12 recreated,inconsistent
scrub_check_params 13 dryrun
- scrub_check_repaired 14 20 1
+ scrub_check_repaired 14 $repaired 1
# run under normal mode
- #
- # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
- # work under Lustre-2.y (y >= 6), the test script should be fixed as
- # "-noff" or "--dryrun=off" or nothing by default.
- if [ $server_version -lt $(version_code 2.5.58) ]; then
- scrub_start 15 --dryrun off
- else
- scrub_start 15
- fi
+ scrub_start 15
scrub_check_status 16 completed
scrub_check_flags 17 ""
scrub_check_params 18 ""
- scrub_check_repaired 19 20 0
+ scrub_check_repaired 19 $repaired 0
# run under normal mode again
- if [ $server_version -lt $(version_code 2.5.58) ]; then
- scrub_start 20 --dryrun off
- else
- scrub_start 20
- fi
+ scrub_start 20
scrub_check_status 21 completed
scrub_check_flags 22 ""
scrub_check_params 23 ""