+/*
+ * When lookup item under striped directory, we need to locate the master
+ * MDT-object of the striped directory firstly, then the client will send
+ * lookup (getattr_by_name) RPC to the MDT with some slave MDT-object's FID
+ * and the item's name. If the system is restored from MDT file level backup,
+ * then before the OI scrub completely built the OI files, the OI mappings of
+ * the master MDT-object and slave MDT-object may be invalid. Usually, it is
+ * not a problem for the master MDT-object. Because when locate the master
+ * MDT-object, we will do name based lookup (for the striped directory itself)
+ * firstly, during such process we can setup the correct OI mapping for the
+ * master MDT-object. But it will be trouble for the slave MDT-object. Because
+ * the client will not trigger name based lookup on the MDT to locate the slave
+ * MDT-object before locating item under the striped directory, then when
+ * osd_fid_lookup(), it will find that the OI mapping for the slave MDT-object
+ * is invalid and does not know what the right OI mapping is, then the MDT has
+ * to return -EINPROGRESS to the client to notify that the OI scrub is rebuiding
+ * the OI file, related OI mapping is unknown yet, please try again later. And
+ * then client will re-try the RPC again and again until related OI mapping has
+ * been updated. That is quite inefficient.
+ *
+ * To resolve above trouble, we will handle it as the following two cases:
+ *
+ * 1) The slave MDT-object and the master MDT-object are on different MDTs.
+ * It is relative easy. Be as one of remote MDT-objects, the slave MDT-object
+ * is linked under /REMOTE_PARENT_DIR with the name of its FID string.
+ * We can locate the slave MDT-object via lookup the /REMOTE_PARENT_DIR
+ * directly. Please check osd_fid_lookup().
+ *
+ * 2) The slave MDT-object and the master MDT-object reside on the same MDT.
+ * Under such case, during lookup the master MDT-object, we will lookup the
+ * slave MDT-object via readdir against the master MDT-object, because the
+ * slave MDT-objects information are stored as sub-directories with the name
+ * "${FID}:${index}". Then when find the local slave MDT-object, its OI
+ * mapping will be recorded. Then subsequent osd_fid_lookup() will know
+ * the correct OI mapping for the slave MDT-object.
+ */
+static int osd_check_lmv(const struct lu_env *env, struct osd_device *osd,
+ uint64_t oid, const struct lu_fid *fid)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct luz_direntry *zde = &info->oti_zde;
+ zap_attribute_t *za = &info->oti_za;
+ zap_cursor_t *zc = &info->oti_zc;
+ struct lu_fid *tfid = &info->oti_fid;
+ nvlist_t *nvbuf = NULL;
+ struct lmv_mds_md_v1 *lmv = NULL;
+ int size;
+ int rc;
+ ENTRY;
+
+ rc = __osd_xattr_load_by_oid(osd, oid, &nvbuf);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ RETURN(0);
+
+ if (rc)
+ RETURN(rc);
+
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMV,
+ (uchar_t **)&lmv, &size);
+ if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
+ GOTO(out_nvbuf, rc = 0);
+
+ if (rc)
+ GOTO(out_nvbuf, rc);
+
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ GOTO(out_nvbuf, rc = -EINVAL);
+
+ zap_cursor_init_serialized(zc, osd->od_os, oid, 0);
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc == -ENOENT) {
+ zap_cursor_advance(zc);
+ } else if (rc) {
+ CERROR("%s: fail to init for check LMV "DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ while (1) {
+ rc = -zap_cursor_retrieve(zc, za);
+ if (rc == -ENOENT)
+ GOTO(out_zc, rc = 0);
+
+ if (rc) {
+ CERROR("%s: fail to locate next for check LMV "
+ DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ fid_zero(tfid);
+ sscanf(za->za_name + 1, SFID, RFID(tfid));
+ if (fid_is_sane(tfid) && !osd_remote_fid(env, osd, tfid)) {
+ rc = osd_zap_lookup(osd, oid, NULL, za->za_name,
+ za->za_integer_length,
+ sizeof(*zde) / za->za_integer_length,
+ (void *)zde);
+ if (rc) {
+ CERROR("%s: fail to lookup for check LMV "
+ DFID"(%llu): rc = %d\n",
+ osd_name(osd), PFID(fid), oid, rc);
+ GOTO(out_zc, rc);
+ }
+
+ rc = osd_oii_insert(env, osd, tfid,
+ zde->lzd_reg.zde_dnode, false);
+ GOTO(out_zc, rc);
+ }
+
+ zap_cursor_advance(zc);
+ }
+
+out_zc:
+ zap_cursor_fini(zc);
+out_nvbuf:
+ nvlist_free(nvbuf);
+
+ return rc;
+}
+
+static int
+osd_consistency_check(const struct lu_env *env, struct osd_device *osd,
+ struct osd_object *obj, const struct lu_fid *fid,
+ uint64_t oid, bool is_dir)
+{
+ struct lustre_scrub *scrub = &osd->od_scrub;
+ dnode_t *dn = NULL;
+ uint64_t oid2;
+ int once = 0;
+ bool insert;
+ int rc;
+ ENTRY;
+
+ if (!fid_is_norm(fid) && !fid_is_igif(fid))
+ RETURN(0);
+
+ /* oid == ZFS_NO_OBJECT must be for lookup ".." case */
+ if (oid == ZFS_NO_OBJECT) {
+ rc = osd_sa_handle_get(obj);
+ if (rc)
+ RETURN(rc);
+
+ rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_PARENT(osd), &oid, 8);
+ if (rc)
+ RETURN(rc);
+ }
+
+ if (scrub->os_running) {
+ if (scrub->os_pos_current > oid)
+ RETURN(0);
+ } else if (osd->od_auto_scrub_interval == AS_NEVER) {
+ RETURN(0);
+ } else {
+ if (ktime_get_real_seconds() <
+ scrub->os_file.sf_time_last_complete +
+ osd->od_auto_scrub_interval)
+ RETURN(0);
+ }
+
+again:
+ rc = osd_fid_lookup(env, osd, fid, &oid2);
+ if (rc == -ENOENT) {
+ insert = true;
+ if (dn)
+ goto trigger;
+
+ rc = __osd_obj2dnode(osd->od_os, oid, &dn);
+ /* The object has been removed (by race maybe). */
+ if (rc)
+ RETURN(rc = (rc == -EEXIST ? -ENOENT : rc));
+
+ goto trigger;
+ } else if (rc || oid == oid2) {
+ GOTO(out, rc);
+ }
+
+ insert = false;
+
+trigger:
+ if (scrub->os_running) {
+ if (!dn) {
+ rc = __osd_obj2dnode(osd->od_os, oid, &dn);
+ /* The object has been removed (by race maybe). */
+ if (rc)
+ RETURN(rc = (rc == -EEXIST ? -ENOENT : rc));
+ }
+
+ rc = osd_oii_insert(env, osd, fid, oid, insert);
+ /* There is race condition between osd_oi_lookup and OI scrub.
+ * The OI scrub finished just after osd_oi_lookup() failure.
+ * Under such case, it is unnecessary to trigger OI scrub again,
+ * but try to call osd_oi_lookup() again. */
+ if (unlikely(rc == -EAGAIN))
+ goto again;
+
+ if (is_dir)
+ rc = osd_check_lmv(env, osd, oid, fid);
+ else
+ rc = 0;
+
+ GOTO(out, rc);
+ }
+
+ if (osd->od_auto_scrub_interval != AS_NEVER && ++once == 1) {
+ rc = osd_scrub_start(env, osd, SS_AUTO_FULL |
+ SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
+ CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING,
+ "%s: trigger partial OI scrub for RPC inconsistency "
+ "checking FID "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), rc);
+ if (!rc)
+ goto again;
+ }
+
+ GOTO(out, rc);
+
+out:
+ if (dn)
+ osd_dnode_rele(dn);
+
+ return rc;
+}
+