+
+struct lfsck_orphan_it {
+ struct lfsck_component *loi_com;
+ struct lfsck_rbtree_node *loi_lrn;
+ struct lfsck_layout_slave_target *loi_llst;
+ struct lu_fid loi_key;
+ struct lu_orphan_rec loi_rec;
+ __u64 loi_hash;
+ unsigned int loi_over:1;
+};
+
+static int lfsck_fid_match_idx(const struct lu_env *env,
+ struct lfsck_instance *lfsck,
+ const struct lu_fid *fid, int idx)
+{
+ struct seq_server_site *ss;
+ struct lu_server_fld *sf;
+ struct lu_seq_range range = { 0 };
+ int rc;
+
+ /* All abnormal cases will be returned to MDT0. */
+ if (!fid_is_norm(fid)) {
+ if (idx == 0)
+ return 1;
+
+ return 0;
+ }
+
+ ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site);
+ if (unlikely(ss == NULL))
+ return -ENOTCONN;
+
+ sf = ss->ss_server_fld;
+ LASSERT(sf != NULL);
+
+ fld_range_set_any(&range);
+ rc = fld_server_lookup(env, sf, fid_seq(fid), &range);
+ if (rc != 0)
+ return rc;
+
+ if (!fld_range_is_mdt(&range))
+ return -EINVAL;
+
+ if (range.lsr_index == idx)
+ return 1;
+
+ return 0;
+}
+
+static void lfsck_layout_destroy_orphan(const struct lu_env *env,
+ struct dt_device *dev,
+ struct dt_object *obj)
+{
+ struct thandle *handle;
+ int rc;
+ ENTRY;
+
+ handle = dt_trans_create(env, dev);
+ if (IS_ERR(handle))
+ RETURN_EXIT;
+
+ rc = dt_declare_ref_del(env, obj, handle);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = dt_declare_destroy(env, obj, handle);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ rc = dt_trans_start_local(env, dev, handle);
+ if (rc != 0)
+ GOTO(stop, rc);
+
+ dt_write_lock(env, obj, 0);
+ rc = dt_ref_del(env, obj, handle);
+ if (rc == 0)
+ rc = dt_destroy(env, obj, handle);
+ dt_write_unlock(env, obj);
+
+ GOTO(stop, rc);
+
+stop:
+ dt_trans_stop(env, dev, handle);
+
+ RETURN_EXIT;
+}
+
+static int lfsck_orphan_index_lookup(const struct lu_env *env,
+ struct dt_object *dt,
+ struct dt_rec *rec,
+ const struct dt_key *key,
+ struct lustre_capa *capa)
+{
+ return -EOPNOTSUPP;
+}
+
+static int lfsck_orphan_index_declare_insert(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_rec *rec,
+ const struct dt_key *key,
+ struct thandle *handle)
+{
+ return -EOPNOTSUPP;
+}
+
+static int lfsck_orphan_index_insert(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_rec *rec,
+ const struct dt_key *key,
+ struct thandle *handle,
+ struct lustre_capa *capa,
+ int ignore_quota)
+{
+ return -EOPNOTSUPP;
+}
+
+static int lfsck_orphan_index_declare_delete(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_key *key,
+ struct thandle *handle)
+{
+ return -EOPNOTSUPP;
+}
+
+static int lfsck_orphan_index_delete(const struct lu_env *env,
+ struct dt_object *dt,
+ const struct dt_key *key,
+ struct thandle *handle,
+ struct lustre_capa *capa)
+{
+ return -EOPNOTSUPP;
+}
+
+static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env,
+ struct dt_object *dt,
+ __u32 attr,
+ struct lustre_capa *capa)
+{
+ struct dt_device *dev = lu2dt_dev(dt->do_lu.lo_dev);
+ struct lfsck_instance *lfsck;
+ struct lfsck_component *com = NULL;
+ struct lfsck_layout_slave_data *llsd;
+ struct lfsck_orphan_it *it = NULL;
+ int rc = 0;
+ ENTRY;
+
+ lfsck = lfsck_instance_find(dev, true, false);
+ if (unlikely(lfsck == NULL))
+ RETURN(ERR_PTR(-ENODEV));
+
+ com = lfsck_component_find(lfsck, LT_LAYOUT);
+ if (unlikely(com == NULL))
+ GOTO(out, rc = -ENOENT);
+
+ llsd = com->lc_data;
+ if (!llsd->llsd_rbtree_valid)
+ GOTO(out, rc = -ESRCH);
+
+ OBD_ALLOC_PTR(it);
+ if (it == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ it->loi_llst = lfsck_layout_llst_find_and_del(llsd, attr, false);
+ if (it->loi_llst == NULL)
+ GOTO(out, rc = -ENODEV);
+
+ if (dev->dd_record_fid_accessed) {
+ /* The first iteratino against the rbtree, scan the whole rbtree
+ * to remove the nodes which do NOT need to be handled. */
+ write_lock(&llsd->llsd_rb_lock);
+ if (dev->dd_record_fid_accessed) {
+ struct rb_node *node;
+ struct rb_node *next;
+ struct lfsck_rbtree_node *lrn;
+
+ /* No need to record the fid accessing anymore. */
+ dev->dd_record_fid_accessed = 0;
+
+ node = rb_first(&llsd->llsd_rb_root);
+ while (node != NULL) {
+ next = rb_next(node);
+ lrn = rb_entry(node, struct lfsck_rbtree_node,
+ lrn_node);
+ if (atomic_read(&lrn->lrn_known_count) <=
+ atomic_read(&lrn->lrn_accessed_count)) {
+ rb_erase(node, &llsd->llsd_rb_root);
+ lfsck_rbtree_free(lrn);
+ }
+ node = next;
+ }
+ }
+ write_unlock(&llsd->llsd_rb_lock);
+ }
+
+ /* read lock the rbtree when init, and unlock when fini */
+ read_lock(&llsd->llsd_rb_lock);
+ it->loi_com = com;
+ com = NULL;
+
+ GOTO(out, rc = 0);
+
+out:
+ if (com != NULL)
+ lfsck_component_put(env, com);
+ lfsck_instance_put(env, lfsck);
+ if (rc != 0) {
+ if (it != NULL)
+ OBD_FREE_PTR(it);
+
+ it = (struct lfsck_orphan_it *)ERR_PTR(rc);
+ }
+
+ return (struct dt_it *)it;
+}
+
+static void lfsck_orphan_it_fini(const struct lu_env *env,
+ struct dt_it *di)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+ struct lfsck_component *com = it->loi_com;
+ struct lfsck_layout_slave_data *llsd;
+ struct lfsck_layout_slave_target *llst;
+
+ if (com != NULL) {
+ llsd = com->lc_data;
+ read_unlock(&llsd->llsd_rb_lock);
+ llst = it->loi_llst;
+ LASSERT(llst != NULL);
+
+ /* Save the key and hash for iterate next. */
+ llst->llst_fid = it->loi_key;
+ llst->llst_hash = it->loi_hash;
+ lfsck_layout_llst_put(llst);
+ lfsck_component_put(env, com);
+ }
+ OBD_FREE_PTR(it);
+}
+
+/**
+ * \retval +1: the iteration finished
+ * \retval 0: on success, not finished
+ * \retval -ve: on error
+ */
+static int lfsck_orphan_it_next(const struct lu_env *env,
+ struct dt_it *di)
+{
+ struct lfsck_thread_info *info = lfsck_env_info(env);
+ struct filter_fid_old *pfid = &info->lti_old_pfid;
+ struct lu_attr *la = &info->lti_la;
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+ struct lu_fid *key = &it->loi_key;
+ struct lu_orphan_rec *rec = &it->loi_rec;
+ struct lfsck_component *com = it->loi_com;
+ struct lfsck_instance *lfsck = com->lc_lfsck;
+ struct lfsck_layout_slave_data *llsd = com->lc_data;
+ struct dt_object *obj;
+ struct lfsck_rbtree_node *lrn;
+ int pos;
+ int rc;
+ __u32 save;
+ __u32 idx = it->loi_llst->llst_index;
+ bool exact = false;
+ ENTRY;
+
+ if (it->loi_over)
+ RETURN(1);
+
+again0:
+ lrn = it->loi_lrn;
+ if (lrn == NULL) {
+ lrn = lfsck_rbtree_search(llsd, key, &exact);
+ if (lrn == NULL) {
+ it->loi_over = 1;
+ RETURN(1);
+ }
+
+ it->loi_lrn = lrn;
+ if (!exact) {
+ key->f_seq = lrn->lrn_seq;
+ key->f_oid = lrn->lrn_first_oid;
+ key->f_ver = 0;
+ }
+ } else {
+ key->f_oid++;
+ if (unlikely(key->f_oid == 0)) {
+ key->f_seq++;
+ it->loi_lrn = NULL;
+ goto again0;
+ }
+
+ if (key->f_oid >=
+ lrn->lrn_first_oid + LFSCK_RBTREE_BITMAP_WIDTH) {
+ it->loi_lrn = NULL;
+ goto again0;
+ }
+ }
+
+ if (unlikely(atomic_read(&lrn->lrn_known_count) <=
+ atomic_read(&lrn->lrn_accessed_count))) {
+ struct rb_node *next = rb_next(&lrn->lrn_node);
+
+ while (next != NULL) {
+ lrn = rb_entry(next, struct lfsck_rbtree_node,
+ lrn_node);
+ if (atomic_read(&lrn->lrn_known_count) >
+ atomic_read(&lrn->lrn_accessed_count))
+ break;
+ next = rb_next(next);
+ }
+
+ if (next == NULL) {
+ it->loi_over = 1;
+ RETURN(1);
+ }
+
+ it->loi_lrn = lrn;
+ key->f_seq = lrn->lrn_seq;
+ key->f_oid = lrn->lrn_first_oid;
+ key->f_ver = 0;
+ }
+
+ pos = key->f_oid - lrn->lrn_first_oid;
+
+again1:
+ pos = find_next_bit(lrn->lrn_known_bitmap,
+ LFSCK_RBTREE_BITMAP_WIDTH, pos);
+ if (pos >= LFSCK_RBTREE_BITMAP_WIDTH) {
+ key->f_oid = lrn->lrn_first_oid + pos;
+ if (unlikely(key->f_oid < lrn->lrn_first_oid)) {
+ key->f_seq++;
+ key->f_oid = 0;
+ }
+ it->loi_lrn = NULL;
+ goto again0;
+ }
+
+ if (test_bit(pos, lrn->lrn_accessed_bitmap)) {
+ pos++;
+ goto again1;
+ }
+
+ key->f_oid = lrn->lrn_first_oid + pos;
+ obj = lfsck_object_find(env, lfsck, key);
+ if (IS_ERR(obj)) {
+ rc = PTR_ERR(obj);
+ if (rc == -ENOENT) {
+ pos++;
+ goto again1;
+ }
+ RETURN(rc);
+ }
+
+ dt_read_lock(env, obj, 0);
+ if (!dt_object_exists(obj)) {
+ dt_read_unlock(env, obj);
+ lfsck_object_put(env, obj);
+ pos++;
+ goto again1;
+ }
+
+ rc = dt_attr_get(env, obj, la, BYPASS_CAPA);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ rc = dt_xattr_get(env, obj, lfsck_buf_get(env, pfid, sizeof(*pfid)),
+ XATTR_NAME_FID, BYPASS_CAPA);
+ if (rc == -ENODATA) {
+ /* For the pre-created OST-object, update the bitmap to avoid
+ * others LFSCK (second phase) iteration to touch it again. */
+ if (la->la_ctime == 0) {
+ if (!test_and_set_bit(pos, lrn->lrn_accessed_bitmap))
+ atomic_inc(&lrn->lrn_accessed_count);
+
+ /* For the race between repairing dangling referenced
+ * MDT-object and unlink the file, it may left orphan
+ * OST-object there. Destroy it now! */
+ if (unlikely(!(la->la_mode & S_ISUID))) {
+ dt_read_unlock(env, obj);
+ lfsck_layout_destroy_orphan(env,
+ lfsck->li_bottom,
+ obj);
+ lfsck_object_put(env, obj);
+ pos++;
+ goto again1;
+ }
+ } else if (idx == 0) {
+ /* If the orphan OST-object has no parent information,
+ * regard it as referenced by the MDT-object on MDT0. */
+ fid_zero(&rec->lor_fid);
+ rec->lor_uid = la->la_uid;
+ rec->lor_gid = la->la_gid;
+ GOTO(out, rc = 0);
+ }
+
+ dt_read_unlock(env, obj);
+ lfsck_object_put(env, obj);
+ pos++;
+ goto again1;
+ }
+
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (rc != sizeof(struct filter_fid) &&
+ rc != sizeof(struct filter_fid_old))
+ GOTO(out, rc = -EINVAL);
+
+ fid_le_to_cpu(&rec->lor_fid, &pfid->ff_parent);
+ /* In fact, the ff_parent::f_ver is not the real parent FID::f_ver,
+ * instead, it is the OST-object index in its parent MDT-object
+ * layout EA. */
+ save = rec->lor_fid.f_ver;
+ rec->lor_fid.f_ver = 0;
+ rc = lfsck_fid_match_idx(env, lfsck, &rec->lor_fid, idx);
+ /* If the orphan OST-object does not claim the MDT, then next.
+ *
+ * If we do not know whether it matches or not, then return it
+ * to the MDT for further check. */
+ if (rc == 0) {
+ dt_read_unlock(env, obj);
+ lfsck_object_put(env, obj);
+ pos++;
+ goto again1;
+ }
+
+ rec->lor_fid.f_ver = save;
+ rec->lor_uid = la->la_uid;
+ rec->lor_gid = la->la_gid;
+
+ CDEBUG(D_LFSCK, "%s: return orphan "DFID", PFID "DFID", owner %u:%u\n",
+ lfsck_lfsck2name(com->lc_lfsck), PFID(key), PFID(&rec->lor_fid),
+ rec->lor_uid, rec->lor_gid);
+
+ GOTO(out, rc = 0);
+
+out:
+ dt_read_unlock(env, obj);
+ lfsck_object_put(env, obj);
+ if (rc == 0)
+ it->loi_hash++;
+
+ return rc;
+}
+
+/**
+ * \retval +1: locate to the exactly position
+ * \retval 0: cannot locate to the exactly position,
+ * call next() to move to a valid position.
+ * \retval -ve: on error
+ */
+static int lfsck_orphan_it_get(const struct lu_env *env,
+ struct dt_it *di,
+ const struct dt_key *key)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+ int rc;
+
+ it->loi_key = *(struct lu_fid *)key;
+ rc = lfsck_orphan_it_next(env, di);
+ if (rc == 1)
+ return 0;
+
+ if (rc == 0)
+ return 1;
+
+ return rc;
+}
+
+static void lfsck_orphan_it_put(const struct lu_env *env,
+ struct dt_it *di)
+{
+}
+
+static struct dt_key *lfsck_orphan_it_key(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+
+ return (struct dt_key *)&it->loi_key;
+}
+
+static int lfsck_orphan_it_key_size(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ return sizeof(struct lu_fid);
+}
+
+static int lfsck_orphan_it_rec(const struct lu_env *env,
+ const struct dt_it *di,
+ struct dt_rec *rec,
+ __u32 attr)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+
+ *(struct lu_orphan_rec *)rec = it->loi_rec;
+
+ return 0;
+}
+
+static __u64 lfsck_orphan_it_store(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+
+ return it->loi_hash;
+}
+
+/**
+ * \retval +1: locate to the exactly position
+ * \retval 0: cannot locate to the exactly position,
+ * call next() to move to a valid position.
+ * \retval -ve: on error
+ */
+static int lfsck_orphan_it_load(const struct lu_env *env,
+ const struct dt_it *di,
+ __u64 hash)
+{
+ struct lfsck_orphan_it *it = (struct lfsck_orphan_it *)di;
+ struct lfsck_layout_slave_target *llst = it->loi_llst;
+ int rc;
+
+ LASSERT(llst != NULL);
+
+ if (hash != llst->llst_hash) {
+ CWARN("%s: the given hash "LPU64" for orphan iteration does "
+ "not match the one when fini "LPU64", to be reset.\n",
+ lfsck_lfsck2name(it->loi_com->lc_lfsck), hash,
+ llst->llst_hash);
+ fid_zero(&llst->llst_fid);
+ llst->llst_hash = 0;
+ }
+
+ it->loi_key = llst->llst_fid;
+ it->loi_hash = llst->llst_hash;
+ rc = lfsck_orphan_it_next(env, (struct dt_it *)di);
+ if (rc == 1)
+ return 0;
+
+ if (rc == 0)
+ return 1;
+
+ return rc;
+}
+
+static int lfsck_orphan_it_key_rec(const struct lu_env *env,
+ const struct dt_it *di,
+ void *key_rec)
+{
+ return 0;
+}
+
+const struct dt_index_operations lfsck_orphan_index_ops = {
+ .dio_lookup = lfsck_orphan_index_lookup,
+ .dio_declare_insert = lfsck_orphan_index_declare_insert,
+ .dio_insert = lfsck_orphan_index_insert,
+ .dio_declare_delete = lfsck_orphan_index_declare_delete,
+ .dio_delete = lfsck_orphan_index_delete,
+ .dio_it = {
+ .init = lfsck_orphan_it_init,
+ .fini = lfsck_orphan_it_fini,
+ .get = lfsck_orphan_it_get,
+ .put = lfsck_orphan_it_put,
+ .next = lfsck_orphan_it_next,
+ .key = lfsck_orphan_it_key,
+ .key_size = lfsck_orphan_it_key_size,
+ .rec = lfsck_orphan_it_rec,
+ .store = lfsck_orphan_it_store,
+ .load = lfsck_orphan_it_load,
+ .key_rec = lfsck_orphan_it_key_rec,
+ }
+};