+/**
+ * Implementation of dt_index_operations:: dio_it.init
+ *
+ * This function is to initialize the iterator for striped directory,
+ * basically these lod_striped_it_xxx will just locate the stripe
+ * and call the correspondent api of its next lower layer.
+ *
+ * \param[in] env execution environment.
+ * \param[in] dt the striped directory object to be iterated.
+ * \param[in] attr the attribute of iterator, mostly used to indicate
+ * the entry attribute in the object to be iterated.
+ * \param[in] capa capability(useless in current implementation)
+ *
+ * \retval initialized iterator(dt_it) if successful initialize the
+ * iteration. lit_stripe_index will be used to indicate the
+ * current iterate position among stripes.
+ * \retval ERR pointer if initialization is failed.
+ */
+static struct dt_it *lod_striped_it_init(const struct lu_env *env,
+ struct dt_object *dt, __u32 attr,
+ struct lustre_capa *capa)
+{
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct dt_object *next;
+ struct lod_it *it = &lod_env_info(env)->lti_it;
+ struct dt_it *it_next;
+ ENTRY;
+
+ LASSERT(lo->ldo_stripenr > 0);
+ next = lo->ldo_stripe[0];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ it_next = next->do_index_ops->dio_it.init(env, next, attr, capa);
+ if (IS_ERR(it_next))
+ return it_next;
+
+ /* currently we do not use more than one iterator per thread
+ * so we store it in thread info. if at some point we need
+ * more active iterators in a single thread, we can allocate
+ * additional ones */
+ LASSERT(it->lit_obj == NULL);
+
+ it->lit_stripe_index = 0;
+ it->lit_attr = attr;
+ it->lit_it = it_next;
+ it->lit_obj = dt;
+
+ return (struct dt_it *)it;
+}
+
+#define LOD_CHECK_STRIPED_IT(env, it, lo) \
+do { \
+ LASSERT((it)->lit_obj != NULL); \
+ LASSERT((it)->lit_it != NULL); \
+ LASSERT((lo)->ldo_stripenr > 0); \
+ LASSERT((it)->lit_stripe_index < (lo)->ldo_stripenr); \
+} while (0)
+
+/**
+ * Implementation of dt_index_operations:: dio_it.fini
+ *
+ * This function is to finish the iterator for striped directory.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for the striped directory
+ *
+ */
+static void lod_striped_it_fini(const struct lu_env *env, struct dt_it *di)
+{
+ struct lod_it *it = (struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ next->do_index_ops->dio_it.fini(env, it->lit_it);
+
+ /* the iterator not in use any more */
+ it->lit_obj = NULL;
+ it->lit_it = NULL;
+ it->lit_stripe_index = 0;
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.get
+ *
+ * This function is to position the iterator with given key
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ * \param[in] key the key the iterator will be positioned.
+ *
+ * \retval 0 if successfully position iterator by the key.
+ * \retval negative error if position is failed.
+ */
+static int lod_striped_it_get(const struct lu_env *env, struct dt_it *di,
+ const struct dt_key *key)
+{
+ const struct lod_it *it = (const struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+ ENTRY;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.get(env, it->lit_it, key);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.put
+ *
+ * This function is supposed to be the pair of it_get, but currently do
+ * nothing. see (osd_it_ea_put or osd_index_it_put)
+ */
+static void lod_striped_it_put(const struct lu_env *env, struct dt_it *di)
+{
+ struct lod_it *it = (struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.put(env, it->lit_it);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.next
+ *
+ * This function is to position the iterator to the next entry, if current
+ * stripe is finished by checking the return value of next() in current
+ * stripe. it will go to next stripe. In the mean time, the sub-iterator
+ * for next stripe needs to be initialized.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ *
+ * \retval 0 if successfully position iterator to the next entry.
+ * \retval negative error if position is failed.
+ */
+static int lod_striped_it_next(const struct lu_env *env, struct dt_it *di)
+{
+ struct lod_it *it = (struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+ struct dt_it *it_next;
+ int rc;
+ ENTRY;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+again:
+ rc = next->do_index_ops->dio_it.next(env, it->lit_it);
+ if (rc < 0)
+ RETURN(rc);
+
+ if (rc == 0 && it->lit_stripe_index == 0)
+ RETURN(rc);
+
+ if (rc == 0 && it->lit_stripe_index > 0) {
+ struct lu_dirent *ent;
+
+ ent = (struct lu_dirent *)lod_env_info(env)->lti_key;
+
+ rc = next->do_index_ops->dio_it.rec(env, it->lit_it,
+ (struct dt_rec *)ent,
+ it->lit_attr);
+ if (rc != 0)
+ RETURN(rc);
+
+ /* skip . and .. for slave stripe */
+ if ((strncmp(ent->lde_name, ".",
+ le16_to_cpu(ent->lde_namelen)) == 0 &&
+ le16_to_cpu(ent->lde_namelen) == 1) ||
+ (strncmp(ent->lde_name, "..",
+ le16_to_cpu(ent->lde_namelen)) == 0 &&
+ le16_to_cpu(ent->lde_namelen) == 2))
+ goto again;
+
+ RETURN(rc);
+ }
+
+ /* go to next stripe */
+ if (it->lit_stripe_index + 1 >= lo->ldo_stripenr)
+ RETURN(1);
+
+ it->lit_stripe_index++;
+
+ next->do_index_ops->dio_it.put(env, it->lit_it);
+ next->do_index_ops->dio_it.fini(env, it->lit_it);
+
+ rc = next->do_ops->do_index_try(env, next, &dt_directory_features);
+ if (rc != 0)
+ RETURN(rc);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ it_next = next->do_index_ops->dio_it.init(env, next, it->lit_attr,
+ BYPASS_CAPA);
+ if (!IS_ERR(it_next)) {
+ it->lit_it = it_next;
+ goto again;
+ } else {
+ rc = PTR_ERR(it_next);
+ }
+
+ RETURN(rc);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.key
+ *
+ * This function is to get the key of the iterator at current position.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ *
+ * \retval key(dt_key) if successfully get the key.
+ * \retval negative error if can not get the key.
+ */
+static struct dt_key *lod_striped_it_key(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ const struct lod_it *it = (const struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.key(env, it->lit_it);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.key_size
+ *
+ * This function is to get the key_size of current key.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ *
+ * \retval key_size if successfully get the key_size.
+ * \retval negative error if can not get the key_size.
+ */
+static int lod_striped_it_key_size(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ struct lod_it *it = (struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.key_size(env, it->lit_it);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.rec
+ *
+ * This function is to get the record at current position.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ * \param[in] attr the attribute of iterator, mostly used to indicate
+ * the entry attribute in the object to be iterated.
+ * \param[out] rec hold the return record.
+ *
+ * \retval 0 if successfully get the entry.
+ * \retval negative error if can not get entry.
+ */
+static int lod_striped_it_rec(const struct lu_env *env, const struct dt_it *di,
+ struct dt_rec *rec, __u32 attr)
+{
+ const struct lod_it *it = (const struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.rec(env, it->lit_it, rec, attr);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.rec_size
+ *
+ * This function is to get the record_size at current record.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ * \param[in] attr the attribute of iterator, mostly used to indicate
+ * the entry attribute in the object to be iterated.
+ *
+ * \retval rec_size if successfully get the entry size.
+ * \retval negative error if can not get entry size.
+ */
+static int lod_striped_it_rec_size(const struct lu_env *env,
+ const struct dt_it *di, __u32 attr)
+{
+ struct lod_it *it = (struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.rec_size(env, it->lit_it, attr);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.store
+ *
+ * This function will a cookie for current position of the iterator head,
+ * so that user can use this cookie to load/start the iterator next time.
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ *
+ * \retval the cookie.
+ */
+static __u64 lod_striped_it_store(const struct lu_env *env,
+ const struct dt_it *di)
+{
+ const struct lod_it *it = (const struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.store(env, it->lit_it);
+}
+
+/**
+ * Implementation of dt_index_operations:: dio_it.load
+ *
+ * This function will position the iterator with the given hash(usually
+ * get from store),
+ *
+ * \param[in] env execution environment.
+ * \param[in] di the iterator for striped directory.
+ * \param[in] hash the given hash.
+ *
+ * \retval >0 if successfuly load the iterator to the given position.
+ * \retval <0 if load is failed.
+ */
+static int lod_striped_it_load(const struct lu_env *env,
+ const struct dt_it *di, __u64 hash)
+{
+ const struct lod_it *it = (const struct lod_it *)di;
+ struct lod_object *lo = lod_dt_obj(it->lit_obj);
+ struct dt_object *next;
+
+ LOD_CHECK_STRIPED_IT(env, it, lo);
+
+ next = lo->ldo_stripe[it->lit_stripe_index];
+ LASSERT(next != NULL);
+ LASSERT(next->do_index_ops != NULL);
+
+ return next->do_index_ops->dio_it.load(env, it->lit_it, hash);
+}
+
+static struct dt_index_operations lod_striped_index_ops = {
+ .dio_lookup = lod_index_lookup,
+ .dio_declare_insert = lod_declare_index_insert,
+ .dio_insert = lod_index_insert,
+ .dio_declare_delete = lod_declare_index_delete,
+ .dio_delete = lod_index_delete,
+ .dio_it = {
+ .init = lod_striped_it_init,
+ .fini = lod_striped_it_fini,
+ .get = lod_striped_it_get,
+ .put = lod_striped_it_put,
+ .next = lod_striped_it_next,
+ .key = lod_striped_it_key,
+ .key_size = lod_striped_it_key_size,
+ .rec = lod_striped_it_rec,
+ .rec_size = lod_striped_it_rec_size,
+ .store = lod_striped_it_store,
+ .load = lod_striped_it_load,
+ }
+};
+
+/**
+ * Append the FID for each shard of the striped directory after the
+ * given LMV EA header.
+ *
+ * To simplify striped directory and the consistency verification,
+ * we only store the LMV EA header on disk, for both master object
+ * and slave objects. When someone wants to know the whole LMV EA,
+ * such as client readdir(), we can build the entrie LMV EA on the
+ * MDT side (in RAM) via iterating the sub-directory entries that
+ * are contained in the master object of the stripe directory.
+ *
+ * For the master object of the striped directroy, the valid name
+ * for each shard is composed of the ${shard_FID}:${shard_idx}.
+ *
+ * There may be holes in the LMV EA if some shards' name entries
+ * are corrupted or lost.
+ *
+ * \param[in] env pointer to the thread context
+ * \param[in] lo pointer to the master object of the striped directory
+ * \param[in] buf pointer to the lu_buf which will hold the LMV EA
+ * \param[in] resize whether re-allocate the buffer if it is not big enough
+ *
+ * \retval positive size of the LMV EA
+ * \retval 0 for nothing to be loaded
+ * \retval negative error number on failure
+ */
+int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
+ struct lu_buf *buf, bool resize)
+{
+ struct lu_dirent *ent =
+ (struct lu_dirent *)lod_env_info(env)->lti_key;
+ struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct dt_object *obj = dt_object_child(&lo->ldo_obj);
+ struct lmv_mds_md_v1 *lmv1 = buf->lb_buf;
+ struct dt_it *it;
+ const struct dt_it_ops *iops;
+ __u32 stripes;
+ __u32 magic = le32_to_cpu(lmv1->lmv_magic);
+ int size;
+ int rc;
+ ENTRY;
+
+ /* If it is not a striped directory, then load nothing. */
+ if (magic != LMV_MAGIC_V1)
+ RETURN(0);
+
+ /* If it is in migration (or failure), then load nothing. */
+ if (le32_to_cpu(lmv1->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION)
+ RETURN(0);
+
+ stripes = le32_to_cpu(lmv1->lmv_stripe_count);
+ if (stripes < 1)
+ RETURN(0);
+
+ size = lmv_mds_md_size(stripes, magic);
+ if (buf->lb_len < size) {
+ struct lu_buf tbuf;
+
+ if (!resize)
+ RETURN(-ERANGE);
+
+ tbuf = *buf;
+ buf->lb_buf = NULL;
+ buf->lb_len = 0;
+ lu_buf_alloc(buf, size);
+ lmv1 = buf->lb_buf;
+ if (lmv1 == NULL)
+ RETURN(-ENOMEM);
+
+ memcpy(buf->lb_buf, tbuf.lb_buf, tbuf.lb_len);
+ }
+
+ if (unlikely(!dt_try_as_dir(env, obj)))
+ RETURN(-ENOTDIR);
+
+ memset(&lmv1->lmv_stripe_fids[0], 0, stripes * sizeof(struct lu_fid));
+ iops = &obj->do_index_ops->dio_it;
+ it = iops->init(env, obj, LUDA_64BITHASH, BYPASS_CAPA);
+ if (IS_ERR(it))
+ RETURN(PTR_ERR(it));
+
+ rc = iops->load(env, it, 0);
+ if (rc == 0)
+ rc = iops->next(env, it);
+ else if (rc > 0)
+ rc = 0;
+
+ while (rc == 0) {
+ char name[FID_LEN + 2] = "";
+ struct lu_fid fid;
+ __u32 index;
+ int len;
+
+ rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_64BITHASH);
+ if (rc != 0)
+ break;
+
+ rc = -EIO;
+
+ fid_le_to_cpu(&fid, &ent->lde_fid);
+ ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
+ if (ent->lde_name[0] == '.') {
+ if (ent->lde_namelen == 1)
+ goto next;
+
+ if (ent->lde_namelen == 2 && ent->lde_name[1] == '.')
+ goto next;
+ }
+
+ len = snprintf(name, FID_LEN + 1, DFID":", PFID(&ent->lde_fid));
+ /* The ent->lde_name is composed of ${FID}:${index} */
+ if (ent->lde_namelen < len + 1 ||
+ memcmp(ent->lde_name, name, len) != 0) {
+ CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO,
+ "%s: invalid shard name %.*s with the FID "DFID
+ " for the striped directory "DFID", %s\n",
+ lod2obd(lod)->obd_name, ent->lde_namelen,
+ ent->lde_name, PFID(&fid),
+ PFID(lu_object_fid(&obj->do_lu)),
+ lod->lod_lmv_failout ? "failout" : "skip");
+
+ if (lod->lod_lmv_failout)
+ break;
+
+ goto next;
+ }
+
+ index = 0;
+ do {
+ if (ent->lde_name[len] < '0' ||
+ ent->lde_name[len] > '9') {
+ CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO,
+ "%s: invalid shard name %.*s with the "
+ "FID "DFID" for the striped directory "
+ DFID", %s\n",
+ lod2obd(lod)->obd_name, ent->lde_namelen,
+ ent->lde_name, PFID(&fid),
+ PFID(lu_object_fid(&obj->do_lu)),
+ lod->lod_lmv_failout ?
+ "failout" : "skip");
+
+ if (lod->lod_lmv_failout)
+ break;
+
+ goto next;
+ }
+
+ index = index * 10 + ent->lde_name[len++] - '0';
+ } while (len < ent->lde_namelen);
+
+ if (len == ent->lde_namelen) {
+ /* Out of LMV EA range. */
+ if (index >= stripes) {
+ CERROR("%s: the shard %.*s for the striped "
+ "directory "DFID" is out of the known "
+ "LMV EA range [0 - %u], failout\n",
+ lod2obd(lod)->obd_name, ent->lde_namelen,
+ ent->lde_name,
+ PFID(lu_object_fid(&obj->do_lu)),
+ stripes - 1);
+
+ break;
+ }
+
+ /* The slot has been occupied. */
+ if (!fid_is_zero(&lmv1->lmv_stripe_fids[index])) {
+ struct lu_fid fid0;
+
+ fid_le_to_cpu(&fid0,
+ &lmv1->lmv_stripe_fids[index]);
+ CERROR("%s: both the shard "DFID" and "DFID
+ " for the striped directory "DFID
+ " claim the same LMV EA slot at the "
+ "index %d, failout\n",
+ lod2obd(lod)->obd_name,
+ PFID(&fid0), PFID(&fid),
+ PFID(lu_object_fid(&obj->do_lu)), index);
+
+ break;
+ }
+
+ /* stored as LE mode */
+ lmv1->lmv_stripe_fids[index] = ent->lde_fid;
+
+next:
+ rc = iops->next(env, it);
+ }
+ }
+
+ iops->put(env, it);
+ iops->fini(env, it);
+
+ RETURN(rc > 0 ? lmv_mds_md_size(stripes, magic) : rc);
+}
+
+/**
+ * Implementation of dt_object_operations:: do_index_try
+ *
+ * This function will try to initialize the index api pointer for the
+ * given object, usually it the entry point of the index api. i.e.
+ * the index object should be initialized in index_try, then start
+ * using index api. For striped directory, it will try to initialize
+ * all of its sub_stripes.
+ *
+ * \param[in] env execution environment.
+ * \param[in] dt the index object to be initialized.
+ * \param[in] feat the features of this object, for example fixed or
+ * variable key size etc.
+ *
+ * \retval >0 if the initialization is successful.
+ * \retval <0 if the initialization is failed.
+ */
+static int lod_index_try(const struct lu_env *env, struct dt_object *dt,
+ const struct dt_index_features *feat)
+{
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct dt_object *next = dt_object_child(dt);
+ int rc;
+ ENTRY;
+
+ LASSERT(next->do_ops);
+ LASSERT(next->do_ops->do_index_try);
+
+ rc = lod_load_striping_locked(env, lo);
+ if (rc != 0)
+ RETURN(rc);
+
+ rc = next->do_ops->do_index_try(env, next, feat);
+ if (rc != 0)
+ RETURN(rc);
+
+ if (lo->ldo_stripenr > 0) {
+ int i;
+
+ for (i = 0; i < lo->ldo_stripenr; i++) {
+ if (dt_object_exists(lo->ldo_stripe[i]) == 0)
+ continue;
+ rc = lo->ldo_stripe[i]->do_ops->do_index_try(env,
+ lo->ldo_stripe[i], feat);
+ if (rc != 0)
+ RETURN(rc);
+ }
+ dt->do_index_ops = &lod_striped_index_ops;
+ } else {
+ dt->do_index_ops = &lod_index_ops;
+ }
+
+ RETURN(rc);
+}
+