#include <lustre_eacl.h>
#include <lustre_quota.h>
#include <lustre_linkea.h>
+#include <lustre_lmv.h>
struct mdt_object;
* list */
struct mutex cdt_restore_lock; /**< protect restore
* list */
- time_t cdt_loop_period; /**< llog scan period */
- time_t cdt_grace_delay; /**< request grace
+ timeout_t cdt_loop_period; /**< llog scan period */
+ timeout_t cdt_grace_delay; /**< request grace
* delay */
- time_t cdt_active_req_timeout; /**< request timeout */
+ timeout_t cdt_active_req_timeout; /**< request timeout */
__u32 cdt_default_archive_id; /**< archive id used
* when none are
* specified */
__u64 msf_age;
};
+/* split directory automatically when sub file count exceeds 50k */
+#define DIR_SPLIT_COUNT_DEFAULT 50000
+
+/* directory auto-split allocate delta new stripes each time */
+#define DIR_SPLIT_DELTA_DEFAULT 4
+
+struct mdt_dir_restriper {
+ struct lu_env mdr_env;
+ struct lu_context mdr_session;
+ struct task_struct *mdr_task;
+ /* lock for below fields */
+ spinlock_t mdr_lock;
+ /* auto split when plain dir/shard sub files exceed threshold */
+ u64 mdr_dir_split_count;
+ /* auto split growth delta */
+ u32 mdr_dir_split_delta;
+ /* directories to split */
+ struct list_head mdr_auto_splitting;
+ /* directories under which sub files are migrating */
+ struct list_head mdr_migrating;
+ /* directories waiting to update layout after migration */
+ struct list_head mdr_updating;
+ /* time to update directory layout after migration */
+ time64_t mdr_update_time;
+ /* lum used in split/migrate/layout_change */
+ union lmv_mds_md mdr_lmv;
+ /* page used in readdir */
+ struct page *mdr_page;
+};
+
struct mdt_device {
/* super-class */
struct lu_device mdt_lu_dev;
mdt_enable_remote_dir:1,
mdt_enable_striped_dir:1,
mdt_enable_dir_migration:1,
+ mdt_enable_dir_restripe:1,
+ mdt_enable_dir_auto_split:1,
mdt_enable_remote_rename:1,
mdt_skip_lfsck:1,
- mdt_readonly:1;
+ mdt_readonly:1,
+ /* dir restripe migrate dirent only */
+ mdt_dir_restripe_nsonly:1;
/* user with gid can create remote/striped
* dir, and set default dir stripe */
atomic_t mdt_async_commit_count;
struct mdt_object *mdt_md_root;
+
+ struct mdt_dir_restriper mdt_restriper;
};
#define MDT_SERVICE_WATCHDOG_FACTOR (2)
struct lu_object_header mot_header;
struct lu_object mot_obj;
unsigned int mot_lov_created:1, /* lov object created */
- mot_cache_attr:1; /* enable remote object
+ mot_cache_attr:1, /* enable remote object
* attribute cache */
+ mot_restriping:1, /* dir restriping */
+ /* dir auto-split disabled */
+ mot_auto_split_disabled:1;
int mot_write_count;
spinlock_t mot_write_lock;
- /* Lock to protect object's SOM update. */
- struct mutex mot_som_mutex;
/* Lock to protect create_data */
struct mutex mot_lov_mutex;
+ /* Lock to protect object's SOM update. */
+ struct mutex mot_som_mutex;
/* lock to protect read/write stages for Data-on-MDT files */
struct rw_semaphore mot_dom_sem;
/* Lock to protect lease open.
struct rw_semaphore mot_open_sem;
atomic_t mot_lease_count;
atomic_t mot_open_count;
+ /* directory offset, used in sub file migration in dir restripe */
+ loff_t mot_restripe_offset;
+ /* link to mdt_restriper auto_splitting/migrating/updating */
+ struct list_head mot_restripe_linkage;
};
struct mdt_lock_handle {
};
enum mdt_reint_flag {
- MRF_OPEN_TRUNC = 1 << 0,
+ MRF_OPEN_TRUNC = BIT(0),
};
/*
/* FLR: layout change API */
struct md_layout_change mti_mlc;
+
+ struct lu_seq_range mti_range;
};
extern struct lu_context_key mdt_thread_key;
return exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE;
}
-enum {
- LMM_NO_DOM,
- LMM_DOM_ONLY,
- LMM_DOM_OST
-};
+__u32 mdt_lmm_dom_entry_check(struct lov_mds_md *lmm, int *dom_only);
-/* XXX Look into layout in MDT layer. This must be done in LOD. */
-static inline int mdt_lmm_dom_entry(struct lov_mds_md *lmm)
+static inline bool mdt_lmm_dom_only(struct lov_mds_md *lmm)
{
- struct lov_comp_md_v1 *comp_v1;
- struct lov_mds_md *v1;
- __u32 off;
- bool has_dom = true;
- int i;
-
- if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1)
- return LMM_NO_DOM;
-
- comp_v1 = (struct lov_comp_md_v1 *)lmm;
- off = le32_to_cpu(comp_v1->lcm_entries[0].lcme_offset);
- v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
-
- /* DoM entry is the first entry always */
- if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) != LOV_PATTERN_MDT &&
- le16_to_cpu(comp_v1->lcm_mirror_count) == 0)
- return LMM_NO_DOM;
-
- for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
- int j;
+ int dom_only = 0;
- off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
- v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
-
- if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) ==
- LOV_PATTERN_MDT)
- has_dom = true;
+ mdt_lmm_dom_entry_check(lmm, &dom_only);
+ return dom_only;
+}
- for (j = 0; j < le16_to_cpu(v1->lmm_stripe_count); j++) {
- /* if there is any object on OST */
- if (le32_to_cpu(v1->lmm_objects[j].l_ost_idx) !=
- (__u32)-1UL)
- return LMM_DOM_OST;
- }
- }
- return has_dom ? LMM_DOM_ONLY : LMM_NO_DOM;
+static inline __u32 mdt_lmm_dom_stripesize(struct lov_mds_md *lmm)
+{
+ return mdt_lmm_dom_entry_check(lmm, NULL);
}
static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm)
struct mdt_object *o, struct md_attr *ma);
int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
const char *name);
+int __mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
+ struct md_attr *ma, const char *name);
int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
struct md_attr *ma, const char *name);
int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o,
struct lu_fid *pfid);
+int mdt_attr_get_pfid_name(struct mdt_thread_info *info, struct mdt_object *o,
+ struct lu_fid *pfid, struct lu_name *lname);
int mdt_write_get(struct mdt_object *o);
void mdt_write_put(struct mdt_object *o);
int mdt_write_read(struct mdt_object *o);
void mdt_version_get_save(struct mdt_thread_info *, struct mdt_object *, int);
int mdt_version_get_check_save(struct mdt_thread_info *, struct mdt_object *,
int);
+int mdt_lookup_version_check(struct mdt_thread_info *info,
+ struct mdt_object *p,
+ const struct lu_name *lname,
+ struct lu_fid *fid, int idx);
void mdt_thread_info_init(struct ptlrpc_request *req,
struct mdt_thread_info *mti);
void mdt_thread_info_fini(struct mdt_thread_info *mti);
static inline struct mdt_device *mdt_dev(struct lu_device *d)
{
- return container_of0(d, struct mdt_device, mdt_lu_dev);
+ return container_of_safe(d, struct mdt_device, mdt_lu_dev);
}
static inline struct mdt_object *mdt_obj(struct lu_object *o)
{
- return container_of0(o, struct mdt_object, mot_obj);
+ return container_of_safe(o, struct mdt_object, mot_obj);
}
static inline struct dt_object *mdt_obj2dt(struct mdt_object *mo)
LPROC_MDT_IO_READ,
LPROC_MDT_IO_WRITE,
LPROC_MDT_IO_PUNCH,
+ LPROC_MDT_MIGRATE,
LPROC_MDT_LAST,
};
u64 want, bool conservative);
extern struct kmem_cache *ldlm_glimpse_work_kmem;
+static inline bool mdt_is_rootadmin(struct mdt_thread_info *info)
+{
+ struct lu_ucred *uc = NULL;
+ bool is_admin;
+ int rc;
+
+ if (info == NULL || info->mti_body == NULL)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 17, 3, 0)
+ /* return true in case old client did not send mdt body */
+ return true;
+#else
+ return false
+#endif
+
+ rc = mdt_init_ucred(info, (struct mdt_body *)info->mti_body);
+ if (rc < 0)
+ return false;
+
+ uc = mdt_ucred(info);
+ is_admin = (uc->uc_uid == 0 && uc->uc_gid == 0 &&
+ md_capable(uc, CFS_CAP_SYS_ADMIN));
+
+ mdt_exit_ucred(info);
+
+ return is_admin;
+}
+
+int mdt_reint_migrate(struct mdt_thread_info *info,
+ struct mdt_lock_handle *unused);
+int mdt_dir_layout_update(struct mdt_thread_info *info);
+
+/* directory restripe */
+int mdt_restripe_internal(struct mdt_thread_info *info,
+ struct mdt_object *parent,
+ struct mdt_object *child,
+ const struct lu_name *lname,
+ struct lu_fid *tfid,
+ struct md_op_spec *spec,
+ struct md_attr *ma);
+int mdt_restriper_start(struct mdt_device *mdt);
+void mdt_restriper_stop(struct mdt_device *mdt);
+void mdt_auto_split_add(struct mdt_thread_info *info, struct mdt_object *o);
+void mdt_restripe_migrate_add(struct mdt_thread_info *info,
+ struct mdt_object *o);
+void mdt_restripe_update_add(struct mdt_thread_info *info,
+ struct mdt_object *o);
+int mdt_is_remote_object(struct mdt_thread_info *info,
+ struct mdt_object *parent,
+ struct mdt_object *child);
+
#endif /* _MDT_INTERNAL_H */