#include <lustre_req_layout.h>
#include <lustre_sec.h>
#include <lustre_idmap.h>
-#include <lustre_eacl.h>
#include <lustre_quota.h>
#include <lustre_linkea.h>
+#include <lustre_lmv.h>
struct mdt_object;
enum {
NO_DOM_LOCK_ON_OPEN = 0,
TRYLOCK_DOM_ON_OPEN = 1,
+ /* not used anymore, left here for compatibility */
ALWAYS_DOM_LOCK_ON_OPEN = 2,
NUM_DOM_LOCK_ON_OPEN_MODES
};
__u64 msf_age;
};
+/* split directory automatically when sub file count exceeds 50k */
+#define DIR_SPLIT_COUNT_DEFAULT 50000
+
+/* directory auto-split allocate delta new stripes each time */
+#define DIR_SPLIT_DELTA_DEFAULT 4
+
+struct mdt_dir_restriper {
+ struct lu_env mdr_env;
+ struct lu_context mdr_session;
+ struct task_struct *mdr_task;
+ /* lock for below fields */
+ spinlock_t mdr_lock;
+ /* auto split when plain dir/shard sub files exceed threshold */
+ u64 mdr_dir_split_count;
+ /* auto split growth delta */
+ u32 mdr_dir_split_delta;
+ /* directories to split */
+ struct list_head mdr_auto_splitting;
+ /* directories under which sub files are migrating */
+ struct list_head mdr_migrating;
+ /* directories waiting to update layout after migration */
+ struct list_head mdr_updating;
+ /* time to update directory layout after migration */
+ time64_t mdr_update_time;
+ /* lum used in split/migrate/layout_change */
+ union lmv_mds_md mdr_lmv;
+ /* page used in readdir */
+ struct page *mdr_page;
+};
+
struct mdt_device {
/* super-class */
struct lu_device mdt_lu_dev;
mdt_enable_striped_dir:1,
mdt_enable_dir_migration:1,
mdt_enable_dir_restripe:1,
+ mdt_enable_dir_auto_split:1,
mdt_enable_remote_rename:1,
mdt_skip_lfsck:1,
- mdt_readonly:1;
+ mdt_readonly:1,
+ /* dir restripe migrate dirent only */
+ mdt_dir_restripe_nsonly:1,
+ /* subdirectory mount of remote dir */
+ mdt_enable_remote_subdir_mount:1;
/* user with gid can create remote/striped
* dir, and set default dir stripe */
atomic_t mdt_async_commit_count;
struct mdt_object *mdt_md_root;
+
+ struct mdt_dir_restriper mdt_restriper;
};
#define MDT_SERVICE_WATCHDOG_FACTOR (2)
struct lu_object_header mot_header;
struct lu_object mot_obj;
unsigned int mot_lov_created:1, /* lov object created */
- mot_cache_attr:1; /* enable remote object
+ mot_cache_attr:1, /* enable remote object
* attribute cache */
+ mot_restriping:1, /* dir restriping */
+ /* dir auto-split disabled */
+ mot_auto_split_disabled:1;
int mot_write_count;
spinlock_t mot_write_lock;
- /* Lock to protect object's SOM update. */
- struct mutex mot_som_mutex;
/* Lock to protect create_data */
struct mutex mot_lov_mutex;
+ /* Lock to protect object's SOM update. */
+ struct mutex mot_som_mutex;
/* lock to protect read/write stages for Data-on-MDT files */
struct rw_semaphore mot_dom_sem;
/* Lock to protect lease open.
struct rw_semaphore mot_open_sem;
atomic_t mot_lease_count;
atomic_t mot_open_count;
+ /* directory offset, used in sub file migration in dir restripe */
+ loff_t mot_restripe_offset;
+ /* link to mdt_restriper auto_splitting/migrating/updating */
+ struct list_head mot_restripe_linkage;
};
struct mdt_lock_handle {
/* Regular lock */
struct lustre_handle mlh_reg_lh;
enum ldlm_mode mlh_reg_mode;
+ __u64 mlh_gid;
/* Pdirops lock */
struct lustre_handle mlh_pdo_lh;
/* FLR: layout change API */
struct md_layout_change mti_mlc;
+
+ struct lu_seq_range mti_range;
};
extern struct lu_context_key mdt_thread_key;
}
struct cdt_req_progress {
- struct mutex crp_lock; /**< protect tree */
- struct interval_node *crp_root; /**< tree to track extent
+ spinlock_t crp_lock; /**< protect tree */
+ struct interval_tree_root crp_root; /**< tree to track extent
* moved */
- struct interval_node **crp_node; /**< buffer for tree nodes
- * vector of fixed size
- * vectors */
- int crp_cnt; /**< # of used nodes */
- int crp_max; /**< # of allocated nodes */
+ __u64 crp_total;
};
struct cdt_agent_req {
comp_v1 = (struct lov_comp_md_v1 *)lmm;
- for (i = 1; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
+ for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
const struct lu_name *lname);
void mdt_lock_reg_init(struct mdt_lock_handle *lh, enum ldlm_mode lm);
+void mdt_lh_reg_init(struct mdt_lock_handle *lh, struct ldlm_lock *lock);
int mdt_lock_setup(struct mdt_thread_info *info, struct mdt_object *mo,
struct mdt_lock_handle *lh);
struct mdt_object *o, const struct lu_fid *fid,
struct lustre_handle *lh,
enum ldlm_mode mode, __u64 ibits, bool cache);
+int mdt_object_local_lock(struct mdt_thread_info *info, struct mdt_object *o,
+ struct mdt_lock_handle *lh, __u64 *ibits,
+ __u64 trybits, bool cos_incompat);
int mdt_reint_striped_lock(struct mdt_thread_info *info,
struct mdt_object *o,
struct mdt_lock_handle *lh,
struct mdt_object *o, struct md_attr *ma);
int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o,
const char *name);
+int __mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
+ struct md_attr *ma, const char *name);
int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
struct md_attr *ma, const char *name);
int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o,
struct lu_fid *pfid);
+int mdt_attr_get_pfid_name(struct mdt_thread_info *info, struct mdt_object *o,
+ struct lu_fid *pfid, struct lu_name *lname);
int mdt_write_get(struct mdt_object *o);
void mdt_write_put(struct mdt_object *o);
int mdt_write_read(struct mdt_object *o);
bool allow_client_chgrp(struct mdt_thread_info *info, struct lu_ucred *uc);
int mdt_check_ucred(struct mdt_thread_info *);
int mdt_init_ucred(struct mdt_thread_info *, struct mdt_body *);
-int mdt_init_ucred_intent_getattr(struct mdt_thread_info *, struct mdt_body *);
int mdt_init_ucred_reint(struct mdt_thread_info *);
void mdt_exit_ucred(struct mdt_thread_info *);
int mdt_version_get_check(struct mdt_thread_info *, struct mdt_object *, int);
void mdt_version_get_save(struct mdt_thread_info *, struct mdt_object *, int);
int mdt_version_get_check_save(struct mdt_thread_info *, struct mdt_object *,
int);
+int mdt_lookup_version_check(struct mdt_thread_info *info,
+ struct mdt_object *p,
+ const struct lu_name *lname,
+ struct lu_fid *fid, int idx);
void mdt_thread_info_init(struct ptlrpc_request *req,
struct mdt_thread_info *mti);
void mdt_thread_info_fini(struct mdt_thread_info *mti);
struct mdt_body *repbody);
int mdt_pack_secctx_in_reply(struct mdt_thread_info *info,
struct mdt_object *child);
+int mdt_pack_encctx_in_reply(struct mdt_thread_info *info,
+ struct mdt_object *child);
static inline struct mdt_device *mdt_dev(struct lu_device *d)
{
- return container_of0(d, struct mdt_device, mdt_lu_dev);
+ return container_of_safe(d, struct mdt_device, mdt_lu_dev);
}
static inline struct mdt_object *mdt_obj(struct lu_object *o)
{
- return container_of0(o, struct mdt_object, mot_obj);
+ return container_of_safe(o, struct mdt_object, mot_obj);
}
static inline struct dt_object *mdt_obj2dt(struct mdt_object *mo)
void mdt_cdt_free_request(struct cdt_agent_req *car);
int mdt_cdt_add_request(struct coordinator *cdt, struct cdt_agent_req *new_car);
struct cdt_agent_req *mdt_cdt_find_request(struct coordinator *cdt, u64 cookie);
-void mdt_cdt_get_work_done(struct cdt_agent_req *car, __u64 *done_sz);
void mdt_cdt_get_request(struct cdt_agent_req *car);
void mdt_cdt_put_request(struct cdt_agent_req *car);
struct cdt_agent_req *mdt_cdt_update_request(struct coordinator *cdt,
LPROC_MDT_LAST,
};
-void mdt_counter_incr(struct ptlrpc_request *req, int opcode);
-void mdt_stats_counter_init(struct lprocfs_stats *stats);
+void mdt_counter_incr(struct ptlrpc_request *req, int opcode, long amount);
+void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset);
int mdt_tunables_init(struct mdt_device *mdt, const char *name);
void mdt_tunables_fini(struct mdt_device *mdt);
void mdt_rename_counter_tally(struct mdt_thread_info *info,
struct mdt_device *mdt,
struct ptlrpc_request *req,
- struct mdt_object *src, struct mdt_object *tgt);
+ struct mdt_object *src, struct mdt_object *tgt,
+ long count);
static inline struct obd_device *mdt2obd_dev(const struct mdt_device *mdt)
{
uc = mdt_ucred(info);
is_admin = (uc->uc_uid == 0 && uc->uc_gid == 0 &&
- md_capable(uc, CFS_CAP_SYS_ADMIN));
+ md_capable(uc, CAP_SYS_ADMIN));
mdt_exit_ucred(info);
return is_admin;
}
+int mdt_reint_migrate(struct mdt_thread_info *info,
+ struct mdt_lock_handle *unused);
+int mdt_dir_layout_update(struct mdt_thread_info *info);
+
+/* directory restripe */
+int mdt_restripe_internal(struct mdt_thread_info *info,
+ struct mdt_object *parent,
+ struct mdt_object *child,
+ const struct lu_name *lname,
+ struct lu_fid *tfid,
+ struct md_op_spec *spec,
+ struct md_attr *ma);
+int mdt_restriper_start(struct mdt_device *mdt);
+void mdt_restriper_stop(struct mdt_device *mdt);
+void mdt_auto_split_add(struct mdt_thread_info *info, struct mdt_object *o);
+void mdt_restripe_migrate_add(struct mdt_thread_info *info,
+ struct mdt_object *o);
+void mdt_restripe_update_add(struct mdt_thread_info *info,
+ struct mdt_object *o);
+int mdt_is_remote_object(struct mdt_thread_info *info,
+ struct mdt_object *parent,
+ struct mdt_object *child);
#endif /* _MDT_INTERNAL_H */