X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=inline;f=lustre%2Fmdt%2Fmdt_internal.h;h=f712f00d5264cab4c3955946b8f536096e428e86;hb=f9a2da63abab5b8b687842166a0b5b5e434ad441;hp=7fe6b87e70d98a3567f6fb053d475864b3e7b380;hpb=23773b32bfe15fccc9c029d722dc1bd5b0144779;p=fs%2Flustre-release.git diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 7fe6b87..f712f00 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -59,6 +59,7 @@ #include #include #include +#include struct mdt_object; @@ -149,10 +150,10 @@ struct coordinator { * list */ struct mutex cdt_restore_lock; /**< protect restore * list */ - time_t cdt_loop_period; /**< llog scan period */ - time_t cdt_grace_delay; /**< request grace + timeout_t cdt_loop_period; /**< llog scan period */ + timeout_t cdt_grace_delay; /**< request grace * delay */ - time_t cdt_active_req_timeout; /**< request timeout */ + timeout_t cdt_active_req_timeout; /**< request timeout */ __u32 cdt_default_archive_id; /**< archive id used * when none are * specified */ @@ -205,6 +206,36 @@ struct mdt_statfs_cache { __u64 msf_age; }; +/* split directory automatically when sub file count exceeds 50k */ +#define DIR_SPLIT_COUNT_DEFAULT 50000 + +/* directory auto-split allocate delta new stripes each time */ +#define DIR_SPLIT_DELTA_DEFAULT 4 + +struct mdt_dir_restriper { + struct lu_env mdr_env; + struct lu_context mdr_session; + struct task_struct *mdr_task; + /* lock for below fields */ + spinlock_t mdr_lock; + /* auto split when plain dir/shard sub files exceed threshold */ + u64 mdr_dir_split_count; + /* auto split growth delta */ + u32 mdr_dir_split_delta; + /* directories to split */ + struct list_head mdr_auto_splitting; + /* directories under which sub files are migrating */ + struct list_head mdr_migrating; + /* directories waiting to update layout after migration */ + struct list_head mdr_updating; + /* time to update directory layout after migration */ + time64_t mdr_update_time; + /* lum used in split/migrate/layout_change */ + union lmv_mds_md mdr_lmv; + /* page used in readdir */ + struct page *mdr_page; +}; + struct mdt_device { /* super-class */ struct lu_device mdt_lu_dev; @@ -255,13 +286,19 @@ struct mdt_device { mdt_enable_remote_dir:1, mdt_enable_striped_dir:1, mdt_enable_dir_migration:1, + mdt_enable_dir_restripe:1, + mdt_enable_dir_auto_split:1, mdt_enable_remote_rename:1, mdt_skip_lfsck:1, - mdt_readonly:1; + mdt_readonly:1, + /* dir restripe migrate dirent only */ + mdt_dir_restripe_nsonly:1; /* user with gid can create remote/striped * dir, and set default dir stripe */ gid_t mdt_enable_remote_dir_gid; + /* user with this gid can change projid */ + gid_t mdt_enable_chprojid_gid; /* lock for osfs and md_root */ spinlock_t mdt_lock; @@ -290,6 +327,8 @@ struct mdt_device { atomic_t mdt_async_commit_count; struct mdt_object *mdt_md_root; + + struct mdt_dir_restriper mdt_restriper; }; #define MDT_SERVICE_WATCHDOG_FACTOR (2) @@ -301,14 +340,17 @@ struct mdt_object { struct lu_object_header mot_header; struct lu_object mot_obj; unsigned int mot_lov_created:1, /* lov object created */ - mot_cache_attr:1; /* enable remote object + mot_cache_attr:1, /* enable remote object * attribute cache */ + mot_restriping:1, /* dir restriping */ + /* dir auto-split disabled */ + mot_auto_split_disabled:1; int mot_write_count; spinlock_t mot_write_lock; - /* Lock to protect object's SOM update. */ - struct mutex mot_som_mutex; /* Lock to protect create_data */ struct mutex mot_lov_mutex; + /* Lock to protect object's SOM update. */ + struct mutex mot_som_mutex; /* lock to protect read/write stages for Data-on-MDT files */ struct rw_semaphore mot_dom_sem; /* Lock to protect lease open. @@ -316,6 +358,10 @@ struct mdt_object { struct rw_semaphore mot_open_sem; atomic_t mot_lease_count; atomic_t mot_open_count; + /* directory offset, used in sub file migration in dir restripe */ + loff_t mot_restripe_offset; + /* link to mdt_restriper auto_splitting/migrating/updating */ + struct list_head mot_restripe_linkage; }; struct mdt_lock_handle { @@ -374,7 +420,7 @@ struct mdt_reint_record { }; enum mdt_reint_flag { - MRF_OPEN_TRUNC = 1 << 0, + MRF_OPEN_TRUNC = BIT(0), }; /* @@ -512,19 +558,16 @@ struct mdt_thread_info { struct tg_reply_data *mti_reply_data; /* FLR: layout change API */ - struct md_layout_change mti_layout; + struct md_layout_change mti_mlc; + + struct lu_seq_range mti_range; }; extern struct lu_context_key mdt_thread_key; static inline struct mdt_thread_info *mdt_th_info(const struct lu_env *env) { - struct mdt_thread_info *mti; - - lu_env_refill((void *)env); - mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); - LASSERT(mti); - return mti; + return lu_env_info(env, &mdt_thread_key); } struct cdt_req_progress { @@ -661,51 +704,19 @@ static inline bool mdt_is_striped_client(struct obd_export *exp) return exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE; } -enum { - LMM_NO_DOM, - LMM_DOM_ONLY, - LMM_DOM_OST -}; +__u32 mdt_lmm_dom_entry_check(struct lov_mds_md *lmm, int *dom_only); -/* XXX Look into layout in MDT layer. This must be done in LOD. */ -static inline int mdt_lmm_dom_entry(struct lov_mds_md *lmm) +static inline bool mdt_lmm_dom_only(struct lov_mds_md *lmm) { - struct lov_comp_md_v1 *comp_v1; - struct lov_mds_md *v1; - __u32 off; - bool has_dom = true; - int i; - - if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1) - return LMM_NO_DOM; - - comp_v1 = (struct lov_comp_md_v1 *)lmm; - off = le32_to_cpu(comp_v1->lcm_entries[0].lcme_offset); - v1 = (struct lov_mds_md *)((char *)comp_v1 + off); - - /* DoM entry is the first entry always */ - if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) != LOV_PATTERN_MDT && - le16_to_cpu(comp_v1->lcm_mirror_count) == 0) - return LMM_NO_DOM; + int dom_only = 0; - for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) { - int j; - - off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset); - v1 = (struct lov_mds_md *)((char *)comp_v1 + off); - - if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) == - LOV_PATTERN_MDT) - has_dom = true; + mdt_lmm_dom_entry_check(lmm, &dom_only); + return dom_only; +} - for (j = 0; j < le16_to_cpu(v1->lmm_stripe_count); j++) { - /* if there is any object on OST */ - if (le32_to_cpu(v1->lmm_objects[j].l_ost_idx) != - (__u32)-1UL) - return LMM_DOM_OST; - } - } - return has_dom ? LMM_DOM_ONLY : LMM_NO_DOM; +static inline __u32 mdt_lmm_dom_stripesize(struct lov_mds_md *lmm) +{ + return mdt_lmm_dom_entry_check(lmm, NULL); } static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm) @@ -883,10 +894,14 @@ int mdt_attr_get_complex(struct mdt_thread_info *info, struct mdt_object *o, struct md_attr *ma); int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o, const char *name); +int __mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o, + struct md_attr *ma, const char *name); int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o, struct md_attr *ma, const char *name); int mdt_attr_get_pfid(struct mdt_thread_info *info, struct mdt_object *o, struct lu_fid *pfid); +int mdt_attr_get_pfid_name(struct mdt_thread_info *info, struct mdt_object *o, + struct lu_fid *pfid, struct lu_name *lname); int mdt_write_get(struct mdt_object *o); void mdt_write_put(struct mdt_object *o); int mdt_write_read(struct mdt_object *o); @@ -901,6 +916,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *, struct md_attr *); void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *); int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj, + struct mdt_lock_handle *lhc, struct md_layout_change *spec); int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt); @@ -921,6 +937,10 @@ int mdt_version_get_check(struct mdt_thread_info *, struct mdt_object *, int); void mdt_version_get_save(struct mdt_thread_info *, struct mdt_object *, int); int mdt_version_get_check_save(struct mdt_thread_info *, struct mdt_object *, int); +int mdt_lookup_version_check(struct mdt_thread_info *info, + struct mdt_object *p, + const struct lu_name *lname, + struct lu_fid *fid, int idx); void mdt_thread_info_init(struct ptlrpc_request *req, struct mdt_thread_info *mti); void mdt_thread_info_fini(struct mdt_thread_info *mti); @@ -944,17 +964,17 @@ int mdt_links_read(struct mdt_thread_info *info, struct linkea_data *ldata); int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req, struct mdt_body *repbody); -void mdt_pack_secctx_in_reply(struct mdt_thread_info *info, - struct mdt_object *child); +int mdt_pack_secctx_in_reply(struct mdt_thread_info *info, + struct mdt_object *child); static inline struct mdt_device *mdt_dev(struct lu_device *d) { - return container_of0(d, struct mdt_device, mdt_lu_dev); + return container_of_safe(d, struct mdt_device, mdt_lu_dev); } static inline struct mdt_object *mdt_obj(struct lu_object *o) { - return container_of0(o, struct mdt_object, mot_obj); + return container_of_safe(o, struct mdt_object, mot_obj); } static inline struct dt_object *mdt_obj2dt(struct mdt_object *mo) @@ -1215,7 +1235,7 @@ static inline void mdt_fid_unlock(struct lustre_handle *lh, enum ldlm_mode mode) static inline bool mdt_slc_is_enabled(struct mdt_device *mdt) { - return mdt->mdt_lut.lut_sync_lock_cancel == BLOCKING_SYNC_ON_CANCEL; + return mdt->mdt_lut.lut_sync_lock_cancel == SYNC_LOCK_CANCEL_BLOCKING; } extern mdl_mode_t mdt_mdl_lock_modes[]; @@ -1274,6 +1294,7 @@ enum mdt_stat_idx { LPROC_MDT_IO_READ, LPROC_MDT_IO_WRITE, LPROC_MDT_IO_PUNCH, + LPROC_MDT_MIGRATE, LPROC_MDT_LAST, }; @@ -1365,4 +1386,54 @@ long mdt_grant_connect(const struct lu_env *env, struct obd_export *exp, u64 want, bool conservative); extern struct kmem_cache *ldlm_glimpse_work_kmem; +static inline bool mdt_is_rootadmin(struct mdt_thread_info *info) +{ + struct lu_ucred *uc = NULL; + bool is_admin; + int rc; + + if (info == NULL || info->mti_body == NULL) +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 17, 3, 0) + /* return true in case old client did not send mdt body */ + return true; +#else + return false +#endif + + rc = mdt_init_ucred(info, (struct mdt_body *)info->mti_body); + if (rc < 0) + return false; + + uc = mdt_ucred(info); + is_admin = (uc->uc_uid == 0 && uc->uc_gid == 0 && + md_capable(uc, CFS_CAP_SYS_ADMIN)); + + mdt_exit_ucred(info); + + return is_admin; +} + +int mdt_reint_migrate(struct mdt_thread_info *info, + struct mdt_lock_handle *unused); +int mdt_dir_layout_update(struct mdt_thread_info *info); + +/* directory restripe */ +int mdt_restripe_internal(struct mdt_thread_info *info, + struct mdt_object *parent, + struct mdt_object *child, + const struct lu_name *lname, + struct lu_fid *tfid, + struct md_op_spec *spec, + struct md_attr *ma); +int mdt_restriper_start(struct mdt_device *mdt); +void mdt_restriper_stop(struct mdt_device *mdt); +void mdt_auto_split_add(struct mdt_thread_info *info, struct mdt_object *o); +void mdt_restripe_migrate_add(struct mdt_thread_info *info, + struct mdt_object *o); +void mdt_restripe_update_add(struct mdt_thread_info *info, + struct mdt_object *o); +int mdt_is_remote_object(struct mdt_thread_info *info, + struct mdt_object *parent, + struct mdt_object *child); + #endif /* _MDT_INTERNAL_H */